364 lines
10 KiB
JSON
364 lines
10 KiB
JSON
{
|
|
"id": "DswhuYzoemjA6iNN",
|
|
"meta": {
|
|
"instanceId": "a1ae5c8dc6c65e674f9c3947d083abcc749ef2546dff9f4ff01de4d6a36ebfe6",
|
|
"templateCredsSetupCompleted": true
|
|
},
|
|
"name": "Scrape Books from URL with Dumpling AI, Clean HTML, Save to Sheets, Email as CSV",
|
|
"tags": [
|
|
{
|
|
"id": "TlcNkmb96fUfZ2eA",
|
|
"name": "Tutorials",
|
|
"createdAt": "2025-04-15T17:02:00.249Z",
|
|
"updatedAt": "2025-04-15T17:02:00.249Z"
|
|
}
|
|
],
|
|
"nodes": [
|
|
{
|
|
"id": "2e4f64a5-353c-4dd3-9822-62df795d4940",
|
|
"name": "Convert to CSV File",
|
|
"type": "n8n-nodes-base.convertToFile",
|
|
"position": [
|
|
1640,
|
|
340
|
|
],
|
|
"parameters": {
|
|
"options": {}
|
|
},
|
|
"typeVersion": 1.1
|
|
},
|
|
{
|
|
"id": "472442d3-a691-4310-93f8-019579d0c473",
|
|
"name": "Extract all books from the page",
|
|
"type": "n8n-nodes-base.html",
|
|
"position": [
|
|
760,
|
|
340
|
|
],
|
|
"parameters": {
|
|
"options": {},
|
|
"operation": "extractHtmlContent",
|
|
"dataPropertyName": "content",
|
|
"extractionValues": {
|
|
"values": [
|
|
{
|
|
"key": "books",
|
|
"cssSelector": ".row > li",
|
|
"returnArray": true,
|
|
"returnValue": "html"
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"typeVersion": 1.2
|
|
},
|
|
{
|
|
"id": "92765257-d64d-47c9-bd57-50914342138b",
|
|
"name": "Sort by price",
|
|
"type": "n8n-nodes-base.sort",
|
|
"position": [
|
|
1420,
|
|
340
|
|
],
|
|
"parameters": {
|
|
"options": {},
|
|
"sortFieldsUi": {
|
|
"sortField": [
|
|
{
|
|
"order": "descending",
|
|
"fieldName": "price"
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"typeVersion": 1
|
|
},
|
|
{
|
|
"id": "efc2f33f-1bef-4906-b3b7-b02868080a54",
|
|
"name": "Extract individual book price",
|
|
"type": "n8n-nodes-base.html",
|
|
"position": [
|
|
1200,
|
|
340
|
|
],
|
|
"parameters": {
|
|
"options": {},
|
|
"operation": "extractHtmlContent",
|
|
"dataPropertyName": "books",
|
|
"extractionValues": {
|
|
"values": [
|
|
{
|
|
"key": "title",
|
|
"attribute": "title",
|
|
"cssSelector": "h3 > a",
|
|
"returnValue": "attribute"
|
|
},
|
|
{
|
|
"key": "price",
|
|
"cssSelector": ".price_color"
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"typeVersion": 1.2
|
|
},
|
|
{
|
|
"id": "74c7c3af-d63c-4b6c-95a0-15f45b19134b",
|
|
"name": "Send CSV via e-mail",
|
|
"type": "n8n-nodes-base.gmail",
|
|
"position": [
|
|
1860,
|
|
340
|
|
],
|
|
"webhookId": "40f2d609-52ed-40bf-b190-1f1cebbe3fb7",
|
|
"parameters": {
|
|
"sendTo": "",
|
|
"message": "Hey, here's the scraped data from the online bookstore!",
|
|
"options": {
|
|
"attachmentsUi": {
|
|
"attachmentsBinary": [
|
|
{}
|
|
]
|
|
}
|
|
},
|
|
"subject": "bookstore csv",
|
|
"emailType": "text"
|
|
},
|
|
"credentials": {
|
|
"gmailOAuth2": {
|
|
"id": "j70r3RTMED1pgN3R",
|
|
"name": "Gmail account 2"
|
|
}
|
|
},
|
|
"typeVersion": 2.1
|
|
},
|
|
{
|
|
"id": "95c7998b-ece0-4dea-b99e-97ac22fb8a59",
|
|
"name": "Sticky Note3",
|
|
"type": "n8n-nodes-base.stickyNote",
|
|
"position": [
|
|
140,
|
|
-260
|
|
],
|
|
"parameters": {
|
|
"width": 619,
|
|
"height": 297,
|
|
"content": "### Scrape Books from URL with Dumpling AI, Clean HTML, Save to Sheets, Email as CSV\n\n📌 This workflow scrapes book data from a website, turns it into a CSV, saves it, and sends it by email.\n\n🔧 It starts from a Google Sheets trigger, fetches the page using DumplingAI, extracts books, sorts by price, and emails the CSV.\n\n✅ Make sure APIs for Gmail, Sheets & Drive are enabled in Google Cloud. Update the URL in the \"Fetch website content\" node.\n"
|
|
},
|
|
"typeVersion": 1
|
|
},
|
|
{
|
|
"id": "f599028a-49a9-4b85-b484-5abf1229e373",
|
|
"name": "Sticky Note",
|
|
"type": "n8n-nodes-base.stickyNote",
|
|
"position": [
|
|
140,
|
|
60
|
|
],
|
|
"parameters": {
|
|
"color": 4,
|
|
"width": 900,
|
|
"height": 300,
|
|
"content": "### 🔁 Trigger to Raw Book HTML\n\n1. **Google Sheets Trigger** \n Watches a sheet for new row entries. Once a new URL is added, the workflow starts.\n\n2. **Fetch Website Content (Dumpling AI)** \n Makes an HTTP POST request to Dumpling AI to scrape and return the full HTML of the target URL.\n\n3. **Extract All Books** \n Uses CSS selectors to isolate the list items (`li.row > li`) containing book entries.\n\n4. **Split Out Node** \n Breaks the array of book HTML blocks into individual items, so each book can be processed separately in the next steps.\n"
|
|
},
|
|
"typeVersion": 1
|
|
},
|
|
{
|
|
"id": "bc6ab72c-de03-4e79-9da0-ca12ddf31811",
|
|
"name": "Sticky Note1",
|
|
"type": "n8n-nodes-base.stickyNote",
|
|
"position": [
|
|
1140,
|
|
60
|
|
],
|
|
"parameters": {
|
|
"color": 6,
|
|
"width": 840,
|
|
"height": 300,
|
|
"content": "### 📦 Parse, Sort, Export & Email\n\n5. **Extract Individual Book Data** \n From each book, extract the title (`<h3>a` title attribute) and price (`.price_color` content).\n\n6. **Sort by Price** \n Organizes the extracted data in descending order using the price field.\n\n7. **Convert to CSV File** \n Transforms the sorted JSON data into a downloadable CSV file format.\n\n8. **Send CSV via Gmail** \n Automatically sends an email with the CSV file attached to the predefined address.\n"
|
|
},
|
|
"typeVersion": 1
|
|
},
|
|
{
|
|
"id": "a1246b4e-212f-4bd3-970b-b0ff8db2f834",
|
|
"name": "Trigger- Watches For new URL in Spreadsheet",
|
|
"type": "n8n-nodes-base.googleSheetsTrigger",
|
|
"position": [
|
|
320,
|
|
340
|
|
],
|
|
"parameters": {
|
|
"event": "rowAdded",
|
|
"options": {},
|
|
"pollTimes": {
|
|
"item": [
|
|
{
|
|
"mode": "everyMinute"
|
|
}
|
|
]
|
|
},
|
|
"sheetName": {
|
|
"__rl": true,
|
|
"mode": "list",
|
|
"value": "",
|
|
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/1pb4WLqv2EruLM1z9-utehcINolSj0vlUqZionyLoRUs/edit#gid=0",
|
|
"cachedResultName": "Sheet1"
|
|
},
|
|
"documentId": {
|
|
"__rl": true,
|
|
"mode": "list",
|
|
"value": "",
|
|
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/1pb4WLqv2EruLM1z9-utehcINolSj0vlUqZionyLoRUs/edit?usp=drivesdk",
|
|
"cachedResultName": "URLs"
|
|
}
|
|
},
|
|
"credentials": {
|
|
"googleSheetsTriggerOAuth2Api": {
|
|
"id": "qDzHSzTkclwDHpSR",
|
|
"name": "Google Sheets Trigger account"
|
|
}
|
|
},
|
|
"typeVersion": 1
|
|
},
|
|
{
|
|
"id": "b19aa287-3be4-4e16-908d-b0cb484519e3",
|
|
"name": "Scrape Website Content with Dumpling AI",
|
|
"type": "n8n-nodes-base.httpRequest",
|
|
"position": [
|
|
540,
|
|
340
|
|
],
|
|
"parameters": {
|
|
"url": "https://app.dumplingai.com/api/v1/scrape",
|
|
"method": "POST",
|
|
"options": {
|
|
"allowUnauthorizedCerts": true
|
|
},
|
|
"jsonBody": "={\n \"url\": \"{{ $('Trigger- Watches For new URL in Spreadsheet')}}\", \n \"format\": \"html\",\n \"cleaned\": \"True\"\n }",
|
|
"sendBody": true,
|
|
"sendHeaders": true,
|
|
"specifyBody": "json",
|
|
"authentication": "genericCredentialType",
|
|
"genericAuthType": "httpHeaderAuth",
|
|
"headerParameters": {
|
|
"parameters": [
|
|
{
|
|
"name": "Content-Type",
|
|
"value": "application/json"
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"credentials": {
|
|
"httpBasicAuth": {
|
|
"id": "mznexGH3YDtrUTAk",
|
|
"name": "Unnamed credential"
|
|
},
|
|
"httpHeaderAuth": {
|
|
"id": "xamyMqCpAech5BeT",
|
|
"name": "Header Auth account"
|
|
}
|
|
},
|
|
"typeVersion": 4.1
|
|
},
|
|
{
|
|
"id": "02cbc6f9-bdcb-45fc-9973-ded42346ffbc",
|
|
"name": "Split HTML Array into Individual Books",
|
|
"type": "n8n-nodes-base.splitOut",
|
|
"position": [
|
|
980,
|
|
340
|
|
],
|
|
"parameters": {
|
|
"options": {},
|
|
"fieldToSplitOut": "books"
|
|
},
|
|
"typeVersion": 1
|
|
}
|
|
],
|
|
"active": false,
|
|
"pinData": {},
|
|
"settings": {
|
|
"executionOrder": "v1"
|
|
},
|
|
"versionId": "264412ff-9d74-443c-a2ff-69be1e042a82",
|
|
"connections": {
|
|
"Sort by price": {
|
|
"main": [
|
|
[
|
|
{
|
|
"node": "Convert to CSV File",
|
|
"type": "main",
|
|
"index": 0
|
|
}
|
|
]
|
|
]
|
|
},
|
|
"Convert to CSV File": {
|
|
"main": [
|
|
[
|
|
{
|
|
"node": "Send CSV via e-mail",
|
|
"type": "main",
|
|
"index": 0
|
|
}
|
|
]
|
|
]
|
|
},
|
|
"Extract individual book price": {
|
|
"main": [
|
|
[
|
|
{
|
|
"node": "Sort by price",
|
|
"type": "main",
|
|
"index": 0
|
|
}
|
|
]
|
|
]
|
|
},
|
|
"Extract all books from the page": {
|
|
"main": [
|
|
[
|
|
{
|
|
"node": "Split HTML Array into Individual Books",
|
|
"type": "main",
|
|
"index": 0
|
|
}
|
|
]
|
|
]
|
|
},
|
|
"Split HTML Array into Individual Books": {
|
|
"main": [
|
|
[
|
|
{
|
|
"node": "Extract individual book price",
|
|
"type": "main",
|
|
"index": 0
|
|
}
|
|
]
|
|
]
|
|
},
|
|
"Scrape Website Content with Dumpling AI": {
|
|
"main": [
|
|
[
|
|
{
|
|
"node": "Extract all books from the page",
|
|
"type": "main",
|
|
"index": 0
|
|
}
|
|
]
|
|
]
|
|
},
|
|
"Trigger- Watches For new URL in Spreadsheet": {
|
|
"main": [
|
|
[
|
|
{
|
|
"node": "Scrape Website Content with Dumpling AI",
|
|
"type": "main",
|
|
"index": 0
|
|
}
|
|
]
|
|
]
|
|
}
|
|
}
|
|
} |