Merge pull request #83 from PraveenMudalgeri/add-folder-support

fix: enable recursive workflow discovery and normalize categories (Issue #82)
This commit is contained in:
Eliad Shahar
2025-08-13 00:59:29 +03:00
committed by GitHub
6 changed files with 8428 additions and 23 deletions

View File

@@ -1,10 +1,12 @@
# n8n-workflows 仓库
## 概述
本仓库包含一系列 n8n 工作流自动化文件。n8n 是一款工作流自动化工具,可通过可视化节点界面创建复杂自动化。每个工作流以 JSON 文件形式存储,包含节点定义、连接和配置信息。
## 仓库结构
```
```bash
n8n-workflows/
├── workflows/ # 主目录,包含所有 n8n 工作流 JSON 文件
│ ├── *.json # 各个工作流文件
@@ -14,7 +16,9 @@ n8n-workflows/
```
## 工作流文件格式
每个工作流 JSON 文件包含:
- **name**:工作流标识符
- **nodes**:节点对象数组,定义操作
- **connections**:定义节点连接方式的对象
@@ -24,6 +28,7 @@ n8n-workflows/
- **createdAt/updatedAt**:时间戳
## 常见节点类型
- **触发节点**webhook、cron、manual
- **集成节点**HTTP 请求、数据库连接器、API 集成
- **逻辑节点**IF、Switch、Merge、Loop
@@ -33,14 +38,18 @@ n8n-workflows/
## 使用本仓库
### 分析任务建议
分析本仓库工作流时:
1. 解析 JSON 文件,理解工作流结构
2. 检查节点链路,确定功能实现
3. 识别外部集成与依赖
4. 考虑节点连接实现的业务逻辑
### 文档任务建议
记录工作流文档时:
1. 验证现有描述与实际实现的一致性
2. 识别触发机制和调度计划
3. 列出所有使用的外部服务和API
@@ -48,7 +57,9 @@ n8n-workflows/
5. 突出显示任何错误处理或重试机制
### 修改任务建议
修改工作流时:
1. 保持 JSON 结构和必要字段
2. 维护节点 ID 的唯一性
3. 添加/删除节点时更新连接
@@ -57,17 +68,20 @@ n8n-workflows/
## 关键注意事项
### 安全性
- 工作流文件可能在 webhook URL 或 API 配置中包含敏感信息
- 凭证通常单独存储在 n8n 中,而不在工作流文件中
- 谨慎处理任何硬编码的值或端点
### 最佳实践
- 工作流应有清晰、描述性的名称
- 复杂工作流受益于文档节点或注释
- 错误处理节点提高可靠性
- 模块化工作流(调用子工作流)提高可维护性
### 常见模式
- **数据管道**:触发 → 获取数据 → 转换 → 存储/发送
- **集成同步**:定时任务 → API调用 → 比较 → 更新系统
- **自动化**Webhook → 处理 → 条件逻辑 → 执行操作
@@ -82,27 +96,32 @@ n8n-workflows/
2. **文档生成**:创建解释工作流实现功能的描述,而不仅仅是包含哪些节点。
3. **故障排除**:常见问题包括:
- 节点连接不正确
- 缺少错误处理
- 循环中的低效数据处理
- 应该参数化的硬编码值
4. **优化建议**
- 识别冗余操作
- 适用场景下建议批处理
- 推荐添加错误处理
- 建议拆分复杂工作流
5. **代码生成**:创建分析这些工作流的工具时:
- 处理各种 n8n 格式版本
- 考虑自定义节点
- 解析节点参数中的表达式
- 考虑节点执行顺序
## 仓库特定信息
[在此处添加有关工作流、命名约定或特殊注意事项的任何特定信息]
## 版本兼容性
- n8n 版本:[指定这些工作流兼容的 n8n 版本]
- 最后更新:[最后一次主要更新的日期]
- 迁移说明:[任何特定版本的注意事项]

File diff suppressed because it is too large Load Diff

View File

@@ -1,3 +1,18 @@
[
"Uncategorized"
"AI Agent Development",
"Business Process Automation",
"CRM & Sales",
"Cloud Storage & File Management",
"Communication & Messaging",
"Creative Content & Video Automation",
"Creative Design Automation",
"Data Processing & Analysis",
"E-commerce & Retail",
"Financial & Accounting",
"Marketing & Advertising Automation",
"Project Management",
"Social Media Management",
"Technical Infrastructure & DevOps",
"Uncategorized",
"Web Scraping & Data Extraction"
]

View File

@@ -1,20 +1,20 @@
import json
import os
from pathlib import Path
import glob
import re
def load_def_categories():
"""Load the definition categories from def_categories.json"""
def_categories_path = Path("context/def_categories.json")
with open(def_categories_path, 'r', encoding='utf-8') as f:
categories_data = json.load(f)
# Create a mapping from integration name (lowercase) to category
integration_to_category = {}
for item in categories_data:
integration = item['integration'].lower()
category = item['category']
integration_to_category[integration] = category
raw_map = json.load(f)
# Normalize keys: strip non-alphanumerics and lowercase
integration_to_category = {
re.sub(r"[^a-z0-9]", "", item["integration"].lower()): item["category"]
for item in raw_map
}
return integration_to_category
def extract_tokens_from_filename(filename):
@@ -33,30 +33,134 @@ def extract_tokens_from_filename(filename):
def find_matching_category(tokens, integration_to_category):
"""Find the first matching category for the given tokens"""
for token in tokens:
if token in integration_to_category:
return integration_to_category[token]
# Normalize token same as keys
norm = re.sub(r"[^a-z0-9]", "", token.lower())
if norm in integration_to_category:
return integration_to_category[norm]
# Try partial matches for common variations
for token in tokens:
for integration in integration_to_category:
if token in integration or integration in token:
return integration_to_category[integration]
norm = re.sub(r"[^a-z0-9]", "", token.lower())
for integration_key in integration_to_category:
if norm in integration_key or integration_key in norm:
return integration_to_category[integration_key]
return ""
def categorize_by_filename(filename):
"""
Categorize workflow based on filename patterns.
Returns the most likely category or None if uncertain.
"""
filename_lower = filename.lower()
# Security & Authentication
if any(word in filename_lower for word in ['totp', 'bitwarden', 'auth', 'security']):
return "Technical Infrastructure & DevOps"
# Data Processing & File Operations
if any(word in filename_lower for word in ['process', 'writebinaryfile', 'readbinaryfile', 'extractfromfile', 'converttofile', 'googlefirebasecloudfirestore', 'supabase', 'surveymonkey', 'renamekeys', 'readpdf', 'wufoo', 'splitinbatches', 'airtop', 'comparedatasets', 'spreadsheetfile']):
return "Data Processing & Analysis"
# Utility & Business Process Automation
if any(word in filename_lower for word in ['noop', 'code', 'schedule', 'filter', 'splitout', 'wait', 'limit', 'aggregate', 'acuityscheduling', 'eventbrite', 'philipshue', 'stickynote', 'n8ntrainingcustomerdatastore', 'n8n']):
return "Business Process Automation"
# Webhook & API related
if any(word in filename_lower for word in ['webhook', 'respondtowebhook', 'http', 'rssfeedread']):
return "Web Scraping & Data Extraction"
# Form & Data Collection
if any(word in filename_lower for word in ['form', 'typeform', 'jotform']):
return "Data Processing & Analysis"
# Local file operations
if any(word in filename_lower for word in ['localfile', 'filemaker']):
return "Cloud Storage & File Management"
# Database operations
if any(word in filename_lower for word in ['postgres', 'mysql', 'mongodb', 'redis', 'elasticsearch', 'snowflake']):
return "Data Processing & Analysis"
# AI & Machine Learning
if any(word in filename_lower for word in ['openai', 'awstextract', 'awsrekognition', 'humanticai', 'openthesaurus', 'googletranslate', 'summarize']):
return "AI Agent Development"
# E-commerce specific
if any(word in filename_lower for word in ['woocommerce', 'gumroad']):
return "E-commerce & Retail"
# Social media specific
if any(word in filename_lower for word in ['facebook', 'linkedin', 'instagram']):
return "Social Media Management"
# Customer support
if any(word in filename_lower for word in ['zendesk', 'intercom', 'drift', 'pagerduty']):
return "Communication & Messaging"
# Analytics & Tracking
if any(word in filename_lower for word in ['googleanalytics', 'segment', 'mixpanel']):
return "Data Processing & Analysis"
# Development tools
if any(word in filename_lower for word in ['git', 'github', 'gitlab', 'travisci', 'jenkins', 'uptimerobot', 'gsuiteadmin', 'debughelper', 'bitbucket']):
return "Technical Infrastructure & DevOps"
# CRM & Sales tools
if any(word in filename_lower for word in ['pipedrive', 'hubspot', 'salesforce', 'copper', 'orbit', 'agilecrm']):
return "CRM & Sales"
# Marketing tools
if any(word in filename_lower for word in ['mailchimp', 'convertkit', 'sendgrid', 'mailerlite', 'lemlist', 'sendy', 'postmark', 'mailgun']):
return "Marketing & Advertising Automation"
# Project management
if any(word in filename_lower for word in ['asana', 'mondaycom', 'clickup', 'trello', 'notion', 'toggl', 'microsofttodo', 'calendly', 'jira']):
return "Project Management"
# Communication
if any(word in filename_lower for word in ['slack', 'telegram', 'discord', 'mattermost', 'twilio', 'emailreadimap', 'teams', 'gotowebinar']):
return "Communication & Messaging"
# Cloud storage
if any(word in filename_lower for word in ['dropbox', 'googledrive', 'onedrive', 'awss3', 'googledocs']):
return "Cloud Storage & File Management"
# Creative tools
if any(word in filename_lower for word in ['canva', 'figma', 'bannerbear', 'editimage']):
return "Creative Design Automation"
# Video & content
if any(word in filename_lower for word in ['youtube', 'vimeo', 'storyblok', 'strapi']):
return "Creative Content & Video Automation"
# Financial tools
if any(word in filename_lower for word in ['stripe', 'chargebee', 'quickbooks', 'harvest']):
return "Financial & Accounting"
# Weather & external APIs
if any(word in filename_lower for word in ['openweathermap', 'nasa', 'crypto', 'coingecko']):
return "Web Scraping & Data Extraction"
return ""
def main():
# Load definition categories
integration_to_category = load_def_categories()
# Get all JSON files from workflows directory
workflows_dir = Path("workflows")
json_files = list(workflows_dir.glob("*.json"))
json_files = glob.glob(
os.path.join(workflows_dir, "**", "*.json"),
recursive=True
)
# Process each file
search_categories = []
for json_file in json_files:
filename = json_file.name
path_obj = Path(json_file)
filename = path_obj.name
tokens = extract_tokens_from_filename(filename)
category = find_matching_category(tokens, integration_to_category)
@@ -64,6 +168,11 @@ def main():
"filename": filename,
"category": category
})
# Second pass for categorization
for item in search_categories:
if not item['category']:
item['category'] = categorize_by_filename(item['filename'])
# Sort by filename for consistency
search_categories.sort(key=lambda x: x['filename'])
@@ -136,4 +245,4 @@ def main():
print("="*50)
if __name__ == "__main__":
main()
main()

View File

@@ -10,6 +10,21 @@ import sys
from pathlib import Path
from typing import List, Dict, Any
from categorize_workflows import categorize_by_filename
def load_categories():
"""Load the search categories file."""
try:
with open('context/search_categories.json', 'r', encoding='utf-8') as f:
return json.load(f)
except (FileNotFoundError, json.JSONDecodeError):
return []
def save_categories(data):
"""Save the search categories file."""
with open('context/search_categories.json', 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2, ensure_ascii=False)
class WorkflowImporter:
"""Import n8n workflows with progress tracking and error handling."""
@@ -56,6 +71,32 @@ class WorkflowImporter:
if result.returncode == 0:
print(f"✅ Imported: {file_path.name}")
# Categorize the workflow and update search_categories.json
suggested_category = categorize_by_filename(file_path.name)
all_workflows_data = load_categories()
found = False
for workflow_entry in all_workflows_data:
if workflow_entry.get('filename') == file_path.name:
workflow_entry['category'] = suggested_category
found = True
break
if not found:
# Add new workflow entry if not found (e.g., first import)
all_workflows_data.append({
"filename": file_path.name,
"category": suggested_category,
"name": file_path.stem, # Assuming workflow name is filename without extension
"description": "", # Placeholder, can be updated manually
"nodes": [] # Placeholder, can be updated manually
})
save_categories(all_workflows_data)
print(f" Categorized '{file_path.name}' as '{suggested_category or 'Uncategorized'}'")
return True
else:
error_msg = result.stderr.strip() or result.stdout.strip()
@@ -141,6 +182,7 @@ def check_n8n_available() -> bool:
def main():
"""Main entry point."""
sys.stdout.reconfigure(encoding='utf-8')
print("🔧 N8N Workflow Importer")
print("=" * 40)

View File

@@ -434,10 +434,9 @@ class WorkflowDatabase:
if not os.path.exists(self.workflows_dir):
print(f"Warning: Workflows directory '{self.workflows_dir}' not found.")
return {'processed': 0, 'skipped': 0, 'errors': 0}
workflows_path = Path(self.workflows_dir)
json_files = list(workflows_path.rglob("*.json"))
# json_files = glob.glob(os.path.join(self.workflows_dir, "*.json"), recursive=True)
json_files = [str(p) for p in workflows_path.rglob("*.json")]
if not json_files:
print(f"Warning: No JSON files found in '{self.workflows_dir}' directory.")