This commit is contained in:
kgod
2026-05-26 21:02:17 +08:00
commit 8697477a53
10000 changed files with 1541403 additions and 0 deletions
+108
View File
@@ -0,0 +1,108 @@
"""入口"""
import asyncio
import sys
from pathlib import Path
# 支持直接运行
if __name__ == "__main__":
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
from playwright.async_api import async_playwright
from src.detail_analysis_graph.graph import create_graph
async def analyze_job_detail(url: str, job_item_selector: str, change_type: str, headless: bool = False) -> dict:
"""
分析岗位详情页,提取字段选择器
Args:
url: 列表页 URL
job_item_selector: 岗位项选择器
change_type: 点击后变化类型 (redirect / new_tab / in_page)
Returns:
{
"status": "success" | "failed",
"detail_area_selector": str,
"fields": {
"job_title": {"selector": str, "sample": str},
"description": {"selectors": list[str], "sample": str},
"salary": {"selector": str | None, "sample": str | None},
"location": {"selector": str | None, "sample": str | None},
"company": {"selector": str | None, "sample": str | None},
"experience": {"selector": str | None, "sample": str | None},
"education": {"selector": str | None, "sample": str | None},
"detail_url": {"sample": str}
},
"error": str | None
}
"""
async with async_playwright() as p:
browser = await p.chromium.launch(headless=headless)
context = await browser.new_context(viewport={"width": 1280, "height": 800})
page = await context.new_page()
try:
graph = create_graph()
result = await graph.ainvoke({
"url": url,
"job_item_selector": job_item_selector,
"change_type": change_type,
"page": page
})
if result.get("error"):
return {
"status": "failed",
"error": result["error"]
}
if result.get("is_valid"):
return {
"status": "success",
"detail_area_selector": result.get("detail_area_selector"),
"fields": result.get("fields")
}
else:
return {
"status": "failed",
"error": "验证失败,已达最大重试次数",
"failed_attempts": result.get("failed_attempts")
}
finally:
await browser.close()
async def main():
"""测试"""
url = "https://dearsamsung.zhiye.com/#/samsung/pc/szzw"
job_item_selector = ".BHGkB li"
change_type = "in_page"
result = await analyze_job_detail(url, job_item_selector, change_type)
print("\n最终结果:")
if result["status"] == "success":
print(f"✅ 成功")
print(f" 详情区域: {result['detail_area_selector']}")
print(" 字段:")
for name, info in result["fields"].items():
sample = str(info.get('sample', ''))
# detail_url 不截断,其他字段截断显示
if name != "detail_url":
sample = sample[:50]
if "selector" in info:
print(f" {name}: {info['selector']} -> {sample}")
elif "selectors" in info:
print(f" {name}: {info['selectors']} -> {sample}")
else:
print(f" {name}: {sample}")
else:
print(f"❌ 失败")
print(f" 原因: {result.get('error')}")
if __name__ == "__main__":
asyncio.run(main())