generated from kgod/ai-review-template
提交
This commit is contained in:
@@ -0,0 +1,108 @@
|
||||
"""入口"""
|
||||
import asyncio
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# 支持直接运行
|
||||
if __name__ == "__main__":
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
||||
|
||||
from playwright.async_api import async_playwright
|
||||
from src.detail_analysis_graph.graph import create_graph
|
||||
|
||||
|
||||
async def analyze_job_detail(url: str, job_item_selector: str, change_type: str, headless: bool = False) -> dict:
|
||||
"""
|
||||
分析岗位详情页,提取字段选择器
|
||||
|
||||
Args:
|
||||
url: 列表页 URL
|
||||
job_item_selector: 岗位项选择器
|
||||
change_type: 点击后变化类型 (redirect / new_tab / in_page)
|
||||
|
||||
Returns:
|
||||
{
|
||||
"status": "success" | "failed",
|
||||
"detail_area_selector": str,
|
||||
"fields": {
|
||||
"job_title": {"selector": str, "sample": str},
|
||||
"description": {"selectors": list[str], "sample": str},
|
||||
"salary": {"selector": str | None, "sample": str | None},
|
||||
"location": {"selector": str | None, "sample": str | None},
|
||||
"company": {"selector": str | None, "sample": str | None},
|
||||
"experience": {"selector": str | None, "sample": str | None},
|
||||
"education": {"selector": str | None, "sample": str | None},
|
||||
"detail_url": {"sample": str}
|
||||
},
|
||||
"error": str | None
|
||||
}
|
||||
"""
|
||||
async with async_playwright() as p:
|
||||
browser = await p.chromium.launch(headless=headless)
|
||||
context = await browser.new_context(viewport={"width": 1280, "height": 800})
|
||||
page = await context.new_page()
|
||||
|
||||
try:
|
||||
graph = create_graph()
|
||||
|
||||
result = await graph.ainvoke({
|
||||
"url": url,
|
||||
"job_item_selector": job_item_selector,
|
||||
"change_type": change_type,
|
||||
"page": page
|
||||
})
|
||||
|
||||
if result.get("error"):
|
||||
return {
|
||||
"status": "failed",
|
||||
"error": result["error"]
|
||||
}
|
||||
|
||||
if result.get("is_valid"):
|
||||
return {
|
||||
"status": "success",
|
||||
"detail_area_selector": result.get("detail_area_selector"),
|
||||
"fields": result.get("fields")
|
||||
}
|
||||
else:
|
||||
return {
|
||||
"status": "failed",
|
||||
"error": "验证失败,已达最大重试次数",
|
||||
"failed_attempts": result.get("failed_attempts")
|
||||
}
|
||||
|
||||
finally:
|
||||
await browser.close()
|
||||
|
||||
|
||||
async def main():
|
||||
"""测试"""
|
||||
url = "https://dearsamsung.zhiye.com/#/samsung/pc/szzw"
|
||||
job_item_selector = ".BHGkB li"
|
||||
change_type = "in_page"
|
||||
|
||||
result = await analyze_job_detail(url, job_item_selector, change_type)
|
||||
|
||||
print("\n最终结果:")
|
||||
if result["status"] == "success":
|
||||
print(f"✅ 成功")
|
||||
print(f" 详情区域: {result['detail_area_selector']}")
|
||||
print(" 字段:")
|
||||
for name, info in result["fields"].items():
|
||||
sample = str(info.get('sample', ''))
|
||||
# detail_url 不截断,其他字段截断显示
|
||||
if name != "detail_url":
|
||||
sample = sample[:50]
|
||||
if "selector" in info:
|
||||
print(f" {name}: {info['selector']} -> {sample}")
|
||||
elif "selectors" in info:
|
||||
print(f" {name}: {info['selectors']} -> {sample}")
|
||||
else:
|
||||
print(f" {name}: {sample}")
|
||||
else:
|
||||
print(f"❌ 失败")
|
||||
print(f" 原因: {result.get('error')}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
Reference in New Issue
Block a user