generated from kgod/ai-review-template
109 lines
3.7 KiB
Python
109 lines
3.7 KiB
Python
"""入口"""
|
|
import asyncio
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
# 支持直接运行
|
|
if __name__ == "__main__":
|
|
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
|
|
|
from playwright.async_api import async_playwright
|
|
from src.detail_analysis_graph.graph import create_graph
|
|
|
|
|
|
async def analyze_job_detail(url: str, job_item_selector: str, change_type: str, headless: bool = False) -> dict:
|
|
"""
|
|
分析岗位详情页,提取字段选择器
|
|
|
|
Args:
|
|
url: 列表页 URL
|
|
job_item_selector: 岗位项选择器
|
|
change_type: 点击后变化类型 (redirect / new_tab / in_page)
|
|
|
|
Returns:
|
|
{
|
|
"status": "success" | "failed",
|
|
"detail_area_selector": str,
|
|
"fields": {
|
|
"job_title": {"selector": str, "sample": str},
|
|
"description": {"selectors": list[str], "sample": str},
|
|
"salary": {"selector": str | None, "sample": str | None},
|
|
"location": {"selector": str | None, "sample": str | None},
|
|
"company": {"selector": str | None, "sample": str | None},
|
|
"experience": {"selector": str | None, "sample": str | None},
|
|
"education": {"selector": str | None, "sample": str | None},
|
|
"detail_url": {"sample": str}
|
|
},
|
|
"error": str | None
|
|
}
|
|
"""
|
|
async with async_playwright() as p:
|
|
browser = await p.chromium.launch(headless=headless)
|
|
context = await browser.new_context(viewport={"width": 1280, "height": 800})
|
|
page = await context.new_page()
|
|
|
|
try:
|
|
graph = create_graph()
|
|
|
|
result = await graph.ainvoke({
|
|
"url": url,
|
|
"job_item_selector": job_item_selector,
|
|
"change_type": change_type,
|
|
"page": page
|
|
})
|
|
|
|
if result.get("error"):
|
|
return {
|
|
"status": "failed",
|
|
"error": result["error"]
|
|
}
|
|
|
|
if result.get("is_valid"):
|
|
return {
|
|
"status": "success",
|
|
"detail_area_selector": result.get("detail_area_selector"),
|
|
"fields": result.get("fields")
|
|
}
|
|
else:
|
|
return {
|
|
"status": "failed",
|
|
"error": "验证失败,已达最大重试次数",
|
|
"failed_attempts": result.get("failed_attempts")
|
|
}
|
|
|
|
finally:
|
|
await browser.close()
|
|
|
|
|
|
async def main():
|
|
"""测试"""
|
|
url = "https://dearsamsung.zhiye.com/#/samsung/pc/szzw"
|
|
job_item_selector = ".BHGkB li"
|
|
change_type = "in_page"
|
|
|
|
result = await analyze_job_detail(url, job_item_selector, change_type)
|
|
|
|
print("\n最终结果:")
|
|
if result["status"] == "success":
|
|
print(f"✅ 成功")
|
|
print(f" 详情区域: {result['detail_area_selector']}")
|
|
print(" 字段:")
|
|
for name, info in result["fields"].items():
|
|
sample = str(info.get('sample', ''))
|
|
# detail_url 不截断,其他字段截断显示
|
|
if name != "detail_url":
|
|
sample = sample[:50]
|
|
if "selector" in info:
|
|
print(f" {name}: {info['selector']} -> {sample}")
|
|
elif "selectors" in info:
|
|
print(f" {name}: {info['selectors']} -> {sample}")
|
|
else:
|
|
print(f" {name}: {sample}")
|
|
else:
|
|
print(f"❌ 失败")
|
|
print(f" 原因: {result.get('error')}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|