"""入口""" import asyncio import sys from pathlib import Path # 支持直接运行 if __name__ == "__main__": sys.path.insert(0, str(Path(__file__).parent.parent.parent)) from playwright.async_api import async_playwright from src.detail_analysis_graph.graph import create_graph async def analyze_job_detail(url: str, job_item_selector: str, change_type: str, headless: bool = False) -> dict: """ 分析岗位详情页,提取字段选择器 Args: url: 列表页 URL job_item_selector: 岗位项选择器 change_type: 点击后变化类型 (redirect / new_tab / in_page) Returns: { "status": "success" | "failed", "detail_area_selector": str, "fields": { "job_title": {"selector": str, "sample": str}, "description": {"selectors": list[str], "sample": str}, "salary": {"selector": str | None, "sample": str | None}, "location": {"selector": str | None, "sample": str | None}, "company": {"selector": str | None, "sample": str | None}, "experience": {"selector": str | None, "sample": str | None}, "education": {"selector": str | None, "sample": str | None}, "detail_url": {"sample": str} }, "error": str | None } """ async with async_playwright() as p: browser = await p.chromium.launch(headless=headless) context = await browser.new_context(viewport={"width": 1280, "height": 800}) page = await context.new_page() try: graph = create_graph() result = await graph.ainvoke({ "url": url, "job_item_selector": job_item_selector, "change_type": change_type, "page": page }) if result.get("error"): return { "status": "failed", "error": result["error"] } if result.get("is_valid"): return { "status": "success", "detail_area_selector": result.get("detail_area_selector"), "fields": result.get("fields") } else: return { "status": "failed", "error": "验证失败,已达最大重试次数", "failed_attempts": result.get("failed_attempts") } finally: await browser.close() async def main(): """测试""" url = "https://dearsamsung.zhiye.com/#/samsung/pc/szzw" job_item_selector = ".BHGkB li" change_type = "in_page" result = await analyze_job_detail(url, job_item_selector, change_type) print("\n最终结果:") if result["status"] == "success": print(f"✅ 成功") print(f" 详情区域: {result['detail_area_selector']}") print(" 字段:") for name, info in result["fields"].items(): sample = str(info.get('sample', '')) # detail_url 不截断,其他字段截断显示 if name != "detail_url": sample = sample[:50] if "selector" in info: print(f" {name}: {info['selector']} -> {sample}") elif "selectors" in info: print(f" {name}: {info['selectors']} -> {sample}") else: print(f" {name}: {sample}") else: print(f"❌ 失败") print(f" 原因: {result.get('error')}") if __name__ == "__main__": asyncio.run(main())