"""Crawler 测试脚本""" import asyncio from src.crawler import crawl, CrawlerConfig # 中兴招聘测试配置 # test_config: CrawlerConfig = { # "url": "https://app.mokahr.com/social-recruitment/zte/47588#/jobs", # "job_item_selector": ".jobs-list-WmE84RgZxp .container-aOp138AX_X.normal-TBuWTpDMcE.list-oR2doUijv4", # "item_change_type": "redirect", # "next_page_selector": ".sd-Pagination-pagination-2kuN2 .sd-Pagination-forward-3z80f", # "page_change_type": "url_change", # "field_selectors": { # "job_title": {"selector": [".title-ROUQFdjmhP"]}, # "description": {"selector": [".job-description-VvfEUGocNE"]}, # "location": {"selector": [".info-UcB_mxJq8y span:first-child"]}, # "company": {"selector": [".basic-info-dB86EjV5uU span:nth-child(2)"]}, # }, # "detail_area_selector": None, # } # 美宜佳招聘测试配置 # test_config: CrawlerConfig = { # "url": "https://meiyijia.jobs.feishu.cn/social/position/list", # "job_item_selector": ".listItems__fca8c0 a", # "item_change_type": "new_tab", # "next_page_selector": ".pager__fca8c0 .atsx-pagination-next:not(.atsx-pagination-disabled)", # "page_change_type": "url_change", # "field_selectors": { # "job_title": {"selector": [".positionItem-title-text"]}, # "description": {"selector": [".positionItem-jobDesc"]}, # "location": {"selector": [".positionItem-subTitle span"]}, # }, # "detail_area_selector": None, # } # 三星招聘测试配置 test_config: CrawlerConfig = { "url": "https://dearsamsung.zhiye.com/#/samsung/pc/szzw", "job_item_selector": ".BHGkB li", "item_change_type": "in_page", "next_page_selector": "._8x6MD .ant-pagination-next:not([aria-disabled='true']) .ant-pagination-item-link", "page_change_type": "content_change", "field_selectors": { "job_title": {"selector": ["h2"]}, "description": {"selector": ['.aCl-8 p', '.aCl-8 pre']}, }, "detail_area_selector": ".FLf6j", } async def main(): results = await crawl(test_config, headless=False) print(f"\n爬取完成,共 {len(results)} 条数据") for i, item in enumerate(results): print(f"\n--- 岗位 {i+1} ---") for k, v in item.items(): print(f"{k}: {v}") if __name__ == "__main__": asyncio.run(main())