generated from kgod/ai-review-template
c06f595559
- btyy (倍特药业), fullsemi (富芯半导体): 北森平台爬虫 - hotjob (中国五矿): hotjob平台爬虫 - leinao (中科类脑): 静态HTML爬虫 - task_fetcher: 原子锁获取任务 - post.md: 抓取技能文档 - export_har: mitmproxy HAR导出工具
74 lines
1.7 KiB
Python
74 lines
1.7 KiB
Python
import pymysql
|
|
from datetime import datetime
|
|
|
|
|
|
DB_CONFIG = {
|
|
"host": "192.168.31.105",
|
|
"port": 3306,
|
|
"user": "root",
|
|
"password": "123456",
|
|
"database": "table_comple",
|
|
"charset": "utf8mb4",
|
|
}
|
|
|
|
|
|
def fetch_next_task():
|
|
"""
|
|
从 app_url_list 获取下一个待处理的任务。
|
|
使用 SELECT ... FOR UPDATE 原子锁,按 finished_at 最早排序。
|
|
获取后立即更新 started_at 为当前时间。
|
|
|
|
:return: {"id": int, "url": str, "company": str} 或 None
|
|
"""
|
|
conn = pymysql.connect(**DB_CONFIG)
|
|
try:
|
|
conn.begin()
|
|
cursor = conn.cursor(pymysql.cursors.DictCursor)
|
|
|
|
cursor.execute("""
|
|
SELECT id, input_url, input_company_name
|
|
FROM app_url_list
|
|
WHERE status != 'processing'
|
|
ORDER BY finished_at ASC, id ASC
|
|
LIMIT 1
|
|
FOR UPDATE
|
|
""")
|
|
row = cursor.fetchone()
|
|
|
|
if not row:
|
|
conn.rollback()
|
|
return None
|
|
|
|
cursor.execute("""
|
|
UPDATE app_url_list
|
|
SET started_at = %s, status = 'processing'
|
|
WHERE id = %s
|
|
""", (datetime.now(), row["id"]))
|
|
|
|
conn.commit()
|
|
|
|
return {
|
|
"id": row["id"],
|
|
"url": row["input_url"],
|
|
"company": row["input_company_name"],
|
|
}
|
|
except Exception as e:
|
|
conn.rollback()
|
|
raise e
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import sys
|
|
sys.stdout.reconfigure(encoding="utf-8")
|
|
|
|
task = fetch_next_task()
|
|
if task:
|
|
print(f"获取任务成功:")
|
|
print(f" ID: {task['id']}")
|
|
print(f" URL: {task['url']}")
|
|
print(f" 公司: {task['company']}")
|
|
else:
|
|
print("没有可用任务")
|