From fa7d87e435bf03db26738a7cb3ba9cc4c09720ed Mon Sep 17 00:00:00 2001 From: zk Date: Thu, 23 Apr 2026 15:37:26 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A4=84=E7=90=86=E5=85=BC=E5=AE=B9=E9=97=AE?= =?UTF-8?q?=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/ai/resume_extractor/extractor.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/app/ai/resume_extractor/extractor.py b/app/ai/resume_extractor/extractor.py index 0093900..c05c1e5 100644 --- a/app/ai/resume_extractor/extractor.py +++ b/app/ai/resume_extractor/extractor.py @@ -1,8 +1,10 @@ """简历并行提取:将完整简历文本拆分为5个AI任务并行提取""" import asyncio +import re -from langchain_core.output_parsers import JsonOutputParser +from json_repair import repair_json +from langchain_core.output_parsers import StrOutputParser from langchain_core.prompts import ChatPromptTemplate from app.ai.models import LLM @@ -13,12 +15,19 @@ from app.ai.resume_extractor.prompts import ( from app.core.logger import log +def _parse_json(text: str) -> dict: + """解析 AI 输出的 JSON,自动去除 markdown 代码块包裹,容错处理""" + cleaned = re.sub(r"^```(?:json)?\s*\n?", "", text.strip()) + cleaned = re.sub(r"\n?```\s*$", "", cleaned) + return repair_json(cleaned, return_objects=True) + + def _build_chain(prompt: str): - """构建单个提取链:prompt → LLM → JSON解析""" + """构建单个提取链:prompt → LLM → 文本输出""" return ( ChatPromptTemplate.from_messages([("system", prompt), ("human", "{text}")]) | LLM.JIAYU_CLAUDE_SONNET_4_5.create(temperature=0) - | JsonOutputParser() + | StrOutputParser() ) @@ -55,7 +64,8 @@ async def extract_all(text: str) -> dict: async def _safe_invoke(chain, inp: dict, label: str): """单个链调用,失败返回空""" try: - return await chain.ainvoke(inp) + raw = await chain.ainvoke(inp) + return _parse_json(raw) except Exception as e: log.warning(f"AI提取[{label}]失败: {e}") return {} if "个人信息" in label else []