修改简历上传为多路上传

This commit is contained in:
zk
2026-04-03 11:20:17 +08:00
parent e82159278a
commit 7ac1e723a4
6 changed files with 164 additions and 122 deletions
+3 -3
View File
@@ -22,9 +22,8 @@ class LLM(Enum):
"""所有可用模型,每个枚举值 = (模型名, api_key函数, base_url函数)"""
# 火山引擎
DOUBAO_PRO_256K = ("doubao-pro-256k", *_VOLCENGINE)
DOUBAO_PRO_32K = ("doubao-pro-32k", *_VOLCENGINE)
DOUBAO_LITE_128K = ("doubao-lite-128k", *_VOLCENGINE)
DOUBAO_PRO_32K = ("doubao-1-5-pro-32k-250115", *_VOLCENGINE)
DEEPSEEK_V3 = ("deepseek-v3-250324", *_VOLCENGINE)
DEEPSEEK_R1 = ("deepseek-r1-250528", *_VOLCENGINE)
DOUBAO_SEED_LITE = ("doubao-seed-2-0-lite-260215", *_VOLCENGINE)
@@ -34,6 +33,7 @@ class LLM(Enum):
GPT_4O = ("gpt-4o", *_CARDIAC)
GPT_4O_MINI = ("gpt-4o-mini", *_CARDIAC)
CLAUDE_SONNET_4 = ("claude-sonnet-4-20250514", *_CARDIAC)
GEMINI_FLASH = ("gemini-2.5-flash", *_CARDIAC)
def __init__(self, model_name: str, api_key_fn, base_url_fn):
self.model_name = model_name
+1
View File
@@ -0,0 +1 @@
+61
View File
@@ -0,0 +1,61 @@
"""简历并行提取:将完整简历文本拆分为5个AI任务并行提取"""
import asyncio
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import ChatPromptTemplate
from app.ai.models import LLM
from app.ai.resume_extractor.prompts import (
PROFILE_PROMPT, EDUCATION_PROMPT, WORK_PROMPT,
PROJECT_PROMPT, COMPETITION_PROMPT,
)
from app.core.logger import log
def _build_chain(prompt: str):
"""构建单个提取链:prompt → LLM → JSON解析"""
return (
ChatPromptTemplate.from_messages([("system", prompt), ("human", "{text}")])
| LLM.DOUBAO_PRO_32K.create(temperature=0)
| JsonOutputParser()
)
# 5 条独立的提取链
_profile_chain = _build_chain(PROFILE_PROMPT)
_education_chain = _build_chain(EDUCATION_PROMPT)
_work_chain = _build_chain(WORK_PROMPT)
_project_chain = _build_chain(PROJECT_PROMPT)
_competition_chain = _build_chain(COMPETITION_PROMPT)
async def extract_all(text: str) -> dict:
"""asyncio.gather 并行提取简历所有模块,返回合并后的结构化数据"""
log.info("开始5路并行AI提取")
inp = {"text": text}
profile, education, work_intern, project, competition = await asyncio.gather(
_safe_invoke(_profile_chain, inp, "个人信息"),
_safe_invoke(_education_chain, inp, "教育经历"),
_safe_invoke(_work_chain, inp, "工作+实习经历"),
_safe_invoke(_project_chain, inp, "项目经历"),
_safe_invoke(_competition_chain, inp, "竞赛经历"),
)
result = profile if isinstance(profile, dict) else {}
result["education"] = education if isinstance(education, list) else []
result["work"] = work_intern.get("work", []) if isinstance(work_intern, dict) else []
result["internship"] = work_intern.get("internship", []) if isinstance(work_intern, dict) else []
result["project"] = project if isinstance(project, list) else []
result["competition"] = competition if isinstance(competition, list) else []
return result
async def _safe_invoke(chain, inp: dict, label: str):
"""单个链调用,失败返回空"""
try:
return await chain.ainvoke(inp)
except Exception as e:
log.warning(f"AI提取[{label}]失败: {e}")
return {} if "个人信息" in label else []
+79
View File
@@ -0,0 +1,79 @@
"""简历各模块提取的 System Prompt
注意:prompt 中的 JSON 示例花括号必须用 {{ }} 转义,避免被 ChatPromptTemplate 当作变量。
"""
PROFILE_PROMPT = """从简历文本中仅提取个人基本信息,原文提取不要改写,输出JSON:
```json
{{
"name": "姓名",
"email": "邮箱",
"mobileNumber": "手机号",
"city": "所在城市",
"wechatNumber": "微信号",
"portfolioUrl": "作品集链接",
"skills": ["技能1"],
"certificates": ["证书1"],
"summary": "个人概述原文"
}}
```
规则:只提取个人信息,不提取经历内容。summary只填"自我评价/个人概述"原文。没有的填null,数组填[]。只输出JSON。"""
EDUCATION_PROMPT = """从简历文本中仅提取教育经历,原文提取不要改写,输出JSON数组:
```json
[{{
"school": "学校",
"major": "专业",
"degree": "学历",
"studyType": "全日制/非全日制",
"startDate": "2020.09",
"endDate": "2024.06",
"description": ["原文段落"]
}}]
```
规则:只提取教育经历,不提取工作/实习/项目/竞赛。时间格式YYYY.MM。没有输出[]。只输出JSON。"""
WORK_PROMPT = """从简历文本中仅提取工作经历和实习经历,原文提取不要改写,输出JSON:
```json
{{
"work": [{{
"companyName": "公司",
"position": "职位",
"startDate": "2024.07",
"endDate": "2025.03",
"description": ["原文段落"]
}}],
"internship": [{{
"companyName": "公司",
"position": "职位",
"startDate": "2023.06",
"endDate": "2023.09",
"description": ["原文段落"]
}}]
}}
```
规则:标注"实习"的归internship,其余归work。不提取项目/教育/竞赛。时间格式YYYY.MM。没有填[]。只输出JSON。"""
PROJECT_PROMPT = """从简历文本中仅提取项目经历,原文提取不要改写,输出JSON数组:
```json
[{{
"companyName": "所属公司",
"projectName": "项目名",
"role": "角色名称(如:后端开发、项目经理、前端工程师,只填角色名不填职责描述)",
"startDate": "2023.03",
"endDate": "2023.12",
"description": ["原文段落"]
}}]
```
规则:只提取项目经历,不提取工作/实习/教育/竞赛。role只填简短角色名,职责内容放description。时间格式YYYY.MM。没有输出[]。只输出JSON。"""
COMPETITION_PROMPT = """从简历文本中仅提取竞赛/获奖经历,原文提取不要改写,输出JSON数组:
```json
[{{
"competitionName": "竞赛名",
"award": "获奖情况",
"awardDate": "2023.07",
"description": ["原文段落"]
}}]
```
规则:只提取竞赛获奖,不提取其他经历。时间格式YYYY.MM。没有输出[]。只输出JSON。"""