添加Ai 简历解析

2026-04-02 16:01:08 +08:00
parent 4de721ffca
commit ff0993e431
14 changed files with 441 additions and 10 deletions
@@ -27,6 +27,8 @@ class LLM(Enum):
    DOUBAO_LITE_128K = ("doubao-lite-128k", *_VOLCENGINE)
    DEEPSEEK_V3 = ("deepseek-v3-250324", *_VOLCENGINE)
    DEEPSEEK_R1 = ("deepseek-r1-250528", *_VOLCENGINE)
+    DOUBAO_SEED_LIST = ("doubao-seed-2-0-lite-260215", *_VOLCENGINE)
+    DOUBAO_SEED_PRO = ("doubao-seed-2-0-pro-260215", *_VOLCENGINE)

    # 心缘
    GPT_4O = ("gpt-4o", *_CARDIAC)
@@ -0,0 +1,25 @@
+"""简历上传解析接口"""
+
+from fastapi import APIRouter, UploadFile, File
+
+from app.core.context import RequestContext
+from app.core.database import get_db
+from app.services.resume_parse_service import ResumeParseService
+
+router = APIRouter(prefix="/resume", tags=["简历"])
+
+
+@router.post("/upload", summary="上传简历文件并AI解析")
+async def upload_resume(file: UploadFile = File(...)):
+    """上传简历文件（PDF/Word/TXT），AI解析后生成结构化简历，返回简历ID"""
+    user_id = RequestContext.user_id.get()
+    content = await file.read()
+
+    service = ResumeParseService()
+    # 文件解析 + AI 结构化（不占数据库连接）
+    parsed = await service.parse_and_extract(file.filename, content)
+    # 短事务：只做数据库写入
+    resume_id = None
+    async for session in get_db():
+        resume_id = await service.save_resume(session, user_id, file.filename, parsed)
+    return {"resumeId": resume_id}
@@ -31,8 +31,10 @@ app.add_middleware(

 # ========== 路由注册 ==========
 from app.api.health import router as health_router
+from app.api.resume import router as resume_router

 app.include_router(health_router)
+app.include_router(resume_router)
 # ==============================

 if __name__ == "__main__":
@@ -0,0 +1,40 @@
+"""用户简历表（bg_user_resume）
+简历维度信息 + 个人基本信息合并存储
+"""
+
+from datetime import datetime
+from typing import Optional
+
+from sqlalchemy import BigInteger, Integer, String, DateTime, JSON
+from sqlalchemy.orm import Mapped, mapped_column
+
+from app.core.database import Base
+
+
+class UserResume(Base):
+    """用户简历表 bg_user_resume"""
+    __tablename__ = "bg_user_resume"
+
+    id: Mapped[int] = mapped_column(BigInteger, primary_key=True)
+    user_id: Mapped[int] = mapped_column(BigInteger, nullable=False, comment="用户ID")
+
+    # 简历维度信息
+    resume_name: Mapped[Optional[str]] = mapped_column(String(128), nullable=True, comment="简历名称")
+    target_position: Mapped[Optional[str]] = mapped_column(String(128), nullable=True, comment="目标岗位")
+    is_default: Mapped[int] = mapped_column(Integer, default=0, comment="是否默认简历 0=否 1=是")
+    sort_order: Mapped[Optional[int]] = mapped_column(Integer, nullable=True, comment="排序序号")
+
+    # 个人信息
+    avatar_url: Mapped[Optional[str]] = mapped_column(String(512), nullable=True, comment="头像URL")
+    name: Mapped[Optional[str]] = mapped_column(String(64), nullable=True, comment="真实姓名")
+    email: Mapped[Optional[str]] = mapped_column(String(128), nullable=True, comment="邮箱")
+    mobile_number: Mapped[Optional[str]] = mapped_column(String(20), nullable=True, comment="手机号码")
+    city: Mapped[Optional[str]] = mapped_column(String(64), nullable=True, comment="所在城市")
+    wechat_number: Mapped[Optional[str]] = mapped_column(String(64), nullable=True, comment="微信号")
+    portfolio_url: Mapped[Optional[str]] = mapped_column(String(512), nullable=True, comment="作品集链接")
+    skills: Mapped[Optional[list]] = mapped_column(JSON, nullable=True, comment="技能标签列表")
+    certificates: Mapped[Optional[list]] = mapped_column(JSON, nullable=True, comment="证书标签列表")
+    summary: Mapped[Optional[str]] = mapped_column(String(2000), nullable=True, comment="个人概述")
+
+    create_time: Mapped[datetime] = mapped_column(DateTime, default=datetime.now, comment="创建时间")
+    update_time: Mapped[datetime] = mapped_column(DateTime, default=datetime.now, onupdate=datetime.now, comment="更新时间")
@@ -0,0 +1,25 @@
+"""简历-竞赛经历表（bg_user_resume_competition）"""
+
+from datetime import datetime
+from typing import Optional
+
+from sqlalchemy import BigInteger, Integer, String, DateTime, JSON
+from sqlalchemy.orm import Mapped, mapped_column
+
+from app.core.database import Base
+
+
+class UserResumeCompetition(Base):
+    """简历-竞赛经历表 bg_user_resume_competition"""
+    __tablename__ = "bg_user_resume_competition"
+
+    id: Mapped[int] = mapped_column(BigInteger, primary_key=True)
+    resume_id: Mapped[int] = mapped_column(BigInteger, nullable=False, comment="关联bg_user_resume.id")
+    user_id: Mapped[int] = mapped_column(BigInteger, nullable=False, comment="用户ID")
+    competition_name: Mapped[Optional[str]] = mapped_column(String(128), nullable=True, comment="竞赛名称")
+    award: Mapped[Optional[str]] = mapped_column(String(128), nullable=True, comment="获奖情况")
+    award_date: Mapped[Optional[str]] = mapped_column(String(16), nullable=True, comment="获奖时间，格式：2023.07")
+    description: Mapped[Optional[list]] = mapped_column(JSON, nullable=True, comment="描述段落 [{id, text}]")
+    sort_order: Mapped[Optional[int]] = mapped_column(Integer, nullable=True, comment="排序序号")
+    create_time: Mapped[datetime] = mapped_column(DateTime, default=datetime.now, comment="创建时间")
+    update_time: Mapped[datetime] = mapped_column(DateTime, default=datetime.now, onupdate=datetime.now, comment="更新时间")
@@ -0,0 +1,28 @@
+"""简历-教育经历表（bg_user_resume_education）"""
+
+from datetime import datetime
+from typing import Optional
+
+from sqlalchemy import BigInteger, Integer, String, DateTime, JSON
+from sqlalchemy.orm import Mapped, mapped_column
+
+from app.core.database import Base
+
+
+class UserResumeEducation(Base):
+    """简历-教育经历表 bg_user_resume_education"""
+    __tablename__ = "bg_user_resume_education"
+
+    id: Mapped[int] = mapped_column(BigInteger, primary_key=True)
+    resume_id: Mapped[int] = mapped_column(BigInteger, nullable=False, comment="关联bg_user_resume.id")
+    user_id: Mapped[int] = mapped_column(BigInteger, nullable=False, comment="用户ID")
+    school: Mapped[Optional[str]] = mapped_column(String(128), nullable=True, comment="学校名称")
+    major: Mapped[Optional[str]] = mapped_column(String(128), nullable=True, comment="专业")
+    degree: Mapped[Optional[str]] = mapped_column(String(32), nullable=True, comment="学历：大专/本科/硕士/博士")
+    study_type: Mapped[Optional[str]] = mapped_column(String(32), nullable=True, comment="学习形式：全日制/非全日制")
+    start_date: Mapped[Optional[str]] = mapped_column(String(16), nullable=True, comment="开始时间，格式：2023.09")
+    end_date: Mapped[Optional[str]] = mapped_column(String(16), nullable=True, comment="结束时间，格式：2024.06")
+    description: Mapped[Optional[list]] = mapped_column(JSON, nullable=True, comment="描述段落 [{id, text}]")
+    sort_order: Mapped[Optional[int]] = mapped_column(Integer, nullable=True, comment="排序序号")
+    create_time: Mapped[datetime] = mapped_column(DateTime, default=datetime.now, comment="创建时间")
+    update_time: Mapped[datetime] = mapped_column(DateTime, default=datetime.now, onupdate=datetime.now, comment="更新时间")
@@ -0,0 +1,26 @@
+"""简历-实习经历表（bg_user_resume_internship）"""
+
+from datetime import datetime
+from typing import Optional
+
+from sqlalchemy import BigInteger, Integer, String, DateTime, JSON
+from sqlalchemy.orm import Mapped, mapped_column
+
+from app.core.database import Base
+
+
+class UserResumeInternship(Base):
+    """简历-实习经历表 bg_user_resume_internship"""
+    __tablename__ = "bg_user_resume_internship"
+
+    id: Mapped[int] = mapped_column(BigInteger, primary_key=True)
+    resume_id: Mapped[int] = mapped_column(BigInteger, nullable=False, comment="关联bg_user_resume.id")
+    user_id: Mapped[int] = mapped_column(BigInteger, nullable=False, comment="用户ID")
+    company_name: Mapped[Optional[str]] = mapped_column(String(128), nullable=True, comment="公司名称")
+    position: Mapped[Optional[str]] = mapped_column(String(128), nullable=True, comment="职位")
+    start_date: Mapped[Optional[str]] = mapped_column(String(16), nullable=True, comment="开始时间，格式：2023.06")
+    end_date: Mapped[Optional[str]] = mapped_column(String(16), nullable=True, comment="结束时间，格式：2023.09")
+    description: Mapped[Optional[list]] = mapped_column(JSON, nullable=True, comment="描述段落 [{id, text}]")
+    sort_order: Mapped[Optional[int]] = mapped_column(Integer, nullable=True, comment="排序序号")
+    create_time: Mapped[datetime] = mapped_column(DateTime, default=datetime.now, comment="创建时间")
+    update_time: Mapped[datetime] = mapped_column(DateTime, default=datetime.now, onupdate=datetime.now, comment="更新时间")
@@ -0,0 +1,27 @@
+"""简历-项目经历表（bg_user_resume_project）"""
+
+from datetime import datetime
+from typing import Optional
+
+from sqlalchemy import BigInteger, Integer, String, DateTime, JSON
+from sqlalchemy.orm import Mapped, mapped_column
+
+from app.core.database import Base
+
+
+class UserResumeProject(Base):
+    """简历-项目经历表 bg_user_resume_project"""
+    __tablename__ = "bg_user_resume_project"
+
+    id: Mapped[int] = mapped_column(BigInteger, primary_key=True)
+    resume_id: Mapped[int] = mapped_column(BigInteger, nullable=False, comment="关联bg_user_resume.id")
+    user_id: Mapped[int] = mapped_column(BigInteger, nullable=False, comment="用户ID")
+    company_name: Mapped[Optional[str]] = mapped_column(String(128), nullable=True, comment="所属公司")
+    project_name: Mapped[Optional[str]] = mapped_column(String(128), nullable=True, comment="项目名称")
+    role: Mapped[Optional[str]] = mapped_column(String(64), nullable=True, comment="担任角色")
+    start_date: Mapped[Optional[str]] = mapped_column(String(16), nullable=True, comment="开始时间，格式：2023.06")
+    end_date: Mapped[Optional[str]] = mapped_column(String(16), nullable=True, comment="结束时间，格式：2023.09")
+    description: Mapped[Optional[list]] = mapped_column(JSON, nullable=True, comment="描述段落 [{id, text}]")
+    sort_order: Mapped[Optional[int]] = mapped_column(Integer, nullable=True, comment="排序序号")
+    create_time: Mapped[datetime] = mapped_column(DateTime, default=datetime.now, comment="创建时间")
+    update_time: Mapped[datetime] = mapped_column(DateTime, default=datetime.now, onupdate=datetime.now, comment="更新时间")
@@ -0,0 +1,26 @@
+"""简历-工作经历表（bg_user_resume_work）"""
+
+from datetime import datetime
+from typing import Optional
+
+from sqlalchemy import BigInteger, Integer, String, DateTime, JSON
+from sqlalchemy.orm import Mapped, mapped_column
+
+from app.core.database import Base
+
+
+class UserResumeWork(Base):
+    """简历-工作经历表 bg_user_resume_work"""
+    __tablename__ = "bg_user_resume_work"
+
+    id: Mapped[int] = mapped_column(BigInteger, primary_key=True)
+    resume_id: Mapped[int] = mapped_column(BigInteger, nullable=False, comment="关联bg_user_resume.id")
+    user_id: Mapped[int] = mapped_column(BigInteger, nullable=False, comment="用户ID")
+    company_name: Mapped[Optional[str]] = mapped_column(String(128), nullable=True, comment="公司名称")
+    position: Mapped[Optional[str]] = mapped_column(String(128), nullable=True, comment="职位")
+    start_date: Mapped[Optional[str]] = mapped_column(String(16), nullable=True, comment="开始时间，格式：2023.06")
+    end_date: Mapped[Optional[str]] = mapped_column(String(16), nullable=True, comment="结束时间，格式：2023.09")
+    description: Mapped[Optional[list]] = mapped_column(JSON, nullable=True, comment="描述段落 [{id, text}]")
+    sort_order: Mapped[Optional[int]] = mapped_column(Integer, nullable=True, comment="排序序号")
+    create_time: Mapped[datetime] = mapped_column(DateTime, default=datetime.now, comment="创建时间")
+    update_time: Mapped[datetime] = mapped_column(DateTime, default=datetime.now, onupdate=datetime.now, comment="更新时间")
@@ -0,0 +1,208 @@
+"""简历解析 Service
+
+上传简历文件 → 解析为纯文本 → AI 结构化 → 写入数据库。
+依赖：file_parser（文件解析工具）、LLM（AI模型）
+使用表：bg_user_resume（主表）、bg_user_resume_education/work/internship/project/competition（5张子表）
+"""
+
+import asyncio
+import json
+
+import shortuuid
+from langchain_core.messages import SystemMessage, HumanMessage
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.ai.models import LLM
+from app.core.logger import log
+from app.models.user_resume import UserResume
+from app.models.user_resume_competition import UserResumeCompetition
+from app.models.user_resume_education import UserResumeEducation
+from app.models.user_resume_internship import UserResumeInternship
+from app.models.user_resume_project import UserResumeProject
+from app.models.user_resume_work import UserResumeWork
+from app.tool.file_parser import parse_to_text
+from app.tool.snowflake import next_id
+
+
+_SYSTEM_PROMPT = """你是一个专业的简历解析助手。请将用户提供的简历纯文本解析为结构化JSON。
+
+输出格式要求（严格按此JSON结构输出，不要输出任何其他内容）：
+```json
+{
+  "name": "姓名",
+  "email": "邮箱",
+  "mobileNumber": "手机号",
+  "city": "所在城市",
+  "wechatNumber": "微信号（如有）",
+  "portfolioUrl": "作品集链接（如有）",
+  "skills": ["技能1", "技能2"],
+  "certificates": ["证书1", "证书2"],
+  "summary": "个人概述/自我评价",
+  "education": [
+    {
+      "school": "学校名称",
+      "major": "专业",
+      "degree": "学历（大专/本科/硕士/博士）",
+      "studyType": "学习形式（全日制/非全日制）",
+      "startDate": "2020.09",
+      "endDate": "2024.06",
+      "description": ["描述段落1", "描述段落2"]
+    }
+  ],
+  "work": [
+    {
+      "companyName": "公司名称",
+      "position": "职位",
+      "startDate": "2024.07",
+      "endDate": "2025.03",
+      "description": ["工作描述段落1", "工作描述段落2"]
+    }
+  ],
+  "internship": [
+    {
+      "companyName": "公司名称",
+      "position": "实习职位",
+      "startDate": "2023.06",
+      "endDate": "2023.09",
+      "description": ["实习描述段落1"]
+    }
+  ],
+  "project": [
+    {
+      "companyName": "所属公司（如有）",
+      "projectName": "项目名称",
+      "role": "担任角色",
+      "startDate": "2023.03",
+      "endDate": "2023.12",
+      "description": ["项目描述段落1"]
+    }
+  ],
+  "competition": [
+    {
+      "competitionName": "竞赛名称",
+      "award": "获奖情况",
+      "awardDate": "2023.07",
+      "description": ["竞赛描述段落1"]
+    }
+  ]
+}
+```
+
+规则：
+1. 时间格式统一为 YYYY.MM（如 2023.09），如果只有年份则写 YYYY.01
+2. 没有的字段填 null，没有的数组填 []
+3. description 是字符串数组，每个元素是一个描述段落
+4. 区分工作经历和实习经历：明确标注"实习"的归入 internship，其余归入 work
+5. 只输出 JSON，不要输出任何解释文字"""
+
+
+class ResumeParseService:
+
+    async def parse_and_extract(self, filename: str, content: bytes) -> dict:
+        """文件解析 + AI 结构化，不涉及数据库操作"""
+
+        # 1. 文件解析为纯文本（同步操作丢线程池）
+        log.info(f"开始解析简历文件: {filename}")
+        text = await asyncio.to_thread(parse_to_text, filename, content)
+        if not text or not text.strip():
+            raise ValueError("文件内容为空，无法解析")
+        log.info(f"文件解析完成，文本长度: {len(text)}")
+
+        # 2. AI 结构化解析
+        log.info("开始AI结构化解析")
+        parsed = await self._ai_parse(text)
+        log.info("AI结构化解析完成")
+        return parsed
+
+    async def _ai_parse(self, text: str) -> dict:
+        """调用 AI 将纯文本解析为结构化 JSON"""
+        llm = LLM.DOUBAO_SEED_PRO.create(temperature=0)
+        messages = [SystemMessage(content=_SYSTEM_PROMPT), HumanMessage(content=text)]
+        response = await llm.ainvoke(messages)
+
+        # 提取 JSON（兼容 markdown 代码块包裹）
+        raw = response.content.strip()
+        if raw.startswith("```"):
+            raw = raw.split("\n", 1)[1] if "\n" in raw else raw[3:]
+            raw = raw.rsplit("```", 1)[0]
+        return json.loads(raw)
+
+    async def save_resume(self, session: AsyncSession, user_id: int, filename: str, parsed: dict) -> int:
+        """将解析结果写入主表 + 5张子表，返回简历ID"""
+        resume_id = next_id()
+
+        # 主表
+        resume = UserResume(
+            id=resume_id, user_id=user_id,
+            resume_name=filename.rsplit(".", 1)[0],
+            target_position=None, is_default=0, sort_order=0,
+            name=parsed.get("name"), email=parsed.get("email"),
+            mobile_number=parsed.get("mobileNumber"), city=parsed.get("city"),
+            wechat_number=parsed.get("wechatNumber"), portfolio_url=parsed.get("portfolioUrl"),
+            skills=parsed.get("skills") or [], certificates=parsed.get("certificates") or [],
+            summary=parsed.get("summary"),
+        )
+        session.add(resume)
+
+        # 教育经历
+        for i, edu in enumerate(parsed.get("education") or []):
+            session.add(UserResumeEducation(
+                id=next_id(), resume_id=resume_id, user_id=user_id,
+                school=edu.get("school"), major=edu.get("major"),
+                degree=edu.get("degree"), study_type=edu.get("studyType"),
+                start_date=edu.get("startDate"), end_date=edu.get("endDate"),
+                description=_to_description_paragraphs(edu.get("description")),
+                sort_order=i,
+            ))
+
+        # 工作经历
+        for i, work in enumerate(parsed.get("work") or []):
+            session.add(UserResumeWork(
+                id=next_id(), resume_id=resume_id, user_id=user_id,
+                company_name=work.get("companyName"), position=work.get("position"),
+                start_date=work.get("startDate"), end_date=work.get("endDate"),
+                description=_to_description_paragraphs(work.get("description")),
+                sort_order=i,
+            ))
+
+        # 实习经历
+        for i, intern in enumerate(parsed.get("internship") or []):
+            session.add(UserResumeInternship(
+                id=next_id(), resume_id=resume_id, user_id=user_id,
+                company_name=intern.get("companyName"), position=intern.get("position"),
+                start_date=intern.get("startDate"), end_date=intern.get("endDate"),
+                description=_to_description_paragraphs(intern.get("description")),
+                sort_order=i,
+            ))
+
+        # 项目经历
+        for i, proj in enumerate(parsed.get("project") or []):
+            session.add(UserResumeProject(
+                id=next_id(), resume_id=resume_id, user_id=user_id,
+                company_name=proj.get("companyName"), project_name=proj.get("projectName"),
+                role=proj.get("role"),
+                start_date=proj.get("startDate"), end_date=proj.get("endDate"),
+                description=_to_description_paragraphs(proj.get("description")),
+                sort_order=i,
+            ))
+
+        # 竞赛经历
+        for i, comp in enumerate(parsed.get("competition") or []):
+            session.add(UserResumeCompetition(
+                id=next_id(), resume_id=resume_id, user_id=user_id,
+                competition_name=comp.get("competitionName"), award=comp.get("award"),
+                award_date=comp.get("awardDate"),
+                description=_to_description_paragraphs(comp.get("description")),
+                sort_order=i,
+            ))
+
+        await session.flush()
+        log.info(f"简历保存完成，resumeId: {resume_id}")
+        return resume_id
+
+
+def _to_description_paragraphs(texts: list[str] | None) -> list[dict] | None:
+    """将字符串数组转为 [{id, text}] 格式的描述段落"""
+    if not texts:
+        return None
+    return [{"id": shortuuid.ShortUUID().random(length=8), "text": t} for t in texts if t]
@@ -60,9 +60,9 @@ def parse_to_text(filename: str, content: bytes) -> str:

    if suffix == ".pdf":
        return parse_pdf(content)
-    elif suffix in (".docx", ".doc"):
+    elif suffix == ".docx":
        return parse_docx(content)
    elif suffix == ".txt":
        return parse_txt(content)
    else:
-        raise ValueError(f"不支持的文件类型: {suffix}，支持: .pdf, .docx, .doc, .txt")
+        raise ValueError(f"不支持的文件类型: {suffix}，支持: .pdf, .docx, .txt")
@@ -0,0 +1,10 @@
+"""雪花 ID 生成工具"""
+
+from snowflake import SnowflakeGenerator
+
+_gen = SnowflakeGenerator(1)
+
+
+def next_id() -> int:
+    """生成一个雪花 ID"""
+    return next(_gen)