diff --git a/.kiro/steering/项目结构说明.md b/.kiro/steering/项目结构说明.md index febb406..a9a2516 100644 --- a/.kiro/steering/项目结构说明.md +++ b/.kiro/steering/项目结构说明.md @@ -34,18 +34,27 @@ offerpie_python_ai/ │ └─ models.py # LLM 模型枚举(LLM.DOUBAO_PRO_256K、DEEPSEEK_V3、GPT_4O 等),基于 LangChain ChatOpenAI │ ├─ api/ # **路由层**(REST API 接口) - │ └─ health.py # 健康检查接口 GET /health/ + │ ├─ health.py # 健康检查接口 GET /health/ + │ └─ resume.py # 简历接口 POST /resume/upload(上传文件AI解析) │ ├─ models/ # **ORM 模型层**(SQLAlchemy 声明式映射) │ ├─ func_permission.py # 功能权限定义表(bg_func_permission) │ ├─ user_func_permission_stock.py # 用户功能权限库存表(bg_user_func_permission_stock) - │ └─ user_func_usage_log.py # 用户功能使用记录表(bg_user_func_usage_log) + │ ├─ user_func_usage_log.py # 用户功能使用记录表(bg_user_func_usage_log) + │ ├─ user_resume.py # 用户简历主表(bg_user_resume) + │ ├─ user_resume_education.py # 简历-教育经历表(bg_user_resume_education) + │ ├─ user_resume_work.py # 简历-工作经历表(bg_user_resume_work) + │ ├─ user_resume_internship.py # 简历-实习经历表(bg_user_resume_internship) + │ ├─ user_resume_project.py # 简历-项目经历表(bg_user_resume_project) + │ └─ user_resume_competition.py # 简历-竞赛经历表(bg_user_resume_competition) │ ├─ tool/ # **工具层**(无状态、无业务依赖的通用工具) - │ └─ file_parser.py # 文件解析工具(PDF/Word/TXT → 纯文本,parse_to_text 入口方法) + │ ├─ file_parser.py # 文件解析工具(PDF/Word/TXT → 纯文本,parse_to_text 入口方法) + │ └─ snowflake.py # 雪花 ID 生成工具(next_id) │ └─ services/ # **业务逻辑层** - └─ func_permission_service.py # 功能权限服务(校验+扣减+回退,逻辑与Java端一致) + ├─ func_permission_service.py # 功能权限服务(校验+扣减+回退,逻辑与Java端一致) + └─ resume_parse_service.py # 简历解析服务(文件解析→AI结构化→写入主表+5张子表) ``` ## 2️⃣ 各层模块职责 @@ -54,10 +63,10 @@ offerpie_python_ai/ | **config** | 统一配置管理,基于 Pydantic Settings,支持 .env 文件加载 | `Settings`(数据库、Redis、LLM供应商、JWT、CORS、日志等全部配置项) | | **core** | 核心基础设施:数据库连接、Redis连接、鉴权、日志、中间件、异常处理、统一响应 | `database.py`、`redis.py`、`auth.py`、`middleware.py`、`exceptions.py`、`logger.py`、`StandardResponse` | | **ai** | AI 模型管理,封装多供应商 LLM 实例创建,基于 LangChain ChatOpenAI | `LLM` 枚举(火山引擎:doubao/deepseek,心缘:gpt-4o/claude) | -| **api** | REST API 路由定义 | `health.py`(健康检查) | -| **models** | SQLAlchemy ORM 模型,与 Java 端共享同一数据库 | `FuncPermission`、`UserFuncPermissionStock`、`UserFuncUsageLog` | -| **tool** | 无状态通用工具,不依赖数据库/Redis/用户上下文 | `file_parser.py`(PDF/Word/TXT 文件解析为纯文本) | -| **services** | 业务逻辑实现 | `FuncPermissionService`(功能权限校验、扣减、回退) | +| **api** | REST API 路由定义 | `health.py`(健康检查)、`resume.py`(简历上传解析) | +| **models** | SQLAlchemy ORM 模型,与 Java 端共享同一数据库 | `FuncPermission`、`UserFuncPermissionStock`、`UserFuncUsageLog`、`UserResume`、`UserResumeEducation`/`Work`/`Internship`/`Project`/`Competition` | +| **tool** | 无状态通用工具,不依赖数据库/Redis/用户上下文 | `file_parser.py`(PDF/Word/TXT 文件解析为纯文本)、`snowflake.py`(雪花ID生成) | +| **services** | 业务逻辑实现 | `FuncPermissionService`(功能权限校验、扣减、回退)、`ResumeParseService`(简历文件解析→AI结构化→入库) | ## 3️⃣ 技术栈 | 类别 | 技术选型 | 说明 | diff --git a/app/ai/models.py b/app/ai/models.py index 65cf50b..500370d 100644 --- a/app/ai/models.py +++ b/app/ai/models.py @@ -27,6 +27,8 @@ class LLM(Enum): DOUBAO_LITE_128K = ("doubao-lite-128k", *_VOLCENGINE) DEEPSEEK_V3 = ("deepseek-v3-250324", *_VOLCENGINE) DEEPSEEK_R1 = ("deepseek-r1-250528", *_VOLCENGINE) + DOUBAO_SEED_LIST = ("doubao-seed-2-0-lite-260215", *_VOLCENGINE) + DOUBAO_SEED_PRO = ("doubao-seed-2-0-pro-260215", *_VOLCENGINE) # 心缘 GPT_4O = ("gpt-4o", *_CARDIAC) diff --git a/app/api/resume.py b/app/api/resume.py new file mode 100644 index 0000000..69c5f97 --- /dev/null +++ b/app/api/resume.py @@ -0,0 +1,25 @@ +"""简历上传解析接口""" + +from fastapi import APIRouter, UploadFile, File + +from app.core.context import RequestContext +from app.core.database import get_db +from app.services.resume_parse_service import ResumeParseService + +router = APIRouter(prefix="/resume", tags=["简历"]) + + +@router.post("/upload", summary="上传简历文件并AI解析") +async def upload_resume(file: UploadFile = File(...)): + """上传简历文件(PDF/Word/TXT),AI解析后生成结构化简历,返回简历ID""" + user_id = RequestContext.user_id.get() + content = await file.read() + + service = ResumeParseService() + # 文件解析 + AI 结构化(不占数据库连接) + parsed = await service.parse_and_extract(file.filename, content) + # 短事务:只做数据库写入 + resume_id = None + async for session in get_db(): + resume_id = await service.save_resume(session, user_id, file.filename, parsed) + return {"resumeId": resume_id} diff --git a/app/main.py b/app/main.py index 4d5b926..57ffdb8 100644 --- a/app/main.py +++ b/app/main.py @@ -31,8 +31,10 @@ app.add_middleware( # ========== 路由注册 ========== from app.api.health import router as health_router +from app.api.resume import router as resume_router app.include_router(health_router) +app.include_router(resume_router) # ============================== if __name__ == "__main__": diff --git a/app/models/user_resume.py b/app/models/user_resume.py new file mode 100644 index 0000000..7425406 --- /dev/null +++ b/app/models/user_resume.py @@ -0,0 +1,40 @@ +"""用户简历表(bg_user_resume) +简历维度信息 + 个人基本信息合并存储 +""" + +from datetime import datetime +from typing import Optional + +from sqlalchemy import BigInteger, Integer, String, DateTime, JSON +from sqlalchemy.orm import Mapped, mapped_column + +from app.core.database import Base + + +class UserResume(Base): + """用户简历表 bg_user_resume""" + __tablename__ = "bg_user_resume" + + id: Mapped[int] = mapped_column(BigInteger, primary_key=True) + user_id: Mapped[int] = mapped_column(BigInteger, nullable=False, comment="用户ID") + + # 简历维度信息 + resume_name: Mapped[Optional[str]] = mapped_column(String(128), nullable=True, comment="简历名称") + target_position: Mapped[Optional[str]] = mapped_column(String(128), nullable=True, comment="目标岗位") + is_default: Mapped[int] = mapped_column(Integer, default=0, comment="是否默认简历 0=否 1=是") + sort_order: Mapped[Optional[int]] = mapped_column(Integer, nullable=True, comment="排序序号") + + # 个人信息 + avatar_url: Mapped[Optional[str]] = mapped_column(String(512), nullable=True, comment="头像URL") + name: Mapped[Optional[str]] = mapped_column(String(64), nullable=True, comment="真实姓名") + email: Mapped[Optional[str]] = mapped_column(String(128), nullable=True, comment="邮箱") + mobile_number: Mapped[Optional[str]] = mapped_column(String(20), nullable=True, comment="手机号码") + city: Mapped[Optional[str]] = mapped_column(String(64), nullable=True, comment="所在城市") + wechat_number: Mapped[Optional[str]] = mapped_column(String(64), nullable=True, comment="微信号") + portfolio_url: Mapped[Optional[str]] = mapped_column(String(512), nullable=True, comment="作品集链接") + skills: Mapped[Optional[list]] = mapped_column(JSON, nullable=True, comment="技能标签列表") + certificates: Mapped[Optional[list]] = mapped_column(JSON, nullable=True, comment="证书标签列表") + summary: Mapped[Optional[str]] = mapped_column(String(2000), nullable=True, comment="个人概述") + + create_time: Mapped[datetime] = mapped_column(DateTime, default=datetime.now, comment="创建时间") + update_time: Mapped[datetime] = mapped_column(DateTime, default=datetime.now, onupdate=datetime.now, comment="更新时间") diff --git a/app/models/user_resume_competition.py b/app/models/user_resume_competition.py new file mode 100644 index 0000000..5f8c6e4 --- /dev/null +++ b/app/models/user_resume_competition.py @@ -0,0 +1,25 @@ +"""简历-竞赛经历表(bg_user_resume_competition)""" + +from datetime import datetime +from typing import Optional + +from sqlalchemy import BigInteger, Integer, String, DateTime, JSON +from sqlalchemy.orm import Mapped, mapped_column + +from app.core.database import Base + + +class UserResumeCompetition(Base): + """简历-竞赛经历表 bg_user_resume_competition""" + __tablename__ = "bg_user_resume_competition" + + id: Mapped[int] = mapped_column(BigInteger, primary_key=True) + resume_id: Mapped[int] = mapped_column(BigInteger, nullable=False, comment="关联bg_user_resume.id") + user_id: Mapped[int] = mapped_column(BigInteger, nullable=False, comment="用户ID") + competition_name: Mapped[Optional[str]] = mapped_column(String(128), nullable=True, comment="竞赛名称") + award: Mapped[Optional[str]] = mapped_column(String(128), nullable=True, comment="获奖情况") + award_date: Mapped[Optional[str]] = mapped_column(String(16), nullable=True, comment="获奖时间,格式:2023.07") + description: Mapped[Optional[list]] = mapped_column(JSON, nullable=True, comment="描述段落 [{id, text}]") + sort_order: Mapped[Optional[int]] = mapped_column(Integer, nullable=True, comment="排序序号") + create_time: Mapped[datetime] = mapped_column(DateTime, default=datetime.now, comment="创建时间") + update_time: Mapped[datetime] = mapped_column(DateTime, default=datetime.now, onupdate=datetime.now, comment="更新时间") diff --git a/app/models/user_resume_education.py b/app/models/user_resume_education.py new file mode 100644 index 0000000..21d0a33 --- /dev/null +++ b/app/models/user_resume_education.py @@ -0,0 +1,28 @@ +"""简历-教育经历表(bg_user_resume_education)""" + +from datetime import datetime +from typing import Optional + +from sqlalchemy import BigInteger, Integer, String, DateTime, JSON +from sqlalchemy.orm import Mapped, mapped_column + +from app.core.database import Base + + +class UserResumeEducation(Base): + """简历-教育经历表 bg_user_resume_education""" + __tablename__ = "bg_user_resume_education" + + id: Mapped[int] = mapped_column(BigInteger, primary_key=True) + resume_id: Mapped[int] = mapped_column(BigInteger, nullable=False, comment="关联bg_user_resume.id") + user_id: Mapped[int] = mapped_column(BigInteger, nullable=False, comment="用户ID") + school: Mapped[Optional[str]] = mapped_column(String(128), nullable=True, comment="学校名称") + major: Mapped[Optional[str]] = mapped_column(String(128), nullable=True, comment="专业") + degree: Mapped[Optional[str]] = mapped_column(String(32), nullable=True, comment="学历:大专/本科/硕士/博士") + study_type: Mapped[Optional[str]] = mapped_column(String(32), nullable=True, comment="学习形式:全日制/非全日制") + start_date: Mapped[Optional[str]] = mapped_column(String(16), nullable=True, comment="开始时间,格式:2023.09") + end_date: Mapped[Optional[str]] = mapped_column(String(16), nullable=True, comment="结束时间,格式:2024.06") + description: Mapped[Optional[list]] = mapped_column(JSON, nullable=True, comment="描述段落 [{id, text}]") + sort_order: Mapped[Optional[int]] = mapped_column(Integer, nullable=True, comment="排序序号") + create_time: Mapped[datetime] = mapped_column(DateTime, default=datetime.now, comment="创建时间") + update_time: Mapped[datetime] = mapped_column(DateTime, default=datetime.now, onupdate=datetime.now, comment="更新时间") diff --git a/app/models/user_resume_internship.py b/app/models/user_resume_internship.py new file mode 100644 index 0000000..8cc28ec --- /dev/null +++ b/app/models/user_resume_internship.py @@ -0,0 +1,26 @@ +"""简历-实习经历表(bg_user_resume_internship)""" + +from datetime import datetime +from typing import Optional + +from sqlalchemy import BigInteger, Integer, String, DateTime, JSON +from sqlalchemy.orm import Mapped, mapped_column + +from app.core.database import Base + + +class UserResumeInternship(Base): + """简历-实习经历表 bg_user_resume_internship""" + __tablename__ = "bg_user_resume_internship" + + id: Mapped[int] = mapped_column(BigInteger, primary_key=True) + resume_id: Mapped[int] = mapped_column(BigInteger, nullable=False, comment="关联bg_user_resume.id") + user_id: Mapped[int] = mapped_column(BigInteger, nullable=False, comment="用户ID") + company_name: Mapped[Optional[str]] = mapped_column(String(128), nullable=True, comment="公司名称") + position: Mapped[Optional[str]] = mapped_column(String(128), nullable=True, comment="职位") + start_date: Mapped[Optional[str]] = mapped_column(String(16), nullable=True, comment="开始时间,格式:2023.06") + end_date: Mapped[Optional[str]] = mapped_column(String(16), nullable=True, comment="结束时间,格式:2023.09") + description: Mapped[Optional[list]] = mapped_column(JSON, nullable=True, comment="描述段落 [{id, text}]") + sort_order: Mapped[Optional[int]] = mapped_column(Integer, nullable=True, comment="排序序号") + create_time: Mapped[datetime] = mapped_column(DateTime, default=datetime.now, comment="创建时间") + update_time: Mapped[datetime] = mapped_column(DateTime, default=datetime.now, onupdate=datetime.now, comment="更新时间") diff --git a/app/models/user_resume_project.py b/app/models/user_resume_project.py new file mode 100644 index 0000000..9c9681a --- /dev/null +++ b/app/models/user_resume_project.py @@ -0,0 +1,27 @@ +"""简历-项目经历表(bg_user_resume_project)""" + +from datetime import datetime +from typing import Optional + +from sqlalchemy import BigInteger, Integer, String, DateTime, JSON +from sqlalchemy.orm import Mapped, mapped_column + +from app.core.database import Base + + +class UserResumeProject(Base): + """简历-项目经历表 bg_user_resume_project""" + __tablename__ = "bg_user_resume_project" + + id: Mapped[int] = mapped_column(BigInteger, primary_key=True) + resume_id: Mapped[int] = mapped_column(BigInteger, nullable=False, comment="关联bg_user_resume.id") + user_id: Mapped[int] = mapped_column(BigInteger, nullable=False, comment="用户ID") + company_name: Mapped[Optional[str]] = mapped_column(String(128), nullable=True, comment="所属公司") + project_name: Mapped[Optional[str]] = mapped_column(String(128), nullable=True, comment="项目名称") + role: Mapped[Optional[str]] = mapped_column(String(64), nullable=True, comment="担任角色") + start_date: Mapped[Optional[str]] = mapped_column(String(16), nullable=True, comment="开始时间,格式:2023.06") + end_date: Mapped[Optional[str]] = mapped_column(String(16), nullable=True, comment="结束时间,格式:2023.09") + description: Mapped[Optional[list]] = mapped_column(JSON, nullable=True, comment="描述段落 [{id, text}]") + sort_order: Mapped[Optional[int]] = mapped_column(Integer, nullable=True, comment="排序序号") + create_time: Mapped[datetime] = mapped_column(DateTime, default=datetime.now, comment="创建时间") + update_time: Mapped[datetime] = mapped_column(DateTime, default=datetime.now, onupdate=datetime.now, comment="更新时间") diff --git a/app/models/user_resume_work.py b/app/models/user_resume_work.py new file mode 100644 index 0000000..ddb94c3 --- /dev/null +++ b/app/models/user_resume_work.py @@ -0,0 +1,26 @@ +"""简历-工作经历表(bg_user_resume_work)""" + +from datetime import datetime +from typing import Optional + +from sqlalchemy import BigInteger, Integer, String, DateTime, JSON +from sqlalchemy.orm import Mapped, mapped_column + +from app.core.database import Base + + +class UserResumeWork(Base): + """简历-工作经历表 bg_user_resume_work""" + __tablename__ = "bg_user_resume_work" + + id: Mapped[int] = mapped_column(BigInteger, primary_key=True) + resume_id: Mapped[int] = mapped_column(BigInteger, nullable=False, comment="关联bg_user_resume.id") + user_id: Mapped[int] = mapped_column(BigInteger, nullable=False, comment="用户ID") + company_name: Mapped[Optional[str]] = mapped_column(String(128), nullable=True, comment="公司名称") + position: Mapped[Optional[str]] = mapped_column(String(128), nullable=True, comment="职位") + start_date: Mapped[Optional[str]] = mapped_column(String(16), nullable=True, comment="开始时间,格式:2023.06") + end_date: Mapped[Optional[str]] = mapped_column(String(16), nullable=True, comment="结束时间,格式:2023.09") + description: Mapped[Optional[list]] = mapped_column(JSON, nullable=True, comment="描述段落 [{id, text}]") + sort_order: Mapped[Optional[int]] = mapped_column(Integer, nullable=True, comment="排序序号") + create_time: Mapped[datetime] = mapped_column(DateTime, default=datetime.now, comment="创建时间") + update_time: Mapped[datetime] = mapped_column(DateTime, default=datetime.now, onupdate=datetime.now, comment="更新时间") diff --git a/app/services/resume_parse_service.py b/app/services/resume_parse_service.py new file mode 100644 index 0000000..0e4954f --- /dev/null +++ b/app/services/resume_parse_service.py @@ -0,0 +1,208 @@ +"""简历解析 Service + +上传简历文件 → 解析为纯文本 → AI 结构化 → 写入数据库。 +依赖:file_parser(文件解析工具)、LLM(AI模型) +使用表:bg_user_resume(主表)、bg_user_resume_education/work/internship/project/competition(5张子表) +""" + +import asyncio +import json + +import shortuuid +from langchain_core.messages import SystemMessage, HumanMessage +from sqlalchemy.ext.asyncio import AsyncSession + +from app.ai.models import LLM +from app.core.logger import log +from app.models.user_resume import UserResume +from app.models.user_resume_competition import UserResumeCompetition +from app.models.user_resume_education import UserResumeEducation +from app.models.user_resume_internship import UserResumeInternship +from app.models.user_resume_project import UserResumeProject +from app.models.user_resume_work import UserResumeWork +from app.tool.file_parser import parse_to_text +from app.tool.snowflake import next_id + + +_SYSTEM_PROMPT = """你是一个专业的简历解析助手。请将用户提供的简历纯文本解析为结构化JSON。 + +输出格式要求(严格按此JSON结构输出,不要输出任何其他内容): +```json +{ + "name": "姓名", + "email": "邮箱", + "mobileNumber": "手机号", + "city": "所在城市", + "wechatNumber": "微信号(如有)", + "portfolioUrl": "作品集链接(如有)", + "skills": ["技能1", "技能2"], + "certificates": ["证书1", "证书2"], + "summary": "个人概述/自我评价", + "education": [ + { + "school": "学校名称", + "major": "专业", + "degree": "学历(大专/本科/硕士/博士)", + "studyType": "学习形式(全日制/非全日制)", + "startDate": "2020.09", + "endDate": "2024.06", + "description": ["描述段落1", "描述段落2"] + } + ], + "work": [ + { + "companyName": "公司名称", + "position": "职位", + "startDate": "2024.07", + "endDate": "2025.03", + "description": ["工作描述段落1", "工作描述段落2"] + } + ], + "internship": [ + { + "companyName": "公司名称", + "position": "实习职位", + "startDate": "2023.06", + "endDate": "2023.09", + "description": ["实习描述段落1"] + } + ], + "project": [ + { + "companyName": "所属公司(如有)", + "projectName": "项目名称", + "role": "担任角色", + "startDate": "2023.03", + "endDate": "2023.12", + "description": ["项目描述段落1"] + } + ], + "competition": [ + { + "competitionName": "竞赛名称", + "award": "获奖情况", + "awardDate": "2023.07", + "description": ["竞赛描述段落1"] + } + ] +} +``` + +规则: +1. 时间格式统一为 YYYY.MM(如 2023.09),如果只有年份则写 YYYY.01 +2. 没有的字段填 null,没有的数组填 [] +3. description 是字符串数组,每个元素是一个描述段落 +4. 区分工作经历和实习经历:明确标注"实习"的归入 internship,其余归入 work +5. 只输出 JSON,不要输出任何解释文字""" + + +class ResumeParseService: + + async def parse_and_extract(self, filename: str, content: bytes) -> dict: + """文件解析 + AI 结构化,不涉及数据库操作""" + + # 1. 文件解析为纯文本(同步操作丢线程池) + log.info(f"开始解析简历文件: {filename}") + text = await asyncio.to_thread(parse_to_text, filename, content) + if not text or not text.strip(): + raise ValueError("文件内容为空,无法解析") + log.info(f"文件解析完成,文本长度: {len(text)}") + + # 2. AI 结构化解析 + log.info("开始AI结构化解析") + parsed = await self._ai_parse(text) + log.info("AI结构化解析完成") + return parsed + + async def _ai_parse(self, text: str) -> dict: + """调用 AI 将纯文本解析为结构化 JSON""" + llm = LLM.DOUBAO_SEED_PRO.create(temperature=0) + messages = [SystemMessage(content=_SYSTEM_PROMPT), HumanMessage(content=text)] + response = await llm.ainvoke(messages) + + # 提取 JSON(兼容 markdown 代码块包裹) + raw = response.content.strip() + if raw.startswith("```"): + raw = raw.split("\n", 1)[1] if "\n" in raw else raw[3:] + raw = raw.rsplit("```", 1)[0] + return json.loads(raw) + + async def save_resume(self, session: AsyncSession, user_id: int, filename: str, parsed: dict) -> int: + """将解析结果写入主表 + 5张子表,返回简历ID""" + resume_id = next_id() + + # 主表 + resume = UserResume( + id=resume_id, user_id=user_id, + resume_name=filename.rsplit(".", 1)[0], + target_position=None, is_default=0, sort_order=0, + name=parsed.get("name"), email=parsed.get("email"), + mobile_number=parsed.get("mobileNumber"), city=parsed.get("city"), + wechat_number=parsed.get("wechatNumber"), portfolio_url=parsed.get("portfolioUrl"), + skills=parsed.get("skills") or [], certificates=parsed.get("certificates") or [], + summary=parsed.get("summary"), + ) + session.add(resume) + + # 教育经历 + for i, edu in enumerate(parsed.get("education") or []): + session.add(UserResumeEducation( + id=next_id(), resume_id=resume_id, user_id=user_id, + school=edu.get("school"), major=edu.get("major"), + degree=edu.get("degree"), study_type=edu.get("studyType"), + start_date=edu.get("startDate"), end_date=edu.get("endDate"), + description=_to_description_paragraphs(edu.get("description")), + sort_order=i, + )) + + # 工作经历 + for i, work in enumerate(parsed.get("work") or []): + session.add(UserResumeWork( + id=next_id(), resume_id=resume_id, user_id=user_id, + company_name=work.get("companyName"), position=work.get("position"), + start_date=work.get("startDate"), end_date=work.get("endDate"), + description=_to_description_paragraphs(work.get("description")), + sort_order=i, + )) + + # 实习经历 + for i, intern in enumerate(parsed.get("internship") or []): + session.add(UserResumeInternship( + id=next_id(), resume_id=resume_id, user_id=user_id, + company_name=intern.get("companyName"), position=intern.get("position"), + start_date=intern.get("startDate"), end_date=intern.get("endDate"), + description=_to_description_paragraphs(intern.get("description")), + sort_order=i, + )) + + # 项目经历 + for i, proj in enumerate(parsed.get("project") or []): + session.add(UserResumeProject( + id=next_id(), resume_id=resume_id, user_id=user_id, + company_name=proj.get("companyName"), project_name=proj.get("projectName"), + role=proj.get("role"), + start_date=proj.get("startDate"), end_date=proj.get("endDate"), + description=_to_description_paragraphs(proj.get("description")), + sort_order=i, + )) + + # 竞赛经历 + for i, comp in enumerate(parsed.get("competition") or []): + session.add(UserResumeCompetition( + id=next_id(), resume_id=resume_id, user_id=user_id, + competition_name=comp.get("competitionName"), award=comp.get("award"), + award_date=comp.get("awardDate"), + description=_to_description_paragraphs(comp.get("description")), + sort_order=i, + )) + + await session.flush() + log.info(f"简历保存完成,resumeId: {resume_id}") + return resume_id + + +def _to_description_paragraphs(texts: list[str] | None) -> list[dict] | None: + """将字符串数组转为 [{id, text}] 格式的描述段落""" + if not texts: + return None + return [{"id": shortuuid.ShortUUID().random(length=8), "text": t} for t in texts if t] \ No newline at end of file diff --git a/app/tool/file_parser.py b/app/tool/file_parser.py index dec8555..04a2f9b 100644 --- a/app/tool/file_parser.py +++ b/app/tool/file_parser.py @@ -60,9 +60,9 @@ def parse_to_text(filename: str, content: bytes) -> str: if suffix == ".pdf": return parse_pdf(content) - elif suffix in (".docx", ".doc"): + elif suffix == ".docx": return parse_docx(content) elif suffix == ".txt": return parse_txt(content) else: - raise ValueError(f"不支持的文件类型: {suffix},支持: .pdf, .docx, .doc, .txt") + raise ValueError(f"不支持的文件类型: {suffix},支持: .pdf, .docx, .txt") diff --git a/app/tool/snowflake.py b/app/tool/snowflake.py new file mode 100644 index 0000000..a993fe9 --- /dev/null +++ b/app/tool/snowflake.py @@ -0,0 +1,10 @@ +"""雪花 ID 生成工具""" + +from snowflake import SnowflakeGenerator + +_gen = SnowflakeGenerator(1) + + +def next_id() -> int: + """生成一个雪花 ID""" + return next(_gen) diff --git a/requirements.txt b/requirements.txt index 75b5934..75d2f61 100644 --- a/requirements.txt +++ b/requirements.txt @@ -43,6 +43,9 @@ python-dotenv>=1.0.0 pdfplumber>=0.11.0 python-docx>=1.1.0 +# 雪花ID +snowflake-id>=1.0.0 + # 测试 pytest>=8.0.0 pytest-asyncio>=0.24.0