106 lines
4.9 KiB
Python
106 lines
4.9 KiB
Python
"""简历解析 Service
|
||
|
||
上传简历文件 → 解析为纯文本 → AI 并行结构化 → 写入数据库。
|
||
依赖:file_parser(文件解析工具)、resume_extractor(AI并行提取)
|
||
使用表:bg_user_resume(主表)、bg_user_resume_education/work/internship/project/competition(5张子表)
|
||
"""
|
||
|
||
import asyncio
|
||
|
||
import shortuuid
|
||
from sqlalchemy.ext.asyncio import AsyncSession
|
||
|
||
from app.ai.resume_extractor.extractor import extract_all
|
||
from app.core.logger import log
|
||
from app.models.user_resume import UserResume
|
||
from app.models.user_resume_competition import UserResumeCompetition
|
||
from app.models.user_resume_education import UserResumeEducation
|
||
from app.models.user_resume_internship import UserResumeInternship
|
||
from app.models.user_resume_project import UserResumeProject
|
||
from app.models.user_resume_work import UserResumeWork
|
||
from app.tool.file_parser import parse_to_text
|
||
from app.tool.snowflake import next_id
|
||
|
||
|
||
class ResumeParseService:
|
||
|
||
async def parse_and_extract(self, filename: str, content: bytes) -> dict:
|
||
"""文件解析 + AI 并行结构化,不涉及数据库操作"""
|
||
log.info(f"开始解析简历文件: {filename}")
|
||
text = await asyncio.to_thread(parse_to_text, filename, content)
|
||
if not text or not text.strip():
|
||
raise ValueError("文件内容为空,无法解析")
|
||
log.info(f"文件解析完成,文本长度: {len(text)}")
|
||
|
||
log.info("开始AI并行结构化提取")
|
||
parsed = await extract_all(text)
|
||
log.info("AI并行结构化提取完成")
|
||
return parsed
|
||
|
||
async def save_resume(self, session: AsyncSession, user_id: int, filename: str, parsed: dict) -> int:
|
||
"""将解析结果写入主表 + 5张子表,返回简历ID"""
|
||
resume_id = next_id()
|
||
|
||
session.add(UserResume(
|
||
id=resume_id, user_id=user_id,
|
||
resume_name=filename.rsplit(".", 1)[0],
|
||
target_position=None, is_default=0, sort_order=0,
|
||
name=parsed.get("name"), email=parsed.get("email"),
|
||
mobile_number=parsed.get("mobileNumber"), city=parsed.get("city"),
|
||
wechat_number=parsed.get("wechatNumber"), portfolio_url=parsed.get("portfolioUrl"),
|
||
skills=parsed.get("skills") or [], certificates=parsed.get("certificates") or [],
|
||
summary=parsed.get("summary"),
|
||
))
|
||
|
||
for i, edu in enumerate(parsed.get("education") or []):
|
||
session.add(UserResumeEducation(
|
||
id=next_id(), resume_id=resume_id, user_id=user_id,
|
||
school=edu.get("school"), major=edu.get("major"),
|
||
degree=edu.get("degree"), study_type=edu.get("studyType"),
|
||
start_date=edu.get("startDate"), end_date=edu.get("endDate"),
|
||
description=_to_paragraphs(edu.get("description")), sort_order=i,
|
||
))
|
||
|
||
for i, work in enumerate(parsed.get("work") or []):
|
||
session.add(UserResumeWork(
|
||
id=next_id(), resume_id=resume_id, user_id=user_id,
|
||
company_name=work.get("companyName"), position=work.get("position"),
|
||
start_date=work.get("startDate"), end_date=work.get("endDate"),
|
||
description=_to_paragraphs(work.get("description")), sort_order=i,
|
||
))
|
||
|
||
for i, intern in enumerate(parsed.get("internship") or []):
|
||
session.add(UserResumeInternship(
|
||
id=next_id(), resume_id=resume_id, user_id=user_id,
|
||
company_name=intern.get("companyName"), position=intern.get("position"),
|
||
start_date=intern.get("startDate"), end_date=intern.get("endDate"),
|
||
description=_to_paragraphs(intern.get("description")), sort_order=i,
|
||
))
|
||
|
||
for i, proj in enumerate(parsed.get("project") or []):
|
||
session.add(UserResumeProject(
|
||
id=next_id(), resume_id=resume_id, user_id=user_id,
|
||
company_name=proj.get("companyName"), project_name=proj.get("projectName"),
|
||
role=proj.get("role"),
|
||
start_date=proj.get("startDate"), end_date=proj.get("endDate"),
|
||
description=_to_paragraphs(proj.get("description")), sort_order=i,
|
||
))
|
||
|
||
for i, comp in enumerate(parsed.get("competition") or []):
|
||
session.add(UserResumeCompetition(
|
||
id=next_id(), resume_id=resume_id, user_id=user_id,
|
||
competition_name=comp.get("competitionName"), award=comp.get("award"),
|
||
award_date=comp.get("awardDate"),
|
||
description=_to_paragraphs(comp.get("description")), sort_order=i,
|
||
))
|
||
|
||
await session.flush()
|
||
log.info(f"简历保存完成,resumeId: {resume_id}")
|
||
return resume_id
|
||
|
||
|
||
def _to_paragraphs(texts: list[str] | None) -> list[dict] | None:
|
||
"""将字符串数组转为 [{id, text}] 格式的描述段落"""
|
||
if not texts:
|
||
return None
|
||
return [{"id": shortuuid.ShortUUID().random(length=8), "text": t} for t in texts if t] |