修改岗位清洗
This commit is contained in:
@@ -1,8 +1,12 @@
|
||||
package org.jiayunet.mapper;
|
||||
|
||||
import org.apache.ibatis.annotations.Insert;
|
||||
import org.apache.ibatis.annotations.Mapper;
|
||||
import org.apache.ibatis.annotations.Param;
|
||||
import org.jiayunet.pojo.po.SkillTag;
|
||||
|
||||
import java.time.Instant;
|
||||
|
||||
/**
|
||||
* 技能标签Mapper
|
||||
*
|
||||
@@ -10,4 +14,11 @@ import org.jiayunet.pojo.po.SkillTag;
|
||||
*/
|
||||
@Mapper
|
||||
public interface SkillTagMapper extends CommonMapper<SkillTag> {
|
||||
|
||||
/**
|
||||
* INSERT IGNORE:依靠 name 唯一索引去重,并发安全
|
||||
* <p>id 由调用方传入(雪花算法生成),重复 name 时忽略插入</p>
|
||||
*/
|
||||
@Insert("INSERT IGNORE INTO bg_skill_tag (id, name, create_time) VALUES (#{id}, #{name}, #{createTime})")
|
||||
int insertIgnore(@Param("id") Long id, @Param("name") String name, @Param("createTime") Instant createTime);
|
||||
}
|
||||
|
||||
@@ -5,26 +5,24 @@ import lombok.extern.slf4j.Slf4j;
|
||||
import org.jiayunet.mapper.ChinaRegionsCodeMapper;
|
||||
import org.jiayunet.mapper.IndustryMapper;
|
||||
import org.jiayunet.mapper.JobCategoryMapper;
|
||||
import org.jiayunet.mapper.SkillTagMapper;
|
||||
import org.jiayunet.mapper.MajorCategoryMapper;
|
||||
import org.jiayunet.pojo.po.ChinaRegionsCode;
|
||||
import org.jiayunet.pojo.po.Industry;
|
||||
import org.jiayunet.pojo.po.JobCategory;
|
||||
import org.jiayunet.pojo.po.SkillTag;
|
||||
import org.jiayunet.pojo.po.MajorCategory;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import javax.annotation.PostConstruct;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* 字典数据缓存服务
|
||||
* <p>启动时加载岗位分类、行业、地区、技能标签数据到内存,供清洗/推荐等业务使用</p>
|
||||
* <p>依赖:JobCategoryMapper、IndustryMapper、ChinaRegionsCodeMapper、SkillTagMapper</p>
|
||||
* <p>使用表:bg_job_category(全量缓存)、bg_industry(全量缓存)、bg_china_regions_code(市级缓存)、bg_skill_tag(按categoryId分组缓存)</p>
|
||||
* <p>启动时加载岗位分类、行业、地区、专业分类数据到内存,供清洗/推荐等业务使用</p>
|
||||
* <p>依赖:JobCategoryMapper、IndustryMapper、ChinaRegionsCodeMapper、MajorCategoryMapper</p>
|
||||
* <p>使用表:bg_job_category(全量缓存)、bg_industry(全量缓存)、bg_china_regions_code(市级缓存)、bg_major_category(全量缓存)</p>
|
||||
*
|
||||
* @author zk
|
||||
*/
|
||||
@@ -42,23 +40,23 @@ public class DictCacheService {
|
||||
private ChinaRegionsCodeMapper chinaRegionsCodeMapper;
|
||||
|
||||
@Autowired
|
||||
private SkillTagMapper skillTagMapper;
|
||||
private MajorCategoryMapper majorCategoryMapper;
|
||||
|
||||
private List<JobCategory> jobCategoryList;
|
||||
private List<Industry> industryList;
|
||||
private List<ChinaRegionsCode> regionList;
|
||||
|
||||
/** 技能标签按 categoryId 分组 */
|
||||
private Map<Long, List<SkillTag>> skillTagMap;
|
||||
private List<MajorCategory> majorCategoryList;
|
||||
|
||||
/** 岗位分类文本(叶子节点,带父级路径),供 AI prompt 使用 */
|
||||
private String jobCategoryText;
|
||||
/** 行业文本(叶子节点,带父级路径),供 AI prompt 使用 */
|
||||
private String industryText;
|
||||
/** 专业分类文本(三级叶子节点,带父级路径),供 AI prompt 使用 */
|
||||
private String majorCategoryText;
|
||||
|
||||
/**
|
||||
* 启动时加载全量字典数据
|
||||
* <p>分类/行业全量加载用于构建父级路径,文本只取叶子节点</p>
|
||||
* <p>分类/行业/专业全量加载用于构建父级路径,文本只取叶子节点</p>
|
||||
*/
|
||||
@PostConstruct
|
||||
public void refresh() {
|
||||
@@ -66,6 +64,7 @@ public class DictCacheService {
|
||||
|
||||
jobCategoryList = jobCategoryMapper.selectList(null);
|
||||
industryList = industryMapper.selectList(null);
|
||||
majorCategoryList = majorCategoryMapper.selectList(null);
|
||||
|
||||
// 只缓存省级+市级地区(provinceCode 为 null 是省,provinceCode 不为 null 且 cityCode 为 null 是市)
|
||||
regionList = chinaRegionsCodeMapper.selectList(
|
||||
@@ -76,7 +75,6 @@ public class DictCacheService {
|
||||
// 构建岗位分类文本:只取三级(叶子),格式 id:name(一级/二级)
|
||||
Map<Long, String> categoryNameMap = jobCategoryList.stream()
|
||||
.collect(Collectors.toMap(JobCategory::getId, JobCategory::getName));
|
||||
|
||||
jobCategoryText = jobCategoryList.stream()
|
||||
.filter(c -> c.getLevel() == 3)
|
||||
.map(c -> {
|
||||
@@ -97,17 +95,25 @@ public class DictCacheService {
|
||||
})
|
||||
.collect(Collectors.joining(", "));
|
||||
|
||||
// 构建专业分类文本:只取三级(叶子),格式 id:name(一级/二级)
|
||||
Map<Long, String> majorNameMap = majorCategoryList.stream()
|
||||
.collect(Collectors.toMap(MajorCategory::getId, MajorCategory::getName));
|
||||
majorCategoryText = majorCategoryList.stream()
|
||||
.filter(m -> m.getLevel() == 3)
|
||||
.map(m -> {
|
||||
String parentName = majorNameMap.getOrDefault(m.getParentId(), "");
|
||||
String rootName = majorNameMap.getOrDefault(m.getRootId(), "");
|
||||
return m.getId() + ":" + m.getName() + "(" + rootName + "/" + parentName + ")";
|
||||
})
|
||||
.collect(Collectors.joining(", "));
|
||||
|
||||
long categoryLeafCount = jobCategoryList.stream().filter(c -> c.getLevel() == 3).count();
|
||||
long industryLeafCount = industryList.stream().filter(i -> i.getLevel() == 2).count();
|
||||
long majorLeafCount = majorCategoryList.stream().filter(m -> m.getLevel() == 3).count();
|
||||
|
||||
// 加载技能标签,按 categoryId 分组
|
||||
List<SkillTag> skillTagList = skillTagMapper.selectList(null);
|
||||
skillTagMap = skillTagList.stream()
|
||||
.collect(Collectors.groupingBy(SkillTag::getCategoryId));
|
||||
|
||||
log.info("字典缓存加载完成: 岗位分类{}条(叶子{}条), 行业{}条(叶子{}条), 地区{}条, 技能标签{}条(覆盖{}个分类)",
|
||||
log.info("字典缓存加载完成: 岗位分类{}条(叶子{}条), 行业{}条(叶子{}条), 地区{}条, 专业分类{}条(叶子{}条)",
|
||||
jobCategoryList.size(), categoryLeafCount, industryList.size(), industryLeafCount,
|
||||
regionList.size(), skillTagList.size(), skillTagMap.size());
|
||||
regionList.size(), majorCategoryList.size(), majorLeafCount);
|
||||
}
|
||||
|
||||
/** 获取岗位分类文本(叶子节点,带父级路径,逗号分隔) */
|
||||
@@ -120,32 +126,9 @@ public class DictCacheService {
|
||||
return industryText;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取指定岗位类型下的技能标签文本(id:name 逗号分隔)
|
||||
*
|
||||
* @param categoryId 岗位类型ID
|
||||
* @return 标签文本,无标签返回 null
|
||||
*/
|
||||
public String getSkillTagText(Long categoryId) {
|
||||
List<SkillTag> tags = skillTagMap.getOrDefault(categoryId, Collections.emptyList());
|
||||
if (tags.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
return tags.stream()
|
||||
.map(t -> t.getId() + ":" + t.getName())
|
||||
.collect(Collectors.joining(", "));
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取指定岗位类型下的技能标签ID集合(用于校验AI返回)
|
||||
*
|
||||
* @param categoryId 岗位类型ID
|
||||
* @return 标签ID集合
|
||||
*/
|
||||
public Set<Long> getSkillTagIds(Long categoryId) {
|
||||
return skillTagMap.getOrDefault(categoryId, Collections.emptyList()).stream()
|
||||
.map(SkillTag::getId)
|
||||
.collect(Collectors.toSet());
|
||||
/** 获取专业分类文本(三级叶子节点,带父级路径,逗号分隔) */
|
||||
public String getMajorCategoryText() {
|
||||
return majorCategoryText;
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -5,10 +5,13 @@ import com.baomidou.mybatisplus.core.conditions.update.LambdaUpdateWrapper;
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.jiayunet.ai.AiChatAbility;
|
||||
import com.baomidou.mybatisplus.core.toolkit.IdWorker;
|
||||
import org.jiayunet.mapper.AppJobDataMapper;
|
||||
import org.jiayunet.mapper.JobMapper;
|
||||
import org.jiayunet.mapper.SkillTagMapper;
|
||||
import org.jiayunet.pojo.po.AppJobData;
|
||||
import org.jiayunet.pojo.po.Job;
|
||||
import org.jiayunet.pojo.po.SkillTag;
|
||||
import org.jiayunet.tool.HttpTool;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
@@ -18,7 +21,6 @@ import org.springframework.stereotype.Service;
|
||||
import java.time.Instant;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
|
||||
@@ -26,7 +28,7 @@ import java.util.concurrent.Executors;
|
||||
* 岗位清洗服务
|
||||
* <p>定时从 app_job_data 捞取待清洗数据,调用 AI 清洗后写入业务表</p>
|
||||
* <p>依赖:AiChatAbility(AI调用)、DictCacheService(字典缓存)、JobCleanTransactionService(事务操作)</p>
|
||||
* <p>使用表:app_job_data(读取/更新状态)、bg_job(去重查询)</p>
|
||||
* <p>使用表:app_job_data(读取/更新状态)、bg_job(去重查询/更新专业)、bg_skill_tag(技能入库)</p>
|
||||
*
|
||||
* @author zk
|
||||
*/
|
||||
@@ -49,6 +51,9 @@ public class JobCleanService {
|
||||
@Autowired
|
||||
private JobMapper jobMapper;
|
||||
|
||||
@Autowired
|
||||
private SkillTagMapper skillTagMapper;
|
||||
|
||||
@Value("${app.job-clean.batch-size:20}")
|
||||
private int batchSize;
|
||||
|
||||
@@ -107,7 +112,7 @@ public class JobCleanService {
|
||||
|
||||
/**
|
||||
* 清洗单条岗位数据
|
||||
* <p>1. 前置校验 2. 拼prompt调AI 3. 解析结果 4. 写入业务表</p>
|
||||
* <p>1. 前置校验 2. 第一次AI提取结构化信息 3. 写入业务表 4. 第二次AI匹配专业 5. 第三次AI提取技能</p>
|
||||
*/
|
||||
public void cleanOne(AppJobData data) {
|
||||
// 1. 前置校验
|
||||
@@ -116,23 +121,14 @@ public class JobCleanService {
|
||||
return;
|
||||
}
|
||||
|
||||
// 2. 拼 prompt
|
||||
// 2. 第一次AI:提取岗位结构化信息
|
||||
String systemPrompt = buildSystemPrompt();
|
||||
String userMessage = buildUserMessage(data);
|
||||
|
||||
// 3. 调用 AI
|
||||
String aiResponse = aiChatAbility.chat(systemPrompt, userMessage);
|
||||
|
||||
// 4. 解析 JSON
|
||||
// 3. 解析JSON
|
||||
try {
|
||||
// 去掉可能的 markdown 代码块标记
|
||||
String json = aiResponse.trim();
|
||||
if (json.startsWith("```")) {
|
||||
json = json.replaceAll("^```\\w*\\n?", "").replaceAll("\\n?```$", "").trim();
|
||||
}
|
||||
// 清除控制字符(Tab等),保留换行符,防止 Jackson 解析失败
|
||||
json = json.replaceAll("[\\x00-\\x08\\x0B\\x0C\\x0E-\\x1F]", "");
|
||||
|
||||
String json = cleanAiResponse(aiResponse);
|
||||
JsonNode root = HttpTool.objectMapper.readTree(json);
|
||||
|
||||
// valid 校验
|
||||
@@ -141,23 +137,22 @@ public class JobCleanService {
|
||||
return;
|
||||
}
|
||||
|
||||
// 5. 去重检查
|
||||
// 4. 去重检查
|
||||
String sourceId = String.valueOf(data.getId());
|
||||
Long existJob = jobMapper.selectCount(
|
||||
new LambdaQueryWrapper<Job>().eq(Job::getSourceId, sourceId));
|
||||
Long existJob = jobMapper.selectCount(new LambdaQueryWrapper<Job>().eq(Job::getSourceId, sourceId));
|
||||
if (existJob > 0) {
|
||||
jobCleanTransactionService.updateCleanStatus(data.getId(), 2);
|
||||
return;
|
||||
}
|
||||
|
||||
// 6. 公司处理(加锁防并发重复)
|
||||
// 5. 公司处理(加锁防并发重复)
|
||||
String companyShortName = root.path("companyShortName").asText("");
|
||||
if (companyShortName.isBlank()) {
|
||||
companyShortName = data.getCompany();
|
||||
}
|
||||
Long companyId = jobCleanTransactionService.findOrCreateCompany(companyShortName);
|
||||
|
||||
// 7. 地区处理
|
||||
// 6. 地区处理
|
||||
List<String> regionCodes = new ArrayList<>();
|
||||
JsonNode citiesNode = root.path("cities");
|
||||
if (citiesNode.isArray()) {
|
||||
@@ -169,84 +164,164 @@ public class JobCleanService {
|
||||
}
|
||||
}
|
||||
|
||||
// 8. 写入业务表(短事务,通过独立Service保证@Transactional生效)
|
||||
// 7. 写入业务表
|
||||
jobCleanTransactionService.saveJobData(root, data, companyId, sourceId, regionCodes);
|
||||
|
||||
// 9. 技能标签匹配(第二次AI调用,失败不影响岗位入库)
|
||||
// 拿到刚插入的 job
|
||||
Job insertedJob = jobMapper.selectOne(new LambdaQueryWrapper<Job>().eq(Job::getSourceId, sourceId).last("LIMIT 1"));
|
||||
if (insertedJob == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
// 8. 第二次AI:专业匹配(失败不影响岗位入库)
|
||||
try {
|
||||
Long categoryId = root.path("categoryId").asLong(0);
|
||||
String skillTagText = dictCacheService.getSkillTagText(categoryId);
|
||||
if (skillTagText != null) {
|
||||
String title = root.path("title").asText("");
|
||||
String desc = root.path("description").asText("");
|
||||
String req = root.path("requirement").asText("");
|
||||
List<Long> skillTagIds = matchSkillTags(title, desc, req, skillTagText, categoryId);
|
||||
if (!skillTagIds.isEmpty()) {
|
||||
// 查出刚插入的 job,拿 jobId
|
||||
Job insertedJob = jobMapper.selectOne(
|
||||
new LambdaQueryWrapper<Job>().eq(Job::getSourceId, sourceId).last("LIMIT 1"));
|
||||
if (insertedJob != null) {
|
||||
jobCleanTransactionService.saveSkillTagRelations(insertedJob.getId(), skillTagIds);
|
||||
}
|
||||
}
|
||||
}
|
||||
matchMajor(insertedJob.getId(), title, desc, req);
|
||||
} catch (Exception ex) {
|
||||
log.warn("技能标签匹配失败, id={}", data.getId(), ex);
|
||||
log.warn("专业匹配失败, id={}", data.getId(), ex);
|
||||
}
|
||||
|
||||
// 9. 第三次AI:技能提取(失败不影响岗位入库)
|
||||
try {
|
||||
String title = root.path("title").asText("");
|
||||
String desc = root.path("description").asText("");
|
||||
String req = root.path("requirement").asText("");
|
||||
extractSkillTags(insertedJob.getId(), title, desc, req);
|
||||
} catch (Exception ex) {
|
||||
log.warn("技能提取失败, id={}", data.getId(), ex);
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error("AI 返回解析失败, id={}, response={}", data.getId(), aiResponse, e);
|
||||
// 保持 clean_status=1,由僵尸恢复任务重置
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 第二次AI调用:匹配技能标签
|
||||
* <p>传入岗位信息和该分类下的标签列表,AI返回匹配的标签ID数组</p>
|
||||
* 第二次AI调用:匹配专业 + 专业敏感度
|
||||
* <p>传入岗位信息和三级专业分类列表,AI返回 requiredMajorIds + majorSensitivity → 更新 bg_job</p>
|
||||
*/
|
||||
private List<Long> matchSkillTags(String title, String description, String requirement,
|
||||
String skillTagText, Long categoryId) {
|
||||
private void matchMajor(Long jobId, String title, String description, String requirement) {
|
||||
String systemPrompt = """
|
||||
你是一个技能标签匹配助手。根据岗位信息,从给定的技能标签列表中选出匹配的标签ID。
|
||||
你是一个岗位专业匹配助手。根据岗位信息,判断该岗位对专业的要求。
|
||||
返回JSON格式:
|
||||
{
|
||||
"requiredMajorIds": [专业ID数组,从专业列表中选择最相关的,最多3个,无明确要求则空数组],
|
||||
"majorSensitivity": 0-2的数字(0=专业不限 1=优先相关专业 2=强制要求专业)
|
||||
}
|
||||
规则:
|
||||
1. 只能从给定列表中选择,不允许自创标签
|
||||
2. 选择与岗位核心技能要求相关的标签,不重复
|
||||
3. 只返回ID数组,如 [1, 3, 7],不要其他内容
|
||||
1. 只能从给定专业列表中选择ID
|
||||
2. 根据岗位描述判断专业敏感度:明确写"XX专业"→2,写"相关专业优先"→1,未提及→0
|
||||
3. majorSensitivity为0时,requiredMajorIds应为空数组
|
||||
4. 只返回JSON,不要其他内容
|
||||
""";
|
||||
|
||||
String userMessage = "【岗位信息】\n标题: " + title +
|
||||
"\n职责: " + description +
|
||||
"\n要求: " + requirement +
|
||||
"\n\n【可选标签列表】\n" + skillTagText;
|
||||
String userMessage = "【岗位信息】\n标题: " + title + "\n职责: " + description + "\n要求: " + requirement +
|
||||
"\n\n【专业分类列表】\n" + dictCacheService.getMajorCategoryText();
|
||||
|
||||
String aiResponse = aiChatAbility.chat(systemPrompt, userMessage);
|
||||
String json = cleanAiResponse(aiResponse);
|
||||
|
||||
// 解析返回的 ID 数组
|
||||
String json = aiResponse.trim();
|
||||
if (json.startsWith("```")) {
|
||||
json = json.replaceAll("^```\\w*\\n?", "").replaceAll("\\n?```$", "").trim();
|
||||
try {
|
||||
JsonNode root = HttpTool.objectMapper.readTree(json);
|
||||
int majorSensitivity = root.path("majorSensitivity").asInt(0);
|
||||
|
||||
List<Long> majorIds = new ArrayList<>();
|
||||
JsonNode idsNode = root.path("requiredMajorIds");
|
||||
if (idsNode.isArray()) {
|
||||
for (JsonNode node : idsNode) {
|
||||
long id = node.asLong(0);
|
||||
if (id > 0) {
|
||||
majorIds.add(id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 更新 bg_job
|
||||
jobCleanTransactionService.updateJobMajor(jobId, majorIds.isEmpty() ? null : majorIds, majorSensitivity);
|
||||
} catch (Exception e) {
|
||||
log.warn("专业匹配AI返回解析失败: {}", json, e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 第三次AI调用:自由提取技能标签
|
||||
* <p>AI返回技能名数组 → INSERT IGNORE 入 bg_skill_tag → 查ID → 写关联表</p>
|
||||
*/
|
||||
private void extractSkillTags(Long jobId, String title, String description, String requirement) {
|
||||
String systemPrompt = """
|
||||
你是一个技能提取助手。根据岗位信息,提取该岗位要求的核心专业能力和工具技能。
|
||||
返回JSON数组格式,如:["java", "spring boot", "mysql", "redis"]
|
||||
规则:
|
||||
1. 统一使用小写字母
|
||||
2. 尽量简短,使用业界通用缩写(如 js 而非 javascript,k8s 而非 kubernetes)
|
||||
3. 提取范围包括:技术栈、专业领域知识、行业工具、专业资质能力等
|
||||
4. 不提取纯软技能(如沟通能力、团队协作、学习能力、积极主动)
|
||||
5. 如果岗位完全没有专业能力要求(纯看态度和素质),返回空数组 []
|
||||
6. 最多15个,按重要性排序
|
||||
7. 只返回JSON数组,不要其他内容
|
||||
示例1(技术岗):
|
||||
输入:需要熟悉Java、Spring Boot框架,了解MySQL数据库和Redis缓存
|
||||
输出:["java", "spring boot", "mysql", "redis"]
|
||||
示例2(财务岗):
|
||||
输入:负责费用管理与审核,月度经营利润分析,要求财务管理、会计学相关专业
|
||||
输出:["财务管理", "会计核算", "经营分析", "费用审核"]
|
||||
示例3(科研岗):
|
||||
输入:开展生物合成等相关专业研究,啤酒生物发酵工程方向
|
||||
输出:["生物合成", "发酵工程"]
|
||||
示例4(纯素质岗):
|
||||
输入:具备较强的沟通能力和创新意识,积极主动,专业不限
|
||||
输出:[]
|
||||
""";
|
||||
|
||||
String userMessage = "【岗位信息】\n标题: " + title + "\n职责: " + description + "\n要求: " + requirement;
|
||||
|
||||
String aiResponse = aiChatAbility.chat(systemPrompt, userMessage);
|
||||
String json = cleanAiResponse(aiResponse);
|
||||
|
||||
try {
|
||||
JsonNode arrayNode = HttpTool.objectMapper.readTree(json);
|
||||
Set<Long> validIds = dictCacheService.getSkillTagIds(categoryId);
|
||||
List<Long> result = new ArrayList<>();
|
||||
if (arrayNode.isArray()) {
|
||||
if (!arrayNode.isArray() || arrayNode.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
List<Long> skillTagIds = new ArrayList<>();
|
||||
for (JsonNode node : arrayNode) {
|
||||
long id = node.asLong(0);
|
||||
if (id > 0 && validIds.contains(id) && !result.contains(id)) {
|
||||
result.add(id);
|
||||
String skillName = node.asText("").trim().toLowerCase();
|
||||
if (skillName.isBlank() || skillName.length() > 50) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
} catch (Exception e) {
|
||||
log.warn("技能标签AI返回解析失败: {}", json, e);
|
||||
return List.of();
|
||||
|
||||
// INSERT IGNORE + SELECT 获取ID
|
||||
Long tagId = findOrCreateSkillTag(skillName);
|
||||
if (tagId != null && !skillTagIds.contains(tagId)) {
|
||||
skillTagIds.add(tagId);
|
||||
}
|
||||
}
|
||||
|
||||
/** 构建系统提示词 */
|
||||
if (!skillTagIds.isEmpty()) {
|
||||
jobCleanTransactionService.saveSkillTagRelations(jobId, skillTagIds);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.warn("技能提取AI返回解析失败: {}", json, e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 查找或创建技能标签(依靠数据库唯一索引保证并发安全)
|
||||
* <p>INSERT IGNORE 后 SELECT,避免加锁</p>
|
||||
*/
|
||||
private Long findOrCreateSkillTag(String name) {
|
||||
// 先尝试插入(忽略重复),ID 由 IdWorker 生成
|
||||
skillTagMapper.insertIgnore(IdWorker.getId(), name, Instant.now());
|
||||
|
||||
// 再查询拿ID
|
||||
SkillTag tag = skillTagMapper.selectOne(new LambdaQueryWrapper<SkillTag>().eq(SkillTag::getName, name).last("LIMIT 1"));
|
||||
return tag != null ? tag.getId() : null;
|
||||
}
|
||||
|
||||
/** 构建第一次AI的系统提示词 */
|
||||
private String buildSystemPrompt() {
|
||||
return """
|
||||
你是一个岗位数据清洗助手。请根据提供的原始岗位数据,提取并结构化为JSON格式。
|
||||
@@ -284,7 +359,7 @@ public class JobCleanService {
|
||||
""";
|
||||
}
|
||||
|
||||
/** 构建用户消息 */
|
||||
/** 构建第一次AI的用户消息 */
|
||||
private String buildUserMessage(AppJobData data) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("【原始数据】\n");
|
||||
@@ -300,6 +375,16 @@ public class JobCleanService {
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
/** 清理AI返回的markdown代码块和控制字符 */
|
||||
private String cleanAiResponse(String response) {
|
||||
String json = response.trim();
|
||||
if (json.startsWith("```")) {
|
||||
json = json.replaceAll("^```\\w*\\n?", "").replaceAll("\\n?```$", "").trim();
|
||||
}
|
||||
json = json.replaceAll("[\\x00-\\x08\\x0B\\x0C\\x0E-\\x1F]", "");
|
||||
return json;
|
||||
}
|
||||
|
||||
private String nullToEmpty(String s) {
|
||||
return s == null ? "" : s;
|
||||
}
|
||||
|
||||
@@ -25,7 +25,7 @@ import java.util.List;
|
||||
* 岗位清洗事务服务
|
||||
* <p>独立出来解决 @Transactional 同类自调用失效问题</p>
|
||||
* <p>依赖:JobMapper、CompanyMapper、JobRegionRelationMapper、JobSkillTagRelationMapper、AppJobDataMapper</p>
|
||||
* <p>使用表:bg_job(写入)、bg_company(查询/创建)、bg_job_region_relation(写入)、bg_job_skill_tag_relation(写入)、app_job_data(更新状态)</p>
|
||||
* <p>使用表:bg_job(写入/更新)、bg_company(查询/创建)、bg_job_region_relation(写入)、bg_job_skill_tag_relation(写入)、app_job_data(更新状态)</p>
|
||||
*
|
||||
* @author zk
|
||||
*/
|
||||
@@ -149,6 +149,18 @@ public class JobCleanTransactionService {
|
||||
jobSkillTagRelationMapper.batchInsert(relations);
|
||||
}
|
||||
|
||||
/**
|
||||
* 更新岗位的专业要求和专业敏感度
|
||||
*/
|
||||
public void updateJobMajor(Long jobId, List<Long> requiredMajorIds, Integer majorSensitivity) {
|
||||
Job job = new Job();
|
||||
job.setId(jobId);
|
||||
job.setRequiredMajorIds(requiredMajorIds);
|
||||
job.setMajorSensitivity(majorSensitivity);
|
||||
job.setUpdateTime(Instant.now());
|
||||
jobMapper.updateById(job);
|
||||
}
|
||||
|
||||
/** 更新清洗状态 */
|
||||
public void updateCleanStatus(Long id, int status) {
|
||||
appJobDataMapper.update(null,
|
||||
|
||||
Reference in New Issue
Block a user