From b9d8c2724ab991540ac389f99f80d7b5f6710755 Mon Sep 17 00:00:00 2001 From: zk Date: Tue, 2 Jun 2026 14:45:05 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=B5=B7=E6=95=B0=E6=8D=AE=E6=B8=85?= =?UTF-8?q?=E6=B4=97=E7=9B=B8=E5=85=B3=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../org/jiayunet/mapper/AppJobDataMapper.java | 2 +- .../java/org/jiayunet/pojo/po/AppJobData.java | 35 +++++++++---------- .../main/java/org/jiayunet/pojo/po/Job.java | 6 ++++ .../org/jiayunet/service/JobCleanService.java | 13 +++---- .../service/JobCleanTransactionService.java | 26 +++++++++----- 5 files changed, 49 insertions(+), 33 deletions(-) diff --git a/manager/src/main/java/org/jiayunet/mapper/AppJobDataMapper.java b/manager/src/main/java/org/jiayunet/mapper/AppJobDataMapper.java index 5ee38a3..2685ed0 100644 --- a/manager/src/main/java/org/jiayunet/mapper/AppJobDataMapper.java +++ b/manager/src/main/java/org/jiayunet/mapper/AppJobDataMapper.java @@ -19,6 +19,6 @@ public interface AppJobDataMapper extends CommonMapper { * 查询待清洗数据并加行锁(SELECT ... FOR UPDATE) *

必须在事务内调用,配合状态更新实现原子锁定

*/ - @Select("SELECT * FROM app_job_data WHERE clean_status = 0 AND is_valid = 1 LIMIT #{limit} FOR UPDATE") + @Select("SELECT * FROM app_job_data WHERE clean_status = 'pending' LIMIT #{limit} FOR UPDATE") List selectForUpdate(@Param("limit") int limit); } diff --git a/manager/src/main/java/org/jiayunet/pojo/po/AppJobData.java b/manager/src/main/java/org/jiayunet/pojo/po/AppJobData.java index 8de648c..9f0a608 100644 --- a/manager/src/main/java/org/jiayunet/pojo/po/AppJobData.java +++ b/manager/src/main/java/org/jiayunet/pojo/po/AppJobData.java @@ -1,6 +1,7 @@ package org.jiayunet.pojo.po; import com.baomidou.mybatisplus.annotation.IdType; +import com.baomidou.mybatisplus.annotation.TableField; import com.baomidou.mybatisplus.annotation.TableId; import com.baomidou.mybatisplus.annotation.TableName; import lombok.Data; @@ -20,8 +21,9 @@ public class AppJobData { @TableId(type = IdType.AUTO) private Long id; - /** 关联爬取任务ID */ - private Long taskCrawlId; + /** 关联urllistid */ + @TableField("urllistid") + private Long urllistId; /** 职位名称 */ private String jobTitle; @@ -47,33 +49,30 @@ public class AppJobData { /** 详情页URL */ private String detailUrl; + /** 招聘分类: 0=校招, 1=实习, 2=社招, 3=其他 */ + private Integer recruitCategory; + /** 内容哈希值,用于查重 */ private String contentHash; /** 数据来源 0=官网 1=平台 */ private Integer sources; - /** 是否独立URL 0=页内展示 1=独立页面 */ - private Integer isIndependentUrl; - - /** 是否有效 0=无效 1=有效 */ - private Integer isValid; - - /** 有效期 */ + /** 发布日期 */ private Instant expireAt; - /** 验证状态 pending=待验证 checking=验证中 checked=已验证 */ - private String checkStatus; - - /** 清洗状态 0=待清洗 1=清洗中 2=已入库 3=已丢弃 */ - private Integer cleanStatus; - - /** 上次验证时间 */ - private Instant lastCheckAt; - /** 创建时间 */ private Instant createdAt; /** 更新时间 */ private Instant updatedAt; + + /** 清洗状态: pending=待清洗 cleaning=清洗中 cleaned=已清洗 discarded=已丢弃 */ + private String cleanStatus; + + /** 清洗开始时间 */ + private Instant cleanStartedAt; + + /** 清洗完成时间 */ + private Instant cleanedAt; } diff --git a/manager/src/main/java/org/jiayunet/pojo/po/Job.java b/manager/src/main/java/org/jiayunet/pojo/po/Job.java index 0a83ef4..c7448ac 100644 --- a/manager/src/main/java/org/jiayunet/pojo/po/Job.java +++ b/manager/src/main/java/org/jiayunet/pojo/po/Job.java @@ -77,6 +77,12 @@ public class Job { /** 爬虫原始数据ID,用于去重 */ private String sourceId; + /** 招聘分类 0=校招 1=实习 2=社招 3=其他 */ + private Integer recruitCategory; + + /** 发布日期 */ + private Instant expireAt; + /** 状态 0=上架 1=下架 2=已失效 */ private Integer status; diff --git a/manager/src/main/java/org/jiayunet/service/JobCleanService.java b/manager/src/main/java/org/jiayunet/service/JobCleanService.java index 1dc082c..3fac16d 100644 --- a/manager/src/main/java/org/jiayunet/service/JobCleanService.java +++ b/manager/src/main/java/org/jiayunet/service/JobCleanService.java @@ -102,9 +102,10 @@ public class JobCleanService { public void recoverZombie() { int recovered = appJobDataMapper.update(null, new LambdaUpdateWrapper() - .set(AppJobData::getCleanStatus, 0) - .eq(AppJobData::getCleanStatus, 1) - .lt(AppJobData::getUpdatedAt, Instant.now().minusSeconds(600))); + .set(AppJobData::getCleanStatus, "pending") + .set(AppJobData::getCleanStartedAt, null) + .eq(AppJobData::getCleanStatus, "cleaning") + .lt(AppJobData::getCleanStartedAt, Instant.now().minusSeconds(600))); if (recovered > 0) { log.info("僵尸恢复:重置{}条数据", recovered); @@ -118,7 +119,7 @@ public class JobCleanService { public void cleanOne(AppJobData data) { // 1. 前置校验 if (data.getDescription() == null || data.getDescription().length() < 20) { - jobCleanTransactionService.updateCleanStatus(data.getId(), 3); + jobCleanTransactionService.updateCleanStatus(data.getId(), "discarded"); return; } @@ -134,7 +135,7 @@ public class JobCleanService { // valid 校验 if (!root.path("valid").asBoolean(false)) { - jobCleanTransactionService.updateCleanStatus(data.getId(), 3); + jobCleanTransactionService.updateCleanStatus(data.getId(), "discarded"); return; } @@ -142,7 +143,7 @@ public class JobCleanService { String sourceId = String.valueOf(data.getId()); Long existJob = jobMapper.selectCount(new LambdaQueryWrapper().eq(Job::getSourceId, sourceId)); if (existJob > 0) { - jobCleanTransactionService.updateCleanStatus(data.getId(), 2); + jobCleanTransactionService.updateCleanStatus(data.getId(), "cleaned"); return; } diff --git a/manager/src/main/java/org/jiayunet/service/JobCleanTransactionService.java b/manager/src/main/java/org/jiayunet/service/JobCleanTransactionService.java index 8101baf..da65ab7 100644 --- a/manager/src/main/java/org/jiayunet/service/JobCleanTransactionService.java +++ b/manager/src/main/java/org/jiayunet/service/JobCleanTransactionService.java @@ -86,6 +86,10 @@ public class JobCleanTransactionService { Long requiredIndustryId = root.path("requiredIndustryId").asLong(0); job.setRequiredIndustryId(requiredIndustryId == 0 ? null : requiredIndustryId); + // 从原始数据透传 recruit_category 和 expire_at + job.setRecruitCategory(data.getRecruitCategory()); + job.setExpireAt(data.getExpireAt()); + job.setSourceUrl(data.getDetailUrl()); job.setSourceId(sourceId); job.setStatus(0); @@ -106,8 +110,8 @@ public class JobCleanTransactionService { jobRegionRelationMapper.batchInsert(relations); } - // 更新清洗状态 - updateCleanStatus(data.getId(), 2); + // 更新清洗状态为已清洗 + updateCleanStatus(data.getId(), "cleaned"); } /** @@ -162,11 +166,16 @@ public class JobCleanTransactionService { } /** 更新清洗状态 */ - public void updateCleanStatus(Long id, int status) { - appJobDataMapper.update(null, - new LambdaUpdateWrapper() - .set(AppJobData::getCleanStatus, status) - .eq(AppJobData::getId, id)); + public void updateCleanStatus(Long id, String status) { + LambdaUpdateWrapper wrapper = new LambdaUpdateWrapper() + .set(AppJobData::getCleanStatus, status) + .eq(AppJobData::getId, id); + + if ("cleaned".equals(status)) { + wrapper.set(AppJobData::getCleanedAt, Instant.now()); + } + + appJobDataMapper.update(null, wrapper); } /** @@ -185,7 +194,8 @@ public class JobCleanTransactionService { List ids = dataList.stream().map(AppJobData::getId).toList(); appJobDataMapper.update(null, new LambdaUpdateWrapper() - .set(AppJobData::getCleanStatus, 1) + .set(AppJobData::getCleanStatus, "cleaning") + .set(AppJobData::getCleanStartedAt, Instant.now()) .in(AppJobData::getId, ids)); return dataList;