Compare commits

2 Commits

Author SHA1 Message Date
dyf
4961ec47f8 Merge pull request 'feat(file): 新增文件哈希去重与文本提取功能- 在多个模块中引入 FileHashUtil 并用于文件上传前的哈希计算' (#19) from liwenlong/fys-Multi-tenant:jingquan into jingquan
Reviewed-on: #19
2025-11-07 17:06:27 +08:00
f25afe0e9d feat(file): 新增文件哈希去重与文本提取功能- 在多个模块中引入 FileHashUtil 并用于文件上传前的哈希计算
- 优化文件上传逻辑,实现基于哈希的秒传机制
- 新增音频服务中的文本提取方法,支持 txt 和 docx 格式
- 使用流式解析技术处理大文件内容,避免内存溢出
-为 AppVideoController 添加 /extract 接口用于文本内容提取
- 完善文件哈希工具类,增强线程安全性与异常处理
- 调整 SysOssService 的 updateHash 方法以支持复用逻辑- 统一构建 SysOssVo 实体时的哈希字段设置逻辑
2025-11-07 16:59:07 +08:00
8 changed files with 187 additions and 39 deletions

View File

@ -1,5 +1,6 @@
package com.fuyuanshen.app.controller;
import cn.dev33.satoken.annotation.SaIgnore;
import com.fuyuanshen.app.service.AudioProcessService;
import com.fuyuanshen.app.service.VideoProcessService;
import com.fuyuanshen.common.core.domain.R;
@ -51,4 +52,13 @@ public class AppVideoController extends BaseController {
public R<List<String>> uploadAudioTTS(@RequestParam String text) throws IOException {
return R.ok(audioProcessService.generateStandardPcmData(text));
}
/**
* 提取文本内容只支持txt/docx
*/
@PostMapping(value = "/extract", consumes = MediaType.MULTIPART_FORM_DATA_VALUE)
@RepeatSubmit(interval = 2, timeUnit = TimeUnit.SECONDS,message = "请勿重复提交!")
public R<String> extract(@RequestParam("file") MultipartFile file) throws Exception {
return R.ok("Success",audioProcessService.extract(file));
}
}

View File

@ -7,11 +7,17 @@ import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import org.springframework.web.multipart.MultipartFile;
import java.io.File;
import java.io.IOException;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.util.Arrays;
import java.util.List;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
/**
* 音频处理服务
@ -170,5 +176,75 @@ public class AudioProcessService {
}
}
/**
* 提取文本
*/
public String extract(MultipartFile file) throws Exception {
String name = file.getOriginalFilename();
if (name == null ||
(!name.endsWith(".txt") && !name.endsWith(".docx"))) {
throw new IllegalArgumentException("仅支持 .txt 或 .docx");
}
if (file.getSize() > MAX_AUDIO_SIZE) {
throw new IllegalArgumentException("文件超过5MB");
}
String text;
/* 全程流式,不落地磁盘,不一次性读字节数组 */
try (InputStream in = file.getInputStream()) {
if (name.endsWith(".txt")) {
text = readTxt(in);
} else {
text = readDocx(in);
}
}
return text;
}
/* ---------- txt按行读StringBuilder 复用 ---------- */
private String readTxt(InputStream in) throws IOException {
BufferedReader br = new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8));
StringBuilder sb = new StringBuilder(4096);
String line;
while ((line = br.readLine()) != null) {
sb.append(line).append('\n');
}
return sb.toString();
}
/* ---------- docxZipInputStream 只扫 document.xml ---------- */
private String readDocx(InputStream in) throws IOException {
ZipInputStream zin = new ZipInputStream(in);
ZipEntry e;
while ((e = zin.getNextEntry()) != null) {
if ("word/document.xml".equals(e.getName())) {
return staxExtract(zin); // 流式读 XML
}
}
return "";
}
/* ---------- StAX 流式提取 <w:t> ---------- */
private String staxExtract(InputStream xml) throws IOException {
XMLStreamReader r = null;
StringBuilder sb = new StringBuilder(4096);
try {
//System.out.println(new String(xml.readAllBytes()));
r = XMLInputFactory.newInstance().createXMLStreamReader(xml);
while (r.hasNext()) {
if (r.next() == XMLStreamConstants.START_ELEMENT &&
"t".equals(r.getLocalName())) {
String elementText = r.getElementText();
sb.append(elementText);
}
}
} catch (XMLStreamException ex) {
throw new IOException(ex);
} finally {
if (r != null) try { r.close(); } catch (XMLStreamException ignore) {}
}
return sb.toString();
}
}

View File

@ -39,6 +39,7 @@ public class DeviceDebugService {
private final IAppBusinessFileService appBusinessFileService;
private final IAppOperationVideoService appOperationVideoService;
private final DeviceService deviceService;
private final FileHashUtil fileHashUtil;
/**
* 文件上传并添加文件信息哈希去重
@ -62,26 +63,12 @@ public class DeviceDebugService {
Map<String, Long> hash2OssId = new LinkedHashMap<>(files.length);
for (MultipartFile file : files) {
// 1. 计算文件哈希
String hash = FileHashUtil.hash(file);
String hash = fileHashUtil.hash(file);
// 2. 先根据 hash 查库(秒传)
SysOssVo exist = sysOssService.selectByHash(hash);
Long ossId;
if (exist != null) {
// 2.1 已存在,直接复用
ossId = exist.getOssId();
hash2OssId.putIfAbsent(hash, ossId);
} else {
// 2.2 不存在,真正上传
SysOssVo upload = sysOssService.upload(file);
if (upload == null) {
return false;
}
ossId = upload.getOssId();
hash2OssId.putIfAbsent(hash, ossId);
// 2.3 把 hash 写回记录(供下次去重)
sysOssService.updateHashById(ossId, hash);
}
SysOssVo exist = sysOssService.updateHash(file, hash);
// 2.1 已存在,直接复用
long ossId = exist.getOssId();
hash2OssId.putIfAbsent(hash, ossId);
}
// 4. 组装业务中间表
List<AppBusinessFile> bizList = new ArrayList<>(bo.getDeviceIds().length * hash2OssId.size());

View File

@ -50,6 +50,7 @@ public class DeviceRepairRecordsServiceImpl extends ServiceImpl<DeviceRepairReco
private final DeviceRepairRecordsMapper baseMapper;
private final DeviceRepairImagesMapper imagesMapper;
private final ISysOssService ossService;
private final FileHashUtil fileHashUtil;
/**
* 查询设备维修记录
@ -210,19 +211,13 @@ public class DeviceRepairRecordsServiceImpl extends ServiceImpl<DeviceRepairReco
// 1. 计算文件哈希
String hash = null;
try {
hash = FileHashUtil.hash(file);
hash = fileHashUtil.hash(file);
} catch (IOException e) {
throw new RuntimeException(e);
}
// 2. 先根据 hash 查库(秒传)
SysOssVo exist = ossService.selectByHash(hash);
if (exist == null) {
// 2.2 不存在,真正上传
exist = ossService.upload(file);
// 2.3 把 hash 写回记录(供下次去重)
ossService.updateHashById(exist.getOssId(), hash);
}
SysOssVo exist = ossService.updateHash(file,hash);
DeviceRepairImages image = new DeviceRepairImages();
image.setRecordId(recordId);

View File

@ -34,6 +34,7 @@ import com.fuyuanshen.equipment.enums.CommunicationModeEnum;
import com.fuyuanshen.equipment.enums.DeviceActiveStatusEnum;
import com.fuyuanshen.equipment.mapper.*;
import com.fuyuanshen.equipment.service.*;
import com.fuyuanshen.equipment.utils.FileHashUtil;
import com.fuyuanshen.system.domain.vo.SysOssVo;
import com.fuyuanshen.system.domain.vo.SysRoleVo;
import com.fuyuanshen.system.service.ISysOssService;
@ -84,6 +85,7 @@ public class DeviceServiceImpl extends ServiceImpl<DeviceMapper, Device> impleme
private final DeviceTypeGrantsMapper deviceTypeGrantsMapper;
private final DeviceFenceAccessRecordMapper deviceFenceAccessRecordMapper;
private final FileHashUtil fileHashUtil;
/**
@ -209,7 +211,8 @@ public class DeviceServiceImpl extends ServiceImpl<DeviceMapper, Device> impleme
// 保存图片并获取URL
if (deviceForm.getFile() != null) {
SysOssVo upload = ossService.upload(deviceForm.getFile());
String fileHash = fileHashUtil.hash(deviceForm.getFile());
SysOssVo upload = ossService.updateHash(deviceForm.getFile(),fileHash);
// 设置图片路径
deviceForm.setDevicePic(upload.getUrl());
}
@ -283,8 +286,8 @@ public class DeviceServiceImpl extends ServiceImpl<DeviceMapper, Device> impleme
// 处理上传的图片
if (deviceForm.getFile() != null) {
// 设置图片路径
SysOssVo oss = ossService.upload(deviceForm.getFile());
String fileHash = fileHashUtil.hash(deviceForm.getFile());
SysOssVo oss = ossService.updateHash(deviceForm.getFile(),fileHash);
// 强制将HTTP替换为HTTPS
if (oss.getUrl() != null && oss.getUrl().startsWith("http://")) {
oss.setUrl(oss.getUrl().replaceFirst("^http://", "https://"));

View File

@ -1,28 +1,70 @@
package com.fuyuanshen.equipment.utils;
import org.apache.commons.codec.digest.DigestUtils;
import org.springframework.stereotype.Component;
import org.springframework.web.multipart.MultipartFile;
import java.io.IOException;
import java.io.InputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.HexFormat;
/**
* 文件哈希工具类
*/
@Component // 如果使用 Spring 可以注入
public class FileHashUtil {
/* 算法常量 */
private static final String ALGORITHM = "SHA-256";
/* 缓冲区大小 8 KB */
private static final int BUFFER_SIZE = 8192;
public static String hash(MultipartFile file) throws IOException {
MessageDigest digest = DigestUtils.getDigest(ALGORITHM);
/**
* 计算上传文件的 SHA-256 十六进制哈希
*
* @param file 上传文件;不能为 null且必须非空
* @return 64 位小写十六进制字符串
* @throws IllegalArgumentException 参数不合法
* @throws IOException 流读取失败
* @throws IllegalStateException 算法运行时异常(不会触发)
*/
public String hash(MultipartFile file) throws IOException {
validate(file);
/* 每个请求新建实例,保证线程安全 */
MessageDigest digest = newDigest();
/* try-with-resources 自动关闭流 */
try (InputStream in = file.getInputStream()) {
byte[] buf = new byte[8192];
byte[] buf = new byte[BUFFER_SIZE];
int len;
while ((len = in.read(buf)) != -1) {
digest.update(buf, 0, len);
}
}
/* JDK 17+ 的 HexFormat比 Apache Commons 更快且无需额外依赖 */
return HexFormat.of().formatHex(digest.digest());
}
/* -------------------- 私有辅助方法 -------------------- */
private static void validate(MultipartFile file) {
if (file == null) {
throw new IllegalArgumentException("MultipartFile 不能为 null");
}
if (file.isEmpty()) {
throw new IllegalArgumentException("上传文件不能为空");
}
}
private static MessageDigest newDigest() {
try {
return MessageDigest.getInstance(ALGORITHM);
} catch (NoSuchAlgorithmException e) {
/* SHA-256 是 JDK 必现算法,走到这里说明 JDK 实现损坏 */
throw new IllegalStateException("算法 " + ALGORITHM + " 不可用", e);
}
}
}

View File

@ -60,6 +60,14 @@ public interface ISysOssService {
*/
int updateHashById(long ossId,String fileHash);
/**
* 更新文件 hash 值
*
* @param file 文件对象
* @return 匹配的 SysOssVo 列表
*/
SysOssVo updateHash(MultipartFile file, String hash);
/**
* 上传 MultipartFile 到对象存储服务,并保存文件信息到数据库
*

View File

@ -191,6 +191,32 @@ public class SysOssServiceImpl implements ISysOssService, OssService {
storage.download(sysOss.getFileName(), response.getOutputStream(), response::setContentLengthLong);
}
/**
* 上传 MultipartFile 到对象存储服务,并保存文件信息到数据库
*
* @param file 要上传的 MultipartFile 对象
* @return 保存到数据库的 SysOssVo 对象
*/
@Override
public SysOssVo updateHash(MultipartFile file, String hash) {
// 2. 先根据 hash 查库(秒传)
SysOssVo exist = baseMapper.selectByHash(hash);
if (exist != null) {
return exist;
}
String originalfileName = file.getOriginalFilename();
String suffix = StringUtils.substring(originalfileName, originalfileName.lastIndexOf("."), originalfileName.length());
OssClient storage = OssFactory.instance();
UploadResult uploadResult;
try {
uploadResult = storage.uploadSuffix(file.getBytes(), suffix, file.getContentType());
} catch (IOException e) {
throw new ServiceException(e.getMessage());
}
// 保存文件信息
return buildResultEntity(originalfileName, suffix, storage.getConfigKey(), uploadResult,hash);
}
/**
* 上传 MultipartFile 到对象存储服务,并保存文件信息到数据库
*
@ -210,7 +236,7 @@ public class SysOssServiceImpl implements ISysOssService, OssService {
throw new ServiceException(e.getMessage());
}
// 保存文件信息
return buildResultEntity(originalfileName, suffix, storage.getConfigKey(), uploadResult);
return buildResultEntity(originalfileName, suffix, storage.getConfigKey(), uploadResult,null);
}
/**
@ -226,7 +252,7 @@ public class SysOssServiceImpl implements ISysOssService, OssService {
OssClient storage = OssFactory.instance();
UploadResult uploadResult = storage.uploadSuffix(file, suffix);
// 保存文件信息
return buildResultEntity(originalfileName, suffix, storage.getConfigKey(), uploadResult);
return buildResultEntity(originalfileName, suffix, storage.getConfigKey(), uploadResult,null);
}
@ -255,18 +281,19 @@ public class SysOssServiceImpl implements ISysOssService, OssService {
uploadResult = storage.uploadSuffix(data, suffix, "image/jpeg"); // 假设是图片类型,可以根据实际需要修改
// 保存文件信息
return buildResultEntity(fileName, suffix, storage.getConfigKey(), uploadResult);
return buildResultEntity(fileName, suffix, storage.getConfigKey(), uploadResult,null);
}
@NotNull
private SysOssVo buildResultEntity(String originalfileName, String suffix, String configKey, UploadResult uploadResult) {
private SysOssVo buildResultEntity(String originalfileName, String suffix, String configKey, UploadResult uploadResult, String hash) {
SysOss oss = new SysOss();
oss.setUrl(uploadResult.getUrl());
oss.setFileSuffix(suffix);
oss.setFileName(uploadResult.getFilename());
oss.setOriginalName(originalfileName);
oss.setService(configKey);
oss.setFileHash(hash); // 设置哈希值
baseMapper.insert(oss);
SysOssVo sysOssVo = MapstructUtils.convert(oss, SysOssVo.class);
return this.matchingUrl(sysOssVo);