feat(file): 新增文件哈希去重与文本提取功能- 在多个模块中引入 FileHashUtil 并用于文件上传前的哈希计算

- 优化文件上传逻辑,实现基于哈希的秒传机制
- 新增音频服务中的文本提取方法,支持 txt 和 docx 格式
- 使用流式解析技术处理大文件内容,避免内存溢出
-为 AppVideoController 添加 /extract 接口用于文本内容提取
- 完善文件哈希工具类,增强线程安全性与异常处理
- 调整 SysOssService 的 updateHash 方法以支持复用逻辑- 统一构建 SysOssVo 实体时的哈希字段设置逻辑
This commit is contained in:
2025-11-07 16:59:07 +08:00
parent 07e60cf7f0
commit f25afe0e9d
8 changed files with 187 additions and 39 deletions

View File

@ -1,5 +1,6 @@
package com.fuyuanshen.app.controller;
import cn.dev33.satoken.annotation.SaIgnore;
import com.fuyuanshen.app.service.AudioProcessService;
import com.fuyuanshen.app.service.VideoProcessService;
import com.fuyuanshen.common.core.domain.R;
@ -51,4 +52,13 @@ public class AppVideoController extends BaseController {
public R<List<String>> uploadAudioTTS(@RequestParam String text) throws IOException {
return R.ok(audioProcessService.generateStandardPcmData(text));
}
/**
* 提取文本内容只支持txt/docx
*/
@PostMapping(value = "/extract", consumes = MediaType.MULTIPART_FORM_DATA_VALUE)
@RepeatSubmit(interval = 2, timeUnit = TimeUnit.SECONDS,message = "请勿重复提交!")
public R<String> extract(@RequestParam("file") MultipartFile file) throws Exception {
return R.ok("Success",audioProcessService.extract(file));
}
}

View File

@ -7,11 +7,17 @@ import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import org.springframework.web.multipart.MultipartFile;
import java.io.File;
import java.io.IOException;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.util.Arrays;
import java.util.List;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
/**
* 音频处理服务
@ -170,5 +176,75 @@ public class AudioProcessService {
}
}
/**
* 提取文本
*/
public String extract(MultipartFile file) throws Exception {
String name = file.getOriginalFilename();
if (name == null ||
(!name.endsWith(".txt") && !name.endsWith(".docx"))) {
throw new IllegalArgumentException("仅支持 .txt 或 .docx");
}
if (file.getSize() > MAX_AUDIO_SIZE) {
throw new IllegalArgumentException("文件超过5MB");
}
String text;
/* 全程流式,不落地磁盘,不一次性读字节数组 */
try (InputStream in = file.getInputStream()) {
if (name.endsWith(".txt")) {
text = readTxt(in);
} else {
text = readDocx(in);
}
}
return text;
}
/* ---------- txt按行读StringBuilder 复用 ---------- */
private String readTxt(InputStream in) throws IOException {
BufferedReader br = new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8));
StringBuilder sb = new StringBuilder(4096);
String line;
while ((line = br.readLine()) != null) {
sb.append(line).append('\n');
}
return sb.toString();
}
/* ---------- docxZipInputStream 只扫 document.xml ---------- */
private String readDocx(InputStream in) throws IOException {
ZipInputStream zin = new ZipInputStream(in);
ZipEntry e;
while ((e = zin.getNextEntry()) != null) {
if ("word/document.xml".equals(e.getName())) {
return staxExtract(zin); // 流式读 XML
}
}
return "";
}
/* ---------- StAX 流式提取 <w:t> ---------- */
private String staxExtract(InputStream xml) throws IOException {
XMLStreamReader r = null;
StringBuilder sb = new StringBuilder(4096);
try {
//System.out.println(new String(xml.readAllBytes()));
r = XMLInputFactory.newInstance().createXMLStreamReader(xml);
while (r.hasNext()) {
if (r.next() == XMLStreamConstants.START_ELEMENT &&
"t".equals(r.getLocalName())) {
String elementText = r.getElementText();
sb.append(elementText);
}
}
} catch (XMLStreamException ex) {
throw new IOException(ex);
} finally {
if (r != null) try { r.close(); } catch (XMLStreamException ignore) {}
}
return sb.toString();
}
}