Merge branch 'main' into dyf-device

This commit is contained in:
2025-11-07 17:11:21 +08:00
8 changed files with 187 additions and 37 deletions

View File

@ -1,5 +1,6 @@
package com.fuyuanshen.app.controller;
import cn.dev33.satoken.annotation.SaIgnore;
import com.fuyuanshen.app.service.AudioProcessService;
import com.fuyuanshen.app.service.VideoProcessService;
import com.fuyuanshen.common.core.domain.R;
@ -51,4 +52,13 @@ public class AppVideoController extends BaseController {
public R<List<String>> uploadAudioTTS(@RequestParam String text) throws IOException {
return R.ok(audioProcessService.generateStandardPcmData(text));
}
/**
* 提取文本内容只支持txt/docx
*/
@PostMapping(value = "/extract", consumes = MediaType.MULTIPART_FORM_DATA_VALUE)
@RepeatSubmit(interval = 2, timeUnit = TimeUnit.SECONDS,message = "请勿重复提交!")
public R<String> extract(@RequestParam("file") MultipartFile file) throws Exception {
return R.ok("Success",audioProcessService.extract(file));
}
}

View File

@ -10,9 +10,17 @@ import org.springframework.web.multipart.MultipartFile;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.util.Arrays;
import java.util.List;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
/**
* 音频处理服务
@ -226,5 +234,75 @@ public class AudioProcessService {
}
}
/**
* 提取文本
*/
public String extract(MultipartFile file) throws Exception {
String name = file.getOriginalFilename();
if (name == null ||
(!name.endsWith(".txt") && !name.endsWith(".docx"))) {
throw new IllegalArgumentException("仅支持 .txt 或 .docx");
}
if (file.getSize() > MAX_AUDIO_SIZE) {
throw new IllegalArgumentException("文件超过5MB");
}
String text;
/* 全程流式,不落地磁盘,不一次性读字节数组 */
try (InputStream in = file.getInputStream()) {
if (name.endsWith(".txt")) {
text = readTxt(in);
} else {
text = readDocx(in);
}
}
return text;
}
/* ---------- txt按行读StringBuilder 复用 ---------- */
private String readTxt(InputStream in) throws IOException {
BufferedReader br = new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8));
StringBuilder sb = new StringBuilder(4096);
String line;
while ((line = br.readLine()) != null) {
sb.append(line).append('\n');
}
return sb.toString();
}
/* ---------- docxZipInputStream 只扫 document.xml ---------- */
private String readDocx(InputStream in) throws IOException {
ZipInputStream zin = new ZipInputStream(in);
ZipEntry e;
while ((e = zin.getNextEntry()) != null) {
if ("word/document.xml".equals(e.getName())) {
return staxExtract(zin); // 流式读 XML
}
}
return "";
}
/* ---------- StAX 流式提取 <w:t> ---------- */
private String staxExtract(InputStream xml) throws IOException {
XMLStreamReader r = null;
StringBuilder sb = new StringBuilder(4096);
try {
//System.out.println(new String(xml.readAllBytes()));
r = XMLInputFactory.newInstance().createXMLStreamReader(xml);
while (r.hasNext()) {
if (r.next() == XMLStreamConstants.START_ELEMENT &&
"t".equals(r.getLocalName())) {
String elementText = r.getElementText();
sb.append(elementText);
}
}
} catch (XMLStreamException ex) {
throw new IOException(ex);
} finally {
if (r != null) try { r.close(); } catch (XMLStreamException ignore) {}
}
return sb.toString();
}
}