feat(equipment): 添加阿里巴巴TTS语音合成工具类

- 实现文本转语音功能,支持多种声音、语速、音量等参数调节
- 集成阿里云TTS服务,支持访问令牌自动刷新与缓存
- 提供HTTP客户端配置与请求处理逻辑
- 支持生成标准PCM数据及WAV格式音频文件
- 实现音频文件保存与错误处理机制
- 添加参数校验与日志记录功能
- 集成Redis缓存管理访问令牌- 支持URL编码与请求构建逻辑
- 实现响应处理与音频数据写入文件功能
- 添加静默删除临时文件与错误响应处理机制
This commit is contained in:
2025-10-24 11:22:35 +08:00
parent 740a638444
commit 9bbddee1d5
9 changed files with 1398 additions and 219 deletions

View File

@ -1,249 +1,49 @@
package com.fuyuanshen.app.controller;
import com.fuyuanshen.app.service.AudioProcessService;
import com.fuyuanshen.app.service.VideoProcessService;
import com.fuyuanshen.common.core.domain.R;
import com.fuyuanshen.common.web.core.BaseController;
import lombok.RequiredArgsConstructor;
import org.bytedeco.javacv.FFmpegFrameGrabber;
import org.bytedeco.javacv.Frame;
import org.bytedeco.javacv.Java2DFrameUtils;
import org.springframework.http.MediaType;
import org.springframework.validation.annotation.Validated;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.multipart.MultipartFile;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.Arrays;
import java.io.IOException;
import java.util.Base64;
import java.util.List;
/**
* APP 视频处理
* @date 2025-09-15
* APP 视频处理控制器
*/
@Validated
@RequiredArgsConstructor
@RestController
@RequestMapping("/app/video")
public class AppVideoController extends BaseController {
// 可配置项:建议从 application.yml 中读取
private static final int MAX_VIDEO_SIZE = 10 * 1024 * 1024; // 10 MB
private static final int FRAME_RATE = 15; // 每秒抽15帧
private static final int DURATION = 2; // 抽2秒
private static final int TOTAL_FRAMES = FRAME_RATE * DURATION;
private static final int WIDTH = 160;
private static final int HEIGHT = 80;
private static final char[] HEX_ARRAY = "0123456789ABCDEF".toCharArray();
private final VideoProcessService videoProcessService;
private final AudioProcessService audioProcessService;
@PostMapping(value = "/upload", consumes = MediaType.MULTIPART_FORM_DATA_VALUE)
public R<List<String>> upload(@RequestParam("file") MultipartFile file) {
if (file == null || file.isEmpty()) {
return R.fail("上传文件不能为空");
}
if (!isVideo(file.getOriginalFilename())) {
return R.fail("只允许上传视频文件");
}
if (file.getSize() > MAX_VIDEO_SIZE) {
return R.fail("视频大小不能超过10MB");
}
File tempFile = null;
try {
// 创建临时文件保存上传的视频
tempFile = createTempVideoFile(file);
List<BufferedImage> frames = extractFramesFromVideo(tempFile);
if (frames.isEmpty()) {
return R.fail("无法提取任何帧");
}
// ✅ 新增:保存帧为图片
//saveFramesToLocal(frames, "extracted_frame");
byte[] binaryData = convertFramesToRGB565(frames);
// String base64Data = Base64.getEncoder().encodeToString(binaryData);
//
// return R.ok(base64Data);
// 构造响应头
// 将二进制数据转为 Hex 字符串
// 转换为 Hex 字符串列表
List<String> hexList = bytesToHexList(binaryData);
return R.ok(hexList);
} catch (Exception e) {
return R.fail("视频处理失败:" + e.getMessage());
} finally {
deleteTempFile(tempFile);
}
public R<List<String>> uploadVideo(@RequestParam("file") MultipartFile file) {
return R.ok(videoProcessService.processVideo(file));
}
/**
* rgb565 转 hex
* 上传音频文件并转码
*/
private List<String> bytesToHexList(byte[] bytes) {
List<String> hexList = new ArrayList<>();
for (byte b : bytes) {
int value = b & 0xFF;
char high = HEX_ARRAY[value >>> 4];
char low = HEX_ARRAY[value & 0x0F];
hexList.add(String.valueOf(high) + low);
}
return hexList;
@PostMapping(value = "/audio", consumes = MediaType.MULTIPART_FORM_DATA_VALUE)
public R<List<String>> uploadAudio(@RequestParam("file") MultipartFile file) {
return R.ok(audioProcessService.processAudio(file));
}
/**
* 创建临时文件并保存上传的视频
* 文字转音频TTS服务
*/
private File createTempVideoFile(MultipartFile file) throws Exception {
File tempFile = Files.createTempFile("upload-", ".mp4").toFile();
file.transferTo(tempFile);
return tempFile;
}
/**
* 从视频中按时间均匀提取指定数量的帧
*/
private List<BufferedImage> extractFramesFromVideo(File videoFile) throws Exception {
List<BufferedImage> frames = new ArrayList<>();
try (FFmpegFrameGrabber grabber = FFmpegFrameGrabber.createDefault(videoFile)) {
grabber.start();
// 获取视频总帧数和帧率
long totalFramesInVideo = grabber.getLengthInFrames();
int fps = (int) Math.round(grabber.getFrameRate());
if (fps <= 0) fps = 30;
double durationSeconds = (double) totalFramesInVideo / fps;
if (durationSeconds < DURATION) {
throw new IllegalArgumentException("视频太短,至少需要 " + DURATION + "");
}
// 计算每帧之间的间隔(浮点以实现更精确跳转)
double frameInterval = (double) totalFramesInVideo / TOTAL_FRAMES;
for (int i = 0; i < TOTAL_FRAMES; i++) {
int targetFrameNumber = (int) Math.round(i * frameInterval);
// 避免设置无效帧号
if (targetFrameNumber >= totalFramesInVideo) {
throw new IllegalArgumentException("目标帧超出范围: " + targetFrameNumber + " ");
}
grabber.setFrameNumber(targetFrameNumber);
Frame frame = grabber.grab();
if (frame != null && frame.image != null) {
BufferedImage bufferedImage = Java2DFrameUtils.toBufferedImage(frame);
frames.add(cropImage(bufferedImage, WIDTH, HEIGHT));
} else {
throw new IllegalArgumentException("无法获取第 " + targetFrameNumber + "");
}
}
grabber.stop();
}
return frames;
}
/**
* 将抽取的帧保存到本地,用于调试
*/
private void saveFramesToLocal(List<BufferedImage> frames, String prefix) {
// 指定输出目录
File outputDir = new File("output_frames");
if (!outputDir.exists()) {
outputDir.mkdirs();
}
int index = 0;
for (BufferedImage frame : frames) {
try {
File outputImage = new File(outputDir, prefix + "_" + (index++) + ".png");
ImageIO.write(frame, "png", outputImage);
System.out.println("保存帧图片成功: " + outputImage.getAbsolutePath());
} catch (Exception e) {
throw new IllegalArgumentException("保存帧图片失败 " + e);
}
}
}
/**
* 将所有帧转换为 RGB565 格式字节数组
*/
private byte[] convertFramesToRGB565(List<BufferedImage> frames) throws Exception {
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
for (BufferedImage image : frames) {
byte[] rgb565Bytes = convertToRGB565(image);
byteArrayOutputStream.write(rgb565Bytes);
}
return byteArrayOutputStream.toByteArray();
}
/**
* 判断是否是支持的视频格式
*/
private boolean isVideo(String filename) {
String ext = filename.substring(filename.lastIndexOf('.')).toLowerCase();
return Arrays.asList(".mp4", ".avi", ".mov", ".mkv").contains(ext);
}
/**
* 裁剪图像到目标尺寸
*/
private BufferedImage cropImage(BufferedImage img, int targetWidth, int targetHeight) {
int w = Math.min(img.getWidth(), targetWidth);
int h = Math.min(img.getHeight(), targetHeight);
return img.getSubimage(0, 0, w, h);
}
/**
* 将 BufferedImage 转换为 RGB565 格式的字节数组
*/
private byte[] convertToRGB565(BufferedImage image) {
int width = image.getWidth();
int height = image.getHeight();
byte[] result = new byte[width * height * 2]; // RGB565: 2 bytes per pixel
int index = 0;
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
int rgb = image.getRGB(x, y);
int r = ((rgb >> 16) & 0xFF) >> 3;
int g = ((rgb >> 8) & 0xFF) >> 2;
int b = (rgb & 0xFF) >> 3;
short pixel = (short) ((r << 11) | (g << 5) | b);
result[index++] = (byte) (pixel >> 8); // High byte first
result[index++] = (byte) pixel;
}
}
return result;
}
/**
* 删除临时文件
*/
private void deleteTempFile(File file) {
if (file != null && file.exists()) {
if (!file.delete()) {
throw new IllegalArgumentException("无法删除临时文件: " + file.getAbsolutePath());
}
}
@GetMapping("/audioTTS")
public R<List<String>> uploadAudioTTS(@RequestParam String text) throws IOException {
return R.ok(audioProcessService.generateStandardPcmData(text));
}
}

View File

@ -0,0 +1,174 @@
package com.fuyuanshen.app.service;
import com.fuyuanshen.equipment.utils.AlibabaTTSUtil;
import com.fuyuanshen.equipment.utils.AudioProcessUtil;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import org.springframework.web.multipart.MultipartFile;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.util.Arrays;
import java.util.List;
/**
* 音频处理服务
*/
@Slf4j
@Service
@RequiredArgsConstructor
public class AudioProcessService {
// 配置参数
private static final int MAX_AUDIO_SIZE = 5 * 1024 * 1024; // 5MB
private static final List<String> SUPPORTED_FORMATS = Arrays.asList(
".wav", ".mp3", ".aac", ".flac", ".m4a", ".ogg"
);
private final AudioProcessUtil audioProcessUtil;
private final AlibabaTTSUtil alibabaTTSUtil;
/**
* 处理上传的音频文件
*/
public List<String> processAudio(MultipartFile file) {
// 1. 参数校验
validateAudioFile(file);
File tempFile = null;
try {
// 2. 创建临时文件
tempFile = createTempAudioFile(file);
// 3. 转码为标准PCM-WAV格式
byte[] pcmData = audioProcessUtil.convertToStandardWav(tempFile);
log.info("音频处理成功,输出数据大小: {} bytes", pcmData.length);
// 获取音频信息
// String audioInfo = audioProcessUtil.getAudioInfo(pcmData);
// log.info("音频处理成功,音频信息: {}", audioInfo);
//
// // 保存测试文件(用于验证)
// String savedPath = audioProcessUtil.saveWavToFile(pcmData, "test_output.wav");
// if (savedPath != null) {
// log.info("测试文件已保存: {}", savedPath);
// }
// 将byte[]转换为16进制字符串列表
List<String> hexList = audioProcessUtil.bytesToHexList(pcmData);
log.info("音频处理完成,原始数据大小: {} bytes, 16进制数据长度: {}",
pcmData.length, hexList.size());
return hexList;
} catch (Exception e) {
log.error("音频处理失败", e);
throw new RuntimeException("音频处理失败", e);
} finally {
// 4. 清理临时文件
deleteTempFile(tempFile);
}
}
/**
* 生成标准PCM数据单声道16K采样率16bit深度包含44字节WAV头
* 数据总大小不超过2MB如果超过将抛出异常
* @param text 要转换的文本内容
* @return 标准PCM数据字节数组WAV格式
* @throws IOException 处理失败时抛出
* @throws IllegalArgumentException 如果生成的数据超过2MB
*/
public List<String> generateStandardPcmData(String text) throws IOException {
// 参数校验
if (text == null || text.trim().isEmpty()) {
throw new IllegalArgumentException("文本内容不能为空");
}
if (text.length() > 100) {
throw new IllegalArgumentException("文本长度超过限制最大100字符");
}
log.info("输入文本长度: {}", text.length());
try {
byte[] rawPcmData = alibabaTTSUtil.generateStandardPcmData(text);
// 使用AudioProcessUtil转换成带头44字节 PCM
byte[] pcmData = audioProcessUtil.rawPcmToStandardWav(rawPcmData);
// String savedPath = audioProcessUtil.saveWavToFile(pcmData, "test_output.wav");
// if (savedPath != null) {
// log.info("测试文件已保存: {}", savedPath);
// }
// 将byte[]转换为16进制字符串列表
List<String> hexList = audioProcessUtil.bytesToHexList(pcmData);
log.info("generateStandardPcmData音频处理完成原始数据大小: {} bytes, 16进制数据长度: {}",
pcmData.length, hexList.size());
return hexList;
} finally {
// 4. 清理临时文件
}
}
/**
* 验证音频文件
*/
private void validateAudioFile(MultipartFile file) {
if (file == null || file.isEmpty()) {
throw new IllegalArgumentException("上传文件不能为空");
}
if (!isAudioFile(file.getOriginalFilename())) {
throw new IllegalArgumentException("只允许上传音频文件");
}
if (file.getSize() > MAX_AUDIO_SIZE) {
throw new IllegalArgumentException("音频大小不能超过5MB");
}
}
/**
* 判断是否是支持的音频格式
*/
private boolean isAudioFile(String filename) {
if (filename == null || filename.lastIndexOf('.') == -1) {
return false;
}
String ext = filename.substring(filename.lastIndexOf('.')).toLowerCase();
return SUPPORTED_FORMATS.contains(ext);
}
/**
* 创建临时音频文件
*/
private File createTempAudioFile(MultipartFile file) throws IOException {
String originalFilename = file.getOriginalFilename();
String extension = "";
if (originalFilename != null && originalFilename.contains(".")) {
extension = originalFilename.substring(originalFilename.lastIndexOf("."));
}
File tempFile = File.createTempFile("audio-", extension);
file.transferTo(tempFile);
log.debug("创建临时音频文件: {}", tempFile.getAbsolutePath());
return tempFile;
}
/**
* 删除临时文件
*/
private void deleteTempFile(File file) {
if (file != null && file.exists()) {
if (file.delete()) {
log.debug("删除临时文件成功: {}", file.getAbsolutePath());
} else {
log.warn("无法删除临时文件: {}", file.getAbsolutePath());
}
}
}
}

View File

@ -0,0 +1,84 @@
package com.fuyuanshen.app.service;
import com.fuyuanshen.web.util.VideoProcessUtil;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import org.springframework.web.multipart.MultipartFile;
import java.io.File;
import java.util.Arrays;
import java.util.List;
/**
* 视频处理服务
*/
@Slf4j
@Service
@RequiredArgsConstructor
public class VideoProcessService {
// 配置参数
private static final int MAX_VIDEO_SIZE = 10 * 1024 * 1024;
private static final List<String> SUPPORTED_FORMATS = Arrays.asList(".mp4", ".avi", ".mov", ".mkv");
private static final int FRAME_RATE = 15;
private static final int DURATION = 2;
private static final int WIDTH = 160;
private static final int HEIGHT = 80;
private final VideoProcessUtil videoProcessUtil;
public List<String> processVideo(MultipartFile file) {
// 1. 参数校验
validateVideoFile(file);
File tempFile = null;
try {
// 2. 创建临时文件
tempFile = videoProcessUtil.createTempVideoFile(file);
// 3. 处理视频并提取帧数据
List<String> hexList = videoProcessUtil.processVideoToHex(
tempFile, FRAME_RATE, DURATION, WIDTH, HEIGHT
);
log.info("视频处理成功生成Hex数据长度: {}", hexList.size());
return hexList;
} catch (Exception e) {
log.error("视频处理失败", e);
throw new RuntimeException("视频处理失败", e);
} finally {
// 4. 清理临时文件
videoProcessUtil.deleteTempFile(tempFile);
}
}
/**
* 验证视频文件
*/
private void validateVideoFile(MultipartFile file) {
if (file == null || file.isEmpty()) {
throw new IllegalArgumentException("上传文件不能为空");
}
if (!isVideoFile(file.getOriginalFilename())) {
throw new IllegalArgumentException("只允许上传视频文件");
}
if (file.getSize() > MAX_VIDEO_SIZE) {
throw new IllegalArgumentException("视频大小不能超过10MB");
}
}
/**
* 判断是否是支持的视频格式
*/
private boolean isVideoFile(String filename) {
if (filename == null || filename.lastIndexOf('.') == -1) {
return false;
}
String ext = filename.substring(filename.lastIndexOf('.')).toLowerCase();
return SUPPORTED_FORMATS.contains(ext);
}
}

View File

@ -0,0 +1,194 @@
package com.fuyuanshen.web.util;
import lombok.extern.slf4j.Slf4j;
import org.bytedeco.javacv.FFmpegFrameGrabber;
import org.bytedeco.javacv.Frame;
import org.bytedeco.javacv.Java2DFrameUtils;
import org.springframework.stereotype.Component;
import org.springframework.web.multipart.MultipartFile;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.List;
/**
* 视频处理工具类
*/
@Slf4j
@Component
public class VideoProcessUtil {
private static final char[] HEX_ARRAY = "0123456789ABCDEF".toCharArray();
/**
* 创建临时视频文件
*/
public File createTempVideoFile(MultipartFile file) throws Exception {
File tempFile = Files.createTempFile("upload-", ".mp4").toFile();
file.transferTo(tempFile);
log.debug("创建临时视频文件: {}", tempFile.getAbsolutePath());
return tempFile;
}
/**
* 处理视频并转换为Hex字符串列表
*/
public List<String> processVideoToHex(File videoFile, int frameRate, int duration, int width, int height) throws Exception {
// 1. 提取视频帧
List<BufferedImage> frames = extractFramesFromVideo(videoFile, frameRate, duration, width, height);
// 2. 转换为RGB565格式
byte[] binaryData = convertFramesToRGB565(frames);
// 3. 转换为Hex字符串列表
return bytesToHexList(binaryData);
}
/**
* 从视频中提取帧
*/
private List<BufferedImage> extractFramesFromVideo(File videoFile, int frameRate, int duration, int width, int height) throws Exception {
List<BufferedImage> frames = new ArrayList<>();
int totalFramesToExtract = frameRate * duration;
try (FFmpegFrameGrabber grabber = FFmpegFrameGrabber.createDefault(videoFile)) {
grabber.start();
long totalFramesInVideo = grabber.getLengthInFrames();
int fps = (int) Math.round(grabber.getFrameRate());
if (fps <= 0) fps = 30;
double durationSeconds = (double) totalFramesInVideo / fps;
if (durationSeconds < duration) {
throw new IllegalArgumentException("视频太短,至少需要 " + duration + "");
}
double frameInterval = (double) totalFramesInVideo / totalFramesToExtract;
for (int i = 0; i < totalFramesToExtract; i++) {
int targetFrameNumber = (int) Math.round(i * frameInterval);
if (targetFrameNumber >= totalFramesInVideo) {
throw new IllegalArgumentException("目标帧超出范围: " + targetFrameNumber);
}
grabber.setFrameNumber(targetFrameNumber);
Frame frame = grabber.grab();
if (frame != null && frame.image != null) {
BufferedImage bufferedImage = Java2DFrameUtils.toBufferedImage(frame);
frames.add(cropImage(bufferedImage, width, height));
} else {
throw new IllegalArgumentException("无法获取第 " + targetFrameNumber + "");
}
}
grabber.stop();
}
log.debug("从视频中提取了 {} 帧", frames.size());
return frames;
}
/**
* 将所有帧转换为 RGB565 格式字节数组
*/
private byte[] convertFramesToRGB565(List<BufferedImage> frames) throws Exception {
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
for (BufferedImage image : frames) {
byte[] rgb565Bytes = convertToRGB565(image);
byteArrayOutputStream.write(rgb565Bytes);
}
byte[] result = byteArrayOutputStream.toByteArray();
log.debug("转换RGB565数据完成总字节数: {}", result.length);
return result;
}
/**
* 将字节数组转换为Hex字符串列表
*/
private List<String> bytesToHexList(byte[] bytes) {
List<String> hexList = new ArrayList<>();
for (byte b : bytes) {
int value = b & 0xFF;
char high = HEX_ARRAY[value >>> 4];
char low = HEX_ARRAY[value & 0x0F];
hexList.add(String.valueOf(high) + low);
}
return hexList;
}
/**
* 删除临时文件
*/
public void deleteTempFile(File file) {
if (file != null && file.exists()) {
if (file.delete()) {
log.debug("删除临时文件成功: {}", file.getAbsolutePath());
} else {
log.warn("无法删除临时文件: {}", file.getAbsolutePath());
}
}
}
/**
* 裁剪图像到目标尺寸
*/
private BufferedImage cropImage(BufferedImage img, int targetWidth, int targetHeight) {
int w = Math.min(img.getWidth(), targetWidth);
int h = Math.min(img.getHeight(), targetHeight);
return img.getSubimage(0, 0, w, h);
}
/**
* 将 BufferedImage 转换为 RGB565 格式的字节数组
*/
private byte[] convertToRGB565(BufferedImage image) {
int width = image.getWidth();
int height = image.getHeight();
byte[] result = new byte[width * height * 2];
int index = 0;
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
int rgb = image.getRGB(x, y);
int r = ((rgb >> 16) & 0xFF) >> 3;
int g = ((rgb >> 8) & 0xFF) >> 2;
int b = (rgb & 0xFF) >> 3;
short pixel = (short) ((r << 11) | (g << 5) | b);
result[index++] = (byte) (pixel >> 8);
result[index++] = (byte) pixel;
}
}
return result;
}
/**
* 保存帧到本地(用于调试)
*/
public void saveFramesToLocal(List<BufferedImage> frames, String prefix) {
File outputDir = new File("output_frames");
if (!outputDir.exists()) {
outputDir.mkdirs();
}
int index = 0;
for (BufferedImage frame : frames) {
try {
File outputImage = new File(outputDir, prefix + "_" + (index++) + ".png");
ImageIO.write(frame, "png", outputImage);
log.debug("保存帧图片成功: {}", outputImage.getAbsolutePath());
} catch (Exception e) {
log.error("保存帧图片失败", e);
}
}
}
}

View File

@ -287,6 +287,12 @@ mqtt:
pubTopic: B/#
pubClientId: fys_pubClient
# TTS语音交互配置
alibaba:
tts:
appKey: KTwSUKMrf2olFfjC
akId: LTAI5t6RsfCvQh57qojzbEoe
akSecret: MTqvK2mXYeCRkl1jVPndiNumyaad0R
# 文件存储路径
file: