当前位置:   article > 正文

JAVA变声器_java 变声代码

java 变声代码

需求

最近接到一个需求,为了保护客户隐私,需要对客户的录音文件进行变声处理。于是搞了一个变声器的公共组件以便于大家使用。

工作原理

通过java代码改变音频的音调、速度、声音等来达成变声效果。

效果展示及代码可以参考:

链接:https://pan.baidu.com/s/1tHV4KHGXG4lD11rtB-2LkQ?pwd=jq98 

代码调用参考:
  1. package com.library.knowledge;
  2. import com.library.knowledge.change_sound.SoundUtils;
  3. /**
  4. * @author ll
  5. * @since 2023/7/20 14:27
  6. */
  7. public class ChangeSound {
  8. public static void main(String[] args) {
  9. String filePath = "C:\\Users\\DELL\\Desktop\\history\\变声前.wav";
  10. String newFilePath = "C:\\Users\\DELL\\Desktop\\history\\变声后.wav";
  11. try {
  12. SoundUtils.changeSound(filePath, newFilePath);
  13. } catch (Exception e) {
  14. throw new RuntimeException(e);
  15. }
  16. }
工具类主要的三个方法:
  1. /**
  2. * 变声方法-简化
  3. *
  4. * @param oldFilePath 旧文件路劲
  5. * @param newFilePath 产生文件路径
  6. */
  7. public static void changeSound(String oldFilePath, String newFilePath) ;
  8. /**
  9. * 变声方法-普通
  10. * 返回 byte数组
  11. * 默认变声方法,音调、速度、音量采用默认值
  12. *
  13. * @param stream 输入流
  14. * @return byte数组
  15. */
  16. public static byte[] changeSound(AudioInputStream stream) throws Exception ;
  17. /**
  18. * 自定义变声器-进阶
  19. *
  20. * @param stream 音频输入
  21. * @param speed 速度
  22. * @param pitch 音调
  23. * @param volume 音量
  24. * @return byte数组
  25. */
  26. public static byte[] changeSound(AudioInputStream stream, float speed, float pitch, float volume) throws Exception

具体代码如下:

maven:
  1. <dependency>
  2. <groupId>com.google.guava</groupId>
  3. <artifactId>guava</artifactId>
  4. <version>31.1-jre</version>
  5. </dependency>
工具类:
  1. package com.library.knowledge.change_sound;
  2. import com.google.common.io.ByteArrayDataOutput;
  3. import com.google.common.io.ByteStreams;
  4. import com.sun.media.sound.WaveFileWriter;
  5. import javax.sound.sampled.*;
  6. import java.io.ByteArrayInputStream;
  7. import java.io.File;
  8. import java.io.OutputStream;
  9. import java.nio.file.Files;
  10. /**
  11. * @author ll
  12. * @since 2023/7/20 14:53
  13. */
  14. public class SoundUtils {
  15. public static ByteArrayDataOutput out = ByteStreams.newDataOutput();
  16. /**
  17. * 变声方法-简化
  18. *
  19. * @param oldFilePath 旧文件路劲
  20. * @param newFilePath 产生文件路径
  21. */
  22. public static void changeSound(String oldFilePath, String newFilePath) throws Exception {
  23. OutputStream outStream = null;
  24. try {
  25. File input = new File(oldFilePath);
  26. File output = new File(newFilePath);
  27. outStream = Files.newOutputStream(output.toPath());
  28. WaveFileWriter writer = new WaveFileWriter();
  29. AudioFormat audioFormat = new AudioFormat(16000, 16, 1, true, false);
  30. byte[] bytes = SoundUtils.changeSound(AudioSystem.getAudioInputStream(input));
  31. ByteArrayInputStream bi = new ByteArrayInputStream(bytes);
  32. AudioInputStream in = new AudioInputStream(bi, audioFormat, bytes.length);
  33. writer.write(in, AudioFileFormat.Type.WAVE, outStream);
  34. outStream.close();
  35. } finally {
  36. if (outStream != null) {
  37. outStream.close();
  38. }
  39. }
  40. }
  41. /**
  42. * 变声方法-普通
  43. * 返回 byte数组
  44. * 默认变声方法,音调、速度、音量采用默认值
  45. *
  46. * @param stream 输入流
  47. * @return byte数组
  48. */
  49. public static byte[] changeSound(AudioInputStream stream) throws Exception {
  50. //速度
  51. float speed = 1.0f;
  52. //音调
  53. float pitch = 1.4f;
  54. //音量
  55. float volume = 0.5f;
  56. return changeSound(stream, speed, pitch, volume);
  57. }
  58. /**
  59. * 自定义变声器-进阶
  60. *
  61. * @param stream 音频输入
  62. * @param speed 速度
  63. * @param pitch 音调
  64. * @param volume 音量
  65. * @return byte数组
  66. */
  67. public static byte[] changeSound(AudioInputStream stream, float speed, float pitch, float volume) throws Exception {
  68. float rate = 1.0f;
  69. boolean emulateChordPitch = false;
  70. int quality = 0;
  71. AudioFormat format = stream.getFormat();
  72. int sampleRate = (int) format.getSampleRate();
  73. int numChannels = format.getChannels();
  74. SourceDataLine.Info info = new DataLine.Info(SourceDataLine.class, format,
  75. ((int) stream.getFrameLength() * format.getFrameSize()));
  76. SourceDataLine line = (SourceDataLine) AudioSystem.getLine(info);
  77. return runSonic(stream, line, speed, pitch, rate, volume, emulateChordPitch, quality,
  78. sampleRate, numChannels);
  79. }
  80. // Run sonic.
  81. private static byte[] runSonic(
  82. AudioInputStream audioStream,
  83. SourceDataLine line,
  84. float speed,
  85. float pitch,
  86. float rate,
  87. float volume,
  88. boolean emulateChordPitch,
  89. int quality,
  90. int sampleRate,
  91. int numChannels) throws Exception {
  92. Sound sound = new Sound(sampleRate, numChannels);
  93. int bufferSize = line.getBufferSize();
  94. byte[] inBuffer = new byte[bufferSize];
  95. byte[] outBuffer = new byte[bufferSize];
  96. int numRead, numWritten;
  97. sound.setSpeed(speed);
  98. sound.setPitch(pitch);
  99. sound.setRate(rate);
  100. sound.setVolume(volume);
  101. sound.setChordPitch(emulateChordPitch);
  102. sound.setQuality(quality);
  103. do {
  104. numRead = audioStream.read(inBuffer, 0, bufferSize);
  105. if (numRead <= 0) {
  106. sound.flushStream();
  107. } else {
  108. sound.writeBytesToStream(inBuffer, numRead);
  109. }
  110. do {
  111. numWritten = sound.readBytesFromStream(outBuffer, bufferSize);
  112. if (numWritten > 0) {
  113. line.write(outBuffer, 0, numWritten);
  114. byte[] target = new byte[numWritten];
  115. System.arraycopy(outBuffer, 0, target, 0, numWritten);
  116. out.write(target);
  117. }
  118. } while (numWritten > 0);
  119. } while (numRead > 0);
  120. return out.toByteArray();
  121. }
  122. }
实现类:
  1. package com.library.knowledge.change_sound;
  2. /**
  3. * @author ll
  4. * @since 2023/7/20 14:43
  5. */
  6. public class Sound {
  7. private static final int SONIC_MIN_PITCH = 65;
  8. private static final int SONIC_MAX_PITCH = 400;
  9. // This is used to down-sample some inputs to improve speed
  10. private static final int SONIC_AMDF_FREQ = 4000;
  11. // The number of points to use in the sinc FIR filter for resampling.
  12. private static final int SINC_FILTER_POINTS = 12;
  13. private static final int SINC_TABLE_SIZE = 601;
  14. // Lookup table for windowed sinc function of SINC_FILTER_POINTS points.
  15. private static final short[] sincTable = {
  16. 0, 0, 0, 0, 0, 0, 0, -1, -1, -2, -2, -3, -4, -6, -7, -9, -10, -12, -14,
  17. -17, -19, -21, -24, -26, -29, -32, -34, -37, -40, -42, -44, -47, -48, -50,
  18. -51, -52, -53, -53, -53, -52, -50, -48, -46, -43, -39, -34, -29, -22, -16,
  19. -8, 0, 9, 19, 29, 41, 53, 65, 79, 92, 107, 121, 137, 152, 168, 184, 200,
  20. 215, 231, 247, 262, 276, 291, 304, 317, 328, 339, 348, 357, 363, 369, 372,
  21. 374, 375, 373, 369, 363, 355, 345, 332, 318, 300, 281, 259, 234, 208, 178,
  22. 147, 113, 77, 39, 0, -41, -85, -130, -177, -225, -274, -324, -375, -426,
  23. -478, -530, -581, -632, -682, -731, -779, -825, -870, -912, -951, -989,
  24. -1023, -1053, -1080, -1104, -1123, -1138, -1149, -1154, -1155, -1151,
  25. -1141, -1125, -1105, -1078, -1046, -1007, -963, -913, -857, -796, -728,
  26. -655, -576, -492, -403, -309, -210, -107, 0, 111, 225, 342, 462, 584, 708,
  27. 833, 958, 1084, 1209, 1333, 1455, 1575, 1693, 1807, 1916, 2022, 2122, 2216,
  28. 2304, 2384, 2457, 2522, 2579, 2625, 2663, 2689, 2706, 2711, 2705, 2687,
  29. 2657, 2614, 2559, 2491, 2411, 2317, 2211, 2092, 1960, 1815, 1658, 1489,
  30. 1308, 1115, 912, 698, 474, 241, 0, -249, -506, -769, -1037, -1310, -1586,
  31. -1864, -2144, -2424, -2703, -2980, -3254, -3523, -3787, -4043, -4291,
  32. -4529, -4757, -4972, -5174, -5360, -5531, -5685, -5819, -5935, -6029,
  33. -6101, -6150, -6175, -6175, -6149, -6096, -6015, -5905, -5767, -5599,
  34. -5401, -5172, -4912, -4621, -4298, -3944, -3558, -3141, -2693, -2214,
  35. -1705, -1166, -597, 0, 625, 1277, 1955, 2658, 3386, 4135, 4906, 5697, 6506,
  36. 7332, 8173, 9027, 9893, 10769, 11654, 12544, 13439, 14335, 15232, 16128,
  37. 17019, 17904, 18782, 19649, 20504, 21345, 22170, 22977, 23763, 24527,
  38. 25268, 25982, 26669, 27327, 27953, 28547, 29107, 29632, 30119, 30569,
  39. 30979, 31349, 31678, 31964, 32208, 32408, 32565, 32677, 32744, 32767,
  40. 32744, 32677, 32565, 32408, 32208, 31964, 31678, 31349, 30979, 30569,
  41. 30119, 29632, 29107, 28547, 27953, 27327, 26669, 25982, 25268, 24527,
  42. 23763, 22977, 22170, 21345, 20504, 19649, 18782, 17904, 17019, 16128,
  43. 15232, 14335, 13439, 12544, 11654, 10769, 9893, 9027, 8173, 7332, 6506,
  44. 5697, 4906, 4135, 3386, 2658, 1955, 1277, 625, 0, -597, -1166, -1705,
  45. -2214, -2693, -3141, -3558, -3944, -4298, -4621, -4912, -5172, -5401,
  46. -5599, -5767, -5905, -6015, -6096, -6149, -6175, -6175, -6150, -6101,
  47. -6029, -5935, -5819, -5685, -5531, -5360, -5174, -4972, -4757, -4529,
  48. -4291, -4043, -3787, -3523, -3254, -2980, -2703, -2424, -2144, -1864,
  49. -1586, -1310, -1037, -769, -506, -249, 0, 241, 474, 698, 912, 1115, 1308,
  50. 1489, 1658, 1815, 1960, 2092, 2211, 2317, 2411, 2491, 2559, 2614, 2657,
  51. 2687, 2705, 2711, 2706, 2689, 2663, 2625, 2579, 2522, 2457, 2384, 2304,
  52. 2216, 2122, 2022, 1916, 1807, 1693, 1575, 1455, 1333, 1209, 1084, 958, 833,
  53. 708, 584, 462, 342, 225, 111, 0, -107, -210, -309, -403, -492, -576, -655,
  54. -728, -796, -857, -913, -963, -1007, -1046, -1078, -1105, -1125, -1141,
  55. -1151, -1155, -1154, -1149, -1138, -1123, -1104, -1080, -1053, -1023, -989,
  56. -951, -912, -870, -825, -779, -731, -682, -632, -581, -530, -478, -426,
  57. -375, -324, -274, -225, -177, -130, -85, -41, 0, 39, 77, 113, 147, 178,
  58. 208, 234, 259, 281, 300, 318, 332, 345, 355, 363, 369, 373, 375, 374, 372,
  59. 369, 363, 357, 348, 339, 328, 317, 304, 291, 276, 262, 247, 231, 215, 200,
  60. 184, 168, 152, 137, 121, 107, 92, 79, 65, 53, 41, 29, 19, 9, 0, -8, -16,
  61. -22, -29, -34, -39, -43, -46, -48, -50, -52, -53, -53, -53, -52, -51, -50,
  62. -48, -47, -44, -42, -40, -37, -34, -32, -29, -26, -24, -21, -19, -17, -14,
  63. -12, -10, -9, -7, -6, -4, -3, -2, -2, -1, -1, 0, 0, 0, 0, 0, 0, 0
  64. };
  65. private short[] inputBuffer;
  66. private short[] outputBuffer;
  67. private short[] pitchBuffer;
  68. private short[] downSampleBuffer;
  69. private float speed;
  70. private float volume;
  71. private float pitch;
  72. private float rate;
  73. private int oldRatePosition;
  74. private int newRatePosition;
  75. private boolean useChordPitch;
  76. private int quality;
  77. private int numChannels;
  78. private int inputBufferSize;
  79. private int pitchBufferSize;
  80. private int outputBufferSize;
  81. private int numInputSamples;
  82. private int numOutputSamples;
  83. private int numPitchSamples;
  84. private int minPeriod;
  85. private int maxPeriod;
  86. private int maxRequired;
  87. private int remainingInputToCopy;
  88. private int sampleRate;
  89. private int prevPeriod;
  90. private int prevMinDiff;
  91. private int minDiff;
  92. private int maxDiff;
  93. // Create a sonic stream.
  94. public Sound(
  95. int sampleRate,
  96. int numChannels) {
  97. allocateStreamBuffers(sampleRate, numChannels);
  98. speed = 1.0f;
  99. pitch = 1.0f;
  100. volume = 1.0f;
  101. rate = 1.0f;
  102. oldRatePosition = 0;
  103. newRatePosition = 0;
  104. useChordPitch = false;
  105. quality = 0;
  106. }
  107. // This is a non-stream oriented interface to just change the speed of a sound sample
  108. public static int changeFloatSpeed(
  109. float[] samples,
  110. int numSamples,
  111. float speed,
  112. float pitch,
  113. float rate,
  114. float volume,
  115. boolean useChordPitch,
  116. int sampleRate,
  117. int numChannels) {
  118. Sound stream = new Sound(sampleRate, numChannels);
  119. stream.setSpeed(speed);
  120. stream.setPitch(pitch);
  121. stream.setRate(rate);
  122. stream.setVolume(volume);
  123. stream.setChordPitch(useChordPitch);
  124. stream.writeFloatToStream(samples, numSamples);
  125. stream.flushStream();
  126. numSamples = stream.samplesAvailable();
  127. stream.readFloatFromStream(samples, numSamples);
  128. return numSamples;
  129. }
  130. // Resize the array.
  131. private short[] resize(
  132. short[] oldArray,
  133. int newLength) {
  134. newLength *= numChannels;
  135. short[] newArray = new short[newLength];
  136. int length = oldArray.length <= newLength ? oldArray.length : newLength;
  137. System.arraycopy(oldArray, 0, newArray, 0, length);
  138. return newArray;
  139. }
  140. // Move samples from one array to another. May move samples down within an array, but not up.
  141. private void move(
  142. short[] dest,
  143. int destPos,
  144. short[] source,
  145. int sourcePos,
  146. int numSamples) {
  147. System.arraycopy(source, sourcePos * numChannels, dest, destPos * numChannels, numSamples * numChannels);
  148. }
  149. // Scale the samples by the factor.
  150. private void scaleSamples(
  151. short[] samples,
  152. int position,
  153. int numSamples,
  154. float volume) {
  155. int fixedPointVolume = (int) (volume * 4096.0f);
  156. int start = position * numChannels;
  157. int stop = start + numSamples * numChannels;
  158. for (int xSample = start; xSample < stop; xSample++) {
  159. int value = (samples[xSample] * fixedPointVolume) >> 12;
  160. if (value > 32767) {
  161. value = 32767;
  162. } else if (value < -32767) {
  163. value = -32767;
  164. }
  165. samples[xSample] = (short) value;
  166. }
  167. }
  168. // Get the speed of the stream.
  169. public float getSpeed() {
  170. return speed;
  171. }
  172. // Set the speed of the stream.
  173. public void setSpeed(
  174. float speed) {
  175. this.speed = speed;
  176. }
  177. // Get the pitch of the stream.
  178. public float getPitch() {
  179. return pitch;
  180. }
  181. // Set the pitch of the stream.
  182. public void setPitch(
  183. float pitch) {
  184. this.pitch = pitch;
  185. }
  186. // Get the rate of the stream.
  187. public float getRate() {
  188. return rate;
  189. }
  190. // Set the playback rate of the stream. This scales pitch and speed at the same time.
  191. public void setRate(
  192. float rate) {
  193. this.rate = rate;
  194. this.oldRatePosition = 0;
  195. this.newRatePosition = 0;
  196. }
  197. // Get the vocal chord pitch setting.
  198. public boolean getChordPitch() {
  199. return useChordPitch;
  200. }
  201. // Set the vocal chord mode for pitch computation. Default is off.
  202. public void setChordPitch(
  203. boolean useChordPitch) {
  204. this.useChordPitch = useChordPitch;
  205. }
  206. // Get the quality setting.
  207. public int getQuality() {
  208. return quality;
  209. }
  210. // Set the "quality". Default 0 is virtually as good as 1, but very much faster.
  211. public void setQuality(
  212. int quality) {
  213. this.quality = quality;
  214. }
  215. // Get the scaling factor of the stream.
  216. public float getVolume() {
  217. return volume;
  218. }
  219. // Set the scaling factor of the stream.
  220. public void setVolume(
  221. float volume) {
  222. this.volume = volume;
  223. }
  224. // Allocate stream buffers.
  225. private void allocateStreamBuffers(
  226. int sampleRate,
  227. int numChannels) {
  228. minPeriod = sampleRate / SONIC_MAX_PITCH;
  229. maxPeriod = sampleRate / SONIC_MIN_PITCH;
  230. maxRequired = 2 * maxPeriod;
  231. inputBufferSize = maxRequired;
  232. inputBuffer = new short[maxRequired * numChannels];
  233. outputBufferSize = maxRequired;
  234. outputBuffer = new short[maxRequired * numChannels];
  235. pitchBufferSize = maxRequired;
  236. pitchBuffer = new short[maxRequired * numChannels];
  237. downSampleBuffer = new short[maxRequired];
  238. this.sampleRate = sampleRate;
  239. this.numChannels = numChannels;
  240. oldRatePosition = 0;
  241. newRatePosition = 0;
  242. prevPeriod = 0;
  243. }
  244. // Get the sample rate of the stream.
  245. public int getSampleRate() {
  246. return sampleRate;
  247. }
  248. // Set the sample rate of the stream. This will cause samples buffered in the stream to be lost.
  249. public void setSampleRate(
  250. int sampleRate) {
  251. allocateStreamBuffers(sampleRate, numChannels);
  252. }
  253. // Get the number of channels.
  254. public int getNumChannels() {
  255. return numChannels;
  256. }
  257. // Set the num channels of the stream. This will cause samples buffered in the stream to be lost.
  258. public void setNumChannels(
  259. int numChannels) {
  260. allocateStreamBuffers(sampleRate, numChannels);
  261. }
  262. // Enlarge the output buffer if needed.
  263. private void enlargeOutputBufferIfNeeded(
  264. int numSamples) {
  265. if (numOutputSamples + numSamples > outputBufferSize) {
  266. outputBufferSize += (outputBufferSize >> 1) + numSamples;
  267. outputBuffer = resize(outputBuffer, outputBufferSize);
  268. }
  269. }
  270. // Enlarge the input buffer if needed.
  271. private void enlargeInputBufferIfNeeded(
  272. int numSamples) {
  273. if (numInputSamples + numSamples > inputBufferSize) {
  274. inputBufferSize += (inputBufferSize >> 1) + numSamples;
  275. inputBuffer = resize(inputBuffer, inputBufferSize);
  276. }
  277. }
  278. // Add the input samples to the input buffer.
  279. private void addFloatSamplesToInputBuffer(
  280. float[] samples,
  281. int numSamples) {
  282. if (numSamples == 0) {
  283. return;
  284. }
  285. enlargeInputBufferIfNeeded(numSamples);
  286. int xBuffer = numInputSamples * numChannels;
  287. for (int xSample = 0; xSample < numSamples * numChannels; xSample++) {
  288. inputBuffer[xBuffer++] = (short) (samples[xSample] * 32767.0f);
  289. }
  290. numInputSamples += numSamples;
  291. }
  292. // Add the input samples to the input buffer.
  293. private void addShortSamplesToInputBuffer(
  294. short[] samples,
  295. int numSamples) {
  296. if (numSamples == 0) {
  297. return;
  298. }
  299. enlargeInputBufferIfNeeded(numSamples);
  300. move(inputBuffer, numInputSamples, samples, 0, numSamples);
  301. numInputSamples += numSamples;
  302. }
  303. // Add the input samples to the input buffer.
  304. private void addUnsignedByteSamplesToInputBuffer(
  305. byte[] samples,
  306. int numSamples) {
  307. short sample;
  308. enlargeInputBufferIfNeeded(numSamples);
  309. int xBuffer = numInputSamples * numChannels;
  310. for (int xSample = 0; xSample < numSamples * numChannels; xSample++) {
  311. sample = (short) ((samples[xSample] & 0xff) - 128); // Convert from unsigned to signed
  312. inputBuffer[xBuffer++] = (short) (sample << 8);
  313. }
  314. numInputSamples += numSamples;
  315. }
  316. // Add the input samples to the input buffer. They must be 16-bit little-endian encoded in a byte array.
  317. private void addBytesToInputBuffer(
  318. byte[] inBuffer,
  319. int numBytes) {
  320. int numSamples = numBytes / (2 * numChannels);
  321. short sample;
  322. enlargeInputBufferIfNeeded(numSamples);
  323. int xBuffer = numInputSamples * numChannels;
  324. for (int xByte = 0; xByte + 1 < numBytes; xByte += 2) {
  325. sample = (short) ((inBuffer[xByte] & 0xff) | (inBuffer[xByte + 1] << 8));
  326. inputBuffer[xBuffer++] = sample;
  327. }
  328. numInputSamples += numSamples;
  329. }
  330. // Remove input samples that we have already processed.
  331. private void removeInputSamples(
  332. int position) {
  333. int remainingSamples = numInputSamples - position;
  334. move(inputBuffer, 0, inputBuffer, position, remainingSamples);
  335. numInputSamples = remainingSamples;
  336. }
  337. // Just copy from the array to the output buffer
  338. private void copyToOutput(
  339. short[] samples,
  340. int position,
  341. int numSamples) {
  342. enlargeOutputBufferIfNeeded(numSamples);
  343. move(outputBuffer, numOutputSamples, samples, position, numSamples);
  344. numOutputSamples += numSamples;
  345. }
  346. // Just copy from the input buffer to the output buffer. Return num samples copied.
  347. private int copyInputToOutput(
  348. int position) {
  349. int numSamples = remainingInputToCopy;
  350. if (numSamples > maxRequired) {
  351. numSamples = maxRequired;
  352. }
  353. copyToOutput(inputBuffer, position, numSamples);
  354. remainingInputToCopy -= numSamples;
  355. return numSamples;
  356. }
  357. // Read data out of the stream. Sometimes no data will be available, and zero
  358. // is returned, which is not an error condition.
  359. public int readFloatFromStream(
  360. float[] samples,
  361. int maxSamples) {
  362. int numSamples = numOutputSamples;
  363. int remainingSamples = 0;
  364. if (numSamples == 0) {
  365. return 0;
  366. }
  367. if (numSamples > maxSamples) {
  368. remainingSamples = numSamples - maxSamples;
  369. numSamples = maxSamples;
  370. }
  371. for (int xSample = 0; xSample < numSamples * numChannels; xSample++) {
  372. samples[xSample] = (outputBuffer[xSample]) / 32767.0f;
  373. }
  374. move(outputBuffer, 0, outputBuffer, numSamples, remainingSamples);
  375. numOutputSamples = remainingSamples;
  376. return numSamples;
  377. }
  378. // Read short data out of the stream. Sometimes no data will be available, and zero
  379. // is returned, which is not an error condition.
  380. public int readShortFromStream(
  381. short[] samples,
  382. int maxSamples) {
  383. int numSamples = numOutputSamples;
  384. int remainingSamples = 0;
  385. if (numSamples == 0) {
  386. return 0;
  387. }
  388. if (numSamples > maxSamples) {
  389. remainingSamples = numSamples - maxSamples;
  390. numSamples = maxSamples;
  391. }
  392. move(samples, 0, outputBuffer, 0, numSamples);
  393. move(outputBuffer, 0, outputBuffer, numSamples, remainingSamples);
  394. numOutputSamples = remainingSamples;
  395. return numSamples;
  396. }
  397. // Read unsigned byte data out of the stream. Sometimes no data will be available, and zero
  398. // is returned, which is not an error condition.
  399. public int readUnsignedByteFromStream(
  400. byte[] samples,
  401. int maxSamples) {
  402. int numSamples = numOutputSamples;
  403. int remainingSamples = 0;
  404. if (numSamples == 0) {
  405. return 0;
  406. }
  407. if (numSamples > maxSamples) {
  408. remainingSamples = numSamples - maxSamples;
  409. numSamples = maxSamples;
  410. }
  411. for (int xSample = 0; xSample < numSamples * numChannels; xSample++) {
  412. samples[xSample] = (byte) ((outputBuffer[xSample] >> 8) + 128);
  413. }
  414. move(outputBuffer, 0, outputBuffer, numSamples, remainingSamples);
  415. numOutputSamples = remainingSamples;
  416. return numSamples;
  417. }
  418. // Read unsigned byte data out of the stream. Sometimes no data will be available, and zero
  419. // is returned, which is not an error condition.
  420. public int readBytesFromStream(
  421. byte[] outBuffer,
  422. int maxBytes) {
  423. int maxSamples = maxBytes / (2 * numChannels);
  424. int numSamples = numOutputSamples;
  425. int remainingSamples = 0;
  426. if (numSamples == 0 || maxSamples == 0) {
  427. return 0;
  428. }
  429. if (numSamples > maxSamples) {
  430. remainingSamples = numSamples - maxSamples;
  431. numSamples = maxSamples;
  432. }
  433. for (int xSample = 0; xSample < numSamples * numChannels; xSample++) {
  434. short sample = outputBuffer[xSample];
  435. outBuffer[xSample << 1] = (byte) (sample & 0xff);
  436. outBuffer[(xSample << 1) + 1] = (byte) (sample >> 8);
  437. }
  438. move(outputBuffer, 0, outputBuffer, numSamples, remainingSamples);
  439. numOutputSamples = remainingSamples;
  440. return 2 * numSamples * numChannels;
  441. }
  442. // Force the sonic stream to generate output using whatever data it currently
  443. // has. No extra delay will be added to the output, but flushing in the middle of
  444. // words could introduce distortion.
  445. public void flushStream() {
  446. int remainingSamples = numInputSamples;
  447. float s = speed / pitch;
  448. float r = rate * pitch;
  449. int expectedOutputSamples = numOutputSamples + (int) ((remainingSamples / s + numPitchSamples) / r + 0.5f);
  450. // Add enough silence to flush both input and pitch buffers.
  451. enlargeInputBufferIfNeeded(remainingSamples + 2 * maxRequired);
  452. for (int xSample = 0; xSample < 2 * maxRequired * numChannels; xSample++) {
  453. inputBuffer[remainingSamples * numChannels + xSample] = 0;
  454. }
  455. numInputSamples += 2 * maxRequired;
  456. writeShortToStream(null, 0);
  457. // Throw away any extra samples we generated due to the silence we added.
  458. if (numOutputSamples > expectedOutputSamples) {
  459. numOutputSamples = expectedOutputSamples;
  460. }
  461. // Empty input and pitch buffers.
  462. numInputSamples = 0;
  463. remainingInputToCopy = 0;
  464. numPitchSamples = 0;
  465. }
  466. // Return the number of samples in the output buffer
  467. public int samplesAvailable() {
  468. return numOutputSamples;
  469. }
  470. // If skip is greater than one, average skip samples together and write them to
  471. // the down-sample buffer. If numChannels is greater than one, mix the channels
  472. // together as we down sample.
  473. private void downSampleInput(
  474. short[] samples,
  475. int position,
  476. int skip) {
  477. int numSamples = maxRequired / skip;
  478. int samplesPerValue = numChannels * skip;
  479. int value;
  480. position *= numChannels;
  481. for (int i = 0; i < numSamples; i++) {
  482. value = 0;
  483. for (int j = 0; j < samplesPerValue; j++) {
  484. value += samples[position + i * samplesPerValue + j];
  485. }
  486. value /= samplesPerValue;
  487. downSampleBuffer[i] = (short) value;
  488. }
  489. }
  490. // Find the best frequency match in the range, and given a sample skip multiple.
  491. // For now, just find the pitch of the first channel.
  492. private int findPitchPeriodInRange(
  493. short[] samples,
  494. int position,
  495. int minPeriod,
  496. int maxPeriod) {
  497. int bestPeriod = 0, worstPeriod = 255;
  498. int minDiff = 1, maxDiff = 0;
  499. position *= numChannels;
  500. for (int period = minPeriod; period <= maxPeriod; period++) {
  501. int diff = 0;
  502. for (int i = 0; i < period; i++) {
  503. short sVal = samples[position + i];
  504. short pVal = samples[position + period + i];
  505. diff += sVal >= pVal ? sVal - pVal : pVal - sVal;
  506. }
  507. /* Note that the highest number of samples we add into diff will be less
  508. than 256, since we skip samples. Thus, diff is a 24 bit number, and
  509. we can safely multiply by numSamples without overflow */
  510. if (diff * bestPeriod < minDiff * period) {
  511. minDiff = diff;
  512. bestPeriod = period;
  513. }
  514. if (diff * worstPeriod > maxDiff * period) {
  515. maxDiff = diff;
  516. worstPeriod = period;
  517. }
  518. }
  519. this.minDiff = minDiff / bestPeriod;
  520. this.maxDiff = maxDiff / worstPeriod;
  521. return bestPeriod;
  522. }
  523. // At abrupt ends of voiced words, we can have pitch periods that are better
  524. // approximated by the previous pitch period estimate. Try to detect this case.
  525. private boolean prevPeriodBetter(
  526. int minDiff,
  527. int maxDiff,
  528. boolean preferNewPeriod) {
  529. if (minDiff == 0 || prevPeriod == 0) {
  530. return false;
  531. }
  532. if (preferNewPeriod) {
  533. if (maxDiff > minDiff * 3) {
  534. // Got a reasonable match this period
  535. return false;
  536. }
  537. // Mismatch is not that much greater this period
  538. return minDiff * 2 > prevMinDiff * 3;
  539. } else {
  540. return minDiff > prevMinDiff;
  541. }
  542. }
  543. // Find the pitch period. This is a critical step, and we may have to try
  544. // multiple ways to get a good answer. This version uses AMDF. To improve
  545. // speed, we down sample by an integer factor get in the 11KHz range, and then
  546. // do it again with a narrower frequency range without down sampling
  547. private int findPitchPeriod(
  548. short[] samples,
  549. int position,
  550. boolean preferNewPeriod) {
  551. int period, retPeriod;
  552. int skip = 1;
  553. if (sampleRate > SONIC_AMDF_FREQ && quality == 0) {
  554. skip = sampleRate / SONIC_AMDF_FREQ;
  555. }
  556. if (numChannels == 1 && skip == 1) {
  557. period = findPitchPeriodInRange(samples, position, minPeriod, maxPeriod);
  558. } else {
  559. downSampleInput(samples, position, skip);
  560. period = findPitchPeriodInRange(downSampleBuffer, 0, minPeriod / skip,
  561. maxPeriod / skip);
  562. if (skip != 1) {
  563. period *= skip;
  564. int minP = period - (skip << 2);
  565. int maxP = period + (skip << 2);
  566. if (minP < minPeriod) {
  567. minP = minPeriod;
  568. }
  569. if (maxP > maxPeriod) {
  570. maxP = maxPeriod;
  571. }
  572. if (numChannels == 1) {
  573. period = findPitchPeriodInRange(samples, position, minP, maxP);
  574. } else {
  575. downSampleInput(samples, position, 1);
  576. period = findPitchPeriodInRange(downSampleBuffer, 0, minP, maxP);
  577. }
  578. }
  579. }
  580. if (prevPeriodBetter(minDiff, maxDiff, preferNewPeriod)) {
  581. retPeriod = prevPeriod;
  582. } else {
  583. retPeriod = period;
  584. }
  585. prevMinDiff = minDiff;
  586. prevPeriod = period;
  587. return retPeriod;
  588. }
  589. // Overlap two sound segments, ramp the volume of one down, while ramping the
  590. // other one from zero up, and add them, storing the result at the output.
  591. private void overlapAdd(
  592. int numSamples,
  593. int numChannels,
  594. short[] out,
  595. int outPos,
  596. short[] rampDown,
  597. int rampDownPos,
  598. short[] rampUp,
  599. int rampUpPos) {
  600. for (int i = 0; i < numChannels; i++) {
  601. int o = outPos * numChannels + i;
  602. int u = rampUpPos * numChannels + i;
  603. int d = rampDownPos * numChannels + i;
  604. for (int t = 0; t < numSamples; t++) {
  605. out[o] = (short) ((rampDown[d] * (numSamples - t) + rampUp[u] * t) / numSamples);
  606. o += numChannels;
  607. d += numChannels;
  608. u += numChannels;
  609. }
  610. }
  611. }
  612. // Overlap two sound segments, ramp the volume of one down, while ramping the
  613. // other one from zero up, and add them, storing the result at the output.
  614. private void overlapAddWithSeparation(
  615. int numSamples,
  616. int numChannels,
  617. int separation,
  618. short[] out,
  619. int outPos,
  620. short[] rampDown,
  621. int rampDownPos,
  622. short[] rampUp,
  623. int rampUpPos) {
  624. for (int i = 0; i < numChannels; i++) {
  625. int o = outPos * numChannels + i;
  626. int u = rampUpPos * numChannels + i;
  627. int d = rampDownPos * numChannels + i;
  628. for (int t = 0; t < numSamples + separation; t++) {
  629. if (t < separation) {
  630. out[o] = (short) (rampDown[d] * (numSamples - t) / numSamples);
  631. d += numChannels;
  632. } else if (t < numSamples) {
  633. out[o] = (short) ((rampDown[d] * (numSamples - t) + rampUp[u] * (t - separation)) / numSamples);
  634. d += numChannels;
  635. u += numChannels;
  636. } else {
  637. out[o] = (short) (rampUp[u] * (t - separation) / numSamples);
  638. u += numChannels;
  639. }
  640. o += numChannels;
  641. }
  642. }
  643. }
  644. // Just move the new samples in the output buffer to the pitch buffer
  645. private void moveNewSamplesToPitchBuffer(
  646. int originalNumOutputSamples) {
  647. int numSamples = numOutputSamples - originalNumOutputSamples;
  648. if (numPitchSamples + numSamples > pitchBufferSize) {
  649. pitchBufferSize += (pitchBufferSize >> 1) + numSamples;
  650. pitchBuffer = resize(pitchBuffer, pitchBufferSize);
  651. }
  652. move(pitchBuffer, numPitchSamples, outputBuffer, originalNumOutputSamples, numSamples);
  653. numOutputSamples = originalNumOutputSamples;
  654. numPitchSamples += numSamples;
  655. }
  656. // Remove processed samples from the pitch buffer.
  657. private void removePitchSamples(
  658. int numSamples) {
  659. if (numSamples == 0) {
  660. return;
  661. }
  662. move(pitchBuffer, 0, pitchBuffer, numSamples, numPitchSamples - numSamples);
  663. numPitchSamples -= numSamples;
  664. }
  665. // Change the pitch. The latency this introduces could be reduced by looking at
  666. // past samples to determine pitch, rather than future.
  667. private void adjustPitch(
  668. int originalNumOutputSamples) {
  669. int period, newPeriod, separation;
  670. int position = 0;
  671. if (numOutputSamples == originalNumOutputSamples) {
  672. return;
  673. }
  674. moveNewSamplesToPitchBuffer(originalNumOutputSamples);
  675. while (numPitchSamples - position >= maxRequired) {
  676. period = findPitchPeriod(pitchBuffer, position, false);
  677. newPeriod = (int) (period / pitch);
  678. enlargeOutputBufferIfNeeded(newPeriod);
  679. if (pitch >= 1.0f) {
  680. overlapAdd(newPeriod, numChannels, outputBuffer, numOutputSamples, pitchBuffer,
  681. position, pitchBuffer, position + period - newPeriod);
  682. } else {
  683. separation = newPeriod - period;
  684. overlapAddWithSeparation(period, numChannels, separation, outputBuffer, numOutputSamples,
  685. pitchBuffer, position, pitchBuffer, position);
  686. }
  687. numOutputSamples += newPeriod;
  688. position += period;
  689. }
  690. removePitchSamples(position);
  691. }
  692. // Aproximate the sinc function times a Hann window from the sinc table.
  693. private int findSincCoefficient(int i, int ratio, int width) {
  694. int lobePoints = (SINC_TABLE_SIZE - 1) / SINC_FILTER_POINTS;
  695. int left = i * lobePoints + (ratio * lobePoints) / width;
  696. int right = left + 1;
  697. int position = i * lobePoints * width + ratio * lobePoints - left * width;
  698. int leftVal = sincTable[left];
  699. int rightVal = sincTable[right];
  700. return ((leftVal * (width - position) + rightVal * position) << 1) / width;
  701. }
  702. // Return 1 if value >= 0, else -1. This represents the sign of value.
  703. private int getSign(int value) {
  704. return value >= 0 ? 1 : -1;
  705. }
  706. // Interpolate the new output sample.
  707. private short interpolate(
  708. short[] in,
  709. int inPos, // Index to first sample which already includes channel offset.
  710. int oldSampleRate,
  711. int newSampleRate) {
  712. // Compute N-point sinc FIR-filter here. Clip rather than overflow.
  713. int i;
  714. int total = 0;
  715. int position = newRatePosition * oldSampleRate;
  716. int leftPosition = oldRatePosition * newSampleRate;
  717. int rightPosition = (oldRatePosition + 1) * newSampleRate;
  718. int ratio = rightPosition - position - 1;
  719. int width = rightPosition - leftPosition;
  720. int weight, value;
  721. int oldSign;
  722. int overflowCount = 0;
  723. for (i = 0; i < SINC_FILTER_POINTS; i++) {
  724. weight = findSincCoefficient(i, ratio, width);
  725. /* printf("%u %f\n", i, weight); */
  726. value = in[inPos + i * numChannels] * weight;
  727. oldSign = getSign(total);
  728. total += value;
  729. if (oldSign != getSign(total) && getSign(value) == oldSign) {
  730. /* We must have overflowed. This can happen with a sinc filter. */
  731. overflowCount += oldSign;
  732. }
  733. }
  734. /* It is better to clip than to wrap if there was a overflow. */
  735. if (overflowCount > 0) {
  736. return Short.MAX_VALUE;
  737. } else if (overflowCount < 0) {
  738. return Short.MIN_VALUE;
  739. }
  740. return (short) (total >> 16);
  741. }
  742. // Change the rate.
  743. private void adjustRate(
  744. float rate,
  745. int originalNumOutputSamples) {
  746. int newSampleRate = (int) (sampleRate / rate);
  747. int oldSampleRate = sampleRate;
  748. int position;
  749. // Set these values to help with the integer math
  750. while (newSampleRate > (1 << 14) || oldSampleRate > (1 << 14)) {
  751. newSampleRate >>= 1;
  752. oldSampleRate >>= 1;
  753. }
  754. if (numOutputSamples == originalNumOutputSamples) {
  755. return;
  756. }
  757. moveNewSamplesToPitchBuffer(originalNumOutputSamples);
  758. // Leave at least one pitch sample in the buffer
  759. for (position = 0; position < numPitchSamples - 1; position++) {
  760. while ((oldRatePosition + 1) * newSampleRate > newRatePosition * oldSampleRate) {
  761. enlargeOutputBufferIfNeeded(1);
  762. for (int i = 0; i < numChannels; i++) {
  763. outputBuffer[numOutputSamples * numChannels + i] = interpolate(pitchBuffer,
  764. position * numChannels + i, oldSampleRate, newSampleRate);
  765. }
  766. newRatePosition++;
  767. numOutputSamples++;
  768. }
  769. oldRatePosition++;
  770. if (oldRatePosition == oldSampleRate) {
  771. oldRatePosition = 0;
  772. if (newRatePosition != newSampleRate) {
  773. System.out.print("Assertion failed: newRatePosition != newSampleRate\n");
  774. assert false;
  775. }
  776. newRatePosition = 0;
  777. }
  778. }
  779. removePitchSamples(position);
  780. }
  781. // Skip over a pitch period, and copy period/speed samples to the output
  782. private int skipPitchPeriod(
  783. short[] samples,
  784. int position,
  785. float speed,
  786. int period) {
  787. int newSamples;
  788. if (speed >= 2.0f) {
  789. newSamples = (int) (period / (speed - 1.0f));
  790. } else {
  791. newSamples = period;
  792. remainingInputToCopy = (int) (period * (2.0f - speed) / (speed - 1.0f));
  793. }
  794. enlargeOutputBufferIfNeeded(newSamples);
  795. overlapAdd(newSamples, numChannels, outputBuffer, numOutputSamples, samples, position,
  796. samples, position + period);
  797. numOutputSamples += newSamples;
  798. return newSamples;
  799. }
  800. // Insert a pitch period, and determine how much input to copy directly.
  801. private int insertPitchPeriod(
  802. short[] samples,
  803. int position,
  804. float speed,
  805. int period) {
  806. int newSamples;
  807. if (speed < 0.5f) {
  808. newSamples = (int) (period * speed / (1.0f - speed));
  809. } else {
  810. newSamples = period;
  811. remainingInputToCopy = (int) (period * (2.0f * speed - 1.0f) / (1.0f - speed));
  812. }
  813. enlargeOutputBufferIfNeeded(period + newSamples);
  814. move(outputBuffer, numOutputSamples, samples, position, period);
  815. overlapAdd(newSamples, numChannels, outputBuffer, numOutputSamples + period, samples,
  816. position + period, samples, position);
  817. numOutputSamples += period + newSamples;
  818. return newSamples;
  819. }
  820. // Resample as many pitch periods as we have buffered on the input. Return 0 if
  821. // we fail to resize an input or output buffer. Also scale the output by the volume.
  822. private void changeSpeed(
  823. float speed) {
  824. int numSamples = numInputSamples;
  825. int position = 0, period, newSamples;
  826. if (numInputSamples < maxRequired) {
  827. return;
  828. }
  829. do {
  830. if (remainingInputToCopy > 0) {
  831. newSamples = copyInputToOutput(position);
  832. position += newSamples;
  833. } else {
  834. period = findPitchPeriod(inputBuffer, position, true);
  835. if (speed > 1.0) {
  836. newSamples = skipPitchPeriod(inputBuffer, position, speed, period);
  837. position += period + newSamples;
  838. } else {
  839. newSamples = insertPitchPeriod(inputBuffer, position, speed, period);
  840. position += newSamples;
  841. }
  842. }
  843. } while (position + maxRequired <= numSamples);
  844. removeInputSamples(position);
  845. }
  846. // Resample as many pitch periods as we have buffered on the input. Scale the output by the volume.
  847. private void processStreamInput() {
  848. int originalNumOutputSamples = numOutputSamples;
  849. float s = speed / pitch;
  850. float r = rate;
  851. if (!useChordPitch) {
  852. r *= pitch;
  853. }
  854. if (s > 1.00001 || s < 0.99999) {
  855. changeSpeed(s);
  856. } else {
  857. copyToOutput(inputBuffer, 0, numInputSamples);
  858. numInputSamples = 0;
  859. }
  860. if (useChordPitch) {
  861. if (pitch != 1.0f) {
  862. adjustPitch(originalNumOutputSamples);
  863. }
  864. } else if (r != 1.0f) {
  865. adjustRate(r, originalNumOutputSamples);
  866. }
  867. if (volume != 1.0f) {
  868. // Adjust output volume.
  869. scaleSamples(outputBuffer, originalNumOutputSamples, numOutputSamples - originalNumOutputSamples,
  870. volume);
  871. }
  872. }
  873. // Write floating point data to the input buffer and process it.
  874. public void writeFloatToStream(
  875. float[] samples,
  876. int numSamples) {
  877. addFloatSamplesToInputBuffer(samples, numSamples);
  878. processStreamInput();
  879. }
  880. // Write the data to the input stream, and process it.
  881. public void writeShortToStream(
  882. short[] samples,
  883. int numSamples) {
  884. addShortSamplesToInputBuffer(samples, numSamples);
  885. processStreamInput();
  886. }
  887. // Simple wrapper around sonicWriteFloatToStream that does the unsigned byte to short
  888. // conversion for you.
  889. public void writeUnsignedByteToStream(
  890. byte[] samples,
  891. int numSamples) {
  892. addUnsignedByteSamplesToInputBuffer(samples, numSamples);
  893. processStreamInput();
  894. }
  895. // Simple wrapper around sonicWriteBytesToStream that does the byte to 16-bit LE conversion.
  896. public void writeBytesToStream(
  897. byte[] inBuffer,
  898. int numBytes) {
  899. addBytesToInputBuffer(inBuffer, numBytes);
  900. processStreamInput();
  901. }
  902. /* This is a non-stream oriented interface to just change the speed of a sound sample */
  903. public int sonicChangeShortSpeed(
  904. short[] samples,
  905. int numSamples,
  906. float speed,
  907. float pitch,
  908. float rate,
  909. float volume,
  910. boolean useChordPitch,
  911. int sampleRate,
  912. int numChannels) {
  913. Sound stream = new Sound(sampleRate, numChannels);
  914. stream.setSpeed(speed);
  915. stream.setPitch(pitch);
  916. stream.setRate(rate);
  917. stream.setVolume(volume);
  918. stream.setChordPitch(useChordPitch);
  919. stream.writeShortToStream(samples, numSamples);
  920. stream.flushStream();
  921. numSamples = stream.samplesAvailable();
  922. stream.readShortFromStream(samples, numSamples);
  923. return numSamples;
  924. }
  925. }

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/不正经/article/detail/354875
推荐阅读
相关标签
  

闽ICP备14008679号