using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; using System.Numerics; namespace CrowVol { class MFCCHelp { int windowSize = 256; int sampleRate = 16000; int countCoef = 10; //todo: constructor(coefCount, sampleRate) //{ //////noisegate; //////normWav; //////winowSize(sampleRate); //////fftSize(winowCompute); //////Compute(wavData, windowSize, fftSize, countCoef) //} private void noiseGate(float[] wavData) { int k = 0; double RMS = 0; for (int j = 0; j < wavData.Length; j++) { if (k < 100) { RMS += Math.Pow((wavData[j]), 2); k++; } else { if (Math.Sqrt(RMS / 100) < 0.005) for (int i = j - 100; i <= j; i++) wavData[i] = 0; k = 0; RMS = 0; } } } private void normWav(float[] wavData) { float[] absWavBuffer = new float[wavData.Length]; for (int i = 0; i < wavData.Length; i++) if (wavData[i] < 0) absWavBuffer[i] = -wavData[i]; else absWavBuffer[i] = wavData[i]; float max = absWavBuffer.Max(); float k = 1f / max; for (int i = 0; i < wavData.Length; i++) wavData[i] = wavData[i] * k; } private float[,] setFrames(float[] wavData, int window) { float[,] frameArray; int count_frames = 0; int count_samples = 0; frameArray = new float[(wavData.Length * 2 / window) + 1, window]; for (int j = 0; j < wavData.Length; j++) { if (j >= window / 2) { count_samples++; if (count_samples >= window + 1) { count_frames += 2; count_samples = 1; } frameArray[count_frames, count_samples - 1] = wavData[j - window / 2]; frameArray[count_frames + 1, count_samples - 1] = wavData[j]; } } return frameArray; } private double[] Hamming(double[] signal) { int frame_size = signal.Length; double[] window = new double[frame_size]; for (int n = 0; n < frame_size; n++) { window[n] = 0.54 - 0.46 * Math.Cos(2 * Math.PI * n / (frame_size - 1)); window[n] *= signal[n]; } return window; } private float[] frameFFT(double[] wFrame) { int frame_size = wFrame.Length; Complex[] fourierCmplxRaw = new Complex[frame_size]; float[] fourierRaw = new float[frame_size]; for (int k = 0; k < frame_size; k++) { fourierCmplxRaw[k] = new Complex(0, 0); for (int n = 0; n < frame_size; n++) { double sample = wFrame[n]; double x = -2f * Math.PI * k * n / (double)frame_size; Complex f = sample * new Complex(Math.Cos(x), Math.Sin(x)); double w = 1f; w = 0.54 - 0.46 * Math.Cos(2 * Math.PI * n / (frame_size - 1)); fourierCmplxRaw[k] += f * w; } fourierRaw[k] = (float)Math.Sqrt(fourierCmplxRaw[k].Magnitude); } return fourierRaw; } private float[,] MelFilterBank(int nfft, int filters_cnt, int sample_rate) { float mel_min = toMel(0); float mel_max = toMel(sample_rate); float mel_step = (mel_max - mel_min) / (filters_cnt + 1); float[] f = new float[filters_cnt + 2]; float[] mel = new float[filters_cnt + 2]; float[] hertz = new float[filters_cnt + 2]; float[] mel_scale = new float[filters_cnt]; float[,] filterBanks = new float[filters_cnt, nfft]; mel[0] = mel_min; for (int n = 1; n < filters_cnt + 2; n++) { mel[n] = mel[n - 1] + mel_step; } for (int n = 0; n < filters_cnt + 2; n++) { hertz[n] = fromMel(mel[n]); f[n] = (float)Math.Floor((nfft) * hertz[n] / sample_rate); } for (int m = 1; m < filters_cnt + 1; m++) { for (int k = 0; k < nfft; k++) { if (f[m - 1] <= k && k <= f[m]) { filterBanks[m - 1, k] = (k - f[m - 1]) / (f[m] - f[m - 1]); } else if (f[m] < k && k <= f[m + 1]) { filterBanks[m - 1, k] = (f[m + 1] - k) / (f[m + 1] - f[m]); } else { filterBanks[m - 1, k] = 0; } } } return filterBanks; } private float[] calcPower(float[] fft, int filters_cnt, float[,] filterBank) { float[] logPower = new float[filters_cnt]; int fftSize = fft.Length; for (int i = 0; i < filters_cnt; i++) { logPower[i] = 0; for (int j = 0; j < fftSize; j++) logPower[i] = filterBank[i, j] * (float)Math.Pow(fft[j], 2); if (logPower[i] != 0) logPower[i] = (float)Math.Log(logPower[i]); } return logPower; } private float[] DCT(float[] data) { int N = data.Length; float[] result = new float[N]; for (int n = 0; n < N; n++) { result[n] = 0; for (int m = 0; m < N; m++) { result[n] += (float)(data[m] * Math.Cos(Math.PI * n * (m + 1f / 2f) / N)); } } return result; } public double[][] Compute(float[] wavData) { int count_frames = (wavData.Length * 2 / windowSize) + 1; //noiseGate(wavData); //normWav(wavData); float[,] frames = setFrames(wavData, windowSize); float[,] H = MelFilterBank(windowSize, countCoef, sampleRate); //float[,] resultMfcc = new float[count_frames, countCoef]; double[][] resultMfcc = new double[count_frames][]; for (int count = 0; count < count_frames; count++) resultMfcc[count] = new double[countCoef]; for (int i = 0; i < count_frames; i++) { float[] fftFrame = new float[windowSize]; double[] frameBuf = new double[windowSize]; for (int buffer = 0; buffer < windowSize; buffer++) frameBuf[buffer] = frames[i, buffer]; fftFrame = frameFFT(frameBuf); float[] fftBuf = new float[windowSize]; for (int buffer = 0; buffer < windowSize; buffer++) fftBuf[buffer] = fftFrame[buffer]; float[] mfccBuf = new float[windowSize]; mfccBuf = calcPower(fftBuf, countCoef, H); mfccBuf = DCT(mfccBuf); for (int coefBuf = 0; coefBuf < countCoef; coefBuf++) resultMfcc[i][coefBuf] = mfccBuf[coefBuf]; } return resultMfcc; } private float toMel(float f) { return 1125 * (float)Math.Log(1 + (f / 700)); } private float fromMel(float m) { return 700 * ((float)Math.Exp(m / 1125) - 1); } } }