summaryrefslogtreecommitdiff
path: root/libs/qm-dsp/dsp/mfcc/MFCC.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'libs/qm-dsp/dsp/mfcc/MFCC.cpp')
-rw-r--r--libs/qm-dsp/dsp/mfcc/MFCC.cpp276
1 files changed, 276 insertions, 0 deletions
diff --git a/libs/qm-dsp/dsp/mfcc/MFCC.cpp b/libs/qm-dsp/dsp/mfcc/MFCC.cpp
new file mode 100644
index 0000000000..5f7c234051
--- /dev/null
+++ b/libs/qm-dsp/dsp/mfcc/MFCC.cpp
@@ -0,0 +1,276 @@
+/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
+
+/*
+ QM DSP Library
+
+ Centre for Digital Music, Queen Mary, University of London.
+ This file copyright 2005 Nicolas Chetry, copyright 2008 QMUL.
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version. See the file
+ COPYING included with this distribution for more information.
+*/
+
+#include <cmath>
+#include <cstdlib>
+#include <cstring>
+
+#include "MFCC.h"
+#include "dsp/transforms/FFT.h"
+#include "base/Window.h"
+
+MFCC::MFCC(MFCCConfig config)
+{
+ int i,j;
+
+ /* Calculate at startup */
+ double *freqs, *lower, *center, *upper, *triangleHeight, *fftFreqs;
+
+ lowestFrequency = 66.6666666;
+ linearFilters = 13;
+ linearSpacing = 66.66666666;
+ logFilters = 27;
+ logSpacing = 1.0711703;
+
+ /* FFT and analysis window sizes */
+ fftSize = config.fftsize;
+ fft = new FFTReal(fftSize);
+
+ totalFilters = linearFilters + logFilters;
+ logPower = config.logpower;
+
+ samplingRate = config.FS;
+
+ /* The number of cepstral componenents */
+ nceps = config.nceps;
+
+ /* Set if user want C0 */
+ WANT_C0 = (config.want_c0 ? 1 : 0);
+
+ /* Allocate space for feature vector */
+ if (WANT_C0 == 1) {
+ ceps = (double*)calloc(nceps+1, sizeof(double));
+ } else {
+ ceps = (double*)calloc(nceps, sizeof(double));
+ }
+
+ /* Allocate space for local vectors */
+ mfccDCTMatrix = (double**)calloc(nceps+1, sizeof(double*));
+ for (i = 0; i < nceps+1; i++) {
+ mfccDCTMatrix[i]= (double*)calloc(totalFilters, sizeof(double));
+ }
+
+ mfccFilterWeights = (double**)calloc(totalFilters, sizeof(double*));
+ for (i = 0; i < totalFilters; i++) {
+ mfccFilterWeights[i] = (double*)calloc(fftSize, sizeof(double));
+ }
+
+ freqs = (double*)calloc(totalFilters+2,sizeof(double));
+
+ lower = (double*)calloc(totalFilters,sizeof(double));
+ center = (double*)calloc(totalFilters,sizeof(double));
+ upper = (double*)calloc(totalFilters,sizeof(double));
+
+ triangleHeight = (double*)calloc(totalFilters,sizeof(double));
+ fftFreqs = (double*)calloc(fftSize,sizeof(double));
+
+ for (i = 0; i < linearFilters; i++) {
+ freqs[i] = lowestFrequency + ((double)i) * linearSpacing;
+ }
+
+ for (i = linearFilters; i < totalFilters+2; i++) {
+ freqs[i] = freqs[linearFilters-1] *
+ pow(logSpacing, (double)(i-linearFilters+1));
+ }
+
+ /* Define lower, center and upper */
+ memcpy(lower, freqs,totalFilters*sizeof(double));
+ memcpy(center, &freqs[1],totalFilters*sizeof(double));
+ memcpy(upper, &freqs[2],totalFilters*sizeof(double));
+
+ for (i=0;i<totalFilters;i++){
+ triangleHeight[i] = 2./(upper[i]-lower[i]);
+ }
+
+ for (i=0;i<fftSize;i++){
+ fftFreqs[i] = ((double) i / ((double) fftSize ) *
+ (double) samplingRate);
+ }
+
+ /* Build now the mccFilterWeight matrix */
+ for (i=0;i<totalFilters;i++){
+
+ for (j=0;j<fftSize;j++) {
+
+ if ((fftFreqs[j] > lower[i]) && (fftFreqs[j] <= center[i])) {
+
+ mfccFilterWeights[i][j] = triangleHeight[i] *
+ (fftFreqs[j]-lower[i]) / (center[i]-lower[i]);
+
+ }
+ else
+ {
+ mfccFilterWeights[i][j] = 0.0;
+ }
+
+ if ((fftFreqs[j]>center[i]) && (fftFreqs[j]<upper[i])) {
+
+ mfccFilterWeights[i][j] = mfccFilterWeights[i][j]
+ + triangleHeight[i] * (upper[i]-fftFreqs[j])
+ / (upper[i]-center[i]);
+ }
+ else
+ {
+ mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + 0.0;
+ }
+ }
+
+ }
+
+ /*
+ * We calculate now mfccDCT matrix
+ * NB: +1 because of the DC component
+ */
+
+ const double pi = 3.14159265358979323846264338327950288;
+
+ for (i = 0; i < nceps+1; i++) {
+ for (j = 0; j < totalFilters; j++) {
+ mfccDCTMatrix[i][j] = (1./sqrt((double) totalFilters / 2.))
+ * cos((double) i * ((double) j + 0.5) / (double) totalFilters * pi);
+ }
+ }
+
+ for (j = 0; j < totalFilters; j++){
+ mfccDCTMatrix[0][j] = (sqrt(2.)/2.) * mfccDCTMatrix[0][j];
+ }
+
+ /* The analysis window */
+ window = new Window<double>(config.window, fftSize);
+
+ /* Allocate memory for the FFT */
+ realOut = (double*)calloc(fftSize, sizeof(double));
+ imagOut = (double*)calloc(fftSize, sizeof(double));
+
+ earMag = (double*)calloc(totalFilters, sizeof(double));
+ fftMag = (double*)calloc(fftSize/2, sizeof(double));
+
+ free(freqs);
+ free(lower);
+ free(center);
+ free(upper);
+ free(triangleHeight);
+ free(fftFreqs);
+}
+
+MFCC::~MFCC()
+{
+ int i;
+
+ /* Free the structure */
+ for (i = 0; i < nceps+1; i++) {
+ free(mfccDCTMatrix[i]);
+ }
+ free(mfccDCTMatrix);
+
+ for (i = 0; i < totalFilters; i++) {
+ free(mfccFilterWeights[i]);
+ }
+ free(mfccFilterWeights);
+
+ /* Free the feature vector */
+ free(ceps);
+
+ /* The analysis window */
+ delete window;
+
+ free(earMag);
+ free(fftMag);
+
+ /* Free the FFT */
+ free(realOut);
+ free(imagOut);
+
+ delete fft;
+}
+
+
+/*
+ *
+ * Extract the MFCC on the input frame
+ *
+ */
+int MFCC::process(const double *inframe, double *outceps)
+{
+ double *inputData = (double *)malloc(fftSize * sizeof(double));
+ for (int i = 0; i < fftSize; ++i) inputData[i] = inframe[i];
+
+ window->cut(inputData);
+
+ /* Calculate the fft on the input frame */
+ fft->process(0, inputData, realOut, imagOut);
+
+ free(inputData);
+
+ return process(realOut, imagOut, outceps);
+}
+
+int MFCC::process(const double *real, const double *imag, double *outceps)
+{
+ int i, j;
+
+ for (i = 0; i < fftSize/2; ++i) {
+ fftMag[i] = sqrt(real[i] * real[i] + imag[i] * imag[i]);
+ }
+
+ for (i = 0; i < totalFilters; ++i) {
+ earMag[i] = 0.0;
+ }
+
+ /* Multiply by mfccFilterWeights */
+ for (i = 0; i < totalFilters; i++) {
+ double tmp = 0.0;
+ for (j = 0; j < fftSize/2; j++) {
+ tmp = tmp + (mfccFilterWeights[i][j] * fftMag[j]);
+ }
+ if (tmp > 0) earMag[i] = log10(tmp);
+ else earMag[i] = 0.0;
+
+ if (logPower != 1.0) {
+ earMag[i] = pow(earMag[i], logPower);
+ }
+ }
+
+ /*
+ *
+ * Calculate now the cepstral coefficients
+ * with or without the DC component
+ *
+ */
+
+ if (WANT_C0 == 1) {
+
+ for (i = 0; i < nceps+1; i++) {
+ double tmp = 0.;
+ for (j = 0; j < totalFilters; j++){
+ tmp = tmp + mfccDCTMatrix[i][j] * earMag[j];
+ }
+ outceps[i] = tmp;
+ }
+ }
+ else
+ {
+ for (i = 1; i < nceps+1; i++) {
+ double tmp = 0.;
+ for (j = 0; j < totalFilters; j++){
+ tmp = tmp + mfccDCTMatrix[i][j] * earMag[j];
+ }
+ outceps[i-1] = tmp;
+ }
+ }
+
+ return nceps;
+}
+