summaryrefslogtreecommitdiff
path: root/libs/qm-dsp/dsp/segmentation/ClusterMeltSegmenter.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'libs/qm-dsp/dsp/segmentation/ClusterMeltSegmenter.cpp')
-rw-r--r--libs/qm-dsp/dsp/segmentation/ClusterMeltSegmenter.cpp398
1 files changed, 398 insertions, 0 deletions
diff --git a/libs/qm-dsp/dsp/segmentation/ClusterMeltSegmenter.cpp b/libs/qm-dsp/dsp/segmentation/ClusterMeltSegmenter.cpp
new file mode 100644
index 0000000000..b257b6ac58
--- /dev/null
+++ b/libs/qm-dsp/dsp/segmentation/ClusterMeltSegmenter.cpp
@@ -0,0 +1,398 @@
+/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
+
+/*
+ * ClusterMeltSegmenter.cpp
+ *
+ * Created by Mark Levy on 23/03/2006.
+ * Copyright 2006 Centre for Digital Music, Queen Mary, University of London.
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version. See the file
+ COPYING included with this distribution for more information.
+ */
+
+#include <cfloat>
+#include <cmath>
+
+#include "ClusterMeltSegmenter.h"
+#include "cluster_segmenter.h"
+#include "segment.h"
+
+#include "dsp/transforms/FFT.h"
+#include "dsp/chromagram/ConstantQ.h"
+#include "dsp/rateconversion/Decimator.h"
+#include "dsp/mfcc/MFCC.h"
+
+ClusterMeltSegmenter::ClusterMeltSegmenter(ClusterMeltSegmenterParams params) :
+ window(NULL),
+ fft(NULL),
+ constq(NULL),
+ mfcc(NULL),
+ featureType(params.featureType),
+ hopSize(params.hopSize),
+ windowSize(params.windowSize),
+ fmin(params.fmin),
+ fmax(params.fmax),
+ nbins(params.nbins),
+ ncomponents(params.ncomponents), // NB currently not passed - no. of PCA components is set in cluser_segmenter.c
+ nHMMStates(params.nHMMStates),
+ nclusters(params.nclusters),
+ histogramLength(params.histogramLength),
+ neighbourhoodLimit(params.neighbourhoodLimit),
+ decimator(NULL)
+{
+}
+
+void ClusterMeltSegmenter::initialise(int fs)
+{
+ samplerate = fs;
+
+ if (featureType == FEATURE_TYPE_CONSTQ ||
+ featureType == FEATURE_TYPE_CHROMA) {
+
+ // run internal processing at 11025 or thereabouts
+ int internalRate = 11025;
+ int decimationFactor = samplerate / internalRate;
+ if (decimationFactor < 1) decimationFactor = 1;
+
+ // must be a power of two
+ while (decimationFactor & (decimationFactor - 1)) ++decimationFactor;
+
+ if (decimationFactor > Decimator::getHighestSupportedFactor()) {
+ decimationFactor = Decimator::getHighestSupportedFactor();
+ }
+
+ if (decimationFactor > 1) {
+ decimator = new Decimator(getWindowsize(), decimationFactor);
+ }
+
+ CQConfig config;
+ config.FS = samplerate / decimationFactor;
+ config.min = fmin;
+ config.max = fmax;
+ config.BPO = nbins;
+ config.CQThresh = 0.0054;
+
+ constq = new ConstantQ(config);
+ constq->sparsekernel();
+
+ ncoeff = constq->getK();
+
+ fft = new FFTReal(constq->getfftlength());
+
+ } else if (featureType == FEATURE_TYPE_MFCC) {
+
+ // run internal processing at 22050 or thereabouts
+ int internalRate = 22050;
+ int decimationFactor = samplerate / internalRate;
+ if (decimationFactor < 1) decimationFactor = 1;
+
+ // must be a power of two
+ while (decimationFactor & (decimationFactor - 1)) ++decimationFactor;
+
+ if (decimationFactor > Decimator::getHighestSupportedFactor()) {
+ decimationFactor = Decimator::getHighestSupportedFactor();
+ }
+
+ if (decimationFactor > 1) {
+ decimator = new Decimator(getWindowsize(), decimationFactor);
+ }
+
+ MFCCConfig config(samplerate / decimationFactor);
+ config.fftsize = 2048;
+ config.nceps = 19;
+ config.want_c0 = true;
+
+ mfcc = new MFCC(config);
+ ncoeff = config.nceps + 1;
+ }
+}
+
+ClusterMeltSegmenter::~ClusterMeltSegmenter()
+{
+ delete window;
+ delete constq;
+ delete decimator;
+ delete fft;
+}
+
+int
+ClusterMeltSegmenter::getWindowsize()
+{
+ return static_cast<int>(windowSize * samplerate + 0.001);
+}
+
+int
+ClusterMeltSegmenter::getHopsize()
+{
+ return static_cast<int>(hopSize * samplerate + 0.001);
+}
+
+void ClusterMeltSegmenter::extractFeatures(const double* samples, int nsamples)
+{
+ if (featureType == FEATURE_TYPE_CONSTQ ||
+ featureType == FEATURE_TYPE_CHROMA) {
+ extractFeaturesConstQ(samples, nsamples);
+ } else if (featureType == FEATURE_TYPE_MFCC) {
+ extractFeaturesMFCC(samples, nsamples);
+ }
+}
+
+void ClusterMeltSegmenter::extractFeaturesConstQ(const double* samples, int nsamples)
+{
+ if (!constq) {
+ std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesConstQ: "
+ << "No const-q: initialise not called?"
+ << std::endl;
+ return;
+ }
+
+ if (nsamples < getWindowsize()) {
+ std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
+ return;
+ }
+
+ int fftsize = constq->getfftlength();
+
+ if (!window || window->getSize() != fftsize) {
+ delete window;
+ window = new Window<double>(HammingWindow, fftsize);
+ }
+
+ vector<double> cq(ncoeff);
+
+ for (int i = 0; i < ncoeff; ++i) cq[i] = 0.0;
+
+ const double *psource = samples;
+ int pcount = nsamples;
+
+ if (decimator) {
+ pcount = nsamples / decimator->getFactor();
+ double *decout = new double[pcount];
+ decimator->process(samples, decout);
+ psource = decout;
+ }
+
+ int origin = 0;
+
+// std::cerr << "nsamples = " << nsamples << ", pcount = " << pcount << std::endl;
+
+ int frames = 0;
+
+ double *frame = new double[fftsize];
+ double *real = new double[fftsize];
+ double *imag = new double[fftsize];
+ double *cqre = new double[ncoeff];
+ double *cqim = new double[ncoeff];
+
+ while (origin <= pcount) {
+
+ // always need at least one fft window per block, but after
+ // that we want to avoid having any incomplete ones
+ if (origin > 0 && origin + fftsize >= pcount) break;
+
+ for (int i = 0; i < fftsize; ++i) {
+ if (origin + i < pcount) {
+ frame[i] = psource[origin + i];
+ } else {
+ frame[i] = 0.0;
+ }
+ }
+
+ for (int i = 0; i < fftsize/2; ++i) {
+ double value = frame[i];
+ frame[i] = frame[i + fftsize/2];
+ frame[i + fftsize/2] = value;
+ }
+
+ window->cut(frame);
+
+ fft->process(false, frame, real, imag);
+
+ constq->process(real, imag, cqre, cqim);
+
+ for (int i = 0; i < ncoeff; ++i) {
+ cq[i] += sqrt(cqre[i] * cqre[i] + cqim[i] * cqim[i]);
+ }
+ ++frames;
+
+ origin += fftsize/2;
+ }
+
+ delete [] cqre;
+ delete [] cqim;
+ delete [] real;
+ delete [] imag;
+ delete [] frame;
+
+ for (int i = 0; i < ncoeff; ++i) {
+ cq[i] /= frames;
+ }
+
+ if (decimator) delete[] psource;
+
+ features.push_back(cq);
+}
+
+void ClusterMeltSegmenter::extractFeaturesMFCC(const double* samples, int nsamples)
+{
+ if (!mfcc) {
+ std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesMFCC: "
+ << "No mfcc: initialise not called?"
+ << std::endl;
+ return;
+ }
+
+ if (nsamples < getWindowsize()) {
+ std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
+ return;
+ }
+
+ int fftsize = mfcc->getfftlength();
+
+ vector<double> cc(ncoeff);
+
+ for (int i = 0; i < ncoeff; ++i) cc[i] = 0.0;
+
+ const double *psource = samples;
+ int pcount = nsamples;
+
+ if (decimator) {
+ pcount = nsamples / decimator->getFactor();
+ double *decout = new double[pcount];
+ decimator->process(samples, decout);
+ psource = decout;
+ }
+
+ int origin = 0;
+ int frames = 0;
+
+ double *frame = new double[fftsize];
+ double *ccout = new double[ncoeff];
+
+ while (origin <= pcount) {
+
+ // always need at least one fft window per block, but after
+ // that we want to avoid having any incomplete ones
+ if (origin > 0 && origin + fftsize >= pcount) break;
+
+ for (int i = 0; i < fftsize; ++i) {
+ if (origin + i < pcount) {
+ frame[i] = psource[origin + i];
+ } else {
+ frame[i] = 0.0;
+ }
+ }
+
+ mfcc->process(frame, ccout);
+
+ for (int i = 0; i < ncoeff; ++i) {
+ cc[i] += ccout[i];
+ }
+ ++frames;
+
+ origin += fftsize/2;
+ }
+
+ delete [] ccout;
+ delete [] frame;
+
+ for (int i = 0; i < ncoeff; ++i) {
+ cc[i] /= frames;
+ }
+
+ if (decimator) delete[] psource;
+
+ features.push_back(cc);
+}
+
+void ClusterMeltSegmenter::segment(int m)
+{
+ nclusters = m;
+ segment();
+}
+
+void ClusterMeltSegmenter::setFeatures(const vector<vector<double> >& f)
+{
+ features = f;
+ featureType = FEATURE_TYPE_UNKNOWN;
+}
+
+void ClusterMeltSegmenter::segment()
+{
+ delete constq;
+ constq = 0;
+ delete mfcc;
+ mfcc = 0;
+ delete decimator;
+ decimator = 0;
+
+ if (features.size() < histogramLength) return;
+/*
+ std::cerr << "ClusterMeltSegmenter::segment: have " << features.size()
+ << " features with " << features[0].size() << " coefficients (ncoeff = " << ncoeff << ", ncomponents = " << ncomponents << ")" << std::endl;
+*/
+ // copy the features to a native array and use the existing C segmenter...
+ double** arrFeatures = new double*[features.size()];
+ for (int i = 0; i < features.size(); i++)
+ {
+ if (featureType == FEATURE_TYPE_UNKNOWN) {
+ arrFeatures[i] = new double[features[0].size()];
+ for (int j = 0; j < features[0].size(); j++)
+ arrFeatures[i][j] = features[i][j];
+ } else {
+ arrFeatures[i] = new double[ncoeff+1]; // allow space for the normalised envelope
+ for (int j = 0; j < ncoeff; j++)
+ arrFeatures[i][j] = features[i][j];
+ }
+ }
+
+ q = new int[features.size()];
+
+ if (featureType == FEATURE_TYPE_UNKNOWN ||
+ featureType == FEATURE_TYPE_MFCC)
+ cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength,
+ nclusters, neighbourhoodLimit);
+ else
+ constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType,
+ nHMMStates, histogramLength, nclusters, neighbourhoodLimit);
+
+ // convert the cluster assignment sequence to a segmentation
+ makeSegmentation(q, features.size());
+
+ // de-allocate arrays
+ delete [] q;
+ for (int i = 0; i < features.size(); i++)
+ delete [] arrFeatures[i];
+ delete [] arrFeatures;
+
+ // clear the features
+ clear();
+}
+
+void ClusterMeltSegmenter::makeSegmentation(int* q, int len)
+{
+ segmentation.segments.clear();
+ segmentation.nsegtypes = nclusters;
+ segmentation.samplerate = samplerate;
+
+ Segment segment;
+ segment.start = 0;
+ segment.type = q[0];
+
+ for (int i = 1; i < len; i++)
+ {
+ if (q[i] != q[i-1])
+ {
+ segment.end = i * getHopsize();
+ segmentation.segments.push_back(segment);
+ segment.type = q[i];
+ segment.start = segment.end;
+ }
+ }
+ segment.end = len * getHopsize();
+ segmentation.segments.push_back(segment);
+}
+