diff options
Diffstat (limited to 'libs/qm-dsp/dsp/segmentation/ClusterMeltSegmenter.cpp')
-rw-r--r-- | libs/qm-dsp/dsp/segmentation/ClusterMeltSegmenter.cpp | 398 |
1 files changed, 398 insertions, 0 deletions
diff --git a/libs/qm-dsp/dsp/segmentation/ClusterMeltSegmenter.cpp b/libs/qm-dsp/dsp/segmentation/ClusterMeltSegmenter.cpp new file mode 100644 index 0000000000..b257b6ac58 --- /dev/null +++ b/libs/qm-dsp/dsp/segmentation/ClusterMeltSegmenter.cpp @@ -0,0 +1,398 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + * ClusterMeltSegmenter.cpp + * + * Created by Mark Levy on 23/03/2006. + * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. + */ + +#include <cfloat> +#include <cmath> + +#include "ClusterMeltSegmenter.h" +#include "cluster_segmenter.h" +#include "segment.h" + +#include "dsp/transforms/FFT.h" +#include "dsp/chromagram/ConstantQ.h" +#include "dsp/rateconversion/Decimator.h" +#include "dsp/mfcc/MFCC.h" + +ClusterMeltSegmenter::ClusterMeltSegmenter(ClusterMeltSegmenterParams params) : + window(NULL), + fft(NULL), + constq(NULL), + mfcc(NULL), + featureType(params.featureType), + hopSize(params.hopSize), + windowSize(params.windowSize), + fmin(params.fmin), + fmax(params.fmax), + nbins(params.nbins), + ncomponents(params.ncomponents), // NB currently not passed - no. of PCA components is set in cluser_segmenter.c + nHMMStates(params.nHMMStates), + nclusters(params.nclusters), + histogramLength(params.histogramLength), + neighbourhoodLimit(params.neighbourhoodLimit), + decimator(NULL) +{ +} + +void ClusterMeltSegmenter::initialise(int fs) +{ + samplerate = fs; + + if (featureType == FEATURE_TYPE_CONSTQ || + featureType == FEATURE_TYPE_CHROMA) { + + // run internal processing at 11025 or thereabouts + int internalRate = 11025; + int decimationFactor = samplerate / internalRate; + if (decimationFactor < 1) decimationFactor = 1; + + // must be a power of two + while (decimationFactor & (decimationFactor - 1)) ++decimationFactor; + + if (decimationFactor > Decimator::getHighestSupportedFactor()) { + decimationFactor = Decimator::getHighestSupportedFactor(); + } + + if (decimationFactor > 1) { + decimator = new Decimator(getWindowsize(), decimationFactor); + } + + CQConfig config; + config.FS = samplerate / decimationFactor; + config.min = fmin; + config.max = fmax; + config.BPO = nbins; + config.CQThresh = 0.0054; + + constq = new ConstantQ(config); + constq->sparsekernel(); + + ncoeff = constq->getK(); + + fft = new FFTReal(constq->getfftlength()); + + } else if (featureType == FEATURE_TYPE_MFCC) { + + // run internal processing at 22050 or thereabouts + int internalRate = 22050; + int decimationFactor = samplerate / internalRate; + if (decimationFactor < 1) decimationFactor = 1; + + // must be a power of two + while (decimationFactor & (decimationFactor - 1)) ++decimationFactor; + + if (decimationFactor > Decimator::getHighestSupportedFactor()) { + decimationFactor = Decimator::getHighestSupportedFactor(); + } + + if (decimationFactor > 1) { + decimator = new Decimator(getWindowsize(), decimationFactor); + } + + MFCCConfig config(samplerate / decimationFactor); + config.fftsize = 2048; + config.nceps = 19; + config.want_c0 = true; + + mfcc = new MFCC(config); + ncoeff = config.nceps + 1; + } +} + +ClusterMeltSegmenter::~ClusterMeltSegmenter() +{ + delete window; + delete constq; + delete decimator; + delete fft; +} + +int +ClusterMeltSegmenter::getWindowsize() +{ + return static_cast<int>(windowSize * samplerate + 0.001); +} + +int +ClusterMeltSegmenter::getHopsize() +{ + return static_cast<int>(hopSize * samplerate + 0.001); +} + +void ClusterMeltSegmenter::extractFeatures(const double* samples, int nsamples) +{ + if (featureType == FEATURE_TYPE_CONSTQ || + featureType == FEATURE_TYPE_CHROMA) { + extractFeaturesConstQ(samples, nsamples); + } else if (featureType == FEATURE_TYPE_MFCC) { + extractFeaturesMFCC(samples, nsamples); + } +} + +void ClusterMeltSegmenter::extractFeaturesConstQ(const double* samples, int nsamples) +{ + if (!constq) { + std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesConstQ: " + << "No const-q: initialise not called?" + << std::endl; + return; + } + + if (nsamples < getWindowsize()) { + std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl; + return; + } + + int fftsize = constq->getfftlength(); + + if (!window || window->getSize() != fftsize) { + delete window; + window = new Window<double>(HammingWindow, fftsize); + } + + vector<double> cq(ncoeff); + + for (int i = 0; i < ncoeff; ++i) cq[i] = 0.0; + + const double *psource = samples; + int pcount = nsamples; + + if (decimator) { + pcount = nsamples / decimator->getFactor(); + double *decout = new double[pcount]; + decimator->process(samples, decout); + psource = decout; + } + + int origin = 0; + +// std::cerr << "nsamples = " << nsamples << ", pcount = " << pcount << std::endl; + + int frames = 0; + + double *frame = new double[fftsize]; + double *real = new double[fftsize]; + double *imag = new double[fftsize]; + double *cqre = new double[ncoeff]; + double *cqim = new double[ncoeff]; + + while (origin <= pcount) { + + // always need at least one fft window per block, but after + // that we want to avoid having any incomplete ones + if (origin > 0 && origin + fftsize >= pcount) break; + + for (int i = 0; i < fftsize; ++i) { + if (origin + i < pcount) { + frame[i] = psource[origin + i]; + } else { + frame[i] = 0.0; + } + } + + for (int i = 0; i < fftsize/2; ++i) { + double value = frame[i]; + frame[i] = frame[i + fftsize/2]; + frame[i + fftsize/2] = value; + } + + window->cut(frame); + + fft->process(false, frame, real, imag); + + constq->process(real, imag, cqre, cqim); + + for (int i = 0; i < ncoeff; ++i) { + cq[i] += sqrt(cqre[i] * cqre[i] + cqim[i] * cqim[i]); + } + ++frames; + + origin += fftsize/2; + } + + delete [] cqre; + delete [] cqim; + delete [] real; + delete [] imag; + delete [] frame; + + for (int i = 0; i < ncoeff; ++i) { + cq[i] /= frames; + } + + if (decimator) delete[] psource; + + features.push_back(cq); +} + +void ClusterMeltSegmenter::extractFeaturesMFCC(const double* samples, int nsamples) +{ + if (!mfcc) { + std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesMFCC: " + << "No mfcc: initialise not called?" + << std::endl; + return; + } + + if (nsamples < getWindowsize()) { + std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl; + return; + } + + int fftsize = mfcc->getfftlength(); + + vector<double> cc(ncoeff); + + for (int i = 0; i < ncoeff; ++i) cc[i] = 0.0; + + const double *psource = samples; + int pcount = nsamples; + + if (decimator) { + pcount = nsamples / decimator->getFactor(); + double *decout = new double[pcount]; + decimator->process(samples, decout); + psource = decout; + } + + int origin = 0; + int frames = 0; + + double *frame = new double[fftsize]; + double *ccout = new double[ncoeff]; + + while (origin <= pcount) { + + // always need at least one fft window per block, but after + // that we want to avoid having any incomplete ones + if (origin > 0 && origin + fftsize >= pcount) break; + + for (int i = 0; i < fftsize; ++i) { + if (origin + i < pcount) { + frame[i] = psource[origin + i]; + } else { + frame[i] = 0.0; + } + } + + mfcc->process(frame, ccout); + + for (int i = 0; i < ncoeff; ++i) { + cc[i] += ccout[i]; + } + ++frames; + + origin += fftsize/2; + } + + delete [] ccout; + delete [] frame; + + for (int i = 0; i < ncoeff; ++i) { + cc[i] /= frames; + } + + if (decimator) delete[] psource; + + features.push_back(cc); +} + +void ClusterMeltSegmenter::segment(int m) +{ + nclusters = m; + segment(); +} + +void ClusterMeltSegmenter::setFeatures(const vector<vector<double> >& f) +{ + features = f; + featureType = FEATURE_TYPE_UNKNOWN; +} + +void ClusterMeltSegmenter::segment() +{ + delete constq; + constq = 0; + delete mfcc; + mfcc = 0; + delete decimator; + decimator = 0; + + if (features.size() < histogramLength) return; +/* + std::cerr << "ClusterMeltSegmenter::segment: have " << features.size() + << " features with " << features[0].size() << " coefficients (ncoeff = " << ncoeff << ", ncomponents = " << ncomponents << ")" << std::endl; +*/ + // copy the features to a native array and use the existing C segmenter... + double** arrFeatures = new double*[features.size()]; + for (int i = 0; i < features.size(); i++) + { + if (featureType == FEATURE_TYPE_UNKNOWN) { + arrFeatures[i] = new double[features[0].size()]; + for (int j = 0; j < features[0].size(); j++) + arrFeatures[i][j] = features[i][j]; + } else { + arrFeatures[i] = new double[ncoeff+1]; // allow space for the normalised envelope + for (int j = 0; j < ncoeff; j++) + arrFeatures[i][j] = features[i][j]; + } + } + + q = new int[features.size()]; + + if (featureType == FEATURE_TYPE_UNKNOWN || + featureType == FEATURE_TYPE_MFCC) + cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength, + nclusters, neighbourhoodLimit); + else + constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType, + nHMMStates, histogramLength, nclusters, neighbourhoodLimit); + + // convert the cluster assignment sequence to a segmentation + makeSegmentation(q, features.size()); + + // de-allocate arrays + delete [] q; + for (int i = 0; i < features.size(); i++) + delete [] arrFeatures[i]; + delete [] arrFeatures; + + // clear the features + clear(); +} + +void ClusterMeltSegmenter::makeSegmentation(int* q, int len) +{ + segmentation.segments.clear(); + segmentation.nsegtypes = nclusters; + segmentation.samplerate = samplerate; + + Segment segment; + segment.start = 0; + segment.type = q[0]; + + for (int i = 1; i < len; i++) + { + if (q[i] != q[i-1]) + { + segment.end = i * getHopsize(); + segmentation.segments.push_back(segment); + segment.type = q[i]; + segment.start = segment.end; + } + } + segment.end = len * getHopsize(); + segmentation.segments.push_back(segment); +} + |