diff options
Diffstat (limited to 'libs/qm-dsp/dsp/segmentation')
-rw-r--r-- | libs/qm-dsp/dsp/segmentation/ClusterMeltSegmenter.cpp | 54 | ||||
-rw-r--r-- | libs/qm-dsp/dsp/segmentation/ClusterMeltSegmenter.h | 22 | ||||
-rw-r--r-- | libs/qm-dsp/dsp/segmentation/Segmenter.cpp | 6 | ||||
-rw-r--r-- | libs/qm-dsp/dsp/segmentation/Segmenter.h | 4 | ||||
-rw-r--r-- | libs/qm-dsp/dsp/segmentation/cluster_melt.c | 70 | ||||
-rw-r--r-- | libs/qm-dsp/dsp/segmentation/cluster_segmenter.c | 92 | ||||
-rw-r--r-- | libs/qm-dsp/dsp/segmentation/cluster_segmenter.h | 4 | ||||
-rw-r--r-- | libs/qm-dsp/dsp/segmentation/segment.h | 8 |
8 files changed, 130 insertions, 130 deletions
diff --git a/libs/qm-dsp/dsp/segmentation/ClusterMeltSegmenter.cpp b/libs/qm-dsp/dsp/segmentation/ClusterMeltSegmenter.cpp index ce5f370436..22835f7116 100644 --- a/libs/qm-dsp/dsp/segmentation/ClusterMeltSegmenter.cpp +++ b/libs/qm-dsp/dsp/segmentation/ClusterMeltSegmenter.cpp @@ -51,7 +51,7 @@ void ClusterMeltSegmenter::initialise(int fs) if (featureType == FEATURE_TYPE_CONSTQ || featureType == FEATURE_TYPE_CHROMA) { - + // run internal processing at 11025 or thereabouts int internalRate = 11025; int decimationFactor = samplerate / internalRate; @@ -77,11 +77,11 @@ void ClusterMeltSegmenter::initialise(int fs) constq = new ConstantQ(config); constq->sparsekernel(); - + ncoeff = constq->getK(); fft = new FFTReal(constq->getfftlength()); - + } else if (featureType == FEATURE_TYPE_MFCC) { // run internal processing at 22050 or thereabouts @@ -110,7 +110,7 @@ void ClusterMeltSegmenter::initialise(int fs) } } -ClusterMeltSegmenter::~ClusterMeltSegmenter() +ClusterMeltSegmenter::~ClusterMeltSegmenter() { delete window; delete constq; @@ -164,7 +164,7 @@ void ClusterMeltSegmenter::extractFeaturesConstQ(const double* samples, int nsam vector<double> cq(ncoeff); for (int i = 0; i < ncoeff; ++i) cq[i] = 0.0; - + const double *psource = samples; int pcount = nsamples; @@ -174,9 +174,9 @@ void ClusterMeltSegmenter::extractFeaturesConstQ(const double* samples, int nsam decimator->process(samples, decout); psource = decout; } - + int origin = 0; - + // std::cerr << "nsamples = " << nsamples << ", pcount = " << pcount << std::endl; int frames = 0; @@ -208,11 +208,11 @@ void ClusterMeltSegmenter::extractFeaturesConstQ(const double* samples, int nsam } window->cut(frame); - - fft->process(false, frame, real, imag); - + + fft->forward(frame, real, imag); + constq->process(real, imag, cqre, cqim); - + for (int i = 0; i < ncoeff; ++i) { cq[i] += sqrt(cqre[i] * cqre[i] + cqim[i] * cqim[i]); } @@ -255,7 +255,7 @@ void ClusterMeltSegmenter::extractFeaturesMFCC(const double* samples, int nsampl vector<double> cc(ncoeff); for (int i = 0; i < ncoeff; ++i) cc[i] = 0.0; - + const double *psource = samples; int pcount = nsamples; @@ -287,7 +287,7 @@ void ClusterMeltSegmenter::extractFeaturesMFCC(const double* samples, int nsampl } mfcc->process(frame, ccout); - + for (int i = 0; i < ncoeff; ++i) { cc[i] += ccout[i]; } @@ -330,44 +330,44 @@ void ClusterMeltSegmenter::segment() decimator = 0; if (features.size() < histogramLength) return; -/* +/* std::cerr << "ClusterMeltSegmenter::segment: have " << features.size() << " features with " << features[0].size() << " coefficients (ncoeff = " << ncoeff << ", ncomponents = " << ncomponents << ")" << std::endl; */ // copy the features to a native array and use the existing C segmenter... - double** arrFeatures = new double*[features.size()]; + double** arrFeatures = new double*[features.size()]; for (int i = 0; i < features.size(); i++) { if (featureType == FEATURE_TYPE_UNKNOWN) { arrFeatures[i] = new double[features[0].size()]; for (int j = 0; j < features[0].size(); j++) - arrFeatures[i][j] = features[i][j]; + arrFeatures[i][j] = features[i][j]; } else { arrFeatures[i] = new double[ncoeff+1]; // allow space for the normalised envelope for (int j = 0; j < ncoeff; j++) - arrFeatures[i][j] = features[i][j]; + arrFeatures[i][j] = features[i][j]; } } - + q = new int[features.size()]; - + if (featureType == FEATURE_TYPE_UNKNOWN || featureType == FEATURE_TYPE_MFCC) - cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength, + cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength, nclusters, neighbourhoodLimit); else - constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType, + constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType, nHMMStates, histogramLength, nclusters, neighbourhoodLimit); - + // convert the cluster assignment sequence to a segmentation - makeSegmentation(q, features.size()); - + makeSegmentation(q, features.size()); + // de-allocate arrays delete [] q; for (int i = 0; i < features.size(); i++) delete [] arrFeatures[i]; delete [] arrFeatures; - + // clear the features clear(); } @@ -377,11 +377,11 @@ void ClusterMeltSegmenter::makeSegmentation(int* q, int len) segmentation.segments.clear(); segmentation.nsegtypes = nclusters; segmentation.samplerate = samplerate; - + Segment segment; segment.start = 0; segment.type = q[0]; - + for (int i = 1; i < len; i++) { if (q[i] != q[i-1]) diff --git a/libs/qm-dsp/dsp/segmentation/ClusterMeltSegmenter.h b/libs/qm-dsp/dsp/segmentation/ClusterMeltSegmenter.h index 8f3130871e..327a25f073 100644 --- a/libs/qm-dsp/dsp/segmentation/ClusterMeltSegmenter.h +++ b/libs/qm-dsp/dsp/segmentation/ClusterMeltSegmenter.h @@ -31,12 +31,12 @@ class ClusterMeltSegmenterParams // defaults are sensible for 11025Hz with 0.2 second hopsize { public: - ClusterMeltSegmenterParams() : + ClusterMeltSegmenterParams() : featureType(FEATURE_TYPE_CONSTQ), hopSize(0.2), windowSize(0.6), fmin(62), - fmax(16000), + fmax(16000), nbins(8), ncomponents(20), nHMMStates(40), @@ -72,34 +72,34 @@ public: protected: void makeSegmentation(int* q, int len); - + void extractFeaturesConstQ(const double *, int); void extractFeaturesMFCC(const double *, int); Window<double> *window; FFTReal *fft; - ConstantQ* constq; + ConstantQ* constq; MFCC* mfcc; model_t* model; // the HMM int* q; // the decoded HMM state sequence - vector<vector<double> > histograms; - - feature_types featureType; + vector<vector<double> > histograms; + + feature_types featureType; double hopSize; // in seconds double windowSize; // in seconds - + // constant-Q parameters int fmin; int fmax; int nbins; int ncoeff; - + // PCA parameters int ncomponents; - + // HMM parameters int nHMMStates; - + // clustering parameters int nclusters; int histogramLength; diff --git a/libs/qm-dsp/dsp/segmentation/Segmenter.cpp b/libs/qm-dsp/dsp/segmentation/Segmenter.cpp index b60fb58162..538eaacc78 100644 --- a/libs/qm-dsp/dsp/segmentation/Segmenter.cpp +++ b/libs/qm-dsp/dsp/segmentation/Segmenter.cpp @@ -19,13 +19,13 @@ ostream& operator<<(ostream& os, const Segmentation& s) { os << "structure_name : begin_time end_time\n"; - + for (int i = 0; i < s.segments.size(); i++) { Segment seg = s.segments[i]; - os << std::fixed << seg.type << ':' << '\t' << std::setprecision(6) << seg.start / static_cast<double>(s.samplerate) + os << std::fixed << seg.type << ':' << '\t' << std::setprecision(6) << seg.start / static_cast<double>(s.samplerate) << '\t' << std::setprecision(6) << seg.end / static_cast<double>(s.samplerate) << "\n"; } - + return os; } diff --git a/libs/qm-dsp/dsp/segmentation/Segmenter.h b/libs/qm-dsp/dsp/segmentation/Segmenter.h index fd2f39b850..1ac0679b72 100644 --- a/libs/qm-dsp/dsp/segmentation/Segmenter.h +++ b/libs/qm-dsp/dsp/segmentation/Segmenter.h @@ -35,7 +35,7 @@ class Segmentation public: int nsegtypes; // number of segment types, so possible types are {0,1,...,nsegtypes-1} int samplerate; - vector<Segment> segments; + vector<Segment> segments; }; ostream& operator<<(ostream& os, const Segmentation& s); @@ -52,7 +52,7 @@ public: virtual void segment() = 0; // call once all the features have been extracted virtual void segment(int m) = 0; // specify desired number of segment-types virtual void clear() { features.clear(); } - const Segmentation& getSegmentation() const { return segmentation; } + const Segmentation& getSegmentation() const { return segmentation; } protected: vector<vector<double> > features; Segmentation segmentation; diff --git a/libs/qm-dsp/dsp/segmentation/cluster_melt.c b/libs/qm-dsp/dsp/segmentation/cluster_melt.c index 092bc7f078..1441b394c2 100644 --- a/libs/qm-dsp/dsp/segmentation/cluster_melt.c +++ b/libs/qm-dsp/dsp/segmentation/cluster_melt.c @@ -25,7 +25,7 @@ double kldist(double* a, double* b, int n) { because a, b represent probability distributions */ double q, d; int i; - + d = 0; for (i = 0; i < n; i++) { @@ -38,8 +38,8 @@ double kldist(double* a, double* b, int n) { d += b[i] * log(b[i] / q); } } - return d; -} + return d; +} void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l, int *c) { double lambda, sum, beta, logsumexp, maxlp; @@ -48,9 +48,9 @@ void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l, int** nc; /* neighbour counts for each histogram */ double** lp; /* soft assignment probs for each histogram */ int* oldc; /* previous hard assignments (to check convergence) */ - + /* NB h is passed as a 1d row major array */ - + /* parameter values */ lambda = DEFAULT_LAMBDA; if (l > 0) @@ -60,22 +60,22 @@ void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l, B = 2 * limit + 1; maxiter0 = 20; /* number of iterations at initial temperature */ maxiter1 = 5; /* number of iterations at subsequent temperatures */ - - /* allocate memory */ + + /* allocate memory */ cl = (double**) malloc(k*sizeof(double*)); for (i= 0; i < k; i++) cl[i] = (double*) malloc(m*sizeof(double)); - + nc = (int**) malloc(n*sizeof(int*)); for (i= 0; i < n; i++) nc[i] = (int*) malloc(k*sizeof(int)); - + lp = (double**) malloc(n*sizeof(double*)); for (i= 0; i < n; i++) lp[i] = (double*) malloc(k*sizeof(double)); - + oldc = (int*) malloc(n * sizeof(int)); - + /* initialise */ for (i = 0; i < k; i++) { @@ -90,40 +90,40 @@ void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l, { cl[i][j] /= sum; /* normalise */ } - } + } //print_array(cl, k, m); - + for (i = 0; i < n; i++) c[i] = 1; /* initially assign all histograms to cluster 1 */ - + for (a = 0; a < t; a++) { beta = Bsched[a]; - + if (a == 0) maxiter = maxiter0; else maxiter = maxiter1; - + for (it = 0; it < maxiter; it++) { //if (it == maxiter - 1) // mexPrintf("hasn't converged after %d iterations\n", maxiter); - + for (i = 0; i < n; i++) { /* save current hard assignments */ oldc[i] = c[i]; - + /* calculate soft assignment logprobs for each cluster */ sum = 0; for (j = 0; j < k; j++) { lp[i][ j] = -beta * kldist(cl[j], &h[i*m], m); - + /* update matching neighbour counts for this histogram, based on current hard assignments */ /* old version: - nc[i][j] = 0; + nc[i][j] = 0; if (i >= limit && i <= n - 1 - limit) { for (b = i - limit; b <= i + limit; b++) @@ -144,14 +144,14 @@ void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l, for (b = b0; b <= b1; b++) if (c[b] == j+1) nc[i][j]--; - + sum += exp(lp[i][j]); } - + /* normalise responsibilities and add duration logprior */ logsumexp = log(sum); for (j = 0; j < k; j++) - lp[i][j] -= logsumexp + lambda * nc[i][j]; + lp[i][j] -= logsumexp + lambda * nc[i][j]; } //print_array(lp, n, k); /* @@ -160,10 +160,10 @@ void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l, for (j = 0; j < k; j++) mexPrintf("%d ", nc[i][j]); mexPrintf("\n"); - } + } */ - - + + /* update the assignments now that we know the duration priors based on the current assignments */ for (i = 0; i < n; i++) @@ -177,14 +177,14 @@ void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l, c[i] = j+1; } } - + /* break if assignments haven't changed */ i = 0; while (i < n && oldc[i] == c[i]) i++; if (i == n) break; - + /* update reference histograms now we know new responsibilities */ for (j = 0; j < k; j++) { @@ -194,21 +194,21 @@ void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l, for (i = 0; i < n; i++) { cl[j][b] += exp(lp[i][j]) * h[i*m+b]; - } + } } - - sum = 0; + + sum = 0; for (i = 0; i < n; i++) sum += exp(lp[i][j]); for (b = 0; b < m; b++) cl[j][b] /= sum; /* normalise */ - } - + } + //print_array(cl, k, m); //mexPrintf("\n\n"); } } - + /* free memory */ for (i = 0; i < k; i++) free(cl[i]); @@ -219,7 +219,7 @@ void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l, for (i = 0; i < n; i++) free(lp[i]); free(lp); - free(oldc); + free(oldc); } diff --git a/libs/qm-dsp/dsp/segmentation/cluster_segmenter.c b/libs/qm-dsp/dsp/segmentation/cluster_segmenter.c index c9f115c205..2a6b196921 100644 --- a/libs/qm-dsp/dsp/segmentation/cluster_segmenter.c +++ b/libs/qm-dsp/dsp/segmentation/cluster_segmenter.c @@ -25,7 +25,7 @@ void cq2chroma(double** cq, int nframes, int ncoeff, int bins, double** chroma) int t, b, oct, ix; //double maxchroma; /* max chroma value at each time, for normalisation */ //double sum; /* for normalisation */ - + for (t = 0; t < nframes; t++) { for (b = 0; b < bins; b++) @@ -50,7 +50,7 @@ void cq2chroma(double** cq, int nframes, int ncoeff, int bins, double** chroma) maxchroma = chroma[t][b]; if (maxchroma > 0) for (b = 0; b < bins; b++) - chroma[t][b] /= maxchroma; + chroma[t][b] /= maxchroma; */ } } @@ -62,13 +62,13 @@ void mpeg7_constq(double** features, int nframes, int ncoeff) double ss; double env; double maxenv = 0; - + /* convert const-Q features to dB scale */ for (i = 0; i < nframes; i++) for (j = 0; j < ncoeff; j++) features[i][j] = 10.0 * log10(features[i][j]+DBL_EPSILON); - - /* normalise each feature vector and add the norm as an extra feature dimension */ + + /* normalise each feature vector and add the norm as an extra feature dimension */ for (i = 0; i < nframes; i++) { ss = 0; @@ -80,10 +80,10 @@ void mpeg7_constq(double** features, int nframes, int ncoeff) features[i][ncoeff] = env; if (env > maxenv) maxenv = env; - } + } /* normalise the envelopes */ for (i = 0; i < nframes; i++) - features[i][ncoeff] /= maxenv; + features[i][ncoeff] /= maxenv; } /* return histograms h[nx*m] of data x[nx] into m bins using a sliding window of length h_len (MUST BE ODD) */ @@ -94,7 +94,7 @@ void create_histograms(int* x, int nx, int m, int hlen, double* h) int i, j, t; double norm; - for (i = 0; i < nx*m; i++) + for (i = 0; i < nx*m; i++) h[i] = 0; for (i = hlen/2; i < nx-hlen/2; i++) @@ -109,7 +109,7 @@ void create_histograms(int* x, int nx, int m, int hlen, double* h) for (j = 0; j < m; j++) h[i*m+j] /= norm; } - + /* duplicate histograms at beginning and end to create one histogram for each data value supplied */ for (i = 0; i < hlen/2; i++) for (j = 0; j < m; j++) @@ -120,11 +120,11 @@ void create_histograms(int* x, int nx, int m, int hlen, double* h) } /* segment using HMM and then histogram clustering */ -void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states, +void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states, int histogram_length, int nclusters, int neighbour_limit) { int i, j; - + /*****************************/ if (0) { /* try just using the predominant bin number as a 'decoded state' */ @@ -137,60 +137,60 @@ void cluster_segment(int* q, double** features, int frames_read, int feature_len maxval = 0; for (j = 0; j < feature_length; j++) { - if (features[i][j] > maxval) + if (features[i][j] > maxval) { maxval = features[i][j]; maxbin = j; - } + } } if (maxval > chroma_thresh) q[i] = maxbin; else q[i] = feature_length; } - + } if (1) { /*****************************/ - - + + /* scale all the features to 'balance covariances' during HMM training */ double scale = 10; for (i = 0; i < frames_read; i++) for (j = 0; j < feature_length; j++) features[i][j] *= scale; - + /* train an HMM on the features */ - + /* create a model */ model_t* model = hmm_init(features, frames_read, feature_length, nHMM_states); - + /* train the model */ hmm_train(features, frames_read, model); -/* +/* printf("\n\nafter training:\n"); hmm_print(model); -*/ +*/ /* decode the hidden state sequence */ - viterbi_decode(features, frames_read, model, q); + viterbi_decode(features, frames_read, model, q); hmm_close(model); - + /*****************************/ } /*****************************/ - - + + /* fprintf(stderr, "HMM state sequence:\n"); for (i = 0; i < frames_read; i++) fprintf(stderr, "%d ", q[i]); fprintf(stderr, "\n\n"); */ - + /* create histograms of states */ double* h = (double*) malloc(frames_read*nHMM_states*sizeof(double)); /* vector in row major order */ create_histograms(q, frames_read, nHMM_states, histogram_length, h); - + /* cluster the histograms */ int nbsched = 20; /* length of inverse temperature schedule */ double* bsched = (double*) malloc(nbsched*sizeof(double)); /* inverse temperature schedule */ @@ -200,39 +200,39 @@ void cluster_segment(int* q, double** features, int frames_read, int feature_len for (i = 1; i < nbsched; i++) bsched[i] = alpha * bsched[i-1]; cluster_melt(h, nHMM_states, frames_read, bsched, nbsched, nclusters, neighbour_limit, q); - + /* now q holds a sequence of cluster assignments */ - - free(h); + + free(h); free(bsched); } /* segment constant-Q or chroma features */ -void constq_segment(int* q, double** features, int frames_read, int bins, int ncoeff, int feature_type, +void constq_segment(int* q, double** features, int frames_read, int bins, int ncoeff, int feature_type, int nHMM_states, int histogram_length, int nclusters, int neighbour_limit) { int feature_length; double** chroma; int i; - + if (feature_type == FEATURE_TYPE_CONSTQ) { /* fprintf(stderr, "Converting to dB and normalising...\n"); - */ + */ mpeg7_constq(features, frames_read, ncoeff); -/* +/* fprintf(stderr, "Running PCA...\n"); -*/ +*/ /* do PCA on the features (but not the envelope) */ int ncomponents = 20; pca_project(features, frames_read, ncoeff, ncomponents); - + /* copy the envelope so that it immediatly follows the chosen components */ for (i = 0; i < frames_read; i++) - features[i][ncomponents] = features[i][ncoeff]; - + features[i][ncomponents] = features[i][ncoeff]; + feature_length = ncomponents + 1; - + /************************************** //TEST // feature file name @@ -241,7 +241,7 @@ void constq_segment(int* q, double** features, int frames_read, int bins, int nc strcpy(file_name, dir); strcat(file_name, trackname); strcat(file_name, "_features_c20r8h0.2f0.6.mat"); - + // get the features from Matlab from mat-file int frames_in_file; readmatarray_size(file_name, 2, &frames_in_file, &feature_length); @@ -254,27 +254,27 @@ void constq_segment(int* q, double** features, int frames_read, int bins, int nc features[frames_read-missing_frames][i] = features[frames_read-missing_frames-1][i]; --missing_frames; } - + free(file_name); ******************************************/ - + cluster_segment(q, features, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit); } - + if (feature_type == FEATURE_TYPE_CHROMA) { /* fprintf(stderr, "Converting to chroma features...\n"); -*/ +*/ /* convert constant-Q to normalised chroma features */ chroma = (double**) malloc(frames_read*sizeof(double*)); for (i = 0; i < frames_read; i++) chroma[i] = (double*) malloc(bins*sizeof(double)); cq2chroma(features, frames_read, ncoeff, bins, chroma); feature_length = bins; - + cluster_segment(q, chroma, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit); - + for (i = 0; i < frames_read; i++) free(chroma[i]); free(chroma); diff --git a/libs/qm-dsp/dsp/segmentation/cluster_segmenter.h b/libs/qm-dsp/dsp/segmentation/cluster_segmenter.h index fad0585cf6..e7106d4f81 100644 --- a/libs/qm-dsp/dsp/segmentation/cluster_segmenter.h +++ b/libs/qm-dsp/dsp/segmentation/cluster_segmenter.h @@ -38,10 +38,10 @@ void cq2chroma(double** cq, int nframes, int ncoeff, int bins, double** chroma); void create_histograms(int* x, int nx, int m, int hlen, double* h); -void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states, +void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states, int histogram_length, int nclusters, int neighbour_limit); -void constq_segment(int* q, double** features, int frames_read, int bins, int ncoeff, int feature_type, +void constq_segment(int* q, double** features, int frames_read, int bins, int ncoeff, int feature_type, int nHMM_states, int histogram_length, int nclusters, int neighbour_limit); #ifdef __cplusplus diff --git a/libs/qm-dsp/dsp/segmentation/segment.h b/libs/qm-dsp/dsp/segmentation/segment.h index 494317cf42..7a4eb8b2b4 100644 --- a/libs/qm-dsp/dsp/segmentation/segment.h +++ b/libs/qm-dsp/dsp/segmentation/segment.h @@ -34,10 +34,10 @@ typedef struct segmentation_t segment_t* segments; } segmentation_t; -typedef enum -{ - FEATURE_TYPE_UNKNOWN = 0, - FEATURE_TYPE_CONSTQ = 1, +typedef enum +{ + FEATURE_TYPE_UNKNOWN = 0, + FEATURE_TYPE_CONSTQ = 1, FEATURE_TYPE_CHROMA = 2, FEATURE_TYPE_MFCC = 3 } feature_types; |