summaryrefslogtreecommitdiff
path: root/libs/qm-dsp/dsp/segmentation
diff options
context:
space:
mode:
Diffstat (limited to 'libs/qm-dsp/dsp/segmentation')
-rw-r--r--libs/qm-dsp/dsp/segmentation/ClusterMeltSegmenter.cpp26
-rw-r--r--libs/qm-dsp/dsp/segmentation/ClusterMeltSegmenter.h6
-rw-r--r--libs/qm-dsp/dsp/segmentation/Segmenter.cpp4
-rw-r--r--libs/qm-dsp/dsp/segmentation/Segmenter.h2
-rw-r--r--libs/qm-dsp/dsp/segmentation/cluster_melt.c68
-rw-r--r--libs/qm-dsp/dsp/segmentation/cluster_segmenter.c76
6 files changed, 91 insertions, 91 deletions
diff --git a/libs/qm-dsp/dsp/segmentation/ClusterMeltSegmenter.cpp b/libs/qm-dsp/dsp/segmentation/ClusterMeltSegmenter.cpp
index 7643691cd3..ce5f370436 100644
--- a/libs/qm-dsp/dsp/segmentation/ClusterMeltSegmenter.cpp
+++ b/libs/qm-dsp/dsp/segmentation/ClusterMeltSegmenter.cpp
@@ -212,7 +212,7 @@ void ClusterMeltSegmenter::extractFeaturesConstQ(const double* samples, int nsam
fft->process(false, frame, real, imag);
constq->process(real, imag, cqre, cqim);
-
+
for (int i = 0; i < ncoeff; ++i) {
cq[i] += sqrt(cqre[i] * cqre[i] + cqim[i] * cqim[i]);
}
@@ -287,7 +287,7 @@ void ClusterMeltSegmenter::extractFeaturesMFCC(const double* samples, int nsampl
}
mfcc->process(frame, ccout);
-
+
for (int i = 0; i < ncoeff; ++i) {
cc[i] += ccout[i];
}
@@ -335,22 +335,22 @@ void ClusterMeltSegmenter::segment()
<< " features with " << features[0].size() << " coefficients (ncoeff = " << ncoeff << ", ncomponents = " << ncomponents << ")" << std::endl;
*/
// copy the features to a native array and use the existing C segmenter...
- double** arrFeatures = new double*[features.size()];
+ double** arrFeatures = new double*[features.size()];
for (int i = 0; i < features.size(); i++)
{
if (featureType == FEATURE_TYPE_UNKNOWN) {
arrFeatures[i] = new double[features[0].size()];
for (int j = 0; j < features[0].size(); j++)
- arrFeatures[i][j] = features[i][j];
+ arrFeatures[i][j] = features[i][j];
} else {
arrFeatures[i] = new double[ncoeff+1]; // allow space for the normalised envelope
for (int j = 0; j < ncoeff; j++)
- arrFeatures[i][j] = features[i][j];
+ arrFeatures[i][j] = features[i][j];
}
}
-
+
q = new int[features.size()];
-
+
if (featureType == FEATURE_TYPE_UNKNOWN ||
featureType == FEATURE_TYPE_MFCC)
cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength,
@@ -358,16 +358,16 @@ void ClusterMeltSegmenter::segment()
else
constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType,
nHMMStates, histogramLength, nclusters, neighbourhoodLimit);
-
+
// convert the cluster assignment sequence to a segmentation
- makeSegmentation(q, features.size());
-
+ makeSegmentation(q, features.size());
+
// de-allocate arrays
delete [] q;
for (int i = 0; i < features.size(); i++)
delete [] arrFeatures[i];
delete [] arrFeatures;
-
+
// clear the features
clear();
}
@@ -377,11 +377,11 @@ void ClusterMeltSegmenter::makeSegmentation(int* q, int len)
segmentation.segments.clear();
segmentation.nsegtypes = nclusters;
segmentation.samplerate = samplerate;
-
+
Segment segment;
segment.start = 0;
segment.type = q[0];
-
+
for (int i = 1; i < len; i++)
{
if (q[i] != q[i-1])
diff --git a/libs/qm-dsp/dsp/segmentation/ClusterMeltSegmenter.h b/libs/qm-dsp/dsp/segmentation/ClusterMeltSegmenter.h
index 528c09cb55..8f3130871e 100644
--- a/libs/qm-dsp/dsp/segmentation/ClusterMeltSegmenter.h
+++ b/libs/qm-dsp/dsp/segmentation/ClusterMeltSegmenter.h
@@ -72,7 +72,7 @@ public:
protected:
void makeSegmentation(int* q, int len);
-
+
void extractFeaturesConstQ(const double *, int);
void extractFeaturesMFCC(const double *, int);
@@ -82,9 +82,9 @@ protected:
MFCC* mfcc;
model_t* model; // the HMM
int* q; // the decoded HMM state sequence
- vector<vector<double> > histograms;
+ vector<vector<double> > histograms;
- feature_types featureType;
+ feature_types featureType;
double hopSize; // in seconds
double windowSize; // in seconds
diff --git a/libs/qm-dsp/dsp/segmentation/Segmenter.cpp b/libs/qm-dsp/dsp/segmentation/Segmenter.cpp
index 120a6617f5..b60fb58162 100644
--- a/libs/qm-dsp/dsp/segmentation/Segmenter.cpp
+++ b/libs/qm-dsp/dsp/segmentation/Segmenter.cpp
@@ -19,13 +19,13 @@
ostream& operator<<(ostream& os, const Segmentation& s)
{
os << "structure_name : begin_time end_time\n";
-
+
for (int i = 0; i < s.segments.size(); i++)
{
Segment seg = s.segments[i];
os << std::fixed << seg.type << ':' << '\t' << std::setprecision(6) << seg.start / static_cast<double>(s.samplerate)
<< '\t' << std::setprecision(6) << seg.end / static_cast<double>(s.samplerate) << "\n";
}
-
+
return os;
}
diff --git a/libs/qm-dsp/dsp/segmentation/Segmenter.h b/libs/qm-dsp/dsp/segmentation/Segmenter.h
index 9a77d70372..fd2f39b850 100644
--- a/libs/qm-dsp/dsp/segmentation/Segmenter.h
+++ b/libs/qm-dsp/dsp/segmentation/Segmenter.h
@@ -35,7 +35,7 @@ class Segmentation
public:
int nsegtypes; // number of segment types, so possible types are {0,1,...,nsegtypes-1}
int samplerate;
- vector<Segment> segments;
+ vector<Segment> segments;
};
ostream& operator<<(ostream& os, const Segmentation& s);
diff --git a/libs/qm-dsp/dsp/segmentation/cluster_melt.c b/libs/qm-dsp/dsp/segmentation/cluster_melt.c
index 0509480807..092bc7f078 100644
--- a/libs/qm-dsp/dsp/segmentation/cluster_melt.c
+++ b/libs/qm-dsp/dsp/segmentation/cluster_melt.c
@@ -25,7 +25,7 @@ double kldist(double* a, double* b, int n) {
because a, b represent probability distributions */
double q, d;
int i;
-
+
d = 0;
for (i = 0; i < n; i++)
{
@@ -38,8 +38,8 @@ double kldist(double* a, double* b, int n) {
d += b[i] * log(b[i] / q);
}
}
- return d;
-}
+ return d;
+}
void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l, int *c) {
double lambda, sum, beta, logsumexp, maxlp;
@@ -48,9 +48,9 @@ void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l,
int** nc; /* neighbour counts for each histogram */
double** lp; /* soft assignment probs for each histogram */
int* oldc; /* previous hard assignments (to check convergence) */
-
+
/* NB h is passed as a 1d row major array */
-
+
/* parameter values */
lambda = DEFAULT_LAMBDA;
if (l > 0)
@@ -60,22 +60,22 @@ void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l,
B = 2 * limit + 1;
maxiter0 = 20; /* number of iterations at initial temperature */
maxiter1 = 5; /* number of iterations at subsequent temperatures */
-
- /* allocate memory */
+
+ /* allocate memory */
cl = (double**) malloc(k*sizeof(double*));
for (i= 0; i < k; i++)
cl[i] = (double*) malloc(m*sizeof(double));
-
+
nc = (int**) malloc(n*sizeof(int*));
for (i= 0; i < n; i++)
nc[i] = (int*) malloc(k*sizeof(int));
-
+
lp = (double**) malloc(n*sizeof(double*));
for (i= 0; i < n; i++)
lp[i] = (double*) malloc(k*sizeof(double));
-
+
oldc = (int*) malloc(n * sizeof(int));
-
+
/* initialise */
for (i = 0; i < k; i++)
{
@@ -90,40 +90,40 @@ void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l,
{
cl[i][j] /= sum; /* normalise */
}
- }
+ }
//print_array(cl, k, m);
-
+
for (i = 0; i < n; i++)
c[i] = 1; /* initially assign all histograms to cluster 1 */
-
+
for (a = 0; a < t; a++)
{
beta = Bsched[a];
-
+
if (a == 0)
maxiter = maxiter0;
else
maxiter = maxiter1;
-
+
for (it = 0; it < maxiter; it++)
{
//if (it == maxiter - 1)
// mexPrintf("hasn't converged after %d iterations\n", maxiter);
-
+
for (i = 0; i < n; i++)
{
/* save current hard assignments */
oldc[i] = c[i];
-
+
/* calculate soft assignment logprobs for each cluster */
sum = 0;
for (j = 0; j < k; j++)
{
lp[i][ j] = -beta * kldist(cl[j], &h[i*m], m);
-
+
/* update matching neighbour counts for this histogram, based on current hard assignments */
/* old version:
- nc[i][j] = 0;
+ nc[i][j] = 0;
if (i >= limit && i <= n - 1 - limit)
{
for (b = i - limit; b <= i + limit; b++)
@@ -144,14 +144,14 @@ void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l,
for (b = b0; b <= b1; b++)
if (c[b] == j+1)
nc[i][j]--;
-
+
sum += exp(lp[i][j]);
}
-
+
/* normalise responsibilities and add duration logprior */
logsumexp = log(sum);
for (j = 0; j < k; j++)
- lp[i][j] -= logsumexp + lambda * nc[i][j];
+ lp[i][j] -= logsumexp + lambda * nc[i][j];
}
//print_array(lp, n, k);
/*
@@ -162,8 +162,8 @@ void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l,
mexPrintf("\n");
}
*/
-
-
+
+
/* update the assignments now that we know the duration priors
based on the current assignments */
for (i = 0; i < n; i++)
@@ -177,14 +177,14 @@ void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l,
c[i] = j+1;
}
}
-
+
/* break if assignments haven't changed */
i = 0;
while (i < n && oldc[i] == c[i])
i++;
if (i == n)
break;
-
+
/* update reference histograms now we know new responsibilities */
for (j = 0; j < k; j++)
{
@@ -194,21 +194,21 @@ void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l,
for (i = 0; i < n; i++)
{
cl[j][b] += exp(lp[i][j]) * h[i*m+b];
- }
+ }
}
-
- sum = 0;
+
+ sum = 0;
for (i = 0; i < n; i++)
sum += exp(lp[i][j]);
for (b = 0; b < m; b++)
cl[j][b] /= sum; /* normalise */
- }
-
+ }
+
//print_array(cl, k, m);
//mexPrintf("\n\n");
}
}
-
+
/* free memory */
for (i = 0; i < k; i++)
free(cl[i]);
@@ -219,7 +219,7 @@ void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l,
for (i = 0; i < n; i++)
free(lp[i]);
free(lp);
- free(oldc);
+ free(oldc);
}
diff --git a/libs/qm-dsp/dsp/segmentation/cluster_segmenter.c b/libs/qm-dsp/dsp/segmentation/cluster_segmenter.c
index 0d2762ee7f..c9f115c205 100644
--- a/libs/qm-dsp/dsp/segmentation/cluster_segmenter.c
+++ b/libs/qm-dsp/dsp/segmentation/cluster_segmenter.c
@@ -25,7 +25,7 @@ void cq2chroma(double** cq, int nframes, int ncoeff, int bins, double** chroma)
int t, b, oct, ix;
//double maxchroma; /* max chroma value at each time, for normalisation */
//double sum; /* for normalisation */
-
+
for (t = 0; t < nframes; t++)
{
for (b = 0; b < bins; b++)
@@ -50,7 +50,7 @@ void cq2chroma(double** cq, int nframes, int ncoeff, int bins, double** chroma)
maxchroma = chroma[t][b];
if (maxchroma > 0)
for (b = 0; b < bins; b++)
- chroma[t][b] /= maxchroma;
+ chroma[t][b] /= maxchroma;
*/
}
}
@@ -62,13 +62,13 @@ void mpeg7_constq(double** features, int nframes, int ncoeff)
double ss;
double env;
double maxenv = 0;
-
+
/* convert const-Q features to dB scale */
for (i = 0; i < nframes; i++)
for (j = 0; j < ncoeff; j++)
features[i][j] = 10.0 * log10(features[i][j]+DBL_EPSILON);
-
- /* normalise each feature vector and add the norm as an extra feature dimension */
+
+ /* normalise each feature vector and add the norm as an extra feature dimension */
for (i = 0; i < nframes; i++)
{
ss = 0;
@@ -83,7 +83,7 @@ void mpeg7_constq(double** features, int nframes, int ncoeff)
}
/* normalise the envelopes */
for (i = 0; i < nframes; i++)
- features[i][ncoeff] /= maxenv;
+ features[i][ncoeff] /= maxenv;
}
/* return histograms h[nx*m] of data x[nx] into m bins using a sliding window of length h_len (MUST BE ODD) */
@@ -109,7 +109,7 @@ void create_histograms(int* x, int nx, int m, int hlen, double* h)
for (j = 0; j < m; j++)
h[i*m+j] /= norm;
}
-
+
/* duplicate histograms at beginning and end to create one histogram for each data value supplied */
for (i = 0; i < hlen/2; i++)
for (j = 0; j < m; j++)
@@ -124,7 +124,7 @@ void cluster_segment(int* q, double** features, int frames_read, int feature_len
int histogram_length, int nclusters, int neighbour_limit)
{
int i, j;
-
+
/*****************************/
if (0) {
/* try just using the predominant bin number as a 'decoded state' */
@@ -141,44 +141,44 @@ void cluster_segment(int* q, double** features, int frames_read, int feature_len
{
maxval = features[i][j];
maxbin = j;
- }
+ }
}
if (maxval > chroma_thresh)
q[i] = maxbin;
else
q[i] = feature_length;
}
-
+
}
if (1) {
/*****************************/
-
-
+
+
/* scale all the features to 'balance covariances' during HMM training */
double scale = 10;
for (i = 0; i < frames_read; i++)
for (j = 0; j < feature_length; j++)
features[i][j] *= scale;
-
+
/* train an HMM on the features */
-
+
/* create a model */
model_t* model = hmm_init(features, frames_read, feature_length, nHMM_states);
-
+
/* train the model */
hmm_train(features, frames_read, model);
-/*
+/*
printf("\n\nafter training:\n");
hmm_print(model);
-*/
+*/
/* decode the hidden state sequence */
viterbi_decode(features, frames_read, model, q);
hmm_close(model);
-
+
/*****************************/
}
/*****************************/
-
+
/*
fprintf(stderr, "HMM state sequence:\n");
@@ -186,11 +186,11 @@ void cluster_segment(int* q, double** features, int frames_read, int feature_len
fprintf(stderr, "%d ", q[i]);
fprintf(stderr, "\n\n");
*/
-
+
/* create histograms of states */
double* h = (double*) malloc(frames_read*nHMM_states*sizeof(double)); /* vector in row major order */
create_histograms(q, frames_read, nHMM_states, histogram_length, h);
-
+
/* cluster the histograms */
int nbsched = 20; /* length of inverse temperature schedule */
double* bsched = (double*) malloc(nbsched*sizeof(double)); /* inverse temperature schedule */
@@ -200,9 +200,9 @@ void cluster_segment(int* q, double** features, int frames_read, int feature_len
for (i = 1; i < nbsched; i++)
bsched[i] = alpha * bsched[i-1];
cluster_melt(h, nHMM_states, frames_read, bsched, nbsched, nclusters, neighbour_limit, q);
-
+
/* now q holds a sequence of cluster assignments */
-
+
free(h);
free(bsched);
}
@@ -214,25 +214,25 @@ void constq_segment(int* q, double** features, int frames_read, int bins, int nc
int feature_length;
double** chroma;
int i;
-
+
if (feature_type == FEATURE_TYPE_CONSTQ)
{
/* fprintf(stderr, "Converting to dB and normalising...\n");
- */
+ */
mpeg7_constq(features, frames_read, ncoeff);
-/*
+/*
fprintf(stderr, "Running PCA...\n");
-*/
+*/
/* do PCA on the features (but not the envelope) */
int ncomponents = 20;
pca_project(features, frames_read, ncoeff, ncomponents);
-
+
/* copy the envelope so that it immediatly follows the chosen components */
for (i = 0; i < frames_read; i++)
- features[i][ncomponents] = features[i][ncoeff];
-
+ features[i][ncomponents] = features[i][ncoeff];
+
feature_length = ncomponents + 1;
-
+
/**************************************
//TEST
// feature file name
@@ -241,7 +241,7 @@ void constq_segment(int* q, double** features, int frames_read, int bins, int nc
strcpy(file_name, dir);
strcat(file_name, trackname);
strcat(file_name, "_features_c20r8h0.2f0.6.mat");
-
+
// get the features from Matlab from mat-file
int frames_in_file;
readmatarray_size(file_name, 2, &frames_in_file, &feature_length);
@@ -254,27 +254,27 @@ void constq_segment(int* q, double** features, int frames_read, int bins, int nc
features[frames_read-missing_frames][i] = features[frames_read-missing_frames-1][i];
--missing_frames;
}
-
+
free(file_name);
******************************************/
-
+
cluster_segment(q, features, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit);
}
-
+
if (feature_type == FEATURE_TYPE_CHROMA)
{
/*
fprintf(stderr, "Converting to chroma features...\n");
-*/
+*/
/* convert constant-Q to normalised chroma features */
chroma = (double**) malloc(frames_read*sizeof(double*));
for (i = 0; i < frames_read; i++)
chroma[i] = (double*) malloc(bins*sizeof(double));
cq2chroma(features, frames_read, ncoeff, bins, chroma);
feature_length = bins;
-
+
cluster_segment(q, chroma, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit);
-
+
for (i = 0; i < frames_read; i++)
free(chroma[i]);
free(chroma);