diff options
Diffstat (limited to 'libs/qm-dsp/dsp/segmentation/cluster_segmenter.c')
-rw-r--r-- | libs/qm-dsp/dsp/segmentation/cluster_segmenter.c | 285 |
1 files changed, 0 insertions, 285 deletions
diff --git a/libs/qm-dsp/dsp/segmentation/cluster_segmenter.c b/libs/qm-dsp/dsp/segmentation/cluster_segmenter.c deleted file mode 100644 index 2a6b196921..0000000000 --- a/libs/qm-dsp/dsp/segmentation/cluster_segmenter.c +++ /dev/null @@ -1,285 +0,0 @@ -/* - * cluster_segmenter.c - * soundbite - * - * Created by Mark Levy on 06/04/2006. - * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. See the file - COPYING included with this distribution for more information. - * - */ - -#include "cluster_segmenter.h" - -extern int readmatarray_size(const char *filepath, int n_array, int* t, int* d); -extern int readmatarray(const char *filepath, int n_array, int t, int d, double** arr); - -/* converts constant-Q features to normalised chroma */ -void cq2chroma(double** cq, int nframes, int ncoeff, int bins, double** chroma) -{ - int noct = ncoeff / bins; /* number of complete octaves in constant-Q */ - int t, b, oct, ix; - //double maxchroma; /* max chroma value at each time, for normalisation */ - //double sum; /* for normalisation */ - - for (t = 0; t < nframes; t++) - { - for (b = 0; b < bins; b++) - chroma[t][b] = 0; - for (oct = 0; oct < noct; oct++) - { - ix = oct * bins; - for (b = 0; b < bins; b++) - chroma[t][b] += fabs(cq[t][ix+b]); - } - /* normalise to unit sum - sum = 0; - for (b = 0; b < bins; b++) - sum += chroma[t][b]; - for (b = 0; b < bins; b++) - chroma[t][b] /= sum; - */ - /* normalise to unit max - NO this made results much worse! - maxchroma = 0; - for (b = 0; b < bins; b++) - if (chroma[t][b] > maxchroma) - maxchroma = chroma[t][b]; - if (maxchroma > 0) - for (b = 0; b < bins; b++) - chroma[t][b] /= maxchroma; - */ - } -} - -/* applies MPEG-7 normalisation to constant-Q features, storing normalised envelope (norm) in last feature dimension */ -void mpeg7_constq(double** features, int nframes, int ncoeff) -{ - int i, j; - double ss; - double env; - double maxenv = 0; - - /* convert const-Q features to dB scale */ - for (i = 0; i < nframes; i++) - for (j = 0; j < ncoeff; j++) - features[i][j] = 10.0 * log10(features[i][j]+DBL_EPSILON); - - /* normalise each feature vector and add the norm as an extra feature dimension */ - for (i = 0; i < nframes; i++) - { - ss = 0; - for (j = 0; j < ncoeff; j++) - ss += features[i][j] * features[i][j]; - env = sqrt(ss); - for (j = 0; j < ncoeff; j++) - features[i][j] /= env; - features[i][ncoeff] = env; - if (env > maxenv) - maxenv = env; - } - /* normalise the envelopes */ - for (i = 0; i < nframes; i++) - features[i][ncoeff] /= maxenv; -} - -/* return histograms h[nx*m] of data x[nx] into m bins using a sliding window of length h_len (MUST BE ODD) */ -/* NB h is a vector in row major order, as required by cluster_melt() */ -/* for historical reasons we normalise the histograms by their norm (not to sum to one) */ -void create_histograms(int* x, int nx, int m, int hlen, double* h) -{ - int i, j, t; - double norm; - - for (i = 0; i < nx*m; i++) - h[i] = 0; - - for (i = hlen/2; i < nx-hlen/2; i++) - { - for (j = 0; j < m; j++) - h[i*m+j] = 0; - for (t = i-hlen/2; t <= i+hlen/2; t++) - ++h[i*m+x[t]]; - norm = 0; - for (j = 0; j < m; j++) - norm += h[i*m+j] * h[i*m+j]; - for (j = 0; j < m; j++) - h[i*m+j] /= norm; - } - - /* duplicate histograms at beginning and end to create one histogram for each data value supplied */ - for (i = 0; i < hlen/2; i++) - for (j = 0; j < m; j++) - h[i*m+j] = h[hlen/2*m+j]; - for (i = nx-hlen/2; i < nx; i++) - for (j = 0; j < m; j++) - h[i*m+j] = h[(nx-hlen/2-1)*m+j]; -} - -/* segment using HMM and then histogram clustering */ -void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states, - int histogram_length, int nclusters, int neighbour_limit) -{ - int i, j; - - /*****************************/ - if (0) { - /* try just using the predominant bin number as a 'decoded state' */ - nHMM_states = feature_length + 1; /* allow a 'zero' state */ - double chroma_thresh = 0.05; - double maxval; - int maxbin; - for (i = 0; i < frames_read; i++) - { - maxval = 0; - for (j = 0; j < feature_length; j++) - { - if (features[i][j] > maxval) - { - maxval = features[i][j]; - maxbin = j; - } - } - if (maxval > chroma_thresh) - q[i] = maxbin; - else - q[i] = feature_length; - } - - } - if (1) { - /*****************************/ - - - /* scale all the features to 'balance covariances' during HMM training */ - double scale = 10; - for (i = 0; i < frames_read; i++) - for (j = 0; j < feature_length; j++) - features[i][j] *= scale; - - /* train an HMM on the features */ - - /* create a model */ - model_t* model = hmm_init(features, frames_read, feature_length, nHMM_states); - - /* train the model */ - hmm_train(features, frames_read, model); -/* - printf("\n\nafter training:\n"); - hmm_print(model); -*/ - /* decode the hidden state sequence */ - viterbi_decode(features, frames_read, model, q); - hmm_close(model); - - /*****************************/ - } - /*****************************/ - - -/* - fprintf(stderr, "HMM state sequence:\n"); - for (i = 0; i < frames_read; i++) - fprintf(stderr, "%d ", q[i]); - fprintf(stderr, "\n\n"); -*/ - - /* create histograms of states */ - double* h = (double*) malloc(frames_read*nHMM_states*sizeof(double)); /* vector in row major order */ - create_histograms(q, frames_read, nHMM_states, histogram_length, h); - - /* cluster the histograms */ - int nbsched = 20; /* length of inverse temperature schedule */ - double* bsched = (double*) malloc(nbsched*sizeof(double)); /* inverse temperature schedule */ - double b0 = 100; - double alpha = 0.7; - bsched[0] = b0; - for (i = 1; i < nbsched; i++) - bsched[i] = alpha * bsched[i-1]; - cluster_melt(h, nHMM_states, frames_read, bsched, nbsched, nclusters, neighbour_limit, q); - - /* now q holds a sequence of cluster assignments */ - - free(h); - free(bsched); -} - -/* segment constant-Q or chroma features */ -void constq_segment(int* q, double** features, int frames_read, int bins, int ncoeff, int feature_type, - int nHMM_states, int histogram_length, int nclusters, int neighbour_limit) -{ - int feature_length; - double** chroma; - int i; - - if (feature_type == FEATURE_TYPE_CONSTQ) - { -/* fprintf(stderr, "Converting to dB and normalising...\n"); - */ - mpeg7_constq(features, frames_read, ncoeff); -/* - fprintf(stderr, "Running PCA...\n"); -*/ - /* do PCA on the features (but not the envelope) */ - int ncomponents = 20; - pca_project(features, frames_read, ncoeff, ncomponents); - - /* copy the envelope so that it immediatly follows the chosen components */ - for (i = 0; i < frames_read; i++) - features[i][ncomponents] = features[i][ncoeff]; - - feature_length = ncomponents + 1; - - /************************************** - //TEST - // feature file name - char* dir = "/Users/mark/documents/semma/audio/"; - char* file_name = (char*) malloc((strlen(dir) + strlen(trackname) + strlen("_features_c20r8h0.2f0.6.mat") + 1)*sizeof(char)); - strcpy(file_name, dir); - strcat(file_name, trackname); - strcat(file_name, "_features_c20r8h0.2f0.6.mat"); - - // get the features from Matlab from mat-file - int frames_in_file; - readmatarray_size(file_name, 2, &frames_in_file, &feature_length); - readmatarray(file_name, 2, frames_in_file, feature_length, features); - // copy final frame to ensure that we get as many as we expected - int missing_frames = frames_read - frames_in_file; - while (missing_frames > 0) - { - for (i = 0; i < feature_length; i++) - features[frames_read-missing_frames][i] = features[frames_read-missing_frames-1][i]; - --missing_frames; - } - - free(file_name); - ******************************************/ - - cluster_segment(q, features, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit); - } - - if (feature_type == FEATURE_TYPE_CHROMA) - { -/* - fprintf(stderr, "Converting to chroma features...\n"); -*/ - /* convert constant-Q to normalised chroma features */ - chroma = (double**) malloc(frames_read*sizeof(double*)); - for (i = 0; i < frames_read; i++) - chroma[i] = (double*) malloc(bins*sizeof(double)); - cq2chroma(features, frames_read, ncoeff, bins, chroma); - feature_length = bins; - - cluster_segment(q, chroma, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit); - - for (i = 0; i < frames_read; i++) - free(chroma[i]); - free(chroma); - } -} - - - |