From ee2a1b7bea2010a6244c5dadf2ee02c4433c1658 Mon Sep 17 00:00:00 2001 From: Robin Gareus Date: Thu, 6 Oct 2016 00:40:33 +0200 Subject: update/include Queen Mary Vamp plugin set --- libs/vamp-plugins/BarBeatTrack.cpp | 484 +++++++ libs/vamp-plugins/BarBeatTrack.h | 69 + libs/vamp-plugins/BeatTrack.cpp | 584 +++++++++ libs/vamp-plugins/BeatTrack.h | 72 ++ libs/vamp-plugins/ChromagramPlugin.cpp | 416 ++++++ libs/vamp-plugins/ChromagramPlugin.h | 72 ++ libs/vamp-plugins/KeyDetect.cpp | 407 ++++++ libs/vamp-plugins/KeyDetect.h | 69 + libs/vamp-plugins/SimilarityPlugin.cpp | 939 ++++++++++++++ libs/vamp-plugins/SimilarityPlugin.h | 123 ++ libs/vamp-plugins/TonalChangeDetect.cpp | 443 +++++++ libs/vamp-plugins/TonalChangeDetect.h | 84 ++ libs/vamp-plugins/Transcription.cpp | 2117 +++++++++++++++++++++++++++++++ libs/vamp-plugins/Transcription.h | 81 ++ libs/vamp-plugins/plugins.cpp | 63 +- libs/vamp-plugins/wscript | 7 + 16 files changed, 6010 insertions(+), 20 deletions(-) create mode 100644 libs/vamp-plugins/BarBeatTrack.cpp create mode 100644 libs/vamp-plugins/BarBeatTrack.h create mode 100644 libs/vamp-plugins/BeatTrack.cpp create mode 100644 libs/vamp-plugins/BeatTrack.h create mode 100644 libs/vamp-plugins/ChromagramPlugin.cpp create mode 100644 libs/vamp-plugins/ChromagramPlugin.h create mode 100644 libs/vamp-plugins/KeyDetect.cpp create mode 100644 libs/vamp-plugins/KeyDetect.h create mode 100644 libs/vamp-plugins/SimilarityPlugin.cpp create mode 100644 libs/vamp-plugins/SimilarityPlugin.h create mode 100644 libs/vamp-plugins/TonalChangeDetect.cpp create mode 100644 libs/vamp-plugins/TonalChangeDetect.h create mode 100644 libs/vamp-plugins/Transcription.cpp create mode 100644 libs/vamp-plugins/Transcription.h (limited to 'libs') diff --git a/libs/vamp-plugins/BarBeatTrack.cpp b/libs/vamp-plugins/BarBeatTrack.cpp new file mode 100644 index 0000000000..2f3016cc40 --- /dev/null +++ b/libs/vamp-plugins/BarBeatTrack.cpp @@ -0,0 +1,484 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + QM Vamp Plugin Set + + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#include "BarBeatTrack.h" + +#include +#include +#include +#include +#include + +using std::string; +using std::vector; +using std::cerr; +using std::endl; + +#ifndef __GNUC__ +#include +#endif + +float BarBeatTracker::m_stepSecs = 0.01161; // 512 samples at 44100 + +class BarBeatTrackerData +{ +public: + BarBeatTrackerData(float rate, const DFConfig &config) : dfConfig(config) { + df = new DetectionFunction(config); + // decimation factor aims at resampling to c. 3KHz; must be power of 2 + int factor = MathUtilities::nextPowerOfTwo(rate / 3000); +// std::cerr << "BarBeatTrackerData: factor = " << factor << std::endl; + downBeat = new DownBeat(rate, factor, config.stepSize); + } + ~BarBeatTrackerData() { + delete df; + delete downBeat; + } + void reset() { + delete df; + df = new DetectionFunction(dfConfig); + dfOutput.clear(); + downBeat->resetAudioBuffer(); + origin = Vamp::RealTime::zeroTime; + } + + DFConfig dfConfig; + DetectionFunction *df; + DownBeat *downBeat; + vector dfOutput; + Vamp::RealTime origin; +}; + + +BarBeatTracker::BarBeatTracker(float inputSampleRate) : + Vamp::Plugin(inputSampleRate), + m_d(0), + m_bpb(4), + m_alpha(0.9), // changes are as per the BeatTrack.cpp + m_tightness(4.), // changes are as per the BeatTrack.cpp + m_inputtempo(120.), // changes are as per the BeatTrack.cpp + m_constraintempo(false) // changes are as per the BeatTrack.cpp +{ +} + +BarBeatTracker::~BarBeatTracker() +{ + delete m_d; +} + +string +BarBeatTracker::getIdentifier() const +{ + return "qm-barbeattracker"; +} + +string +BarBeatTracker::getName() const +{ + return "Bar and Beat Tracker"; +} + +string +BarBeatTracker::getDescription() const +{ + return "Estimate bar and beat locations"; +} + +string +BarBeatTracker::getMaker() const +{ + return "Queen Mary, University of London"; +} + +int +BarBeatTracker::getPluginVersion() const +{ + return 3; +} + +string +BarBeatTracker::getCopyright() const +{ + return "Plugin by Matthew Davies, Christian Landone and Chris Cannam. Copyright (c) 2006-2013 QMUL - All Rights Reserved"; +} + +BarBeatTracker::ParameterList +BarBeatTracker::getParameterDescriptors() const +{ + ParameterList list; + + ParameterDescriptor desc; + + desc.identifier = "bpb"; + desc.name = "Beats per Bar"; + desc.description = "The number of beats in each bar"; + desc.minValue = 2; + desc.maxValue = 16; + desc.defaultValue = 4; + desc.isQuantized = true; + desc.quantizeStep = 1; + list.push_back(desc); + + // changes are as per the BeatTrack.cpp + //Alpha Parameter of Beat Tracker + desc.identifier = "alpha"; + desc.name = "Alpha"; + desc.description = "Inertia - Flexibility Trade Off"; + desc.minValue = 0.1; + desc.maxValue = 0.99; + desc.defaultValue = 0.90; + desc.unit = ""; + desc.isQuantized = false; + list.push_back(desc); + + // We aren't exposing tightness as a parameter, it's fixed at 4 + + // changes are as per the BeatTrack.cpp + //User input tempo + desc.identifier = "inputtempo"; + desc.name = "Tempo Hint"; + desc.description = "User-defined tempo on which to centre the tempo preference function"; + desc.minValue = 50; + desc.maxValue = 250; + desc.defaultValue = 120; + desc.unit = "BPM"; + desc.isQuantized = true; + list.push_back(desc); + + // changes are as per the BeatTrack.cpp + desc.identifier = "constraintempo"; + desc.name = "Constrain Tempo"; + desc.description = "Constrain more tightly around the tempo hint, using a Gaussian weighting instead of Rayleigh"; + desc.minValue = 0; + desc.maxValue = 1; + desc.defaultValue = 0; + desc.isQuantized = true; + desc.quantizeStep = 1; + desc.unit = ""; + desc.valueNames.clear(); + list.push_back(desc); + + + return list; +} + +float +BarBeatTracker::getParameter(std::string name) const +{ + if (name == "bpb") { + return m_bpb; + } else if (name == "alpha") { + return m_alpha; + } else if (name == "inputtempo") { + return m_inputtempo; + } else if (name == "constraintempo") { + return m_constraintempo ? 1.0 : 0.0; + } + return 0.0; +} + +void +BarBeatTracker::setParameter(std::string name, float value) +{ + if (name == "bpb") { + m_bpb = lrintf(value); + } else if (name == "alpha") { + m_alpha = value; + } else if (name == "inputtempo") { + m_inputtempo = value; + } else if (name == "constraintempo") { + m_constraintempo = (value > 0.5); + } +} + +bool +BarBeatTracker::initialise(size_t channels, size_t stepSize, size_t blockSize) +{ + if (m_d) { + delete m_d; + m_d = 0; + } + + if (channels < getMinChannelCount() || + channels > getMaxChannelCount()) { + std::cerr << "BarBeatTracker::initialise: Unsupported channel count: " + << channels << std::endl; + return false; + } + + if (stepSize != getPreferredStepSize()) { + std::cerr << "ERROR: BarBeatTracker::initialise: Unsupported step size for this sample rate: " + << stepSize << " (wanted " << (getPreferredStepSize()) << ")" << std::endl; + return false; + } + + if (blockSize != getPreferredBlockSize()) { + std::cerr << "WARNING: BarBeatTracker::initialise: Sub-optimal block size for this sample rate: " + << blockSize << " (wanted " << getPreferredBlockSize() << ")" << std::endl; +// return false; + } + + DFConfig dfConfig; + dfConfig.DFType = DF_COMPLEXSD; + dfConfig.stepSize = stepSize; + dfConfig.frameLength = blockSize; + dfConfig.dbRise = 3; + dfConfig.adaptiveWhitening = false; + dfConfig.whiteningRelaxCoeff = -1; + dfConfig.whiteningFloor = -1; + + m_d = new BarBeatTrackerData(m_inputSampleRate, dfConfig); + m_d->downBeat->setBeatsPerBar(m_bpb); + return true; +} + +void +BarBeatTracker::reset() +{ + if (m_d) m_d->reset(); +} + +size_t +BarBeatTracker::getPreferredStepSize() const +{ + size_t step = size_t(m_inputSampleRate * m_stepSecs + 0.0001); + if (step < 1) step = 1; +// std::cerr << "BarBeatTracker::getPreferredStepSize: input sample rate is " << m_inputSampleRate << ", step size is " << step << std::endl; + return step; +} + +size_t +BarBeatTracker::getPreferredBlockSize() const +{ + size_t theoretical = getPreferredStepSize() * 2; + + // I think this is not necessarily going to be a power of two, and + // the host might have a problem with that, but I'm not sure we + // can do much about it here + return theoretical; +} + +BarBeatTracker::OutputList +BarBeatTracker::getOutputDescriptors() const +{ + OutputList list; + + OutputDescriptor beat; + beat.identifier = "beats"; + beat.name = "Beats"; + beat.description = "Beat locations labelled with metrical position"; + beat.unit = ""; + beat.hasFixedBinCount = true; + beat.binCount = 0; + beat.sampleType = OutputDescriptor::VariableSampleRate; + beat.sampleRate = 1.0 / m_stepSecs; + + OutputDescriptor bars; + bars.identifier = "bars"; + bars.name = "Bars"; + bars.description = "Bar locations"; + bars.unit = ""; + bars.hasFixedBinCount = true; + bars.binCount = 0; + bars.sampleType = OutputDescriptor::VariableSampleRate; + bars.sampleRate = 1.0 / m_stepSecs; + + OutputDescriptor beatcounts; + beatcounts.identifier = "beatcounts"; + beatcounts.name = "Beat Count"; + beatcounts.description = "Beat counter function"; + beatcounts.unit = ""; + beatcounts.hasFixedBinCount = true; + beatcounts.binCount = 1; + beatcounts.sampleType = OutputDescriptor::VariableSampleRate; + beatcounts.sampleRate = 1.0 / m_stepSecs; + + OutputDescriptor beatsd; + beatsd.identifier = "beatsd"; + beatsd.name = "Beat Spectral Difference"; + beatsd.description = "Beat spectral difference function used for bar-line detection"; + beatsd.unit = ""; + beatsd.hasFixedBinCount = true; + beatsd.binCount = 1; + beatsd.sampleType = OutputDescriptor::VariableSampleRate; + beatsd.sampleRate = 1.0 / m_stepSecs; + + list.push_back(beat); + list.push_back(bars); + list.push_back(beatcounts); + list.push_back(beatsd); + + return list; +} + +BarBeatTracker::FeatureSet +BarBeatTracker::process(const float *const *inputBuffers, + Vamp::RealTime timestamp) +{ + if (!m_d) { + cerr << "ERROR: BarBeatTracker::process: " + << "BarBeatTracker has not been initialised" + << endl; + return FeatureSet(); + } + + // We use time domain input, because DownBeat requires it -- so we + // use the time-domain version of DetectionFunction::process which + // does its own FFT. It requires doubles as input, so we need to + // make a temporary copy + + // We only support a single input channel + + const int fl = m_d->dfConfig.frameLength; +#ifndef __GNUC__ + double *dfinput = (double *)alloca(fl * sizeof(double)); +#else + double dfinput[fl]; +#endif + for (int i = 0; i < fl; ++i) dfinput[i] = inputBuffers[0][i]; + + double output = m_d->df->processTimeDomain(dfinput); + + if (m_d->dfOutput.empty()) m_d->origin = timestamp; + +// std::cerr << "df[" << m_d->dfOutput.size() << "] is " << output << std::endl; + m_d->dfOutput.push_back(output); + + // Downsample and store the incoming audio block. + // We have an overlap on the incoming audio stream (step size is + // half block size) -- this function is configured to take only a + // step size's worth, so effectively ignoring the overlap. Note + // however that this means we omit the last blocksize - stepsize + // samples completely for the purposes of barline detection + // (hopefully not a problem) + m_d->downBeat->pushAudioBlock(inputBuffers[0]); + + return FeatureSet(); +} + +BarBeatTracker::FeatureSet +BarBeatTracker::getRemainingFeatures() +{ + if (!m_d) { + cerr << "ERROR: BarBeatTracker::getRemainingFeatures: " + << "BarBeatTracker has not been initialised" + << endl; + return FeatureSet(); + } + + return barBeatTrack(); +} + +BarBeatTracker::FeatureSet +BarBeatTracker::barBeatTrack() +{ + vector df; + vector beatPeriod; + vector tempi; + + for (size_t i = 2; i < m_d->dfOutput.size(); ++i) { // discard first two elts + df.push_back(m_d->dfOutput[i]); + beatPeriod.push_back(0.0); + } + if (df.empty()) return FeatureSet(); + + TempoTrackV2 tt(m_inputSampleRate, m_d->dfConfig.stepSize); + + // changes are as per the BeatTrack.cpp - allow m_inputtempo and m_constraintempo to be set be the user + tt.calculateBeatPeriod(df, beatPeriod, tempi, m_inputtempo, m_constraintempo); + + vector beats; + // changes are as per the BeatTrack.cpp - allow m_alpha and m_tightness to be set be the user + tt.calculateBeats(df, beatPeriod, beats, m_alpha, m_tightness); + + // tt.calculateBeatPeriod(df, beatPeriod, tempi, 0., 0); // use default parameters + + // vector beats; + // tt.calculateBeats(df, beatPeriod, beats, 0.9, 4.); // use default parameters until i fix this plugin too + + vector downbeats; + size_t downLength = 0; + const float *downsampled = m_d->downBeat->getBufferedAudio(downLength); + m_d->downBeat->findDownBeats(downsampled, downLength, beats, downbeats); + + vector beatsd; + m_d->downBeat->getBeatSD(beatsd); + +// std::cerr << "BarBeatTracker: found downbeats at: "; +// for (int i = 0; i < downbeats.size(); ++i) std::cerr << downbeats[i] << " " << std::endl; + + FeatureSet returnFeatures; + + char label[20]; + + int dbi = 0; + int beat = 0; + int bar = 0; + + if (!downbeats.empty()) { + // get the right number for the first beat; this will be + // incremented before use (at top of the following loop) + int firstDown = downbeats[0]; + beat = m_bpb - firstDown - 1; + if (beat == m_bpb) beat = 0; + } + + for (size_t i = 0; i < beats.size(); ++i) { + + size_t frame = beats[i] * m_d->dfConfig.stepSize; + + if (dbi < downbeats.size() && i == downbeats[dbi]) { + beat = 0; + ++bar; + ++dbi; + } else { + ++beat; + } + + // outputs are: + // + // 0 -> beats + // 1 -> bars + // 2 -> beat counter function + + Feature feature; + feature.hasTimestamp = true; + feature.timestamp = m_d->origin + Vamp::RealTime::frame2RealTime + (frame, lrintf(m_inputSampleRate)); + + sprintf(label, "%d", beat + 1); + feature.label = label; + returnFeatures[0].push_back(feature); // labelled beats + + feature.values.push_back(beat + 1); + returnFeatures[2].push_back(feature); // beat function + + if (i > 0 && i <= beatsd.size()) { + feature.values.clear(); + feature.values.push_back(beatsd[i-1]); + feature.label = ""; + returnFeatures[3].push_back(feature); // beat spectral difference + } + + if (beat == 0) { + feature.values.clear(); + sprintf(label, "%d", bar); + feature.label = label; + returnFeatures[1].push_back(feature); // bars + } + } + + return returnFeatures; +} + diff --git a/libs/vamp-plugins/BarBeatTrack.h b/libs/vamp-plugins/BarBeatTrack.h new file mode 100644 index 0000000000..0edc70a94b --- /dev/null +++ b/libs/vamp-plugins/BarBeatTrack.h @@ -0,0 +1,69 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + QM Vamp Plugin Set + + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#ifndef _BAR_BEAT_TRACK_PLUGIN_H_ +#define _BAR_BEAT_TRACK_PLUGIN_H_ + +#include + +class BarBeatTrackerData; + +class BarBeatTracker : public Vamp::Plugin +{ +public: + BarBeatTracker(float inputSampleRate); + virtual ~BarBeatTracker(); + + bool initialise(size_t channels, size_t stepSize, size_t blockSize); + void reset(); + + InputDomain getInputDomain() const { return TimeDomain; } + + std::string getIdentifier() const; + std::string getName() const; + std::string getDescription() const; + std::string getMaker() const; + int getPluginVersion() const; + std::string getCopyright() const; + + ParameterList getParameterDescriptors() const; + float getParameter(std::string) const; + void setParameter(std::string, float); + + size_t getPreferredStepSize() const; + size_t getPreferredBlockSize() const; + + OutputList getOutputDescriptors() const; + + FeatureSet process(const float *const *inputBuffers, + Vamp::RealTime timestamp); + + FeatureSet getRemainingFeatures(); + +protected: + BarBeatTrackerData *m_d; + static float m_stepSecs; + int m_bpb; + FeatureSet barBeatTrack(); + + // MEPD new protected parameters to allow the user to control these advanced parameters of the beat tracker + // changes are as per the BeatTrack.h + double m_alpha; + double m_tightness; + double m_inputtempo; + bool m_constraintempo; +}; + + +#endif diff --git a/libs/vamp-plugins/BeatTrack.cpp b/libs/vamp-plugins/BeatTrack.cpp new file mode 100644 index 0000000000..00ff40d6ee --- /dev/null +++ b/libs/vamp-plugins/BeatTrack.cpp @@ -0,0 +1,584 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + QM Vamp Plugin Set + + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#include "BeatTrack.h" + +#include +#include +#include +#include + +using std::string; +using std::vector; +using std::cerr; +using std::endl; + +float BeatTracker::m_stepSecs = 0.01161; // 512 samples at 44100 + +#define METHOD_OLD 0 +#define METHOD_NEW 1 + +class BeatTrackerData +{ +public: + BeatTrackerData(const DFConfig &config) : dfConfig(config) { + df = new DetectionFunction(config); + } + ~BeatTrackerData() { + delete df; + } + void reset() { + delete df; + df = new DetectionFunction(dfConfig); + dfOutput.clear(); + origin = Vamp::RealTime::zeroTime; + } + + DFConfig dfConfig; + DetectionFunction *df; + vector dfOutput; + Vamp::RealTime origin; +}; + + +BeatTracker::BeatTracker(float inputSampleRate) : + Vamp::Plugin(inputSampleRate), + m_d(0), + m_method(METHOD_NEW), + m_dfType(DF_COMPLEXSD), + m_whiten(false), + m_alpha(0.9), // MEPD new exposed parameter for beat tracker, default value = 0.9 (as old version) + m_tightness(4.), + m_inputtempo(120.), // MEPD new exposed parameter for beat tracker, default value = 120. (as old version) + m_constraintempo(false) // MEPD new exposed parameter for beat tracker, default value = false (as old version) + // calling the beat tracker with these default parameters will give the same output as the previous existing version + +{ +} + +BeatTracker::~BeatTracker() +{ + delete m_d; +} + +string +BeatTracker::getIdentifier() const +{ + return "qm-tempotracker"; +} + +string +BeatTracker::getName() const +{ + return "Tempo and Beat Tracker"; +} + +string +BeatTracker::getDescription() const +{ + return "Estimate beat locations and tempo"; +} + +string +BeatTracker::getMaker() const +{ + return "Queen Mary, University of London"; +} + +int +BeatTracker::getPluginVersion() const +{ + return 6; +} + +string +BeatTracker::getCopyright() const +{ + return "Plugin by Christian Landone and Matthew Davies. Copyright (c) 2006-2013 QMUL - All Rights Reserved"; +} + +BeatTracker::ParameterList +BeatTracker::getParameterDescriptors() const +{ + ParameterList list; + + ParameterDescriptor desc; + + desc.identifier = "method"; + desc.name = "Beat Tracking Method"; + desc.description = "Basic method to use "; + desc.minValue = 0; + desc.maxValue = 1; + desc.defaultValue = METHOD_NEW; + desc.isQuantized = true; + desc.quantizeStep = 1; + desc.valueNames.push_back("Old"); + desc.valueNames.push_back("New"); + list.push_back(desc); + + desc.identifier = "dftype"; + desc.name = "Onset Detection Function Type"; + desc.description = "Method used to calculate the onset detection function"; + desc.minValue = 0; + desc.maxValue = 4; + desc.defaultValue = 3; + desc.valueNames.clear(); + desc.valueNames.push_back("High-Frequency Content"); + desc.valueNames.push_back("Spectral Difference"); + desc.valueNames.push_back("Phase Deviation"); + desc.valueNames.push_back("Complex Domain"); + desc.valueNames.push_back("Broadband Energy Rise"); + list.push_back(desc); + + desc.identifier = "whiten"; + desc.name = "Adaptive Whitening"; + desc.description = "Normalize frequency bin magnitudes relative to recent peak levels"; + desc.minValue = 0; + desc.maxValue = 1; + desc.defaultValue = 0; + desc.isQuantized = true; + desc.quantizeStep = 1; + desc.unit = ""; + desc.valueNames.clear(); + list.push_back(desc); + + // MEPD new exposed parameter - used in the dynamic programming part of the beat tracker + //Alpha Parameter of Beat Tracker + desc.identifier = "alpha"; + desc.name = "Alpha"; + desc.description = "Inertia - Flexibility Trade Off"; + desc.minValue = 0.1; + desc.maxValue = 0.99; + desc.defaultValue = 0.90; + desc.unit = ""; + desc.isQuantized = false; + list.push_back(desc); + + // We aren't exposing tightness as a parameter, it's fixed at 4 + + // MEPD new exposed parameter - used in the periodicity estimation + //User input tempo + desc.identifier = "inputtempo"; + desc.name = "Tempo Hint"; + desc.description = "User-defined tempo on which to centre the tempo preference function"; + desc.minValue = 50; + desc.maxValue = 250; + desc.defaultValue = 120; + desc.unit = "BPM"; + desc.isQuantized = true; + list.push_back(desc); + + // MEPD new exposed parameter - used in periodicity estimation + desc.identifier = "constraintempo"; + desc.name = "Constrain Tempo"; + desc.description = "Constrain more tightly around the tempo hint, using a Gaussian weighting instead of Rayleigh"; + desc.minValue = 0; + desc.maxValue = 1; + desc.defaultValue = 0; + desc.isQuantized = true; + desc.quantizeStep = 1; + desc.unit = ""; + desc.valueNames.clear(); + list.push_back(desc); + + + + return list; +} + +float +BeatTracker::getParameter(std::string name) const +{ + if (name == "dftype") { + switch (m_dfType) { + case DF_HFC: return 0; + case DF_SPECDIFF: return 1; + case DF_PHASEDEV: return 2; + default: case DF_COMPLEXSD: return 3; + case DF_BROADBAND: return 4; + } + } else if (name == "method") { + return m_method; + } else if (name == "whiten") { + return m_whiten ? 1.0 : 0.0; + } else if (name == "alpha") { + return m_alpha; + } else if (name == "inputtempo") { + return m_inputtempo; + } else if (name == "constraintempo") { + return m_constraintempo ? 1.0 : 0.0; + } + return 0.0; +} + +void +BeatTracker::setParameter(std::string name, float value) +{ + if (name == "dftype") { + switch (lrintf(value)) { + case 0: m_dfType = DF_HFC; break; + case 1: m_dfType = DF_SPECDIFF; break; + case 2: m_dfType = DF_PHASEDEV; break; + default: case 3: m_dfType = DF_COMPLEXSD; break; + case 4: m_dfType = DF_BROADBAND; break; + } + } else if (name == "method") { + m_method = lrintf(value); + } else if (name == "whiten") { + m_whiten = (value > 0.5); + } else if (name == "alpha") { + m_alpha = value; + } else if (name == "inputtempo") { + m_inputtempo = value; + } else if (name == "constraintempo") { + m_constraintempo = (value > 0.5); + } +} + +bool +BeatTracker::initialise(size_t channels, size_t stepSize, size_t blockSize) +{ + if (m_d) { + delete m_d; + m_d = 0; + } + + if (channels < getMinChannelCount() || + channels > getMaxChannelCount()) { + std::cerr << "BeatTracker::initialise: Unsupported channel count: " + << channels << std::endl; + return false; + } + + if (stepSize != getPreferredStepSize()) { + std::cerr << "ERROR: BeatTracker::initialise: Unsupported step size for this sample rate: " + << stepSize << " (wanted " << (getPreferredStepSize()) << ")" << std::endl; + return false; + } + + if (blockSize != getPreferredBlockSize()) { + std::cerr << "WARNING: BeatTracker::initialise: Sub-optimal block size for this sample rate: " + << blockSize << " (wanted " << getPreferredBlockSize() << ")" << std::endl; +// return false; + } + + DFConfig dfConfig; + dfConfig.DFType = m_dfType; + dfConfig.stepSize = stepSize; + dfConfig.frameLength = blockSize; + dfConfig.dbRise = 3; + dfConfig.adaptiveWhitening = m_whiten; + dfConfig.whiteningRelaxCoeff = -1; + dfConfig.whiteningFloor = -1; + + m_d = new BeatTrackerData(dfConfig); + return true; +} + +void +BeatTracker::reset() +{ + if (m_d) m_d->reset(); +} + +size_t +BeatTracker::getPreferredStepSize() const +{ + size_t step = size_t(m_inputSampleRate * m_stepSecs + 0.0001); +// std::cerr << "BeatTracker::getPreferredStepSize: input sample rate is " << m_inputSampleRate << ", step size is " << step << std::endl; + return step; +} + +size_t +BeatTracker::getPreferredBlockSize() const +{ + size_t theoretical = getPreferredStepSize() * 2; + + // I think this is not necessarily going to be a power of two, and + // the host might have a problem with that, but I'm not sure we + // can do much about it here + return theoretical; +} + +BeatTracker::OutputList +BeatTracker::getOutputDescriptors() const +{ + OutputList list; + + OutputDescriptor beat; + beat.identifier = "beats"; + beat.name = "Beats"; + beat.description = "Estimated metrical beat locations"; + beat.unit = ""; + beat.hasFixedBinCount = true; + beat.binCount = 0; + beat.sampleType = OutputDescriptor::VariableSampleRate; + beat.sampleRate = 1.0 / m_stepSecs; + + OutputDescriptor df; + df.identifier = "detection_fn"; + df.name = "Onset Detection Function"; + df.description = "Probability function of note onset likelihood"; + df.unit = ""; + df.hasFixedBinCount = true; + df.binCount = 1; + df.hasKnownExtents = false; + df.isQuantized = false; + df.sampleType = OutputDescriptor::OneSamplePerStep; + + OutputDescriptor tempo; + tempo.identifier = "tempo"; + tempo.name = "Tempo"; + tempo.description = "Locked tempo estimates"; + tempo.unit = "bpm"; + tempo.hasFixedBinCount = true; + tempo.binCount = 1; + tempo.hasKnownExtents = false; + tempo.isQuantized = false; + tempo.sampleType = OutputDescriptor::VariableSampleRate; + tempo.sampleRate = 1.0 / m_stepSecs; + + list.push_back(beat); + list.push_back(df); + list.push_back(tempo); + + return list; +} + +BeatTracker::FeatureSet +BeatTracker::process(const float *const *inputBuffers, + Vamp::RealTime timestamp) +{ + if (!m_d) { + cerr << "ERROR: BeatTracker::process: " + << "BeatTracker has not been initialised" + << endl; + return FeatureSet(); + } + + size_t len = m_d->dfConfig.frameLength / 2 + 1; + + double *reals = new double[len]; + double *imags = new double[len]; + + // We only support a single input channel + + for (size_t i = 0; i < len; ++i) { + reals[i] = inputBuffers[0][i*2]; + imags[i] = inputBuffers[0][i*2+1]; + } + + double output = m_d->df->processFrequencyDomain(reals, imags); + + delete[] reals; + delete[] imags; + + if (m_d->dfOutput.empty()) m_d->origin = timestamp; + + m_d->dfOutput.push_back(output); + + FeatureSet returnFeatures; + + Feature feature; + feature.hasTimestamp = false; + feature.values.push_back(output); + + returnFeatures[1].push_back(feature); // detection function is output 1 + return returnFeatures; +} + +BeatTracker::FeatureSet +BeatTracker::getRemainingFeatures() +{ + if (!m_d) { + cerr << "ERROR: BeatTracker::getRemainingFeatures: " + << "BeatTracker has not been initialised" + << endl; + return FeatureSet(); + } + + if (m_method == METHOD_OLD) return beatTrackOld(); + else return beatTrackNew(); +} + +BeatTracker::FeatureSet +BeatTracker::beatTrackOld() +{ + double aCoeffs[] = { 1.0000, -0.5949, 0.2348 }; + double bCoeffs[] = { 0.1600, 0.3200, 0.1600 }; + + TTParams ttParams; + ttParams.winLength = 512; + ttParams.lagLength = 128; + ttParams.LPOrd = 2; + ttParams.LPACoeffs = aCoeffs; + ttParams.LPBCoeffs = bCoeffs; + ttParams.alpha = 9; + ttParams.WinT.post = 8; + ttParams.WinT.pre = 7; + + TempoTrack tempoTracker(ttParams); + + vector tempi; + vector beats = tempoTracker.process(m_d->dfOutput, &tempi); + + FeatureSet returnFeatures; + + char label[100]; + + for (size_t i = 0; i < beats.size(); ++i) { + + size_t frame = beats[i] * m_d->dfConfig.stepSize; + + Feature feature; + feature.hasTimestamp = true; + feature.timestamp = m_d->origin + Vamp::RealTime::frame2RealTime + (frame, lrintf(m_inputSampleRate)); + + float bpm = 0.0; + int frameIncrement = 0; + + if (i < beats.size() - 1) { + + frameIncrement = (beats[i+1] - beats[i]) * m_d->dfConfig.stepSize; + + // one beat is frameIncrement frames, so there are + // samplerate/frameIncrement bps, so + // 60*samplerate/frameIncrement bpm + + if (frameIncrement > 0) { + bpm = (60.0 * m_inputSampleRate) / frameIncrement; + bpm = int(bpm * 100.0 + 0.5) / 100.0; + sprintf(label, "%.2f bpm", bpm); + feature.label = label; + } + } + + returnFeatures[0].push_back(feature); // beats are output 0 + } + + double prevTempo = 0.0; + + for (size_t i = 0; i < tempi.size(); ++i) { + + size_t frame = i * m_d->dfConfig.stepSize * ttParams.lagLength; + +// std::cerr << "unit " << i << ", step size " << m_d->dfConfig.stepSize << ", hop " << ttParams.lagLength << ", frame = " << frame << std::endl; + + if (tempi[i] > 1 && int(tempi[i] * 100) != int(prevTempo * 100)) { + Feature feature; + feature.hasTimestamp = true; + feature.timestamp = m_d->origin + Vamp::RealTime::frame2RealTime + (frame, lrintf(m_inputSampleRate)); + feature.values.push_back(tempi[i]); + sprintf(label, "%.2f bpm", tempi[i]); + feature.label = label; + returnFeatures[2].push_back(feature); // tempo is output 2 + prevTempo = tempi[i]; + } + } + + return returnFeatures; +} + +BeatTracker::FeatureSet +BeatTracker::beatTrackNew() +{ + vector df; + vector beatPeriod; + vector tempi; + + size_t nonZeroCount = m_d->dfOutput.size(); + while (nonZeroCount > 0) { + if (m_d->dfOutput[nonZeroCount-1] > 0.0) { + break; + } + --nonZeroCount; + } + +// std::cerr << "Note: nonZeroCount was " << m_d->dfOutput.size() << ", is now " << nonZeroCount << std::endl; + + for (size_t i = 2; i < nonZeroCount; ++i) { // discard first two elts + df.push_back(m_d->dfOutput[i]); + beatPeriod.push_back(0.0); + } + if (df.empty()) return FeatureSet(); + + TempoTrackV2 tt(m_inputSampleRate, m_d->dfConfig.stepSize); + + + // MEPD - note this function is now passed 2 new parameters, m_inputtempo and m_constraintempo + tt.calculateBeatPeriod(df, beatPeriod, tempi, m_inputtempo, m_constraintempo); + + vector beats; + + // MEPD - note this function is now passed 2 new parameters, m_alpha and m_tightness + tt.calculateBeats(df, beatPeriod, beats, m_alpha, m_tightness); + + FeatureSet returnFeatures; + + char label[100]; + + for (size_t i = 0; i < beats.size(); ++i) { + + size_t frame = beats[i] * m_d->dfConfig.stepSize; + + Feature feature; + feature.hasTimestamp = true; + feature.timestamp = m_d->origin + Vamp::RealTime::frame2RealTime + (frame, lrintf(m_inputSampleRate)); + + float bpm = 0.0; + int frameIncrement = 0; + + if (i+1 < beats.size()) { + + frameIncrement = (beats[i+1] - beats[i]) * m_d->dfConfig.stepSize; + + // one beat is frameIncrement frames, so there are + // samplerate/frameIncrement bps, so + // 60*samplerate/frameIncrement bpm + + if (frameIncrement > 0) { + bpm = (60.0 * m_inputSampleRate) / frameIncrement; + bpm = int(bpm * 100.0 + 0.5) / 100.0; + sprintf(label, "%.2f bpm", bpm); + feature.label = label; + } + } + + returnFeatures[0].push_back(feature); // beats are output 0 + } + + double prevTempo = 0.0; + + for (size_t i = 0; i < tempi.size(); ++i) { + + size_t frame = i * m_d->dfConfig.stepSize; + + if (tempi[i] > 1 && int(tempi[i] * 100) != int(prevTempo * 100)) { + Feature feature; + feature.hasTimestamp = true; + feature.timestamp = m_d->origin + Vamp::RealTime::frame2RealTime + (frame, lrintf(m_inputSampleRate)); + feature.values.push_back(tempi[i]); + sprintf(label, "%.2f bpm", tempi[i]); + feature.label = label; + returnFeatures[2].push_back(feature); // tempo is output 2 + prevTempo = tempi[i]; + } + } + + return returnFeatures; +} diff --git a/libs/vamp-plugins/BeatTrack.h b/libs/vamp-plugins/BeatTrack.h new file mode 100644 index 0000000000..f14fc2996a --- /dev/null +++ b/libs/vamp-plugins/BeatTrack.h @@ -0,0 +1,72 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + QM Vamp Plugin Set + + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#ifndef _BEAT_TRACK_PLUGIN_H_ +#define _BEAT_TRACK_PLUGIN_H_ + +#include + +class BeatTrackerData; + +class BeatTracker : public Vamp::Plugin +{ +public: + BeatTracker(float inputSampleRate); + virtual ~BeatTracker(); + + bool initialise(size_t channels, size_t stepSize, size_t blockSize); + void reset(); + + InputDomain getInputDomain() const { return FrequencyDomain; } + + std::string getIdentifier() const; + std::string getName() const; + std::string getDescription() const; + std::string getMaker() const; + int getPluginVersion() const; + std::string getCopyright() const; + + ParameterList getParameterDescriptors() const; + float getParameter(std::string) const; + void setParameter(std::string, float); + + size_t getPreferredStepSize() const; + size_t getPreferredBlockSize() const; + + OutputList getOutputDescriptors() const; + + FeatureSet process(const float *const *inputBuffers, + Vamp::RealTime timestamp); + + FeatureSet getRemainingFeatures(); + +protected: + BeatTrackerData *m_d; + int m_method; + int m_dfType; + + // MEPD new protected parameters to allow the user to control these advanced parameters of the beat tracker + double m_alpha; + double m_tightness; + double m_inputtempo; + bool m_constraintempo; + + bool m_whiten; + static float m_stepSecs; + FeatureSet beatTrackOld(); + FeatureSet beatTrackNew(); +}; + + +#endif diff --git a/libs/vamp-plugins/ChromagramPlugin.cpp b/libs/vamp-plugins/ChromagramPlugin.cpp new file mode 100644 index 0000000000..be21961916 --- /dev/null +++ b/libs/vamp-plugins/ChromagramPlugin.cpp @@ -0,0 +1,416 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + QM Vamp Plugin Set + + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#include "ChromagramPlugin.h" + +#include +#include + +using std::string; +using std::vector; +using std::cerr; +using std::endl; + +ChromagramPlugin::ChromagramPlugin(float inputSampleRate) : + Vamp::Plugin(inputSampleRate), + m_chromagram(0), + m_step(0), + m_block(0) +{ + m_minMIDIPitch = 36; + m_maxMIDIPitch = 96; + m_tuningFrequency = 440; + m_normalise = MathUtilities::NormaliseNone; + m_bpo = 12; + + setupConfig(); +} + +void +ChromagramPlugin::setupConfig() +{ + m_config.FS = lrintf(m_inputSampleRate); + m_config.min = Pitch::getFrequencyForPitch + (m_minMIDIPitch, 0, m_tuningFrequency); + m_config.max = Pitch::getFrequencyForPitch + (m_maxMIDIPitch, 0, m_tuningFrequency); + m_config.BPO = m_bpo; + m_config.CQThresh = 0.0054; + m_config.normalise = m_normalise; + + m_step = 0; + m_block = 0; +} + +ChromagramPlugin::~ChromagramPlugin() +{ + delete m_chromagram; +} + +string +ChromagramPlugin::getIdentifier() const +{ + return "qm-chromagram"; +} + +string +ChromagramPlugin::getName() const +{ + return "Chromagram"; +} + +string +ChromagramPlugin::getDescription() const +{ + return "Extract a series of tonal chroma vectors from the audio"; +} + +string +ChromagramPlugin::getMaker() const +{ + return "Queen Mary, University of London"; +} + +int +ChromagramPlugin::getPluginVersion() const +{ + return 4; +} + +string +ChromagramPlugin::getCopyright() const +{ + return "Plugin by Chris Cannam and Christian Landone. Copyright (c) 2006-2009 QMUL - All Rights Reserved"; +} + +ChromagramPlugin::ParameterList +ChromagramPlugin::getParameterDescriptors() const +{ + ParameterList list; + + ParameterDescriptor desc; + desc.identifier = "minpitch"; + desc.name = "Minimum Pitch"; + desc.unit = "MIDI units"; + desc.description = "MIDI pitch corresponding to the lowest frequency to be included in the chromagram"; + desc.minValue = 0; + desc.maxValue = 127; + desc.defaultValue = 36; + desc.isQuantized = true; + desc.quantizeStep = 1; + list.push_back(desc); + + desc.identifier = "maxpitch"; + desc.name = "Maximum Pitch"; + desc.unit = "MIDI units"; + desc.description = "MIDI pitch corresponding to the highest frequency to be included in the chromagram"; + desc.minValue = 0; + desc.maxValue = 127; + desc.defaultValue = 96; + desc.isQuantized = true; + desc.quantizeStep = 1; + list.push_back(desc); + + desc.identifier = "tuning"; + desc.name = "Tuning Frequency"; + desc.unit = "Hz"; + desc.description = "Frequency of concert A"; + desc.minValue = 360; + desc.maxValue = 500; + desc.defaultValue = 440; + desc.isQuantized = false; + list.push_back(desc); + + desc.identifier = "bpo"; + desc.name = "Bins per Octave"; + desc.unit = "bins"; + desc.description = "Number of constant-Q transform bins per octave, and the number of bins for the chromagram outputs"; + desc.minValue = 2; + desc.maxValue = 480; + desc.defaultValue = 12; + desc.isQuantized = true; + desc.quantizeStep = 1; + list.push_back(desc); + + desc.identifier = "normalization"; + desc.name = "Normalization"; + desc.unit = ""; + desc.description = "Normalization for each chromagram output column"; + desc.minValue = 0; + desc.maxValue = 2; + desc.defaultValue = 0; + desc.isQuantized = true; + desc.quantizeStep = 1; + desc.valueNames.push_back("None"); + desc.valueNames.push_back("Unit Sum"); + desc.valueNames.push_back("Unit Maximum"); + list.push_back(desc); + + return list; +} + +float +ChromagramPlugin::getParameter(std::string param) const +{ + if (param == "minpitch") { + return m_minMIDIPitch; + } + if (param == "maxpitch") { + return m_maxMIDIPitch; + } + if (param == "tuning") { + return m_tuningFrequency; + } + if (param == "bpo") { + return m_bpo; + } + if (param == "normalization") { + return int(m_normalise); + } + std::cerr << "WARNING: ChromagramPlugin::getParameter: unknown parameter \"" + << param << "\"" << std::endl; + return 0.0; +} + +void +ChromagramPlugin::setParameter(std::string param, float value) +{ + if (param == "minpitch") { + m_minMIDIPitch = lrintf(value); + } else if (param == "maxpitch") { + m_maxMIDIPitch = lrintf(value); + } else if (param == "tuning") { + m_tuningFrequency = value; + } else if (param == "bpo") { + m_bpo = lrintf(value); + } else if (param == "normalization") { + m_normalise = MathUtilities::NormaliseType(int(value + 0.0001)); + } else { + std::cerr << "WARNING: ChromagramPlugin::setParameter: unknown parameter \"" + << param << "\"" << std::endl; + } + + setupConfig(); +} + + +bool +ChromagramPlugin::initialise(size_t channels, size_t stepSize, size_t blockSize) +{ + if (m_chromagram) { + delete m_chromagram; + m_chromagram = 0; + } + + if (channels < getMinChannelCount() || + channels > getMaxChannelCount()) return false; + + m_chromagram = new Chromagram(m_config); + m_binsums = vector(m_config.BPO); + + for (int i = 0; i < m_config.BPO; ++i) { + m_binsums[i] = 0.0; + } + + m_count = 0; + + m_step = m_chromagram->getHopSize(); + m_block = m_chromagram->getFrameSize(); + if (m_step < 1) m_step = 1; + + if (blockSize != m_block) { + std::cerr << "ChromagramPlugin::initialise: ERROR: supplied block size " << blockSize << " differs from required block size " << m_block << ", initialise failing" << std::endl; + delete m_chromagram; + m_chromagram = 0; + return false; + } + + if (stepSize != m_step) { + std::cerr << "ChromagramPlugin::initialise: NOTE: supplied step size " << stepSize << " differs from expected step size " << m_step << " (for block size = " << m_block << ")" << std::endl; + } + + return true; +} + +void +ChromagramPlugin::reset() +{ + if (m_chromagram) { + delete m_chromagram; + m_chromagram = new Chromagram(m_config); + for (int i = 0; i < m_config.BPO; ++i) { + m_binsums[i] = 0.0; + } + m_count = 0; + } +} + +size_t +ChromagramPlugin::getPreferredStepSize() const +{ + if (!m_step) { + Chromagram chroma(m_config); + m_step = chroma.getHopSize(); + m_block = chroma.getFrameSize(); + if (m_step < 1) m_step = 1; + } + + return m_step; +} + +size_t +ChromagramPlugin::getPreferredBlockSize() const +{ + if (!m_block) { + Chromagram chroma(m_config); + m_step = chroma.getHopSize(); + m_block = chroma.getFrameSize(); + if (m_step < 1) m_step = 1; + } + + return m_block; +} + +ChromagramPlugin::OutputList +ChromagramPlugin::getOutputDescriptors() const +{ + OutputList list; + + OutputDescriptor d; + d.identifier = "chromagram"; + d.name = "Chromagram"; + d.unit = ""; + d.description = "Output of chromagram, as a single vector per process block"; + d.hasFixedBinCount = true; + d.binCount = m_config.BPO; + + const char *names[] = + { "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B" }; + + if (d.binCount % 12 == 0) { + for (int i = 0; i < 12; ++i) { + int ipc = m_minMIDIPitch % 12; + int index = (i + ipc) % 12; + d.binNames.push_back(names[index]); + for (int j = 0; j < int(d.binCount) / 12 - 1; ++j) { + d.binNames.push_back(""); + } + } + } else { + d.binNames.push_back(names[m_minMIDIPitch % 12]); + } + + d.hasKnownExtents = (m_normalise != MathUtilities::NormaliseNone); + d.minValue = 0.0; + d.maxValue = (d.hasKnownExtents ? 1.0 : 0.0); + d.isQuantized = false; + d.sampleType = OutputDescriptor::OneSamplePerStep; + list.push_back(d); + + d.identifier = "chromameans"; + d.name = "Chroma Means"; + d.description = "Mean values of chromagram bins across the duration of the input audio"; + d.sampleType = OutputDescriptor::FixedSampleRate; + d.sampleRate = 1; + list.push_back(d); + + return list; +} + +ChromagramPlugin::FeatureSet +ChromagramPlugin::process(const float *const *inputBuffers, + Vamp::RealTime timestamp) +{ + if (!m_chromagram) { + cerr << "ERROR: ChromagramPlugin::process: " + << "Chromagram has not been initialised" + << endl; + return FeatureSet(); + } + + double *real = new double[m_block]; + double *imag = new double[m_block]; + + for (size_t i = 0; i <= m_block/2; ++i) { + real[i] = inputBuffers[0][i*2]; + if (i > 0) real[m_block - i] = real[i]; + imag[i] = inputBuffers[0][i*2+1]; + if (i > 0) imag[m_block - i] = imag[i]; + } + +// cerr << "chromagram: timestamp = " << timestamp << endl; +/* + bool printThis = false; + + if (timestamp.sec == 3 && timestamp.nsec < 250000000) { + printThis = true; + } + if (printThis) { + cerr << "\n\nchromagram: timestamp " << timestamp << ": input data starts:" << endl; + for (int i = 0; i < m_block && i < 1000; ++i) { + cerr << real[i] << "," << imag[i] << " "; + } + cerr << endl << "values:" << endl; + } +*/ + double *output = m_chromagram->process(real, imag); + + delete[] real; + delete[] imag; + + Feature feature; + feature.hasTimestamp = false; + for (size_t i = 0; i < m_config.BPO; ++i) { + double value = output[i]; +/* + if (printThis) { + cerr << value << " "; + } +*/ + if (ISNAN(value)) value = 0.0; + m_binsums[i] += value; + feature.values.push_back(value); + } + feature.label = ""; + ++m_count; +/* + if (printThis) { + cerr << endl; + } +*/ + + FeatureSet returnFeatures; + returnFeatures[0].push_back(feature); + return returnFeatures; +} + +ChromagramPlugin::FeatureSet +ChromagramPlugin::getRemainingFeatures() +{ + Feature feature; + feature.hasTimestamp = true; + feature.timestamp = Vamp::RealTime::zeroTime; + + for (size_t i = 0; i < m_config.BPO; ++i) { + double v = m_binsums[i]; + if (m_count > 0) v /= m_count; + feature.values.push_back(v); + } + feature.label = "Chromagram bin means"; + + FeatureSet returnFeatures; + returnFeatures[1].push_back(feature); + return returnFeatures; +} + diff --git a/libs/vamp-plugins/ChromagramPlugin.h b/libs/vamp-plugins/ChromagramPlugin.h new file mode 100644 index 0000000000..c8ec9130ff --- /dev/null +++ b/libs/vamp-plugins/ChromagramPlugin.h @@ -0,0 +1,72 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + QM Vamp Plugin Set + + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#ifndef _CHROMAGRAM_PLUGIN_H_ +#define _CHROMAGRAM_PLUGIN_H_ + +#include +#include + +class ChromagramPlugin : public Vamp::Plugin +{ +public: + ChromagramPlugin(float inputSampleRate); + virtual ~ChromagramPlugin(); + + bool initialise(size_t channels, size_t stepSize, size_t blockSize); + void reset(); + + InputDomain getInputDomain() const { return FrequencyDomain; } + + std::string getIdentifier() const; + std::string getName() const; + std::string getDescription() const; + std::string getMaker() const; + int getPluginVersion() const; + std::string getCopyright() const; + + ParameterList getParameterDescriptors() const; + float getParameter(std::string) const; + void setParameter(std::string, float); + + size_t getPreferredStepSize() const; + size_t getPreferredBlockSize() const; + + OutputList getOutputDescriptors() const; + + FeatureSet process(const float *const *inputBuffers, + Vamp::RealTime timestamp); + + FeatureSet getRemainingFeatures(); + +protected: + int m_minMIDIPitch; + int m_maxMIDIPitch; + float m_tuningFrequency; + MathUtilities::NormaliseType m_normalise; + int m_bpo; + + void setupConfig(); + + ChromaConfig m_config; + Chromagram *m_chromagram; + mutable size_t m_step; + mutable size_t m_block; + + vector m_binsums; + size_t m_count; +}; + + +#endif diff --git a/libs/vamp-plugins/KeyDetect.cpp b/libs/vamp-plugins/KeyDetect.cpp new file mode 100644 index 0000000000..a339784335 --- /dev/null +++ b/libs/vamp-plugins/KeyDetect.cpp @@ -0,0 +1,407 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + QM Vamp Plugin Set + + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#include "KeyDetect.h" + +using std::string; +using std::vector; +//using std::cerr; +using std::endl; + +#include + + +// Order for circle-of-5ths plotting +static int conversion[24] = +{ 7, 12, 5, 10, 3, 8, 1, 6, 11, 4, 9, 2, + 16, 21, 14, 19, 24, 17, 22, 15, 20, 13, 18, 23 }; + + +KeyDetector::KeyDetector(float inputSampleRate) : + Plugin(inputSampleRate), + m_stepSize(0), + m_blockSize(0), + m_tuningFrequency(440), + m_length(10), + m_getKeyMode(0), + m_inputFrame(0), + m_prevKey(-1) +{ +} + +KeyDetector::~KeyDetector() +{ + delete m_getKeyMode; + if ( m_inputFrame ) { + delete [] m_inputFrame; + } +} + +string +KeyDetector::getIdentifier() const +{ + return "qm-keydetector"; +} + +string +KeyDetector::getName() const +{ + return "Key Detector"; +} + +string +KeyDetector::getDescription() const +{ + return "Estimate the key of the music"; +} + +string +KeyDetector::getMaker() const +{ + return "Queen Mary, University of London"; +} + +int +KeyDetector::getPluginVersion() const +{ + return 4; +} + +string +KeyDetector::getCopyright() const +{ + return "Plugin by Katy Noland and Christian Landone. Copyright (c) 2006-2009 QMUL - All Rights Reserved"; +} + +KeyDetector::ParameterList +KeyDetector::getParameterDescriptors() const +{ + ParameterList list; + + ParameterDescriptor desc; + desc.identifier = "tuning"; + desc.name = "Tuning Frequency"; + desc.description = "Frequency of concert A"; + desc.unit = "Hz"; + desc.minValue = 420; + desc.maxValue = 460; + desc.defaultValue = 440; + desc.isQuantized = false; + list.push_back(desc); + + desc.identifier = "length"; + desc.name = "Window Length"; + desc.unit = "chroma frames"; + desc.description = "Number of chroma analysis frames per key estimation"; + desc.minValue = 1; + desc.maxValue = 30; + desc.defaultValue = 10; + desc.isQuantized = true; + desc.quantizeStep = 1; + list.push_back(desc); + + return list; +} + +float +KeyDetector::getParameter(std::string param) const +{ + if (param == "tuning") { + return m_tuningFrequency; + } + if (param == "length") { + return m_length; + } + std::cerr << "WARNING: KeyDetector::getParameter: unknown parameter \"" + << param << "\"" << std::endl; + return 0.0; +} + +void +KeyDetector::setParameter(std::string param, float value) +{ + if (param == "tuning") { + m_tuningFrequency = value; + } else if (param == "length") { + m_length = int(value + 0.1); + } else { + std::cerr << "WARNING: KeyDetector::setParameter: unknown parameter \"" + << param << "\"" << std::endl; + } +} + +bool +KeyDetector::initialise(size_t channels, size_t stepSize, size_t blockSize) +{ + if (m_getKeyMode) { + delete m_getKeyMode; + m_getKeyMode = 0; + } + + if (channels < getMinChannelCount() || + channels > getMaxChannelCount()) return false; + + m_getKeyMode = new GetKeyMode(int(m_inputSampleRate + 0.1), + m_tuningFrequency, + m_length, m_length); + + m_stepSize = m_getKeyMode->getHopSize(); + m_blockSize = m_getKeyMode->getBlockSize(); + + if (stepSize != m_stepSize || blockSize != m_blockSize) { + std::cerr << "KeyDetector::initialise: ERROR: step/block sizes " + << stepSize << "/" << blockSize << " differ from required " + << m_stepSize << "/" << m_blockSize << std::endl; + delete m_getKeyMode; + m_getKeyMode = 0; + return false; + } + + m_inputFrame = new double[m_blockSize]; + + m_prevKey = -1; + m_first = true; + + return true; +} + +void +KeyDetector::reset() +{ + if (m_getKeyMode) { + delete m_getKeyMode; + m_getKeyMode = new GetKeyMode(int(m_inputSampleRate + 0.1), + m_tuningFrequency, + m_length, m_length); + } + + if (m_inputFrame) { + for( unsigned int i = 0; i < m_blockSize; i++ ) { + m_inputFrame[ i ] = 0.0; + } + } + + m_prevKey = -1; + m_first = true; +} + + +KeyDetector::OutputList +KeyDetector::getOutputDescriptors() const +{ + OutputList list; + + float osr = 0.0f; + if (m_stepSize == 0) (void)getPreferredStepSize(); + osr = m_inputSampleRate / m_stepSize; + + OutputDescriptor d; + d.identifier = "tonic"; + d.name = "Tonic Pitch"; + d.unit = ""; + d.description = "Tonic of the estimated key (from C = 1 to B = 12)"; + d.hasFixedBinCount = true; + d.binCount = 1; + d.hasKnownExtents = true; + d.isQuantized = true; + d.minValue = 1; + d.maxValue = 12; + d.quantizeStep = 1; + d.sampleRate = osr; + d.sampleType = OutputDescriptor::VariableSampleRate; + list.push_back(d); + + d.identifier = "mode"; + d.name = "Key Mode"; + d.unit = ""; + d.description = "Major or minor mode of the estimated key (major = 0, minor = 1)"; + d.hasFixedBinCount = true; + d.binCount = 1; + d.hasKnownExtents = true; + d.isQuantized = true; + d.minValue = 0; + d.maxValue = 1; + d.quantizeStep = 1; + d.sampleRate = osr; + d.sampleType = OutputDescriptor::VariableSampleRate; + list.push_back(d); + + d.identifier = "key"; + d.name = "Key"; + d.unit = ""; + d.description = "Estimated key (from C major = 1 to B major = 12 and C minor = 13 to B minor = 24)"; + d.hasFixedBinCount = true; + d.binCount = 1; + d.hasKnownExtents = true; + d.isQuantized = true; + d.minValue = 1; + d.maxValue = 24; + d.quantizeStep = 1; + d.sampleRate = osr; + d.sampleType = OutputDescriptor::VariableSampleRate; + list.push_back(d); + + d.identifier = "keystrength"; + d.name = "Key Strength Plot"; + d.unit = ""; + d.description = "Correlation of the chroma vector with stored key profile for each major and minor key"; + d.hasFixedBinCount = true; + d.binCount = 25; + d.hasKnownExtents = false; + d.isQuantized = false; + d.sampleType = OutputDescriptor::OneSamplePerStep; + for (int i = 0; i < 24; ++i) { + if (i == 12) d.binNames.push_back(" "); + int idx = conversion[i]; + std::string label = getKeyName(idx > 12 ? idx-12 : idx, + i >= 12, + true); + d.binNames.push_back(label); + } + list.push_back(d); + + return list; +} + +KeyDetector::FeatureSet +KeyDetector::process(const float *const *inputBuffers, + Vamp::RealTime now) +{ + if (m_stepSize == 0) { + return FeatureSet(); + } + + FeatureSet returnFeatures; + + for ( unsigned int i = 0 ; i < m_blockSize; i++ ) { + m_inputFrame[i] = (double)inputBuffers[0][i]; + } + +// int key = (m_getKeyMode->process(m_inputFrame) % 24); + int key = m_getKeyMode->process(m_inputFrame); + bool minor = m_getKeyMode->isModeMinor(key); + int tonic = key; + if (tonic > 12) tonic -= 12; + + int prevTonic = m_prevKey; + if (prevTonic > 12) prevTonic -= 12; + + if (m_first || (tonic != prevTonic)) { + Feature feature; + feature.hasTimestamp = true; + feature.timestamp = now; +// feature.timestamp = now; + feature.values.push_back((float)tonic); + feature.label = getKeyName(tonic, minor, false); + returnFeatures[0].push_back(feature); // tonic + } + + if (m_first || (minor != (m_getKeyMode->isModeMinor(m_prevKey)))) { + Feature feature; + feature.hasTimestamp = true; + feature.timestamp = now; + feature.values.push_back(minor ? 1.f : 0.f); + feature.label = (minor ? "Minor" : "Major"); + returnFeatures[1].push_back(feature); // mode + } + + if (m_first || (key != m_prevKey)) { + Feature feature; + feature.hasTimestamp = true; + feature.timestamp = now; + feature.values.push_back((float)key); + feature.label = getKeyName(tonic, minor, true); + returnFeatures[2].push_back(feature); // key + } + + m_prevKey = key; + m_first = false; + + Feature ksf; + ksf.values.reserve(25); + double *keystrengths = m_getKeyMode->getKeyStrengths(); + for (int i = 0; i < 24; ++i) { + if (i == 12) ksf.values.push_back(-1); + ksf.values.push_back(keystrengths[conversion[i]-1]); + } + ksf.hasTimestamp = false; + returnFeatures[3].push_back(ksf); + + return returnFeatures; +} + +KeyDetector::FeatureSet +KeyDetector::getRemainingFeatures() +{ + return FeatureSet(); +} + + +size_t +KeyDetector::getPreferredStepSize() const +{ + if (!m_stepSize) { + GetKeyMode gkm(int(m_inputSampleRate + 0.1), + m_tuningFrequency, m_length, m_length); + m_stepSize = gkm.getHopSize(); + m_blockSize = gkm.getBlockSize(); + } + return m_stepSize; +} + +size_t +KeyDetector::getPreferredBlockSize() const +{ + if (!m_blockSize) { + GetKeyMode gkm(int(m_inputSampleRate + 0.1), + m_tuningFrequency, m_length, m_length); + m_stepSize = gkm.getHopSize(); + m_blockSize = gkm.getBlockSize(); + } + return m_blockSize; +} + +std::string +KeyDetector::getKeyName(int index, bool minor, bool includeMajMin) const +{ + // Keys are numbered with 1 => C, 12 => B + // This is based on chromagram base set to a C in qm-dsp's GetKeyMode.cpp + + static const char *namesMajor[] = { + "C", "Db", "D", "Eb", + "E", "F", "F# / Gb", "G", + "Ab", "A", "Bb", "B" + }; + + static const char *namesMinor[] = { + "C", "C#", "D", "Eb / D#", + "E", "F", "F#", "G", + "G#", "A", "Bb", "B" + }; + + if (index < 1 || index > 12) { + return "(unknown)"; + } + + std::string base; + + if (minor) base = namesMinor[index - 1]; + else base = namesMajor[index - 1]; + + if (!includeMajMin) return base; + + if (minor) return base + " minor"; + else return base + " major"; +} + diff --git a/libs/vamp-plugins/KeyDetect.h b/libs/vamp-plugins/KeyDetect.h new file mode 100644 index 0000000000..7983ac3847 --- /dev/null +++ b/libs/vamp-plugins/KeyDetect.h @@ -0,0 +1,69 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + QM Vamp Plugin Set + + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#ifndef _GETMODE_PLUGIN_H_ +#define _GETMODE_PLUGIN_H_ + +#include + +#include + +class KeyDetector : public Vamp::Plugin +{ +public: + KeyDetector(float inputSampleRate); + virtual ~KeyDetector(); + + bool initialise(size_t channels, size_t stepSize, size_t blockSize); + void reset(); + + InputDomain getInputDomain() const { return TimeDomain; } + + std::string getIdentifier() const; + std::string getName() const; + std::string getDescription() const; + std::string getMaker() const; + int getPluginVersion() const; + std::string getCopyright() const; + + ParameterList getParameterDescriptors() const; + float getParameter(std::string) const; + void setParameter(std::string, float); + + OutputList getOutputDescriptors() const; + + FeatureSet process(const float *const *inputBuffers, + Vamp::RealTime timestamp); + + FeatureSet getRemainingFeatures(); + + size_t getPreferredStepSize() const; + size_t getPreferredBlockSize() const; + +protected: + mutable size_t m_stepSize; + mutable size_t m_blockSize; + float m_tuningFrequency; + int m_length; + + std::string getKeyName(int index, bool minor, bool includeMajMin) const; + + GetKeyMode* m_getKeyMode; + double* m_inputFrame; + int m_prevKey; + bool m_first; +}; + + +#endif diff --git a/libs/vamp-plugins/SimilarityPlugin.cpp b/libs/vamp-plugins/SimilarityPlugin.cpp new file mode 100644 index 0000000000..50e2b2f2a8 --- /dev/null +++ b/libs/vamp-plugins/SimilarityPlugin.cpp @@ -0,0 +1,939 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + * SimilarityPlugin.cpp + * + * Copyright 2009 Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. + */ + +#include +#include + +#include "SimilarityPlugin.h" +#include "base/Pitch.h" +#include "dsp/mfcc/MFCC.h" +#include "dsp/chromagram/Chromagram.h" +#include "dsp/rateconversion/Decimator.h" +#include "dsp/rhythm/BeatSpectrum.h" +#include "maths/KLDivergence.h" +#include "maths/CosineDistance.h" +#include "maths/MathUtilities.h" + +using std::string; +using std::vector; +using std::cerr; +using std::endl; +using std::ostringstream; + +const float +SimilarityPlugin::m_noRhythm = 0.009; + +const float +SimilarityPlugin::m_allRhythm = 0.991; + +SimilarityPlugin::SimilarityPlugin(float inputSampleRate) : + Plugin(inputSampleRate), + m_type(TypeMFCC), + m_mfcc(0), + m_rhythmfcc(0), + m_chromagram(0), + m_decimator(0), + m_featureColumnSize(20), + m_rhythmWeighting(0.5f), + m_rhythmClipDuration(4.f), // seconds + m_rhythmClipOrigin(40.f), // seconds + m_rhythmClipFrameSize(0), + m_rhythmClipFrames(0), + m_rhythmColumnSize(20), + m_blockSize(0), + m_channels(0), + m_processRate(0), + m_frameNo(0), + m_done(false) +{ + int rate = lrintf(m_inputSampleRate); + int internalRate = 22050; + int decimationFactor = rate / internalRate; + if (decimationFactor < 1) decimationFactor = 1; + + // must be a power of two + while (decimationFactor & (decimationFactor - 1)) ++decimationFactor; + + m_processRate = rate / decimationFactor; // may be 22050, 24000 etc +} + +SimilarityPlugin::~SimilarityPlugin() +{ + delete m_mfcc; + delete m_rhythmfcc; + delete m_chromagram; + delete m_decimator; +} + +string +SimilarityPlugin::getIdentifier() const +{ + return "qm-similarity"; +} + +string +SimilarityPlugin::getName() const +{ + return "Similarity"; +} + +string +SimilarityPlugin::getDescription() const +{ + return "Return a distance matrix for similarity between the input audio channels"; +} + +string +SimilarityPlugin::getMaker() const +{ + return "Queen Mary, University of London"; +} + +int +SimilarityPlugin::getPluginVersion() const +{ + return 1; +} + +string +SimilarityPlugin::getCopyright() const +{ + return "Plugin by Mark Levy, Kurt Jacobson and Chris Cannam. Copyright (c) 2009 QMUL - All Rights Reserved"; +} + +size_t +SimilarityPlugin::getMinChannelCount() const +{ + return 1; +} + +size_t +SimilarityPlugin::getMaxChannelCount() const +{ + return 1024; +} + +int +SimilarityPlugin::getDecimationFactor() const +{ + int rate = lrintf(m_inputSampleRate); + return rate / m_processRate; +} + +size_t +SimilarityPlugin::getPreferredStepSize() const +{ + if (m_blockSize == 0) calculateBlockSize(); + + // there is also an assumption to this effect in process() + // (referring to m_fftSize/2 instead of a literal post-decimation + // step size): + return m_blockSize/2; +} + +size_t +SimilarityPlugin::getPreferredBlockSize() const +{ + if (m_blockSize == 0) calculateBlockSize(); + return m_blockSize; +} + +void +SimilarityPlugin::calculateBlockSize() const +{ + if (m_blockSize != 0) return; + int decimationFactor = getDecimationFactor(); + m_blockSize = 2048 * decimationFactor; +} + +SimilarityPlugin::ParameterList SimilarityPlugin::getParameterDescriptors() const +{ + ParameterList list; + + ParameterDescriptor desc; + desc.identifier = "featureType"; + desc.name = "Feature Type"; + desc.description = "Audio feature used for similarity measure. Timbral: use the first 20 MFCCs (19 plus C0). Chromatic: use 12 bin-per-octave chroma. Rhythmic: compare beat spectra of short regions."; + desc.unit = ""; + desc.minValue = 0; + desc.maxValue = 4; + desc.defaultValue = 1; + desc.isQuantized = true; + desc.quantizeStep = 1; + desc.valueNames.push_back("Timbre"); + desc.valueNames.push_back("Timbre and Rhythm"); + desc.valueNames.push_back("Chroma"); + desc.valueNames.push_back("Chroma and Rhythm"); + desc.valueNames.push_back("Rhythm only"); + list.push_back(desc); +/* + desc.identifier = "rhythmWeighting"; + desc.name = "Influence of Rhythm"; + desc.description = "Proportion of similarity measure made up from rhythmic similarity component, from 0 (entirely timbral or chromatic) to 100 (entirely rhythmic)."; + desc.unit = "%"; + desc.minValue = 0; + desc.maxValue = 100; + desc.defaultValue = 0; + desc.isQuantized = false; + desc.valueNames.clear(); + list.push_back(desc); +*/ + return list; +} + +float +SimilarityPlugin::getParameter(std::string param) const +{ + if (param == "featureType") { + + if (m_rhythmWeighting > m_allRhythm) { + return 4; + } + + switch (m_type) { + + case TypeMFCC: + if (m_rhythmWeighting < m_noRhythm) return 0; + else return 1; + break; + + case TypeChroma: + if (m_rhythmWeighting < m_noRhythm) return 2; + else return 3; + break; + } + + return 1; + +// } else if (param == "rhythmWeighting") { +// return nearbyint(m_rhythmWeighting * 100.0); + } + + std::cerr << "WARNING: SimilarityPlugin::getParameter: unknown parameter \"" + << param << "\"" << std::endl; + return 0.0; +} + +void +SimilarityPlugin::setParameter(std::string param, float value) +{ + if (param == "featureType") { + + int v = int(value + 0.1); + + Type newType = m_type; + + switch (v) { + case 0: newType = TypeMFCC; m_rhythmWeighting = 0.0f; break; + case 1: newType = TypeMFCC; m_rhythmWeighting = 0.5f; break; + case 2: newType = TypeChroma; m_rhythmWeighting = 0.0f; break; + case 3: newType = TypeChroma; m_rhythmWeighting = 0.5f; break; + case 4: newType = TypeMFCC; m_rhythmWeighting = 1.f; break; + } + + if (newType != m_type) m_blockSize = 0; + + m_type = newType; + return; + +// } else if (param == "rhythmWeighting") { +// m_rhythmWeighting = value / 100; +// return; + } + + std::cerr << "WARNING: SimilarityPlugin::setParameter: unknown parameter \"" + << param << "\"" << std::endl; +} + +SimilarityPlugin::OutputList +SimilarityPlugin::getOutputDescriptors() const +{ + OutputList list; + + OutputDescriptor similarity; + similarity.identifier = "distancematrix"; + similarity.name = "Distance Matrix"; + similarity.description = "Distance matrix for similarity metric. Smaller = more similar. Should be symmetrical."; + similarity.unit = ""; + similarity.hasFixedBinCount = true; + similarity.binCount = m_channels; + similarity.hasKnownExtents = false; + similarity.isQuantized = false; + similarity.sampleType = OutputDescriptor::FixedSampleRate; + similarity.sampleRate = 1; + + m_distanceMatrixOutput = list.size(); + list.push_back(similarity); + + OutputDescriptor simvec; + simvec.identifier = "distancevector"; + simvec.name = "Distance from First Channel"; + simvec.description = "Distance vector for similarity of each channel to the first channel. Smaller = more similar."; + simvec.unit = ""; + simvec.hasFixedBinCount = true; + simvec.binCount = m_channels; + simvec.hasKnownExtents = false; + simvec.isQuantized = false; + simvec.sampleType = OutputDescriptor::FixedSampleRate; + simvec.sampleRate = 1; + + m_distanceVectorOutput = list.size(); + list.push_back(simvec); + + OutputDescriptor sortvec; + sortvec.identifier = "sorteddistancevector"; + sortvec.name = "Ordered Distances from First Channel"; + sortvec.description = "Vector of the order of other channels in similarity to the first, followed by distance vector for similarity of each to the first. Smaller = more similar."; + sortvec.unit = ""; + sortvec.hasFixedBinCount = true; + sortvec.binCount = m_channels; + sortvec.hasKnownExtents = false; + sortvec.isQuantized = false; + sortvec.sampleType = OutputDescriptor::FixedSampleRate; + sortvec.sampleRate = 1; + + m_sortedVectorOutput = list.size(); + list.push_back(sortvec); + + OutputDescriptor means; + means.identifier = "means"; + means.name = "Feature Means"; + means.description = "Means of the feature bins. Feature time (sec) corresponds to input channel. Number of bins depends on selected feature type."; + means.unit = ""; + means.hasFixedBinCount = true; + means.binCount = m_featureColumnSize; + means.hasKnownExtents = false; + means.isQuantized = false; + means.sampleType = OutputDescriptor::FixedSampleRate; + means.sampleRate = 1; + + m_meansOutput = list.size(); + list.push_back(means); + + OutputDescriptor variances; + variances.identifier = "variances"; + variances.name = "Feature Variances"; + variances.description = "Variances of the feature bins. Feature time (sec) corresponds to input channel. Number of bins depends on selected feature type."; + variances.unit = ""; + variances.hasFixedBinCount = true; + variances.binCount = m_featureColumnSize; + variances.hasKnownExtents = false; + variances.isQuantized = false; + variances.sampleType = OutputDescriptor::FixedSampleRate; + variances.sampleRate = 1; + + m_variancesOutput = list.size(); + list.push_back(variances); + + OutputDescriptor beatspectrum; + beatspectrum.identifier = "beatspectrum"; + beatspectrum.name = "Beat Spectra"; + beatspectrum.description = "Rhythmic self-similarity vectors (beat spectra) for the input channels. Feature time (sec) corresponds to input channel. Not returned if rhythm weighting is zero."; + beatspectrum.unit = ""; + if (m_rhythmClipFrames > 0) { + beatspectrum.hasFixedBinCount = true; + beatspectrum.binCount = m_rhythmClipFrames / 2; + } else { + beatspectrum.hasFixedBinCount = false; + } + beatspectrum.hasKnownExtents = false; + beatspectrum.isQuantized = false; + beatspectrum.sampleType = OutputDescriptor::FixedSampleRate; + beatspectrum.sampleRate = 1; + + m_beatSpectraOutput = list.size(); + list.push_back(beatspectrum); + + return list; +} + +bool +SimilarityPlugin::initialise(size_t channels, size_t stepSize, size_t blockSize) +{ + if (channels < getMinChannelCount()) return false; + + // Using more than getMaxChannelCount is not actually a problem + // for us. Using "incorrect" step and block sizes would be fine + // for timbral or chroma similarity, but will break rhythmic + // similarity, so we'd better enforce these. + + if (stepSize != getPreferredStepSize()) { + std::cerr << "SimilarityPlugin::initialise: supplied step size " + << stepSize << " differs from required step size " + << getPreferredStepSize() << std::endl; + return false; + } + + if (blockSize != getPreferredBlockSize()) { + std::cerr << "SimilarityPlugin::initialise: supplied block size " + << blockSize << " differs from required block size " + << getPreferredBlockSize() << std::endl; + return false; + } + + m_blockSize = blockSize; + m_channels = channels; + + m_lastNonEmptyFrame = std::vector(m_channels); + for (int i = 0; i < m_channels; ++i) m_lastNonEmptyFrame[i] = -1; + + m_emptyFrameCount = std::vector(m_channels); + for (int i = 0; i < m_channels; ++i) m_emptyFrameCount[i] = 0; + + m_frameNo = 0; + + int decimationFactor = getDecimationFactor(); + if (decimationFactor > 1) { + m_decimator = new Decimator(m_blockSize, decimationFactor); + } + + if (m_type == TypeMFCC) { + + m_featureColumnSize = 20; + + MFCCConfig config(m_processRate); + config.fftsize = 2048; + config.nceps = m_featureColumnSize - 1; + config.want_c0 = true; + config.logpower = 1; + m_mfcc = new MFCC(config); + m_fftSize = m_mfcc->getfftlength(); + m_rhythmClipFrameSize = m_fftSize / 4; + +// std::cerr << "MFCC FS = " << config.FS << ", FFT size = " << m_fftSize<< std::endl; + + } else if (m_type == TypeChroma) { + + m_featureColumnSize = 12; + + // For simplicity, aim to have the chroma fft size equal to + // 2048, the same as the mfcc fft size (so the input block + // size does not depend on the feature type and we can use the + // same processing parameters for rhythm etc). This is also + // why getPreferredBlockSize can confidently return 2048 * the + // decimation factor. + + // The fft size for a chromagram is the filterbank Q value + // times the sample rate, divided by the minimum frequency, + // rounded up to the nearest power of two. + + double q = 1.0 / (pow(2.0, (1.0 / 12.0)) - 1.0); + double fmin = (q * m_processRate) / 2048.0; + + // Round fmin up to the nearest MIDI pitch multiple of 12. + // So long as fmin is greater than 12 to start with, this + // should not change the resulting fft size. + + int pmin = Pitch::getPitchForFrequency(float(fmin)); + pmin = ((pmin / 12) + 1) * 12; + fmin = Pitch::getFrequencyForPitch(pmin); + + float fmax = Pitch::getFrequencyForPitch(pmin + 36); + + ChromaConfig config; + config.FS = m_processRate; + config.min = fmin; + config.max = fmax; + config.BPO = 12; + config.CQThresh = 0.0054; + // We don't normalise the chromagram's columns individually; + // we normalise the mean at the end instead + config.normalise = MathUtilities::NormaliseNone; + m_chromagram = new Chromagram(config); + m_fftSize = m_chromagram->getFrameSize(); + + if (m_fftSize != 2048) { + std::cerr << "WARNING: SimilarityPlugin::initialise: Internal processing FFT size " << m_fftSize << " != expected size 2048 in chroma mode" << std::endl; + } + +// std::cerr << "fftsize = " << m_fftSize << std::endl; + + m_rhythmClipFrameSize = m_fftSize / 4; + +// std::cerr << "m_rhythmClipFrameSize = " << m_rhythmClipFrameSize << std::endl; +// std::cerr << "min = "<< config.min << ", max = " << config.max << std::endl; + + } else { + + std::cerr << "SimilarityPlugin::initialise: internal error: unknown type " << m_type << std::endl; + return false; + } + + if (needRhythm()) { + m_rhythmClipFrames = + int(ceil((m_rhythmClipDuration * m_processRate) + / m_rhythmClipFrameSize)); +// std::cerr << "SimilarityPlugin::initialise: rhythm clip requires " +// << m_rhythmClipFrames << " frames of size " +// << m_rhythmClipFrameSize << " at process rate " +// << m_processRate << " ( = " +// << (float(m_rhythmClipFrames * m_rhythmClipFrameSize) / m_processRate) << " sec )" +// << std::endl; + + MFCCConfig config(m_processRate); + config.fftsize = m_rhythmClipFrameSize; + config.nceps = m_rhythmColumnSize - 1; + config.want_c0 = true; + config.logpower = 1; + config.window = RectangularWindow; // because no overlap + m_rhythmfcc = new MFCC(config); + } + + for (int i = 0; i < m_channels; ++i) { + + m_values.push_back(FeatureMatrix()); + + if (needRhythm()) { + m_rhythmValues.push_back(FeatureColumnQueue()); + } + } + + m_done = false; + + return true; +} + +void +SimilarityPlugin::reset() +{ + for (int i = 0; i < m_values.size(); ++i) { + m_values[i].clear(); + } + + for (int i = 0; i < m_rhythmValues.size(); ++i) { + m_rhythmValues[i].clear(); + } + + for (int i = 0; i < m_lastNonEmptyFrame.size(); ++i) { + m_lastNonEmptyFrame[i] = -1; + } + + for (int i = 0; i < m_emptyFrameCount.size(); ++i) { + m_emptyFrameCount[i] = 0; + } + + m_done = false; +} + +SimilarityPlugin::FeatureSet +SimilarityPlugin::process(const float *const *inputBuffers, Vamp::RealTime /* timestamp */) +{ + if (m_done) { + return FeatureSet(); + } + + double *dblbuf = new double[m_blockSize]; + double *decbuf = dblbuf; + if (m_decimator) decbuf = new double[m_fftSize]; + + double *raw = new double[std::max(m_featureColumnSize, + m_rhythmColumnSize)]; + + float threshold = 1e-10; + + bool someRhythmFrameNeeded = false; + + for (size_t c = 0; c < m_channels; ++c) { + + bool empty = true; + + for (int i = 0; i < m_blockSize; ++i) { + float val = inputBuffers[c][i]; + if (fabs(val) > threshold) empty = false; + dblbuf[i] = val; + } + + if (empty) { + if (needRhythm() && ((m_frameNo % 2) == 0)) { + for (int i = 0; i < m_fftSize / m_rhythmClipFrameSize; ++i) { + if (m_rhythmValues[c].size() < m_rhythmClipFrames) { + FeatureColumn mf(m_rhythmColumnSize); + for (int i = 0; i < m_rhythmColumnSize; ++i) { + mf[i] = 0.0; + } + m_rhythmValues[c].push_back(mf); + } + } + } + m_emptyFrameCount[c]++; + continue; + } + + m_lastNonEmptyFrame[c] = m_frameNo; + + if (m_decimator) { + m_decimator->process(dblbuf, decbuf); + } + + if (needTimbre()) { + + FeatureColumn mf(m_featureColumnSize); + + if (m_type == TypeMFCC) { + m_mfcc->process(decbuf, raw); + for (int i = 0; i < m_featureColumnSize; ++i) { + mf[i] = raw[i]; + } + } else if (m_type == TypeChroma) { + double *chroma = m_chromagram->process(decbuf); + for (int i = 0; i < m_featureColumnSize; ++i) { + mf[i] = chroma[i]; + } + } + + m_values[c].push_back(mf); + } + +// std::cerr << "needRhythm = " << needRhythm() << ", frame = " << m_frameNo << std::endl; + + if (needRhythm() && ((m_frameNo % 2) == 0)) { + + // The incoming frames are overlapping; we only use every + // other one, because we don't want the overlap (it would + // screw up the rhythm) + + int frameOffset = 0; + + while (frameOffset + m_rhythmClipFrameSize <= m_fftSize) { + + bool needRhythmFrame = true; + + if (m_rhythmValues[c].size() >= m_rhythmClipFrames) { + + needRhythmFrame = false; + + // assumes hopsize = framesize/2 + float current = m_frameNo * (m_fftSize/2) + frameOffset; + current = current / m_processRate; + if (current - m_rhythmClipDuration < m_rhythmClipOrigin) { + needRhythmFrame = true; + m_rhythmValues[c].pop_front(); + } + +// if (needRhythmFrame) { +// std::cerr << "at current = " <= m_values[i].size()) sz = m_values[i].size()-1; + + count = 0; + for (int k = 0; k < sz; ++k) { + double val = m_values[i][k][j]; + if (ISNAN(val) || ISINF(val)) continue; + mean[j] += val; + ++count; + } + if (count > 0) mean[j] /= count; + + count = 0; + for (int k = 0; k < sz; ++k) { + double val = ((m_values[i][k][j] - mean[j]) * + (m_values[i][k][j] - mean[j])); + if (ISNAN(val) || ISINF(val)) continue; + variance[j] += val; + ++count; + } + if (count > 0) variance[j] /= count; + } + + m[i] = mean; + v[i] = variance; + } + + FeatureMatrix distances(m_channels); + + if (m_type == TypeMFCC) { + + // "Despite the fact that MFCCs extracted from music are + // clearly not Gaussian, [14] showed, somewhat surprisingly, + // that a similarity function comparing single Gaussians + // modelling MFCCs for each track can perform as well as + // mixture models. A great advantage of using single + // Gaussians is that a simple closed form exists for the KL + // divergence." -- Mark Levy, "Lightweight measures for + // timbral similarity of musical audio" + // (http://www.elec.qmul.ac.uk/easaier/papers/mlevytimbralsimilarity.pdf) + + KLDivergence kld; + + for (int i = 0; i < m_channels; ++i) { + for (int j = 0; j < m_channels; ++j) { + double d = kld.distanceGaussian(m[i], v[i], m[j], v[j]); + distances[i].push_back(d); + } + } + + } else { + + // We use the KL divergence for distributions of discrete + // variables, as chroma are histograms already. Or at least, + // they will be when we've normalised them like this: + for (int i = 0; i < m_channels; ++i) { + MathUtilities::normalise(m[i], MathUtilities::NormaliseUnitSum); + } + + KLDivergence kld; + + for (int i = 0; i < m_channels; ++i) { + for (int j = 0; j < m_channels; ++j) { + double d = kld.distanceDistribution(m[i], m[j], true); + distances[i].push_back(d); + } + } + } + + Feature feature; + feature.hasTimestamp = true; + + char labelBuffer[100]; + + for (int i = 0; i < m_channels; ++i) { + + feature.timestamp = Vamp::RealTime(i, 0); + + sprintf(labelBuffer, "Means for channel %d", i+1); + feature.label = labelBuffer; + + feature.values.clear(); + for (int k = 0; k < m_featureColumnSize; ++k) { + feature.values.push_back(m[i][k]); + } + + returnFeatures[m_meansOutput].push_back(feature); + + sprintf(labelBuffer, "Variances for channel %d", i+1); + feature.label = labelBuffer; + + feature.values.clear(); + for (int k = 0; k < m_featureColumnSize; ++k) { + feature.values.push_back(v[i][k]); + } + + returnFeatures[m_variancesOutput].push_back(feature); + } + + return distances; +} + +SimilarityPlugin::FeatureMatrix +SimilarityPlugin::calculateRhythmic(FeatureSet &returnFeatures) +{ + if (!needRhythm()) return FeatureMatrix(); + +// std::cerr << "SimilarityPlugin::initialise: rhythm clip for channel 0 contains " +// << m_rhythmValues[0].size() << " frames of size " +// << m_rhythmClipFrameSize << " at process rate " +// << m_processRate << " ( = " +// << (float(m_rhythmValues[0].size() * m_rhythmClipFrameSize) / m_processRate) << " sec )" +// << std::endl; + + BeatSpectrum bscalc; + CosineDistance cd; + + // Our rhythm feature matrix is a deque of vectors for practical + // reasons, but BeatSpectrum::process wants a vector of vectors + // (which is what FeatureMatrix happens to be). + + FeatureMatrixSet bsinput(m_channels); + for (int i = 0; i < m_channels; ++i) { + for (int j = 0; j < m_rhythmValues[i].size(); ++j) { + bsinput[i].push_back(m_rhythmValues[i][j]); + } + } + + FeatureMatrix bs(m_channels); + for (int i = 0; i < m_channels; ++i) { + bs[i] = bscalc.process(bsinput[i]); + } + + FeatureMatrix distances(m_channels); + for (int i = 0; i < m_channels; ++i) { + for (int j = 0; j < m_channels; ++j) { + double d = cd.distance(bs[i], bs[j]); + distances[i].push_back(d); + } + } + + Feature feature; + feature.hasTimestamp = true; + + char labelBuffer[100]; + + for (int i = 0; i < m_channels; ++i) { + + feature.timestamp = Vamp::RealTime(i, 0); + + sprintf(labelBuffer, "Beat spectrum for channel %d", i+1); + feature.label = labelBuffer; + + feature.values.clear(); + for (int j = 0; j < bs[i].size(); ++j) { + feature.values.push_back(bs[i][j]); + } + + returnFeatures[m_beatSpectraOutput].push_back(feature); + } + + return distances; +} + +double +SimilarityPlugin::getDistance(const FeatureMatrix &timbral, + const FeatureMatrix &rhythmic, + int i, int j) +{ + double distance = 1.0; + if (needTimbre()) distance *= timbral[i][j]; + if (needRhythm()) distance *= rhythmic[i][j]; + return distance; +} + +SimilarityPlugin::FeatureSet +SimilarityPlugin::getRemainingFeatures() +{ + FeatureSet returnFeatures; + + // We want to return a matrix of the distances between channels, + // but Vamp doesn't have a matrix return type so we will actually + // return a series of vectors + + FeatureMatrix timbralDistances, rhythmicDistances; + + if (needTimbre()) { + timbralDistances = calculateTimbral(returnFeatures); + } + + if (needRhythm()) { + rhythmicDistances = calculateRhythmic(returnFeatures); + } + + // We give all features a timestamp, otherwise hosts will tend to + // stamp them at the end of the file, which is annoying + + Feature feature; + feature.hasTimestamp = true; + + Feature distanceVectorFeature; + distanceVectorFeature.label = "Distance from first channel"; + distanceVectorFeature.hasTimestamp = true; + distanceVectorFeature.timestamp = Vamp::RealTime::zeroTime; + + std::map sorted; + + char labelBuffer[100]; + + for (int i = 0; i < m_channels; ++i) { + + feature.timestamp = Vamp::RealTime(i, 0); + + feature.values.clear(); + for (int j = 0; j < m_channels; ++j) { + double dist = getDistance(timbralDistances, rhythmicDistances, i, j); + feature.values.push_back(dist); + } + + sprintf(labelBuffer, "Distances from channel %d", i+1); + feature.label = labelBuffer; + + returnFeatures[m_distanceMatrixOutput].push_back(feature); + + double fromFirst = + getDistance(timbralDistances, rhythmicDistances, 0, i); + + distanceVectorFeature.values.push_back(fromFirst); + sorted[fromFirst] = i; + } + + returnFeatures[m_distanceVectorOutput].push_back(distanceVectorFeature); + + feature.label = "Order of channels by similarity to first channel"; + feature.values.clear(); + feature.timestamp = Vamp::RealTime(0, 0); + + for (std::map::iterator i = sorted.begin(); + i != sorted.end(); ++i) { + feature.values.push_back(i->second + 1); + } + + returnFeatures[m_sortedVectorOutput].push_back(feature); + + feature.label = "Ordered distances of channels from first channel"; + feature.values.clear(); + feature.timestamp = Vamp::RealTime(1, 0); + + for (std::map::iterator i = sorted.begin(); + i != sorted.end(); ++i) { + feature.values.push_back(i->first); + } + + returnFeatures[m_sortedVectorOutput].push_back(feature); + + return returnFeatures; +} diff --git a/libs/vamp-plugins/SimilarityPlugin.h b/libs/vamp-plugins/SimilarityPlugin.h new file mode 100644 index 0000000000..1f30aabd21 --- /dev/null +++ b/libs/vamp-plugins/SimilarityPlugin.h @@ -0,0 +1,123 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + * SimilarityPlugin.h + * + * Copyright 2008 Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. + */ + +#ifndef _SIMILARITY_PLUGIN_H_ +#define _SIMILARITY_PLUGIN_H_ + +#include +#include + +#include +#include + +class MFCC; +class Chromagram; +class Decimator; + +class SimilarityPlugin : public Vamp::Plugin +{ +public: + SimilarityPlugin(float inputSampleRate); + virtual ~SimilarityPlugin(); + + bool initialise(size_t channels, size_t stepSize, size_t blockSize); + void reset(); + + std::string getIdentifier() const; + std::string getName() const; + std::string getDescription() const; + std::string getMaker() const; + int getPluginVersion() const; + std::string getCopyright() const; + + size_t getPreferredStepSize() const; + size_t getPreferredBlockSize() const; + InputDomain getInputDomain() const { return TimeDomain; } + + size_t getMinChannelCount() const; + size_t getMaxChannelCount() const; + + SimilarityPlugin::ParameterList getParameterDescriptors() const; + float getParameter(std::string param) const; + void setParameter(std::string param, float value); + + OutputList getOutputDescriptors() const; + + FeatureSet process(const float *const *inputBuffers, Vamp::RealTime timestamp); + + FeatureSet getRemainingFeatures(); + +protected: + int getDecimationFactor() const; + + enum Type { + TypeMFCC, + TypeChroma + }; + + void calculateBlockSize() const; + bool needRhythm() const { return m_rhythmWeighting > m_noRhythm; } + bool needTimbre() const { return m_rhythmWeighting < m_allRhythm; } + + Type m_type; + MFCC *m_mfcc; + MFCC *m_rhythmfcc; + Chromagram *m_chromagram; + Decimator *m_decimator; + int m_featureColumnSize; + float m_rhythmWeighting; + float m_rhythmClipDuration; + float m_rhythmClipOrigin; + int m_rhythmClipFrameSize; + int m_rhythmClipFrames; + int m_rhythmColumnSize; + mutable size_t m_blockSize; // before decimation + size_t m_fftSize; // after decimation + int m_channels; + int m_processRate; + int m_frameNo; + bool m_done; + + static const float m_noRhythm; + static const float m_allRhythm; + + std::vector m_lastNonEmptyFrame; // per channel + std::vector m_emptyFrameCount; // per channel + + mutable int m_distanceMatrixOutput; + mutable int m_distanceVectorOutput; + mutable int m_sortedVectorOutput; + mutable int m_meansOutput; + mutable int m_variancesOutput; + mutable int m_beatSpectraOutput; + + typedef std::vector FeatureColumn; + typedef std::vector FeatureMatrix; + typedef std::vector FeatureMatrixSet; + + typedef std::deque FeatureColumnQueue; + typedef std::vector FeatureQueueSet; + + FeatureMatrixSet m_values; + FeatureQueueSet m_rhythmValues; + + FeatureMatrix calculateTimbral(FeatureSet &returnFeatures); + FeatureMatrix calculateRhythmic(FeatureSet &returnFeatures); + double getDistance(const FeatureMatrix &timbral, + const FeatureMatrix &rhythmic, + int i, int j); +}; + +#endif + diff --git a/libs/vamp-plugins/TonalChangeDetect.cpp b/libs/vamp-plugins/TonalChangeDetect.cpp new file mode 100644 index 0000000000..7b0cd230d1 --- /dev/null +++ b/libs/vamp-plugins/TonalChangeDetect.cpp @@ -0,0 +1,443 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + QM Vamp Plugin Set + + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#include "TonalChangeDetect.h" + +#include +#include +#include + +TonalChangeDetect::TonalChangeDetect(float fInputSampleRate) + : Vamp::Plugin(fInputSampleRate), + m_chromagram(0), + m_step(0), + m_block(0), + m_stepDelay(0), + m_origin(Vamp::RealTime::zeroTime), + m_haveOrigin(false) +{ + m_minMIDIPitch = 32; + m_maxMIDIPitch = 108; + m_tuningFrequency = 440; + m_iSmoothingWidth = 5; + + setupConfig(); +} + +TonalChangeDetect::~TonalChangeDetect() +{ +} + +bool TonalChangeDetect::initialise(size_t channels, size_t stepSize, size_t blockSize) +{ + if (m_chromagram) { + delete m_chromagram; + m_chromagram = 0; + } + + if (channels < getMinChannelCount() || + channels > getMaxChannelCount()) { + std::cerr << "TonalChangeDetect::initialise: Given channel count " << channels << " outside acceptable range (" << getMinChannelCount() << " to " << getMaxChannelCount() << ")" << std::endl; + return false; + } + + m_chromagram = new Chromagram(m_config); + m_step = m_chromagram->getHopSize(); + m_block = m_chromagram->getFrameSize(); + + if (stepSize != m_step) { + std::cerr << "TonalChangeDetect::initialise: Given step size " << stepSize << " differs from only acceptable value " << m_step << std::endl; + delete m_chromagram; + m_chromagram = 0; + return false; + } + if (blockSize != m_block) { + std::cerr << "TonalChangeDetect::initialise: Given step size " << stepSize << " differs from only acceptable value " << m_step << std::endl; + delete m_chromagram; + m_chromagram = 0; + return false; + } + + // m_stepDelay = (blockSize - stepSize) / 2; + // m_stepDelay = m_stepDelay / stepSize; + m_stepDelay = (blockSize - stepSize) / stepSize; //!!! why? seems about right to look at, but... + +// std::cerr << "TonalChangeDetect::initialise: step " << stepSize << ", block " +// << blockSize << ", delay " << m_stepDelay << std::endl; + + m_vaCurrentVector.resize(12, 0.0); + + return true; + +} + +std::string TonalChangeDetect::getIdentifier() const +{ + return "qm-tonalchange"; +} + +std::string TonalChangeDetect::getName() const +{ + return "Tonal Change"; +} + +std::string TonalChangeDetect::getDescription() const +{ + return "Detect and return the positions of harmonic changes such as chord boundaries"; +} + +std::string TonalChangeDetect::getMaker() const +{ + return "Queen Mary, University of London"; +} + +int TonalChangeDetect::getPluginVersion() const +{ + return 2; +} + +std::string TonalChangeDetect::getCopyright() const +{ + return "Plugin by Martin Gasser and Christopher Harte. Copyright (c) 2006-2009 QMUL - All Rights Reserved"; +} + +TonalChangeDetect::ParameterList TonalChangeDetect::getParameterDescriptors() const +{ + ParameterList list; + + ParameterDescriptor desc; + desc.identifier = "smoothingwidth"; + desc.name = "Gaussian smoothing"; + desc.description = "Window length for the internal smoothing operation, in chroma analysis frames"; + desc.unit = "frames"; + desc.minValue = 0; + desc.maxValue = 20; + desc.defaultValue = 5; + desc.isQuantized = true; + desc.quantizeStep = 1; + list.push_back(desc); + + desc.identifier = "minpitch"; + desc.name = "Chromagram minimum pitch"; + desc.unit = "MIDI units"; + desc.description = "Lowest pitch in MIDI units to be included in the chroma analysis"; + desc.minValue = 0; + desc.maxValue = 127; + desc.defaultValue = 32; + desc.isQuantized = true; + desc.quantizeStep = 1; + list.push_back(desc); + + desc.identifier = "maxpitch"; + desc.name = "Chromagram maximum pitch"; + desc.unit = "MIDI units"; + desc.description = "Highest pitch in MIDI units to be included in the chroma analysis"; + desc.minValue = 0; + desc.maxValue = 127; + desc.defaultValue = 108; + desc.isQuantized = true; + desc.quantizeStep = 1; + list.push_back(desc); + + desc.identifier = "tuning"; + desc.name = "Chromagram tuning frequency"; + desc.unit = "Hz"; + desc.description = "Frequency of concert A in the music under analysis"; + desc.minValue = 420; + desc.maxValue = 460; + desc.defaultValue = 440; + desc.isQuantized = false; + list.push_back(desc); + + return list; +} + +float +TonalChangeDetect::getParameter(std::string param) const +{ + if (param == "smoothingwidth") { + return m_iSmoothingWidth; + } + if (param == "minpitch") { + return m_minMIDIPitch; + } + if (param == "maxpitch") { + return m_maxMIDIPitch; + } + if (param == "tuning") { + return m_tuningFrequency; + } + + std::cerr << "WARNING: ChromagramPlugin::getParameter: unknown parameter \"" + << param << "\"" << std::endl; + return 0.0; +} + +void +TonalChangeDetect::setParameter(std::string param, float value) +{ + if (param == "minpitch") { + m_minMIDIPitch = lrintf(value); + } else if (param == "maxpitch") { + m_maxMIDIPitch = lrintf(value); + } else if (param == "tuning") { + m_tuningFrequency = value; + } + else if (param == "smoothingwidth") { + m_iSmoothingWidth = int(value); + } else { + std::cerr << "WARNING: ChromagramPlugin::setParameter: unknown parameter \"" + << param << "\"" << std::endl; + } + + setupConfig(); +} + + +void TonalChangeDetect::setupConfig() +{ + m_config.FS = lrintf(m_inputSampleRate); + m_config.min = Pitch::getFrequencyForPitch + (m_minMIDIPitch, 0, m_tuningFrequency); + m_config.max = Pitch::getFrequencyForPitch + (m_maxMIDIPitch, 0, m_tuningFrequency); + m_config.BPO = 12; + m_config.CQThresh = 0.0054; + m_config.normalise = MathUtilities::NormaliseNone; + + m_step = 0; + m_block = 0; + + +} + +void +TonalChangeDetect::reset() +{ + if (m_chromagram) { + delete m_chromagram; + m_chromagram = new Chromagram(m_config); + } + while (!m_pending.empty()) m_pending.pop(); + m_vaCurrentVector.clear(); + m_TCSGram.clear(); + + m_origin = Vamp::RealTime::zeroTime; + m_haveOrigin = false; +} + +size_t +TonalChangeDetect::getPreferredStepSize() const +{ + if (!m_step) { + Chromagram chroma(m_config); + m_step = chroma.getHopSize(); + m_block = chroma.getFrameSize(); + } + + return m_step; +} + +size_t +TonalChangeDetect::getPreferredBlockSize() const +{ + if (!m_step) { + Chromagram chroma(m_config); + m_step = chroma.getHopSize(); + m_block = chroma.getFrameSize(); + } + + return m_block; +} + +TonalChangeDetect::OutputList TonalChangeDetect::getOutputDescriptors() const +{ + OutputList list; + + OutputDescriptor hc; + hc.identifier = "tcstransform"; + hc.name = "Transform to 6D Tonal Content Space"; + hc.unit = ""; + hc.description = "Representation of content in a six-dimensional tonal space"; + hc.hasFixedBinCount = true; + hc.binCount = 6; + hc.hasKnownExtents = true; + hc.minValue = -1.0; + hc.maxValue = 1.0; + hc.isQuantized = false; + hc.sampleType = OutputDescriptor::OneSamplePerStep; + + OutputDescriptor d; + d.identifier = "tcfunction"; + d.name = "Tonal Change Detection Function"; + d.unit = ""; + d.description = "Estimate of the likelihood of a tonal change occurring within each spectral frame"; + d.minValue = 0; + d.minValue = 2; + d.hasFixedBinCount = true; + d.binCount = 1; + d.hasKnownExtents = false; + d.isQuantized = false; + d.sampleType = OutputDescriptor::VariableSampleRate; + double dStepSecs = double(getPreferredStepSize()) / m_inputSampleRate; + d.sampleRate = 1.0f / dStepSecs; + + OutputDescriptor changes; + changes.identifier = "changepositions"; + changes.name = "Tonal Change Positions"; + changes.unit = ""; + changes.description = "Estimated locations of tonal changes"; + changes.hasFixedBinCount = true; + changes.binCount = 0; + changes.hasKnownExtents = false; + changes.isQuantized = false; + changes.sampleType = OutputDescriptor::VariableSampleRate; + changes.sampleRate = 1.0 / dStepSecs; + + list.push_back(hc); + list.push_back(d); + list.push_back(changes); + + return list; +} + +TonalChangeDetect::FeatureSet +TonalChangeDetect::process(const float *const *inputBuffers, + Vamp::RealTime timestamp) +{ + if (!m_chromagram) { + cerr << "ERROR: TonalChangeDetect::process: " + << "Chromagram has not been initialised" + << endl; + return FeatureSet(); + } + + if (!m_haveOrigin) m_origin = timestamp; + + // convert float* to double* + double *tempBuffer = new double[m_block]; + for (size_t i = 0; i < m_block; ++i) { + tempBuffer[i] = inputBuffers[0][i]; + } + + double *output = m_chromagram->process(tempBuffer); + delete[] tempBuffer; + + for (size_t i = 0; i < 12; i++) + { + m_vaCurrentVector[i] = output[i]; + } + + + FeatureSet returnFeatures; + + if (m_stepDelay == 0) { + m_vaCurrentVector.normalizeL1(); + TCSVector tcsVector = m_TonalEstimator.transform2TCS(m_vaCurrentVector); + m_TCSGram.addTCSVector(tcsVector); + + Feature feature; + feature.hasTimestamp = false; + for (int i = 0; i < 6; i++) + { feature.values.push_back(static_cast(tcsVector[i])); } + feature.label = ""; + returnFeatures[0].push_back(feature); + + return returnFeatures; + } + + if (m_pending.size() == m_stepDelay) { + + ChromaVector v = m_pending.front(); + v.normalizeL1(); + TCSVector tcsVector = m_TonalEstimator.transform2TCS(v); + m_TCSGram.addTCSVector(tcsVector); + + Feature feature; + feature.hasTimestamp = false; + for (int i = 0; i < 6; i++) + { feature.values.push_back(static_cast(tcsVector[i])); } + feature.label = ""; + returnFeatures[0].push_back(feature); + m_pending.pop(); + + } else { + returnFeatures[0].push_back(Feature()); + m_TCSGram.addTCSVector(TCSVector()); + } + + m_pending.push(m_vaCurrentVector); + + + return returnFeatures; +} + +TonalChangeDetect::FeatureSet TonalChangeDetect::getRemainingFeatures() +{ + FeatureSet returnFeatures; + + while (!m_pending.empty()) { + ChromaVector v = m_pending.front(); + v.normalizeL1(); + TCSVector tcsVector = m_TonalEstimator.transform2TCS(v); + m_TCSGram.addTCSVector(tcsVector); + + Feature feature; + feature.hasTimestamp = false; + for (int i = 0; i < 6; i++) + { feature.values.push_back(static_cast(tcsVector[i])); } + feature.label = ""; + returnFeatures[0].push_back(feature); + m_pending.pop(); + } + + ChangeDFConfig dfc; + dfc.smoothingWidth = double(m_iSmoothingWidth); + ChangeDetectionFunction df(dfc); + ChangeDistance d = df.process(m_TCSGram); + + + + for (int i = 0; i < d.size(); i++) + { + double dCurrent = d[i]; + double dPrevious = d[i > 0 ? i - 1 : i]; + double dNext = d[i < d.size()-1 ? i + 1 : i]; + + Feature feature; + feature.label = ""; + feature.hasTimestamp = true; + feature.timestamp = m_origin + + Vamp::RealTime::frame2RealTime(i*m_step, m_inputSampleRate); + feature.values.push_back(dCurrent); + returnFeatures[1].push_back(feature); + + + if (dCurrent > dPrevious && dCurrent > dNext) + { + Feature featurePeak; + featurePeak.label = ""; + featurePeak.hasTimestamp = true; + featurePeak.timestamp = m_origin + + Vamp::RealTime::frame2RealTime(i*m_step, m_inputSampleRate); + returnFeatures[2].push_back(featurePeak); + } + + } + + + return returnFeatures; + +} + diff --git a/libs/vamp-plugins/TonalChangeDetect.h b/libs/vamp-plugins/TonalChangeDetect.h new file mode 100644 index 0000000000..776bab36d2 --- /dev/null +++ b/libs/vamp-plugins/TonalChangeDetect.h @@ -0,0 +1,84 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + QM Vamp Plugin Set + + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#ifndef _TONALCHANGEDETECT_ +#define _TONALCHANGEDETECT_ + +#include + +#include +#include +#include + +#include +#include +#include + +class TonalChangeDetect : public Vamp::Plugin +{ +public: + TonalChangeDetect(float fInputSampleRate); + virtual ~TonalChangeDetect(); + + bool initialise(size_t channels, size_t stepSize, size_t blockSize); + void reset(); + + InputDomain getInputDomain() const { return TimeDomain; } + + std::string getIdentifier() const; + std::string getName() const; + std::string getDescription() const; + std::string getMaker() const; + int getPluginVersion() const; + std::string getCopyright() const; + + ParameterList getParameterDescriptors() const; + float getParameter(std::string) const; + void setParameter(std::string, float); + + + size_t getPreferredStepSize() const; + size_t getPreferredBlockSize() const; + + OutputList getOutputDescriptors() const; + + FeatureSet process(const float *const *inputBuffers, + Vamp::RealTime timestamp); + + FeatureSet getRemainingFeatures(); + +private: + void setupConfig(); + + ChromaConfig m_config; + Chromagram *m_chromagram; + TonalEstimator m_TonalEstimator; + mutable size_t m_step; + mutable size_t m_block; + size_t m_stepDelay; + std::queue m_pending; + ChromaVector m_vaCurrentVector; + TCSGram m_TCSGram; + + int m_iSmoothingWidth; // smoothing window size + int m_minMIDIPitch; // chromagram parameters + int m_maxMIDIPitch; + float m_tuningFrequency; + + Vamp::RealTime m_origin; + bool m_haveOrigin; +}; + + +#endif // _TONALCHANGEDETECT_ diff --git a/libs/vamp-plugins/Transcription.cpp b/libs/vamp-plugins/Transcription.cpp new file mode 100644 index 0000000000..25c8099336 --- /dev/null +++ b/libs/vamp-plugins/Transcription.cpp @@ -0,0 +1,2117 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + QM Vamp Plugin Set + + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +/*transcription vamp plugin: Ruohua Zhou, Josh Reiss, josh.reiss@elec.qmul.ac.uk */ + +#include "Transcription.h" +#include +#include +#include + +using std::string; +using std::vector; +using std::cerr; +using std::endl; + +const double CNum[8] = { + 3.025328153863e-005,0.0002117729707704,0.0006353189123113, 0.001058864853852, + 0.001058864853852,0.0006353189123113,0.0002117729707704,3.025328153863e-005 +}; + +const double CDen[8] = { + 1, -4.647847898799, 9.540784706769, -11.14354542746, + 7.967285533211, -3.477244449085, 0.8559660579522, -0.09152610255505 +}; + +const int A[10] = {0, 120, 190, 240, 279, 310, 337, 360, 380, 399}; + + + +const double EualCurve960[960] = { + 83.750025,83.532690,83.315770,83.099260,82.883159,82.667463,82.452170,82.237276,82.022779,81.808675, + 81.594963,81.381639,81.168699,80.956142,80.743964,80.532163,80.320735,80.109677,79.898987,79.688663,79.478700,79.269096,79.059848,78.850953, + 78.642408,78.434211,78.226359,78.018848,77.811676,77.604839,77.398336,77.192162,76.986316,76.780794,76.575593,76.370710,76.166143,75.961889, + 75.757945,75.554307,75.350973,75.147940,74.945205,74.742766,74.540618,74.338761,74.137189,73.935902,73.734895,73.534166,73.333712,73.133529, + 72.933616,72.733970,72.534586,72.335463,72.136598,71.937987,71.739628,71.541517,71.343653,71.146032,70.948650,70.751506,70.554597,70.357919, + 70.161469,69.965245,69.769244,69.573462,69.377898,69.182548,68.987408,68.792477,68.597752,68.403228,68.208905,68.014781,67.820873,67.627197, + 67.433772,67.240617,67.047749,66.855187,66.662949,66.471053,66.279516,66.088358,65.897597,65.707250,65.517336,65.327873,65.138879,64.950373, + 64.762372,64.574894,64.387959,64.201583,64.015785,63.830584,63.645997,63.462043,63.278739,63.096105,62.914158,62.732915,62.552397,62.372620, + 62.193602,62.015363,61.837920,61.661291,61.485494,61.310549,61.136471,60.963274,60.790941,60.619447,60.448770,60.278885,60.109770,59.941401, + 59.773755,59.606807,59.440536,59.274916,59.109924,58.945538,58.781733,58.618486,58.455773,58.293572,58.131858,57.970608,57.809799,57.649407, + 57.489408,57.329780,57.170498,57.011539,56.852880,56.694496,56.536366,56.378464,56.220768,56.063255,55.905900,55.748680,55.591571,55.434551, + 55.277595,55.120681,54.963784,54.806886,54.649983,54.493077,54.336169,54.179261,54.022353,53.865448,53.708546,53.551650,53.394759,53.237877, + 53.081003,52.924139,52.767287,52.610448,52.453624,52.296815,52.140023,51.983250,51.826496,51.669763,51.513053,51.356366,51.199705,51.043070, + 50.886463,50.729885,50.573337,50.416821,50.260338,50.103890,49.947478,49.791103,49.634766,49.478469,49.322214,49.166001,49.009832,48.853710, + 48.697648,48.541659,48.385757,48.229958,48.074273,47.918719,47.763308,47.608055,47.452974,47.298080,47.143385,46.988904,46.834652,46.680642, + 46.526889,46.373405,46.220207,46.067307,45.914720,45.762460,45.610540,45.458976,45.307780,45.156968,45.006553,44.856549,44.706971,44.557832, + 44.409146,44.260928,44.113192,43.965951,43.819220,43.673013,43.527344,43.382227,43.237676,43.093703,42.950305,42.807478,42.665218,42.523520, + 42.382381,42.241794,42.101757,41.962264,41.823311,41.684894,41.547008,41.409648,41.272811,41.136491,41.000685,40.865387,40.730594,40.596301, + 40.462503,40.329195,40.196375,40.064036,39.932175,39.800787,39.669867,39.539412,39.409417,39.279876,39.150787,39.022143,38.893942,38.766178, + 38.638846,38.511944,38.385465,38.259405,38.133761,38.008525,37.883679,37.759203,37.635076,37.511278,37.387789,37.264588,37.141656,37.018971, + 36.896513,36.774262,36.652197,36.530298,36.408545,36.286918,36.165395,36.043957,35.922583,35.801253,35.679947,35.558643,35.437322,35.315964, + 35.194547,35.073052,34.951458,34.829745,34.707892,34.585879,34.463686,34.341293,34.218678,34.095822,33.972704,33.849303,33.725600,33.601574, + 33.477205,33.352481,33.227425,33.102069,32.976445,32.850585,32.724520,32.598284,32.471906,32.345420,32.218858,32.092250,31.965629,31.839028, + 31.712477,31.586009,31.459655,31.333448,31.207419,31.081601,30.956024,30.830722,30.705725,30.581067,30.456777,30.332890,30.209436,30.086447, + 29.963955,29.841993,29.720591,29.599783,29.479599,29.360071,29.241233,29.123114,29.005748,28.889166,28.773400,28.658474,28.544378,28.431095, + 28.318607,28.206897,28.095947,27.985740,27.876257,27.767481,27.659396,27.551982,27.445224,27.339102,27.233599,27.128699,27.024383,26.920633, + 26.817433,26.714764,26.612609,26.510951,26.409772,26.309053,26.208779,26.108930,26.009491,25.910442,25.811766,25.713446,25.615465,25.517804, + 25.420446,25.323374,25.226570,25.130016,25.033695,24.937589,24.841681,24.745955,24.650409,24.555043,24.459856,24.364847,24.270016,24.175363, + 24.080887,23.986588,23.892466,23.798520,23.704751,23.611156,23.517737,23.424492,23.331422,23.238526,23.145803,23.053253,22.960877,22.868672, + 22.776640,22.684779,22.593090,22.501572,22.410224,22.319046,22.228038,22.137200,22.046530,21.956029,21.865697,21.775532,21.685535,21.595704, + 21.506041,21.416544,21.327213,21.238047,21.149047,21.060211,20.971540,20.883034,20.794691,20.706512,20.618496,20.530642,20.442952,20.355423, + 20.268057,20.180852,20.093808,20.006925,19.920202,19.833640,19.747237,19.660994,19.574910,19.488985,19.403218,19.317610,19.232159,19.146866, + 19.061729,18.976750,18.891927,18.807260,18.722749,18.638393,18.554193,18.470147,18.386255,18.302518,18.218934,18.135504,18.052227,17.969105, + 17.886151,17.803379,17.720805,17.638444,17.556310,17.474419,17.392786,17.311425,17.230351,17.149581,17.069127,16.989007,16.909233,16.829822, + 16.750789,16.672148,16.593914,16.516103,16.438729,16.361808,16.285354,16.209382,16.133907,16.058945,15.984510,15.910617,15.837282,15.764518, + 15.692342,15.620768,15.549811,15.479486,15.409809,15.340793,15.272455,15.204808,15.137869,15.071646,15.006129,14.941300,14.877144,14.813643, + 14.750781,14.688540,14.626906,14.565860,14.505386,14.445467,14.386088,14.327231,14.268879,14.211016,14.153626,14.096691,14.040195,13.984121, + 13.928453,13.873174,13.818267,13.763716,13.709504,13.655615,13.602031,13.548736,13.495714,13.442948,13.390420,13.338115,13.286016,13.234107, + 13.182369,13.130788,13.079346,13.028026,12.976813,12.925693,12.874671,12.823756,12.772958,12.722285,12.671746,12.621351,12.571107,12.521025, + 12.471113,12.421380,12.371835,12.322488,12.273346,12.224419,12.175717,12.127248,12.079020,12.031044,11.983328,11.935880,11.888711,11.841828, + 11.795242,11.748960,11.702993,11.657348,11.612035,11.567063,11.522441,11.478178,11.434282,11.390764,11.347631,11.304893,11.262558,11.220637, + 11.179137,11.138068,11.097437,11.057252,11.017521,10.978252,10.939452,10.901129,10.863290,10.825944,10.789098,10.752760,10.716937,10.681638, + 10.646869,10.612640,10.578956,10.545827,10.513259,10.481261,10.449840,10.419004,10.388760,10.359117,10.330082,10.301663,10.273867,10.246702, + 10.220176,10.194296,10.169071,10.144508,10.120615,10.097399,10.074868,10.053030,10.031892,10.011463,9.991749,9.972762,9.954523,9.937056,9.920385, + 9.904534,9.889527,9.875389,9.862144,9.849815,9.838428,9.828005,9.818572,9.810152,9.802770,9.796449,9.791214,9.787089,9.784099,9.782266,9.781616, + 9.782172,9.783959,9.787001,9.791322,9.796946,9.803897,9.812200,9.821878,9.832956,9.845457,9.859407,9.874829,9.891747,9.910185,9.930168,9.951720, + 9.974864,9.999625,10.026008,10.053933,10.083304,10.114023,10.145991,10.179112,10.213287,10.248419,10.284410,10.321161,10.358576,10.396556,10.435004, + 10.473821,10.512911,10.552175,10.591516,10.630835,10.670035,10.709018,10.747686,10.785942,10.823688,10.860826,10.897258,10.932886,10.967613,11.001341, + 11.033972,11.065408,11.095552,11.124305,11.151570,11.177249,11.201245,11.223459,11.243793,11.262151,11.278450,11.292676,11.304827,11.314906,11.322913, + 11.328848,11.332713,11.334508,11.334233,11.331889,11.327477,11.320998,11.312453,11.301841,11.289164,11.274422,11.257616,11.238747,11.217816,11.194822, + 11.169767,11.142652,11.113476,11.082241,11.048948,11.013597,10.976189,10.936724,10.895203,10.851627,10.805996,10.758312,10.708574,10.656784,10.602942, + 10.547049,10.489106,10.429113,10.367082,10.303073,10.237155,10.169399,10.099876,10.028655,9.955807,9.881403,9.805512,9.728206,9.649554,9.569627,9.488495, + 9.406228,9.322897,9.238573,9.153325,9.067225,8.980341,8.892745,8.804508,8.715698,8.626388,8.536646,8.446544,8.356152,8.265539,8.174778,8.083937,7.993087, + 7.902299,7.811643,7.721190,7.631008,7.541170,7.451746,7.362804,7.274417,7.186644,7.099504,7.013003,6.927151,6.841956,6.757424,6.673565,6.590385,6.507894, + 6.426099,6.345008,6.264629,6.184970,6.106039,6.027843,5.950392,5.873692,5.797752,5.722579,5.648183,5.574570,5.501748,5.429727,5.358512,5.288114,5.218538, + 5.149794,5.081890,5.014832,4.948630,4.883292,4.818824,4.755236,4.692535,4.630729,4.569826,4.509834,4.450761,4.392616,4.335415,4.279172,4.223905,4.169630, + 4.116362,4.064118,4.012914,3.962766,3.913691,3.865703,3.818820,3.773058,3.728432,3.684960,3.642656,3.601538,3.561621,3.522921,3.485455,3.449239,3.414289, + 3.380620,3.348250,3.317194,3.287469,3.259090,3.232074,3.206437,3.182194,3.159363,3.137959,3.117999,3.099498,3.082473,3.066939,3.052914,3.040413,3.029451, + 3.020039,3.012186,3.005904,3.001201,2.998087,2.996571,2.996665,2.998377,3.001718,3.006696,3.013323,3.021607,3.031559,3.043187,3.056503,3.071516,3.088235, + 3.106671,3.126833,3.148731,3.172374,3.197773,3.224938,3.253877,3.284601,3.317120,3.351444,3.387581,3.425543,3.465339,3.506978,3.550470,3.595826,3.643054, + 3.692166,3.743169,3.796075,3.850896,3.907655,3.966377,4.027088,4.089815,4.154581,4.221415,4.290340,4.361382,4.434569,4.509924,4.587474,4.667245,4.749261, + 4.833550,4.920136,5.009046,5.100305,5.193938,5.289972,5.388432,5.489343,5.592732,5.698625,5.807046,5.918022,6.031578,6.147741,6.266535,6.387986,6.512121, + 6.638964,6.768542,6.900880,7.036004,7.173939,7.314712,7.458348,7.604856,7.754175,7.906227,8.060936,8.218223,8.378012,8.540225,8.704784,8.871612,9.040631, + 9.211765,9.384934,9.560063,9.737073,9.915888,10.096429,10.278619,10.462380,10.647636,10.834309,11.022321,11.211594,11.402052,11.593616,11.786210,11.979755, + 12.174175,12.369392,12.565329,12.761907,12.959049,13.156679,13.354718,13.553089,13.751715,13.950518,14.149420,14.348345,14.547211,14.745925,14.944391, + 15.142512,15.340191,15.537333,15.733840,15.929615,16.124564 +}; +void Transcribe(int Len,int inputLen,double *SoundIn,double *out,double *outArray2,double *outArray3,double SampleRate); + +Transcription::Transcription(float inputSampleRate) : + Plugin(inputSampleRate), + m_stepSize(0) +{ + m_SoundIn=0; + m_SampleN=0; + m_AllocN = 0; + m_Excess = false; +} + +Transcription::~Transcription() +{ + free(m_SoundIn); +} + +string +Transcription::getIdentifier() const +{ + return "qm-transcription"; +} + +string +Transcription::getName() const +{ + return "Polyphonic Transcription"; +} + +string +Transcription::getDescription() const +{ + return "Transcribe the input audio to estimated notes"; +} + +string +Transcription::getMaker() const +{ + return "Queen Mary, University of London"; +} + +int +Transcription::getPluginVersion() const +{ + return 1; +} + +string +Transcription::getCopyright() const +{ + return "Plugin by Dr. Ruohua Zhou. Copyright (c) 2008-2009 QMUL - All Rights Reserved"; +} + +size_t +Transcription::getPreferredStepSize() const +{ + return 441; +} + +size_t +Transcription::getPreferredBlockSize() const +{ + return 441; +} + +bool +Transcription::initialise(size_t channels, size_t stepSize, size_t blockSize) +{ + if (channels < getMinChannelCount() || + channels > getMaxChannelCount()) return false; + + if (m_inputSampleRate < 4410 || m_inputSampleRate > 441000) return false; + + m_stepSize = std::min(stepSize, blockSize); + m_blockSize=blockSize; + + m_SampleN = 0; + + return true; +} + +void +Transcription::reset() +{ + free(m_SoundIn); + m_SoundIn = 0; + m_SampleN = 0; + m_AllocN = 0; + m_Excess = false; + m_Base = Vamp::RealTime(); +} + +Transcription::OutputList +Transcription::getOutputDescriptors() const +{ + OutputList list; + + OutputDescriptor zc; + zc.identifier = "transcription"; + zc.name = "Transcription"; + zc.description = "Estimated note pitch (MIDI note number from 0 to 127)"; + zc.unit = "MIDI units"; + zc.hasFixedBinCount = true; + zc.binCount = 1; + zc.hasKnownExtents = true; + zc.minValue = 0; + zc.maxValue = 127; + zc.isQuantized = true; + zc.quantizeStep = 1.0; + zc.hasDuration = true; + zc.sampleType = OutputDescriptor::VariableSampleRate; + +/* no -- this is the result of a confusion between bin indices and values + { + + zc.binNames.push_back("A0");// MIDI Note 21 + zc.binNames.push_back("A0#"); + zc.binNames.push_back("B0"); + zc.binNames.push_back("C1"); + zc.binNames.push_back("C1#"); + zc.binNames.push_back("D1"); + zc.binNames.push_back("D1#"); + zc.binNames.push_back("E1"); + zc.binNames.push_back("F1"); + zc.binNames.push_back("F1#"); + zc.binNames.push_back("G1"); + zc.binNames.push_back("G1#"); + + zc.binNames.push_back("A1");// MIDI Note 33 + zc.binNames.push_back("A1#"); + zc.binNames.push_back("B1"); + zc.binNames.push_back("C2"); + zc.binNames.push_back("C2#"); + zc.binNames.push_back("D2"); + zc.binNames.push_back("D2#"); + zc.binNames.push_back("E2"); + zc.binNames.push_back("F2"); + zc.binNames.push_back("F2#"); + zc.binNames.push_back("G2"); + zc.binNames.push_back("G2#"); + + zc.binNames.push_back("A2");// MIDI Note 45 + zc.binNames.push_back("A2#"); + zc.binNames.push_back("B3"); + zc.binNames.push_back("C3"); + zc.binNames.push_back("C3#"); + zc.binNames.push_back("D3"); + zc.binNames.push_back("D3#"); + zc.binNames.push_back("E3"); + zc.binNames.push_back("F3"); + zc.binNames.push_back("F3#"); + zc.binNames.push_back("G3"); + zc.binNames.push_back("G3#"); + + zc.binNames.push_back("A3");// MIDI Note 57 + zc.binNames.push_back("A3#"); + zc.binNames.push_back("B0"); + zc.binNames.push_back("C4"); + zc.binNames.push_back("C4#"); + zc.binNames.push_back("D4"); + zc.binNames.push_back("D4#"); + zc.binNames.push_back("E4"); + zc.binNames.push_back("F4"); + zc.binNames.push_back("F4#"); + zc.binNames.push_back("G4"); + zc.binNames.push_back("G4#"); + + zc.binNames.push_back("A4");// MIDI Note 69 + zc.binNames.push_back("A4#"); + zc.binNames.push_back("B0"); + zc.binNames.push_back("C5"); + zc.binNames.push_back("C5#"); + zc.binNames.push_back("D5"); + zc.binNames.push_back("D5#"); + zc.binNames.push_back("E5"); + zc.binNames.push_back("F5"); + zc.binNames.push_back("F5#"); + zc.binNames.push_back("G5"); + zc.binNames.push_back("G5#"); + + zc.binNames.push_back("A5");// MIDI Note 85 + zc.binNames.push_back("A5#"); + zc.binNames.push_back("B0"); + zc.binNames.push_back("C6"); + zc.binNames.push_back("C6#"); + zc.binNames.push_back("D6"); + zc.binNames.push_back("D6#"); + zc.binNames.push_back("E6"); + zc.binNames.push_back("F6"); + zc.binNames.push_back("F6#"); + zc.binNames.push_back("G6"); + zc.binNames.push_back("G6#"); + + zc.binNames.push_back("A6");// MIDI Note 93 + zc.binNames.push_back("A6#"); + zc.binNames.push_back("B0"); + zc.binNames.push_back("C7"); + zc.binNames.push_back("C7#"); + zc.binNames.push_back("D7"); + zc.binNames.push_back("D7#"); + zc.binNames.push_back("E7"); + zc.binNames.push_back("F7"); + zc.binNames.push_back("F7#"); + zc.binNames.push_back("G7"); + zc.binNames.push_back("G7#"); + + zc.binNames.push_back("A7");// MIDI Note 105 + zc.binNames.push_back("A7#"); + zc.binNames.push_back("B0"); + zc.binNames.push_back("C8"); + } +*/ + + list.push_back(zc); + +/* zc.identifier = "Transcriptions"; + zc.name = "Polyphonic Transcription"; + zc.description = "Polyphonic Music Transcription"; + zc.unit = ""; + zc.hasFixedBinCount = true; + zc.binCount = 0; + zc.sampleType = OutputDescriptor::VariableSampleRate; + zc.sampleRate = m_inputSampleRate; + list.push_back(zc);*/ + + return list; +} + +Transcription::FeatureSet +Transcription::process(const float *const *inputBuffers, + Vamp::RealTime timestamp) +{ + if (m_stepSize == 0) { + cerr << "ERROR: Transcription::process: " + << "Transcription has not been initialised" + << endl; + return FeatureSet(); + } + + if (m_SampleN == 0) { + m_Base = timestamp; + } + + if (m_Excess) return FeatureSet(); + + for (size_t i = 0; i < m_blockSize;i++) { + + if (m_SampleN >= m_AllocN) { + size_t newsize = m_AllocN * 2; + if (newsize < 10000) newsize = 10000; + double *newbuf = (double *)realloc(m_SoundIn, newsize * sizeof(double)); + if (!newbuf) { + m_Excess = true; + break; + } + m_SoundIn = newbuf; + m_AllocN = newsize; + } + + m_SoundIn[m_SampleN]=inputBuffers[0][i]; + m_SampleN=m_SampleN+1; + } + + return FeatureSet(); +} + +Transcription::FeatureSet +Transcription::getRemainingFeatures() +{ + FeatureSet returnFeatures; + + double * OutArray; + double *OutArray2; + double *hello1; + double *hello2; + int Msec; + size_t i; + size_t j; + size_t n; + size_t count; + + Msec=(int)(100*m_SampleN/m_inputSampleRate); + + if (Msec < 100) return returnFeatures; + + OutArray=(double *)malloc(3*3000*sizeof(double)); + OutArray2=(double *)malloc(88*Msec*sizeof(double)); + hello1=(double *)malloc(112*Msec*sizeof(double)); + hello2=(double *)malloc(112*Msec*sizeof(double)); + + for (j = 0; j 0)&&(OutArray[3*i]<88)) + { + start=OutArray[3*i+1];endd=OutArray[3*i+2]; + for(j=start;j<(start+0.05);j=j+0.01) + { + + Feature feature; + Vamp::RealTime ts; + + feature.hasTimestamp = true; + + feature.timestamp =ts.fromSeconds(j); + feature.values.push_back(OutArray[3*i]+21); + returnFeatures[0].push_back(feature); + + } + + + + } + else + { + + break; + } + + + } + + */ + + + for (i = 0; i < 3000; i++) { + + if((OutArray[3*i]>0)&&(OutArray[3*i]<88)) + { + start=100*OutArray[3*i+1]; + endd=100*OutArray[3*i+2]-5; + for(j=start;j0) + { + + if (starts[n] < 0.) + { + starts[n] = j * 0.01; + } + } + else + { + if (starts[n] > 0.) + { + Feature feature; + feature.hasTimestamp = true; + feature.timestamp = m_Base + Vamp::RealTime::fromSeconds(starts[n]); + feature.hasDuration = true; + feature.duration = Vamp::RealTime::fromSeconds(j * 0.01 - starts[n]); + feature.values.push_back(n+20); + returnFeatures[0].push_back(feature); + + starts[n] = -1.0; + } + } + } + } + + + for(n=0;n<88;n++) + { + if (starts[n] > 0.) + { + Feature feature; + feature.hasTimestamp = true; + feature.timestamp = m_Base + Vamp::RealTime::fromSeconds(starts[n]); + feature.hasDuration = true; + feature.duration = Vamp::RealTime::fromSeconds(j * 0.01 - starts[n]); + feature.values.push_back(n+20); + returnFeatures[0].push_back(feature); + + starts[n] = -1.0; + } + } + + free(OutArray2); + free(OutArray); + + free(hello1); + free(hello2); + + return returnFeatures; + +} + + + + + +void sofacomplexMex(double *y, double *z, int ncols,double StartNote,double NoteInterval1,double NoteNum,double C,double D,double SR) +{ + int mseconds,i,j,el,count,count2; + double Snote,NoteInterval,NoteN, BasicR; + double *signs; + double *rwork,*buffer; + double freq,R,gain,gainI,gainII,coefI,coefM; + double output,input,outputI,outputM; + double *x; + double *sum,*sum2; + double power; + double temp; + + + //SR=44100; + Snote=StartNote; + NoteInterval=NoteInterval1; + NoteN=NoteNum; + + signs=(double*)malloc((int)NoteN*5*sizeof(double)); + + for (i = 0; i MaxValue) + { + MaxValue=In2[j]; + MaxIndex=j; + } + } + InputArray[MaxIndex]=In2[MaxIndex]; + In2[MaxIndex]=0; + } + + free(In2); +} + +double SumF(double *InputArray,int Start, int End) +{ + double Value; + int i; + Value=0; + for (i=Start;i<(End+1);i++) + { + Value=Value+InputArray[i]; + } + + return Value; + +} + +int round10(int x) +{ + int I,I2; + I=((int)(x/10)); + I2=x-I*10; + + if(I2>5) + return (I+1); + else + return I; + +} + + +void ConToPitch1250(double *In, int InLen) +{ + int i,j,k, nn,col; + double *Out; + const int A[12]={0, 120, 190, 240, 279, 310, 337, 360, 380, 399, 415, 430}; + Out=(double*)malloc(InLen*sizeof(double)); + + + col=InLen; + + for (i=0;i(col-1)) + { + k=k+1; + Out[i]=Out[i]+In[col-1]; + } + } + if(k>0) + { + Out[i]=Out[i]/k; + } + } + for (i=0;iMaxValue) + MaxValue=In[i]; + } + + for (i=0;i-1) + { + sum=sum+In[i-j]; + count=count+1; + } + } + + for (j=1;j<(n+1);j++) + { + if ((i+j)(db2+In[i-6]))||(In[i]>(db2+In[i+6])) + ||(In[i]>(db3+In[i+20]))||(In[i]>(db3+In[i-20]))) + /*&&(In[i]>db)*/&&(In[i]>In[i+3])&&(In[i]>In[i-3]) + &&(In[i]>In[i+2])&&(In[i]>In[i-2]) + &&(In[i]>In[i+1])&&(In[i]>In[i-1])) + { + Out1[i]=In[i]; + Out2[i]=1; + } + + } + + lastout=1; + for(i=0;iOut1[lastout]) + { + Out2[lastout]=0; + Out1[lastout]=0; + lastout=i; + } + else + { + Out2[i]=0; + Out1[i]=0; + } + + } + else + { + lastout=i; + } + } + + } + +} + + +void ConFrom1050To960(double *In, double *out, int InputLen) +{ + int i,j; + + for(i=0;i<960;i++) + { + for (j=0;j-1)&&((i+m)MaxVal) + { + MaxVal=InputArray[i+j*InputVLen]; + } + + } + OutArray[j]=MaxVal; + } + +} + +void MaxV2( double *InputArray, int InputHLen, int InputVLen, double *OutArray) +{ + int i; + int j; + double MaxVal; + for (i=0;iMaxVal) + { + MaxVal=InputArray[i+j*InputVLen]; + } + + } + OutArray[i]=MaxVal; + } + +} + + + +void MinArray( double *InputArray, int InputHLen, int InputVLen, double MinValue) +{ + int i; + int j; + + for (i=0;iMaxValue) + InputArray[i+j*InputVLen]=MaxValue; + + } + + } + +} + +double GetMaxValue( double *InputArray, int InputHLen, int InputVLen) +{ + int i; + int j; + + double MaxValue; + MaxValue=InputArray[0]; + for (i=0;iMaxValue) + MaxValue=InputArray[i*InputHLen+j]; + + } + + } + + return MaxValue; +} + +void RemoveNoise( double *InputArray, int InputHLen, int InputVLen) +{ + int i; + int j; + + for (i=0;iIn[i+2])&&(In[i]>In[i-2]) + &&(In[i]>In[i+1])&&(In[i]>In[i-1])) + { + Out1[i]=In[i]; + } + + } + + for(i=0;i0) + { + Edetect(Input,mseconds,960, a,b,OutOne); + } + + + free(Input); + +} + +void PitchEstimation(double *In, int InLen, double *OutArray,double *OutArray2) +{ + double *xx,*x,*y,*y1,*PeakPitch1, *PeakPitch2,*PeakInput1, *PeakInput2; + double *out,*outValue; + double *output,*output1; + double notefloat,hh0,hh1,hh28; + double outM12[12]; + int *outc; + int *yI; + double temp; + int i,j,sumI; + int Len; + int NN,NN2; + int count; + double Th; + + Len=1050; + xx=(double*)malloc(Len*sizeof(double)); + x=(double*)malloc(Len*sizeof(double)); + y=(double*)malloc(Len*sizeof(double)); + y1=(double*)malloc(Len*sizeof(double)); + PeakPitch1=(double*)malloc(Len*sizeof(double)); + PeakPitch2=(double*)malloc(Len*sizeof(double)); + PeakInput1=(double*)malloc(Len*sizeof(double)); + PeakInput2=(double*)malloc(Len*sizeof(double)); + out=(double*)malloc(Len*sizeof(double)); + outValue=(double*)malloc(Len*sizeof(double)); + output=(double*)malloc(112*sizeof(double)); + output1=(double*)malloc(112*sizeof(double)); + outc=(int*)malloc(112*sizeof(int)); +// yI=(double*)malloc(12*sizeof(double)); + + + for (i=0;i12) + { + FindMaxN(PeakPitch1,Len,12); + + for (i=0;i0)&&(SumF(PeakInput2,i+120-4,i+120+4)>0)) + ||((SumF(PeakInput2,i-4,i+4)>0)&&(SumF(PeakInput2,i+190-4,i+190+4)>0)) + ||((SumF(PeakInput2,i+190-4,i+190+4)>0)&&(SumF(PeakInput2,i+120-4,i+120+4)>0)) + ) + { + out[i]=1; + outValue[i]=y1[i]; + + } + } + } + + for (i=0;i<112;i++) + { + output[i]=0; + outc[i]=0; + } + + + + for (i=0;i0)&&(SumF(PeakInput2,outc[i]-5,outc[i]+5)==0)) + { + output1[i]=0; + } + } + + + for (i=0;i<112;i++) + { + OutArray[i]=0; + OutArray2[i]=0; + + } + + Th=30; + for(i=20;i<105;i++) + { + if(output1[i]==1) + { + OutArray[i]=outc[i]+200+2; + OutArray2[i]=y[outc[i]]; + + } + + } + + free(xx); // xx=(double*)malloc(Len*sizeof(double)); + free(x); // x=(double*)malloc(Len*sizeof(double)); + free(y); // y=(double*)malloc(Len*sizeof(double)); + free(y1); // y1=(double*)malloc(Len*sizeof(double)); + free(PeakPitch1); //=(double*)malloc(Len*sizeof(double)); + free(PeakPitch2); //=(double*)malloc(Len*sizeof(double)); + free(PeakInput1); //=(double*)malloc(Len*sizeof(double)); + free(PeakInput2); //=(double*)malloc(Len*sizeof(double)); + free(out); //=(double*)malloc(Len*sizeof(double)); + free(outValue); //=(double*)malloc(Len*sizeof(double)); + free(output); //=(double*)malloc(112*sizeof(double)); + free(output1); //=(double*)malloc(112*sizeof(double)); + free(outc); //=(double*)malloc(112*sizeof(int)); +//free(yI); //=(double*)malloc(12*sizeof(int)); +// printf(" end free \n"); +} + +void DoMultiPitch(double *In, int RLen,int CLen, double *Out1, double *Out2) +{ + + int i, j; + double *sum1,*mean1; + double MaxV; + double *OutArray1, *OutArray2,*tempArray; + + OutArray1=(double *)malloc(112*sizeof(double)); + OutArray2=(double *)malloc(112*sizeof(double)); + tempArray=(double *)malloc(RLen*sizeof(double)); + + sum1=(double*)malloc(CLen*sizeof(double)); + mean1=(double*)malloc(CLen*sizeof(double)); + + for (j=0;jMaxV) + { + MaxV=mean1[j]; + } + } + + for (j=0;jMaxV) + MaxV=tempArray[i]; + } + + if(mean1[j]>-55) + { + + PitchEstimation(tempArray,RLen,OutArray1,OutArray2); + + for(i=0;i<112;i++) + { + if(OutArray1[i]>0) + { + if((MaxV-tempArray[(int)OutArray1[i]-201-1])>40) + { + OutArray1[i]=0; + OutArray2[i]=0; + } + + } + } + + } + + for (i=0;i<112;i++) + { + + Out1[j*112+i]=OutArray1[i]; + Out2[j*112+i]=OutArray2[i]; + + } + + } + + free(OutArray1); + free(OutArray2); + free(tempArray); + free(sum1); + free(mean1); +} + + +int OnsetToArray(double *In, int Len, double *OutStart,double *OutEnd) +{ + int count,i; + + count=0; + + for (i=0;i0) + { + OutStart[count]=i+1; + if(count>0) + { + OutEnd[count-1]=i+1; + } + count=count+1; + } + } + if (count>0) + { + OutEnd[count-1]=Len; + } + return count; + +} +void dbfunction( double *InputArray, int InputHLen, int InputVLen,double *OutArray) +{ + int i; + int j; + double temp; + + for (i=0;i0) + { + OnsetN=OnsetN+1; + count=count+1; + } + } + Len2=count; + out2=(double *)malloc(112*Len2*sizeof(double)); + A6A=(double *)malloc(112*Len2*sizeof(double)); + OnsetToArray(In,Len,OutStart,OutEnd); + DoMultiPitch(dbs,1050,Len, PitchOut1, PitchOut2); + + + for (i=0;i1) + PitchOut3[i*112+j]=1; + } + + } + + + for (i=0;i0) + { + startb=(int)OutStart[i-1]; + } + + for (j=0;j<112;j++) + { + sum=0; + count=0; + for (k=(start-1);kmaxVal) + { + maxVal=PitchOut2[k*112+j]; + } + + } + + A3[j]=maxVal; + + } + + for (j=0;j<112;j++) + { + sum=0; + count=0; + for (k=(start-1);k0) + { + sum=sum+PitchOut2[k*112+j]; + count=count+1; + } + } + if(count>0) + A4[j]=sum/count; + else + A4[j]=0; + } + + + for (j=0;j<112;j++) + { + sum=0; + count=0; + for (k=(start-1);k0) + { + sum=sum+PitchOut1[k*112+j]; + count=count+1; + } + } + if(count>0) + A5[j]=sum/count; + else + A5[j]=0; + } + + maxV=A3[0]; + for (j=0;j<112;j++) + { + if(A3[j]>maxV) + maxV=A3[j]; + } + + for (j=0;j<112;j++) + { + + if(A1[j]>0) + { + D[j]=A1[j];D2[j]=A1[j]; + } + + else + { + D[j]=A1[j];D2[j]=A1[j]; + } + } + + for (j=0;j<112;j++) + { + if(A1[j]<8) + { + D[j]=0;D2[j]=0; + } + + } + + for(j=0;j<112;j++) + { + + if ((j>12)&&(D[j]>0)&&(D[j-12]>0)) + { + D[j]=0; D2[j]=0; + if((A3[j]>45)&&(A3[j]>(A3[j-12]+3))) + { + D[j]=1; + } + } + + + if ((j>19)&&(D[j]>0)&&(D[j-19]>0)) + { + + D[j]=0; D2[j]=0; + if((A3[j]>50)) + { + D[j]=1; + } + } + + if ((j>24)&&(D[j]>0)&&(D[j-24]>0)) + { + + D[j]=0; D2[j]=0; + if((A3[j]>50)) + { + D[j]=1; + } + } + + if ((j>28)&&(D[j]>0)&&(D[j-28]>0)) + { + + D[j]=0; D2[j]=0; + if((A3[j]>50)) + { + D[j]=1; + } + } + + if ((j>34)&&(abs(A5[j]-337.0-A5[j-34])<3.0)&&(D[j]>0)&&(D[j-34]>0)) + { + + D[j]=0; D2[j]=0; + if((A4[j]>25)&&(A3[j]>40)&&(A3[j]>(A3[j-34]-3))&&((A1[j]>8)||(A6[j]>0.8))) + { + D[j]=1; + } + } + + if((j>48)&&(j<59)&&(A3[j]<20)) + { + D[j]=0; + } + + if((j>58)&&(j<69)&&(A3[j]<28)) + { + D[j]=0; + } + + + if((j>68)&&(j<79)&&(A3[j]<40)) + { + D[j]=0; + } + + if((j>78)&&(A3[j]<50)) + { + D[j]=0; + } + + if((j>85)&&(A3[j]<55)) + { + D[j]=0; + } + + if((D2[j]>0)&&(A1[j]>15)) + { + D[j]=1; + } + if(i>1) + { + + for (k=(startb-1);k20)&&(temp>3)) + { + D[j]=0; + } + + } + + } + + for(j=0;j<112;j++) + { + out[j+i*112]=D[j]; + out2[j+i*112]=D[j]; + } + } + + for (i=1;i0)&&(out[j+i*112]>0)) + { + out[j+i*112]=0; + sum=0; + for(k=(start2-1);k0)&&(i<(OnsetN-1))&&(start2>5)) + { + + M1=dbs[index+(start2-1)*1050]; + for (k=(start2-1);k<(start2+10);k++) + { + if(dbs[index+k*1050]>M1) + M1=dbs[index+k*1050]; + + } + + M2=dbs[index+(start2-5-1)*1050]; + for (k=(start2-5-1);k10) + { + out[j+i*112]=1; + } + } + } + } + } + + count=0; + for (i=0;i0) + { + outArray3[count*3+0]=j+1-21;//exp((log(2.0))*(j+1-69)/12)*440; + outArray3[count*3+1]=start*0.01; + + if(i==(OnsetN-1)) + { + outArray3[count*3+2]=0.01*OutEnd[i]; + } + else + { + + for(k=(i+1);k0) + { + outArray3[count*3+2]=0.01*OutStart[k]; + break; + } + + if(A6A[k*112+j]<0.5) + { + outArray3[count*3+2]=0.01*OutStart[k]; + break; + + } + + } + + } + + count=count+1; + } + + } + + } + outArray3[count*3+0]=0; + outArray3[count*3+1]=0; + outArray3[count*3+2]=0; + + free(tempArray); + free(OutStart); + free(OutEnd); + free(A1); + free(A2); + free(A3); + free(A4); + free(A5); + free(A6); + free(A6A); + free(D); + free(D2); + free(out2); + free(PitchOut1); + free(PitchOut2); + free(PitchOut3); + free(In); + free(dbs); + free(dbs1); + free(ss); + free(jj); +} + diff --git a/libs/vamp-plugins/Transcription.h b/libs/vamp-plugins/Transcription.h new file mode 100644 index 0000000000..3796cb31b4 --- /dev/null +++ b/libs/vamp-plugins/Transcription.h @@ -0,0 +1,81 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +#ifndef _TRANSCRIPTION_PLUGIN_H_ +#define _TRSNSCRIPTION_PLUGIN_H_ + +#include + +class Transcription : public Vamp::Plugin +{ +public: + Transcription(float inputSampleRate); + virtual ~Transcription(); + + bool initialise(size_t channels, size_t stepSize, size_t blockSize); + void reset(); + + InputDomain getInputDomain() const { return TimeDomain; } + + std::string getIdentifier() const; + std::string getName() const; + std::string getDescription() const; + std::string getMaker() const; + int getPluginVersion() const; + std::string getCopyright() const; + size_t getPreferredStepSize() const; + size_t getPreferredBlockSize() const; + + OutputList getOutputDescriptors() const; + + FeatureSet process(const float *const *inputBuffers, + Vamp::RealTime timestamp); + + FeatureSet getRemainingFeatures(); + +protected: + size_t m_stepSize; + size_t m_blockSize; + double * m_SoundIn; + size_t m_SampleN; + size_t m_AllocN; + bool m_Excess; + Vamp::RealTime m_Base; +/* + void sofacomplexMex(double *y, double *z, int ncols,double StartNote,double NoteInterval1,double NoteNum,double C,double D); + void FindMaxN( double *InputArray, int InputLen,int MaxOrder); + double SumF(double *InputArray,int Start, int End); + int round10(int x) ; + void ConToPitch1250(double *In, int InLen); + void Norm1(double *In, int InLen); + void Smooth(double *In, int InLen,int smoothLen); + void FindPeaks(double *In, int InLen,double *Out1,double *Out2, int db, int db2, int db3); + void ConFrom1050To960(double *In, double *out, int InputLen); + void Move( double *InputArray, int InputLen,int m); + double SumArray( double *InputArray, int InputHLen, int InputVLen); + double Sum( double *InputArray, int InputHLen); + void MeanV2( double *InputArray, int InputHLen, int InputVLen, double *OutArray); + void SumV( double *InputArray, int InputHLen, int InputVLen, double *OutArray); + void SumV2( double *InputArray, int InputHLen, int InputVLen, double *OutArray); + void MaxV( double *InputArray, int InputHLen, int InputVLen, double *OutArray); + void MaxV2( double *InputArray, int InputHLen, int InputVLen, double *OutArray); + void MinArray( double *InputArray, int InputHLen, int InputVLen, double MinValue); + void MaxArray( double *InputArray, int InputHLen, int InputVLen, double MaxValue); + double GetMaxValue( double *InputArray, int InputHLen, int InputVLen); + void RemoveNoise( double *InputArray, int InputHLen, int InputVLen); + double MeanArray( double *InputArray, int InputHLen, int InputVLen); + void Mydiff( double *InputArray, int InputHLen, int InputVLen,int n); + void PeakDetect(double *In, int InLen); + void MeanV( double *InputArray, int InputHLen, int InputVLen, double *OutArray); +void Edetect(double *InputArray, int InputHLen, int InputVLen, double MinT, double db1,double *OutOne); +void OnsetDetection2(double *In,int InputLen,double *OutOne,double a,double b); +void PitchEstimation(double *In, int InLen, double *OutArray,double *OutArray2); +void DoMultiPitch(double *In, int RLen,int CLen, double *Out1, double *Out2); +int OnsetToArray(double *In, int Len, double *OutStart,double *OutEnd); +void dbfunction( double *InputArray, int InputHLen, int InputVLen,double *OutArray); + +void Transcribe(int Len,int inputLen,double *SoundIn,double *out,double *outArray2,double *outArray3);*/ + +}; + + +#endif diff --git a/libs/vamp-plugins/plugins.cpp b/libs/vamp-plugins/plugins.cpp index 732da908fa..3093c4c312 100644 --- a/libs/vamp-plugins/plugins.cpp +++ b/libs/vamp-plugins/plugins.cpp @@ -41,26 +41,42 @@ #include #include -#include "ZeroCrossing.h" -#include "SpectralCentroid.h" -#include "PercussionOnsetDetector.h" #include "AmplitudeFollower.h" -#include "OnsetDetect.h" +#include "BarBeatTrack.h" +#include "BeatTrack.h" +#include "ChromagramPlugin.h" #include "EBUr128.h" +#include "KeyDetect.h" +#include "OnsetDetect.h" +#include "PercussionOnsetDetector.h" +#include "SimilarityPlugin.h" +#include "SpectralCentroid.h" +#include "TonalChangeDetect.h" +#include "Transcription.h" #include "TruePeak.h" +#include "ZeroCrossing.h" + #ifdef HAVE_AUBIO #include "Onset.h" #endif -static Vamp::PluginAdapter zeroCrossingAdapter; -static Vamp::PluginAdapter spectralCentroidAdapter; -static Vamp::PluginAdapter percussionOnsetAdapter; -static Vamp::PluginAdapter amplitudeAdapter; -static Vamp::PluginAdapter onsetDetectorAdapter; -static Vamp::PluginAdapter VampEBUr128Adapter; -static Vamp::PluginAdapter VampTruePeakAdapter; +static Vamp::PluginAdapter AmplitudeFollowerAdapter; +static Vamp::PluginAdapter BarBeatTrackerAdapter; +static Vamp::PluginAdapter BeatTrackerAdapter; +static Vamp::PluginAdapter ChromagramPluginAdapter; +static Vamp::PluginAdapter EBUr128Adapter; +static Vamp::PluginAdapter KeyDetectorAdapter; +static Vamp::PluginAdapter OnsetDetectorAdapter; +static Vamp::PluginAdapter PercussionOnsetDetectorAdapter; +static Vamp::PluginAdapter SimilarityPluginAdapter; +static Vamp::PluginAdapter SpectralCentroidAdapter; +static Vamp::PluginAdapter TonalChangeDetectAdapter; +static Vamp::PluginAdapter TranscriptionAdapter; +static Vamp::PluginAdapter TruePeakAdapter; +static Vamp::PluginAdapter ZeroCrossingAdapter; + #ifdef HAVE_AUBIO -static Vamp::PluginAdapter onsetAdapter; +static Vamp::PluginAdapter OnsetAdapter; #endif const VampPluginDescriptor *vampGetPluginDescriptor(unsigned int version, @@ -69,15 +85,22 @@ const VampPluginDescriptor *vampGetPluginDescriptor(unsigned int version, if (version < 1) return 0; switch (index) { - case 0: return zeroCrossingAdapter.getDescriptor(); - case 1: return spectralCentroidAdapter.getDescriptor(); - case 2: return percussionOnsetAdapter.getDescriptor(); - case 3: return amplitudeAdapter.getDescriptor(); - case 4: return onsetDetectorAdapter.getDescriptor(); - case 5: return VampEBUr128Adapter.getDescriptor(); - case 6: return VampTruePeakAdapter.getDescriptor(); + case 0: return AmplitudeFollowerAdapter.getDescriptor(); + case 1: return BarBeatTrackerAdapter.getDescriptor(); + case 2: return BeatTrackerAdapter.getDescriptor(); + case 3: return ChromagramPluginAdapter.getDescriptor(); + case 4: return EBUr128Adapter.getDescriptor(); + case 5: return KeyDetectorAdapter.getDescriptor(); + case 6: return OnsetDetectorAdapter.getDescriptor(); + case 7: return PercussionOnsetDetectorAdapter.getDescriptor(); + case 8: return SimilarityPluginAdapter.getDescriptor(); + case 9: return SpectralCentroidAdapter.getDescriptor(); + case 10: return TonalChangeDetectAdapter.getDescriptor(); + case 11: return TranscriptionAdapter.getDescriptor(); + case 12: return TruePeakAdapter.getDescriptor(); + case 13: return ZeroCrossingAdapter.getDescriptor(); #ifdef HAVE_AUBIO - case 7: return onsetAdapter.getDescriptor(); + case 14: return OnsetAdapter.getDescriptor(); #endif default: return 0; } diff --git a/libs/vamp-plugins/wscript b/libs/vamp-plugins/wscript index e2dd0f2e68..f3e0024ee9 100644 --- a/libs/vamp-plugins/wscript +++ b/libs/vamp-plugins/wscript @@ -38,11 +38,18 @@ def build(bld): obj.source = ''' plugins.cpp AmplitudeFollower.cpp + BarBeatTrack.cpp + BeatTrack.cpp + ChromagramPlugin.cpp EBUr128.cpp ebu_r128_proc.cc + KeyDetect.cpp OnsetDetect.cpp PercussionOnsetDetector.cpp + SimilarityPlugin.cpp SpectralCentroid.cpp + TonalChangeDetect.cpp + Transcription.cpp TruePeak.cpp ZeroCrossing.cpp ''' -- cgit v1.2.3