From 63994f3b820c8f0754ff59d0d09585405d87ae0e Mon Sep 17 00:00:00 2001 From: Robin Gareus Date: Mon, 2 Sep 2019 03:12:22 +0200 Subject: Include vamp-pyin In preparation for captainMorgan's pitch analysis script. --- libs/vamp-pyin/LocalCandidatePYIN.cpp | 499 ++++++++++++++++++++++++++++ libs/vamp-pyin/LocalCandidatePYIN.h | 75 +++++ libs/vamp-pyin/MeanFilter.h | 63 ++++ libs/vamp-pyin/MonoNote.cpp | 61 ++++ libs/vamp-pyin/MonoNote.h | 46 +++ libs/vamp-pyin/MonoNoteHMM.cpp | 208 ++++++++++++ libs/vamp-pyin/MonoNoteHMM.h | 39 +++ libs/vamp-pyin/MonoNoteParameters.cpp | 44 +++ libs/vamp-pyin/MonoNoteParameters.h | 61 ++++ libs/vamp-pyin/MonoPitch.cpp | 81 +++++ libs/vamp-pyin/MonoPitch.h | 37 +++ libs/vamp-pyin/MonoPitchHMM.cpp | 153 +++++++++ libs/vamp-pyin/MonoPitchHMM.h | 43 +++ libs/vamp-pyin/PYinVamp.cpp | 608 ++++++++++++++++++++++++++++++++++ libs/vamp-pyin/PYinVamp.h | 84 +++++ libs/vamp-pyin/SparseHMM.cpp | 145 ++++++++ libs/vamp-pyin/SparseHMM.h | 35 ++ libs/vamp-pyin/Yin.cpp | 153 +++++++++ libs/vamp-pyin/Yin.h | 71 ++++ libs/vamp-pyin/YinUtil.cpp | 363 ++++++++++++++++++++ libs/vamp-pyin/YinUtil.h | 42 +++ libs/vamp-pyin/YinVamp.cpp | 367 ++++++++++++++++++++ libs/vamp-pyin/YinVamp.h | 75 +++++ libs/vamp-pyin/libmain.cpp | 38 +++ libs/vamp-pyin/wscript | 53 +++ 25 files changed, 3444 insertions(+) create mode 100644 libs/vamp-pyin/LocalCandidatePYIN.cpp create mode 100644 libs/vamp-pyin/LocalCandidatePYIN.h create mode 100644 libs/vamp-pyin/MeanFilter.h create mode 100644 libs/vamp-pyin/MonoNote.cpp create mode 100644 libs/vamp-pyin/MonoNote.h create mode 100644 libs/vamp-pyin/MonoNoteHMM.cpp create mode 100644 libs/vamp-pyin/MonoNoteHMM.h create mode 100644 libs/vamp-pyin/MonoNoteParameters.cpp create mode 100644 libs/vamp-pyin/MonoNoteParameters.h create mode 100644 libs/vamp-pyin/MonoPitch.cpp create mode 100644 libs/vamp-pyin/MonoPitch.h create mode 100644 libs/vamp-pyin/MonoPitchHMM.cpp create mode 100644 libs/vamp-pyin/MonoPitchHMM.h create mode 100644 libs/vamp-pyin/PYinVamp.cpp create mode 100644 libs/vamp-pyin/PYinVamp.h create mode 100644 libs/vamp-pyin/SparseHMM.cpp create mode 100644 libs/vamp-pyin/SparseHMM.h create mode 100644 libs/vamp-pyin/Yin.cpp create mode 100644 libs/vamp-pyin/Yin.h create mode 100644 libs/vamp-pyin/YinUtil.cpp create mode 100644 libs/vamp-pyin/YinUtil.h create mode 100644 libs/vamp-pyin/YinVamp.cpp create mode 100644 libs/vamp-pyin/YinVamp.h create mode 100644 libs/vamp-pyin/libmain.cpp create mode 100644 libs/vamp-pyin/wscript (limited to 'libs/vamp-pyin') diff --git a/libs/vamp-pyin/LocalCandidatePYIN.cpp b/libs/vamp-pyin/LocalCandidatePYIN.cpp new file mode 100644 index 0000000000..3d33a969fa --- /dev/null +++ b/libs/vamp-pyin/LocalCandidatePYIN.cpp @@ -0,0 +1,499 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + pYIN - A fundamental frequency estimator for monophonic audio + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#include "LocalCandidatePYIN.h" +#include "MonoPitch.h" +#include "YinUtil.h" + +#include "vamp-sdk/FFT.h" + +#include +#include + +#include +#include +// #include +#include +#include +#include + +#include + +using std::string; +using std::vector; +using std::map; +using Vamp::RealTime; + + +LocalCandidatePYIN::LocalCandidatePYIN(float inputSampleRate) : + Plugin(inputSampleRate), + m_channels(0), + m_stepSize(256), + m_blockSize(2048), + m_fmin(40), + m_fmax(700), + m_oPitchTrackCandidates(0), + m_threshDistr(2.0f), + m_outputUnvoiced(0.0f), + m_preciseTime(0.0f), + m_pitchProb(0), + m_timestamp(0), + m_nCandidate(13) +{ +} + +LocalCandidatePYIN::~LocalCandidatePYIN() +{ +} + +string +LocalCandidatePYIN::getIdentifier() const +{ + return "localcandidatepyin"; +} + +string +LocalCandidatePYIN::getName() const +{ + return "Local Candidate PYIN"; +} + +string +LocalCandidatePYIN::getDescription() const +{ + return "Monophonic pitch and note tracking based on a probabilistic Yin extension."; +} + +string +LocalCandidatePYIN::getMaker() const +{ + return "Matthias Mauch"; +} + +int +LocalCandidatePYIN::getPluginVersion() const +{ + // Increment this each time you release a version that behaves + // differently from the previous one + return 2; +} + +string +LocalCandidatePYIN::getCopyright() const +{ + return "GPL"; +} + +LocalCandidatePYIN::InputDomain +LocalCandidatePYIN::getInputDomain() const +{ + return TimeDomain; +} + +size_t +LocalCandidatePYIN::getPreferredBlockSize() const +{ + return 2048; +} + +size_t +LocalCandidatePYIN::getPreferredStepSize() const +{ + return 256; +} + +size_t +LocalCandidatePYIN::getMinChannelCount() const +{ + return 1; +} + +size_t +LocalCandidatePYIN::getMaxChannelCount() const +{ + return 1; +} + +LocalCandidatePYIN::ParameterList +LocalCandidatePYIN::getParameterDescriptors() const +{ + ParameterList list; + + ParameterDescriptor d; + + d.identifier = "threshdistr"; + d.name = "Yin threshold distribution"; + d.description = "."; + d.unit = ""; + d.minValue = 0.0f; + d.maxValue = 7.0f; + d.defaultValue = 2.0f; + d.isQuantized = true; + d.quantizeStep = 1.0f; + d.valueNames.push_back("Uniform"); + d.valueNames.push_back("Beta (mean 0.10)"); + d.valueNames.push_back("Beta (mean 0.15)"); + d.valueNames.push_back("Beta (mean 0.20)"); + d.valueNames.push_back("Beta (mean 0.30)"); + d.valueNames.push_back("Single Value 0.10"); + d.valueNames.push_back("Single Value 0.15"); + d.valueNames.push_back("Single Value 0.20"); + list.push_back(d); + + d.identifier = "outputunvoiced"; + d.valueNames.clear(); + d.name = "Output estimates classified as unvoiced?"; + d.description = "."; + d.unit = ""; + d.minValue = 0.0f; + d.maxValue = 2.0f; + d.defaultValue = 0.0f; + d.isQuantized = true; + d.quantizeStep = 1.0f; + d.valueNames.push_back("No"); + d.valueNames.push_back("Yes"); + d.valueNames.push_back("Yes, as negative frequencies"); + list.push_back(d); + + d.identifier = "precisetime"; + d.valueNames.clear(); + d.name = "Use non-standard precise YIN timing (slow)."; + d.description = "."; + d.unit = ""; + d.minValue = 0.0f; + d.maxValue = 1.0f; + d.defaultValue = 0.0f; + d.isQuantized = true; + d.quantizeStep = 1.0f; + list.push_back(d); + + return list; +} + +float +LocalCandidatePYIN::getParameter(string identifier) const +{ + if (identifier == "threshdistr") { + return m_threshDistr; + } + if (identifier == "outputunvoiced") { + return m_outputUnvoiced; + } + if (identifier == "precisetime") { + return m_preciseTime; + } + return 0.f; +} + +void +LocalCandidatePYIN::setParameter(string identifier, float value) +{ + if (identifier == "threshdistr") + { + m_threshDistr = value; + } + if (identifier == "outputunvoiced") + { + m_outputUnvoiced = value; + } + if (identifier == "precisetime") + { + m_preciseTime = value; + } +} + +LocalCandidatePYIN::ProgramList +LocalCandidatePYIN::getPrograms() const +{ + ProgramList list; + return list; +} + +string +LocalCandidatePYIN::getCurrentProgram() const +{ + return ""; // no programs +} + +void +LocalCandidatePYIN::selectProgram(string name) +{ +} + +LocalCandidatePYIN::OutputList +LocalCandidatePYIN::getOutputDescriptors() const +{ + OutputList outputs; + + OutputDescriptor d; + + d.identifier = "pitchtrackcandidates"; + d.name = "Pitch track candidates"; + d.description = "Multiple candidate pitch tracks."; + d.unit = "Hz"; + d.hasFixedBinCount = false; + d.hasKnownExtents = true; + d.minValue = m_fmin; + d.maxValue = 500; //!!!??? + d.isQuantized = false; + d.sampleType = OutputDescriptor::FixedSampleRate; + d.sampleRate = (m_inputSampleRate / m_stepSize); + d.hasDuration = false; + outputs.push_back(d); + + return outputs; +} + +bool +LocalCandidatePYIN::initialise(size_t channels, size_t stepSize, size_t blockSize) +{ + if (channels < getMinChannelCount() || + channels > getMaxChannelCount()) return false; + +/* + std::cerr << "LocalCandidatePYIN::initialise: channels = " << channels + << ", stepSize = " << stepSize << ", blockSize = " << blockSize + << std::endl; +*/ + m_channels = channels; + m_stepSize = stepSize; + m_blockSize = blockSize; + + reset(); + + return true; +} + +void +LocalCandidatePYIN::reset() +{ + m_pitchProb.clear(); + m_timestamp.clear(); +/* + std::cerr << "LocalCandidatePYIN::reset" + << ", blockSize = " << m_blockSize + << std::endl; +*/ +} + +LocalCandidatePYIN::FeatureSet +LocalCandidatePYIN::process(const float *const *inputBuffers, RealTime timestamp) +{ + int offset = m_preciseTime == 1.0 ? m_blockSize/2 : m_blockSize/4; + timestamp = timestamp + Vamp::RealTime::frame2RealTime(offset, lrintf(m_inputSampleRate)); + + double *dInputBuffers = new double[m_blockSize]; + for (size_t i = 0; i < m_blockSize; ++i) dInputBuffers[i] = inputBuffers[0][i]; + + size_t yinBufferSize = m_blockSize/2; + double* yinBuffer = new double[yinBufferSize]; + if (!m_preciseTime) YinUtil::fastDifference(dInputBuffers, yinBuffer, yinBufferSize); + else YinUtil::slowDifference(dInputBuffers, yinBuffer, yinBufferSize); + + delete [] dInputBuffers; + + YinUtil::cumulativeDifference(yinBuffer, yinBufferSize); + + float minFrequency = 60; + float maxFrequency = 900; + vector peakProbability = YinUtil::yinProb(yinBuffer, + m_threshDistr, + yinBufferSize, + m_inputSampleRate/maxFrequency, + m_inputSampleRate/minFrequency); + + vector > tempPitchProb; + for (size_t iBuf = 0; iBuf < yinBufferSize; ++iBuf) + { + if (peakProbability[iBuf] > 0) + { + double currentF0 = + m_inputSampleRate * (1.0 / + YinUtil::parabolicInterpolation(yinBuffer, iBuf, yinBufferSize)); + double tempPitch = 12 * std::log(currentF0/440)/std::log(2.) + 69; + tempPitchProb.push_back(pair(tempPitch, peakProbability[iBuf])); + } + } + m_pitchProb.push_back(tempPitchProb); + m_timestamp.push_back(timestamp); + + delete[] yinBuffer; + + return FeatureSet(); +} + +LocalCandidatePYIN::FeatureSet +LocalCandidatePYIN::getRemainingFeatures() +{ + // timestamp -> candidate number -> value + map > featureValues; + + // std::cerr << "in remaining features" << std::endl; + + if (m_pitchProb.empty()) { + return FeatureSet(); + } + + // MONO-PITCH STUFF + MonoPitch mp; + size_t nFrame = m_timestamp.size(); + vector > pitchTracks; + vector freqSum = vector(m_nCandidate); + vector freqNumber = vector(m_nCandidate); + vector freqMean = vector(m_nCandidate); + + boost::math::normal normalDist(0, 8); // semitones sd + float maxNormalDist = boost::math::pdf(normalDist, 0); + + // Viterbi-decode multiple times with different frequencies emphasised + for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate) + { + pitchTracks.push_back(vector(nFrame)); + vector > > tempPitchProb; + float centrePitch = 45 + 3 * iCandidate; + + for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) { + tempPitchProb.push_back(vector >()); + float sumProb = 0; + float pitch = 0; + float prob = 0; + for (size_t iProb = 0; iProb < m_pitchProb[iFrame].size(); ++iProb) + { + pitch = m_pitchProb[iFrame][iProb].first; + prob = m_pitchProb[iFrame][iProb].second * + boost::math::pdf(normalDist, pitch-centrePitch) / + maxNormalDist * 2; + sumProb += prob; + tempPitchProb[iFrame].push_back( + pair(pitch,prob)); + } + for (size_t iProb = 0; iProb < m_pitchProb[iFrame].size(); ++iProb) + { + tempPitchProb[iFrame][iProb].second /= sumProb; + } + } + + vector mpOut = mp.process(tempPitchProb); + float prevFreq = 0; + for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) + { + if (mpOut[iFrame] > 0) { + + pitchTracks[iCandidate][iFrame] = mpOut[iFrame]; + freqSum[iCandidate] += mpOut[iFrame]; + freqNumber[iCandidate]++; + prevFreq = mpOut[iFrame]; + + } + } + freqMean[iCandidate] = freqSum[iCandidate]*1.0/freqNumber[iCandidate]; + } + + // find near duplicate pitch tracks + vector duplicates; + for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate) { + for (size_t jCandidate = iCandidate+1; jCandidate < m_nCandidate; ++jCandidate) { + size_t countEqual = 0; + for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) + { + if ((pitchTracks[jCandidate][iFrame] == 0 && pitchTracks[iCandidate][iFrame] == 0) || + fabs(pitchTracks[iCandidate][iFrame]/pitchTracks[jCandidate][iFrame]-1)<0.01) + countEqual++; + } + // std::cerr << "proportion equal: " << (countEqual * 1.0 / nFrame) << std::endl; + if (countEqual * 1.0 / nFrame > 0.8) { + if (freqNumber[iCandidate] > freqNumber[jCandidate]) { + duplicates.push_back(jCandidate); + } else if (iCandidate < jCandidate) { + duplicates.push_back(iCandidate); + } + } + } + } + + // now find non-duplicate pitch tracks + map candidateActuals; + map candidateLabels; + + vector > outputFrequencies; + for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) outputFrequencies.push_back(vector()); + + int actualCandidateNumber = 0; + for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate) + { + bool isDuplicate = false; + for (size_t i = 0; i < duplicates.size(); ++i) { + + if (duplicates[i] == iCandidate) { + isDuplicate = true; + break; + } + } + if (!isDuplicate && freqNumber[iCandidate] > 0.5*nFrame) + { + std::ostringstream convert; + convert << actualCandidateNumber++; + candidateLabels[iCandidate] = convert.str(); + candidateActuals[iCandidate] = actualCandidateNumber; + // std::cerr << iCandidate << " " << actualCandidateNumber << " " << freqNumber[iCandidate] << " " << freqMean[iCandidate] << std::endl; + for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) + { + if (pitchTracks[iCandidate][iFrame] > 0) + { + // featureValues[m_timestamp[iFrame]][iCandidate] = + // pitchTracks[iCandidate][iFrame]; + outputFrequencies[iFrame].push_back(pitchTracks[iCandidate][iFrame]); + } else { + outputFrequencies[iFrame].push_back(0); + } + } + } + // fs[m_oPitchTrackCandidates].push_back(f); + } + + // adapt our features so as to return a stack of candidate values + // per frame + + FeatureSet fs; + + for (size_t iFrame = 0; iFrame < nFrame; ++iFrame){ + Feature f; + f.hasTimestamp = true; + f.timestamp = m_timestamp[iFrame]; + f.values = outputFrequencies[iFrame]; + fs[0].push_back(f); + } + + // I stopped using Chris's map stuff below because I couldn't get my head around it + // + // for (map >::const_iterator i = + // featureValues.begin(); i != featureValues.end(); ++i) { + // Feature f; + // f.hasTimestamp = true; + // f.timestamp = i->first; + // int nextCandidate = candidateActuals.begin()->second; + // for (map::const_iterator j = + // i->second.begin(); j != i->second.end(); ++j) { + // while (candidateActuals[j->first] > nextCandidate) { + // f.values.push_back(0); + // ++nextCandidate; + // } + // f.values.push_back(j->second); + // nextCandidate = j->first + 1; + // } + // //!!! can't use labels? + // fs[0].push_back(f); + // } + + return fs; +} diff --git a/libs/vamp-pyin/LocalCandidatePYIN.h b/libs/vamp-pyin/LocalCandidatePYIN.h new file mode 100644 index 0000000000..3648a3acdd --- /dev/null +++ b/libs/vamp-pyin/LocalCandidatePYIN.h @@ -0,0 +1,75 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + pYIN - A fundamental frequency estimator for monophonic audio + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COLocalCandidatePYING included with this distribution for more information. +*/ + +#ifndef _LOCALCANDIDATEPYIN_H_ +#define _LOCALCANDIDATEPYIN_H_ + +#include + +#include "Yin.h" + +class LocalCandidatePYIN : public Vamp::Plugin +{ +public: + LocalCandidatePYIN(float inputSampleRate); + virtual ~LocalCandidatePYIN(); + + std::string getIdentifier() const; + std::string getName() const; + std::string getDescription() const; + std::string getMaker() const; + int getPluginVersion() const; + std::string getCopyright() const; + + InputDomain getInputDomain() const; + size_t getPreferredBlockSize() const; + size_t getPreferredStepSize() const; + size_t getMinChannelCount() const; + size_t getMaxChannelCount() const; + + ParameterList getParameterDescriptors() const; + float getParameter(std::string identifier) const; + void setParameter(std::string identifier, float value); + + ProgramList getPrograms() const; + std::string getCurrentProgram() const; + void selectProgram(std::string name); + + OutputList getOutputDescriptors() const; + + bool initialise(size_t channels, size_t stepSize, size_t blockSize); + void reset(); + + FeatureSet process(const float *const *inputBuffers, + Vamp::RealTime timestamp); + + FeatureSet getRemainingFeatures(); + +protected: + size_t m_channels; + size_t m_stepSize; + size_t m_blockSize; + float m_fmin; + float m_fmax; + + mutable int m_oPitchTrackCandidates; + + float m_threshDistr; + float m_outputUnvoiced; + float m_preciseTime; + vector > > m_pitchProb; + vector m_timestamp; + size_t m_nCandidate; +}; + +#endif diff --git a/libs/vamp-pyin/MeanFilter.h b/libs/vamp-pyin/MeanFilter.h new file mode 100644 index 0000000000..dc6f8a03b0 --- /dev/null +++ b/libs/vamp-pyin/MeanFilter.h @@ -0,0 +1,63 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + pYIN - A fundamental frequency estimator for monophonic audio + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#ifndef _MEAN_FILTER_H_ +#define _MEAN_FILTER_H_ + +class MeanFilter +{ +public: + /** + * Construct a non-causal mean filter with filter length flen, + * that replaces each sample N with the mean of samples + * [N-floor(F/2) .. N+floor(F/2)] where F is the filter length. + * Only odd F are supported. + */ + MeanFilter(int flen) : m_flen(flen) { } + ~MeanFilter() { } + + /** + * Filter the n samples in "in" and place the results in "out" + */ + void filter(const double *in, double *out, const int n) { + filterSubsequence(in, out, n, n, 0); + } + + /** + * Filter the n samples starting at the given offset in the + * m-element array "in" and place the results in the n-element + * array "out" + */ + void filterSubsequence(const double *in, double *out, + const int m, const int n, + const int offset) { + int half = m_flen/2; + for (int i = 0; i < n; ++i) { + double v = 0; + int n = 0; + for (int j = -half; j <= half; ++j) { + int ix = i + j + offset; + if (ix >= 0 && ix < m) { + v += in[ix]; + ++n; + } + } + out[i] = v / n; + } + } + +private: + int m_flen; +}; + +#endif diff --git a/libs/vamp-pyin/MonoNote.cpp b/libs/vamp-pyin/MonoNote.cpp new file mode 100644 index 0000000000..82d94d2054 --- /dev/null +++ b/libs/vamp-pyin/MonoNote.cpp @@ -0,0 +1,61 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + pYIN - A fundamental frequency estimator for monophonic audio + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#include "MonoNote.h" +#include + +#include +#include +#include + +using std::vector; +using std::pair; + +MonoNote::MonoNote() : + hmm() +{ +} + +MonoNote::~MonoNote() +{ +} + +const vector +MonoNote::process(const vector > > pitchProb) +{ + vector > obsProb; + for (size_t iFrame = 0; iFrame < pitchProb.size(); ++iFrame) + { + obsProb.push_back(hmm.calculateObsProb(pitchProb[iFrame])); + } + + vector *scale = new vector(pitchProb.size()); + + vector out; + + vector path = hmm.decodeViterbi(obsProb, scale); + + for (size_t iFrame = 0; iFrame < path.size(); ++iFrame) + { + double currPitch = -1; + int stateKind = 0; + + currPitch = hmm.par.minPitch + (path[iFrame]/hmm.par.nSPP) * 1.0/hmm.par.nPPS; + stateKind = (path[iFrame]) % hmm.par.nSPP + 1; + + out.push_back(FrameOutput(iFrame, currPitch, stateKind)); + // std::cerr << path[iFrame] << " -- "<< pitchProb[iFrame][0].first << " -- "<< currPitch << " -- " << stateKind << std::endl; + } + delete scale; + return(out); +} diff --git a/libs/vamp-pyin/MonoNote.h b/libs/vamp-pyin/MonoNote.h new file mode 100644 index 0000000000..554d515421 --- /dev/null +++ b/libs/vamp-pyin/MonoNote.h @@ -0,0 +1,46 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + pYIN - A fundamental frequency estimator for monophonic audio + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#ifndef _MONONOTE_H_ +#define _MONONOTE_H_ + +#include "MonoNoteHMM.h" +#include "MonoNoteParameters.h" + +#include +#include +#include + +using std::vector; +using std::pair; + +class MonoNote { +public: + MonoNote(); + virtual ~MonoNote(); + + struct FrameOutput { + size_t frameNumber; + double pitch; + size_t noteState; // unvoiced, attack, stable, release, inter + FrameOutput() : frameNumber(0), pitch(-1.0), noteState(0) { } + FrameOutput(size_t _frameNumber, double _pitch, size_t _noteState) : + frameNumber(_frameNumber), pitch(_pitch), noteState(_noteState) { } + }; + // pitchProb is a frame-wise vector carrying a vector of pitch-probability pairs + const vector process(const vector > > pitchProb); +private: + MonoNoteHMM hmm; +}; + +#endif diff --git a/libs/vamp-pyin/MonoNoteHMM.cpp b/libs/vamp-pyin/MonoNoteHMM.cpp new file mode 100644 index 0000000000..9abd92af11 --- /dev/null +++ b/libs/vamp-pyin/MonoNoteHMM.cpp @@ -0,0 +1,208 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + pYIN - A fundamental frequency estimator for monophonic audio + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#include "MonoNoteHMM.h" + +#include + +#include +#include + +using std::vector; +using std::pair; + +MonoNoteHMM::MonoNoteHMM() : + par() +{ + build(); +} + +const vector +MonoNoteHMM::calculateObsProb(const vector > pitchProb) +{ + // pitchProb is a list of pairs (pitches and their probabilities) + + size_t nCandidate = pitchProb.size(); + + // what is the probability of pitched + double pIsPitched = 0; + for (size_t iCandidate = 0; iCandidate < nCandidate; ++iCandidate) + { + // pIsPitched = pitchProb[iCandidate].second > pIsPitched ? pitchProb[iCandidate].second : pIsPitched; + pIsPitched += pitchProb[iCandidate].second; + } + + // pIsPitched = std::pow(pIsPitched, (1-par.priorWeight)) * std::pow(par.priorPitchedProb, par.priorWeight); + pIsPitched = pIsPitched * (1-par.priorWeight) + par.priorPitchedProb * par.priorWeight; + + vector out = vector(par.n); + double tempProbSum = 0; + for (size_t i = 0; i < par.n; ++i) + { + if (i % par.nSPP != 2) + { + // std::cerr << getMidiPitch(i) << std::endl; + double tempProb = 0; + if (nCandidate > 0) + { + double minDist = 10000.0; + double minDistProb = 0; + size_t minDistCandidate = 0; + for (size_t iCandidate = 0; iCandidate < nCandidate; ++iCandidate) + { + double currDist = std::abs(getMidiPitch(i)-pitchProb[iCandidate].first); + if (currDist < minDist) + { + minDist = currDist; + minDistProb = pitchProb[iCandidate].second; + minDistCandidate = iCandidate; + } + } + tempProb = std::pow(minDistProb, par.yinTrust) * + boost::math::pdf(pitchDistr[i], + pitchProb[minDistCandidate].first); + } else { + tempProb = 1; + } + tempProbSum += tempProb; + out[i] = tempProb; + } + } + + for (size_t i = 0; i < par.n; ++i) + { + if (i % par.nSPP != 2) + { + if (tempProbSum > 0) + { + out[i] = out[i] / tempProbSum * pIsPitched; + } + } else { + out[i] = (1-pIsPitched) / (par.nPPS * par.nS); + } + } + + return(out); +} + +void +MonoNoteHMM::build() +{ + // the states are organised as follows: + // 0-2. lowest pitch + // 0. attack state + // 1. stable state + // 2. silent state + // 3-5. second-lowest pitch + // 3. attack state + // ... + + // observation distributions + for (size_t iState = 0; iState < par.n; ++iState) + { + pitchDistr.push_back(boost::math::normal(0,1)); + if (iState % par.nSPP == 2) + { + // silent state starts tracking + init.push_back(1.0/(par.nS * par.nPPS)); + } else { + init.push_back(0.0); + } + } + + for (size_t iPitch = 0; iPitch < (par.nS * par.nPPS); ++iPitch) + { + size_t index = iPitch * par.nSPP; + double mu = par.minPitch + iPitch * 1.0/par.nPPS; + pitchDistr[index] = boost::math::normal(mu, par.sigmaYinPitchAttack); + pitchDistr[index+1] = boost::math::normal(mu, par.sigmaYinPitchStable); + pitchDistr[index+2] = boost::math::normal(mu, 1.0); // dummy + } + + boost::math::normal noteDistanceDistr(0, par.sigma2Note); + + for (size_t iPitch = 0; iPitch < (par.nS * par.nPPS); ++iPitch) + { + // loop through all notes and set sparse transition probabilities + size_t index = iPitch * par.nSPP; + + // transitions from attack state + from.push_back(index); + to.push_back(index); + transProb.push_back(par.pAttackSelftrans); + + from.push_back(index); + to.push_back(index+1); + transProb.push_back(1-par.pAttackSelftrans); + + // transitions from stable state + from.push_back(index+1); + to.push_back(index+1); // to itself + transProb.push_back(par.pStableSelftrans); + + from.push_back(index+1); + to.push_back(index+2); // to silent + transProb.push_back(par.pStable2Silent); + + // the "easy" transitions from silent state + from.push_back(index+2); + to.push_back(index+2); + transProb.push_back(par.pSilentSelftrans); + + + // the more complicated transitions from the silent + double probSumSilent = 0; + + vector tempTransProbSilent; + for (size_t jPitch = 0; jPitch < (par.nS * par.nPPS); ++jPitch) + { + int fromPitch = iPitch; + int toPitch = jPitch; + double semitoneDistance = + std::abs(fromPitch - toPitch) * 1.0 / par.nPPS; + + // if (std::fmod(semitoneDistance, 1) == 0 && semitoneDistance > par.minSemitoneDistance) + if (semitoneDistance == 0 || + (semitoneDistance > par.minSemitoneDistance + && semitoneDistance < par.maxJump)) + { + size_t toIndex = jPitch * par.nSPP; // note attack index + + double tempWeightSilent = boost::math::pdf(noteDistanceDistr, + semitoneDistance); + probSumSilent += tempWeightSilent; + + tempTransProbSilent.push_back(tempWeightSilent); + + from.push_back(index+2); + to.push_back(toIndex); + } + } + for (size_t i = 0; i < tempTransProbSilent.size(); ++i) + { + transProb.push_back((1-par.pSilentSelftrans) * tempTransProbSilent[i]/probSumSilent); + } + } +} + +double +MonoNoteHMM::getMidiPitch(size_t index) +{ + return pitchDistr[index].mean(); +} + +double +MonoNoteHMM::getFrequency(size_t index) +{ + return 440 * pow(2, (pitchDistr[index].mean()-69)/12); +} diff --git a/libs/vamp-pyin/MonoNoteHMM.h b/libs/vamp-pyin/MonoNoteHMM.h new file mode 100644 index 0000000000..dff4697e7a --- /dev/null +++ b/libs/vamp-pyin/MonoNoteHMM.h @@ -0,0 +1,39 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + pYIN - A fundamental frequency estimator for monophonic audio + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#ifndef _MONONOTEHMM_H_ +#define _MONONOTEHMM_H_ + +#include "MonoNoteParameters.h" +#include "SparseHMM.h" + +#include + +#include +#include + +using std::vector; + +class MonoNoteHMM : public SparseHMM +{ +public: + MonoNoteHMM(); + const std::vector calculateObsProb(const vector >); + double getMidiPitch(size_t index); + double getFrequency(size_t index); + void build(); + MonoNoteParameters par; + vector pitchDistr; +}; + +#endif diff --git a/libs/vamp-pyin/MonoNoteParameters.cpp b/libs/vamp-pyin/MonoNoteParameters.cpp new file mode 100644 index 0000000000..8d9c9508fa --- /dev/null +++ b/libs/vamp-pyin/MonoNoteParameters.cpp @@ -0,0 +1,44 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + pYIN - A fundamental frequency estimator for monophonic audio + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#include "MonoNoteParameters.h" + +MonoNoteParameters::MonoNoteParameters() : + minPitch(35), + nPPS(3), + nS(69), + nSPP(3), // states per pitch + n(0), + initPi(0), + pAttackSelftrans(0.9), + pStableSelftrans(0.99), + pStable2Silent(0.01), + pSilentSelftrans(0.9999), + sigma2Note(0.7), + maxJump(13), + pInterSelftrans(0.0), + priorPitchedProb(.7), + priorWeight(0.5), + minSemitoneDistance(.5), + sigmaYinPitchAttack(5), + sigmaYinPitchStable(0.8), + sigmaYinPitchInter(.1), + yinTrust(0.1) +{ + // just in case someone put in a silly value for pRelease2Unvoiced + n = nPPS * nS * nSPP; +} + +MonoNoteParameters::~MonoNoteParameters() +{ +} diff --git a/libs/vamp-pyin/MonoNoteParameters.h b/libs/vamp-pyin/MonoNoteParameters.h new file mode 100644 index 0000000000..21db7f6102 --- /dev/null +++ b/libs/vamp-pyin/MonoNoteParameters.h @@ -0,0 +1,61 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + pYIN - A fundamental frequency estimator for monophonic audio + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#ifndef _MONONOTEPARAMETERS_H_ +#define _MONONOTEPARAMETERS_H_ + +#include +#include +#include + +using std::vector; + +class MonoNoteParameters +{ +public: + MonoNoteParameters(); + virtual ~MonoNoteParameters(); + + // model architecture parameters + size_t minPitch; // lowest pitch in MIDI notes + size_t nPPS; // number of pitches per semitone + size_t nS; // number of semitones + size_t nSPP; // number of states per pitch + size_t n; // number of states (will be calcualted from other parameters) + + // initial state probabilities + vector initPi; + + // transition parameters + double pAttackSelftrans; + double pStableSelftrans; + double pStable2Silent; + double pSilentSelftrans; + double sigma2Note; // standard deviation of next note Gaussian distribution + double maxJump; + double pInterSelftrans; + + double priorPitchedProb; + double priorWeight; + + double minSemitoneDistance; // minimum distance for a transition + + double sigmaYinPitchAttack; + double sigmaYinPitchStable; + double sigmaYinPitchInter; + + double yinTrust; + +}; + +#endif diff --git a/libs/vamp-pyin/MonoPitch.cpp b/libs/vamp-pyin/MonoPitch.cpp new file mode 100644 index 0000000000..01830f5419 --- /dev/null +++ b/libs/vamp-pyin/MonoPitch.cpp @@ -0,0 +1,81 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + pYIN - A fundamental frequency estimator for monophonic audio + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#include "MonoPitch.h" +#include "MonoPitchHMM.h" +#include + +#include +#include +#include + +using std::vector; +using std::pair; + +MonoPitch::MonoPitch() : + hmm() +{ +} + +MonoPitch::~MonoPitch() +{ +} + +const vector +MonoPitch::process(const vector > > pitchProb) +{ + // std::cerr << "before observation prob calculation" << std::endl; + vector > obsProb; + for (size_t iFrame = 0; iFrame < pitchProb.size(); ++iFrame) + { + obsProb.push_back(hmm.calculateObsProb(pitchProb[iFrame])); + } + + vector *scale = new vector(0); + + vector out; + + // std::cerr << "before Viterbi decoding" << obsProb.size() << "ng" << obsProb[1].size() << std::endl; + vector path = hmm.decodeViterbi(obsProb, scale); + // std::cerr << "after Viterbi decoding" << std::endl; + + for (size_t iFrame = 0; iFrame < path.size(); ++iFrame) + { + // std::cerr << path[iFrame] << " " << hmm.m_freqs[path[iFrame]] << std::endl; + float hmmFreq = hmm.m_freqs[path[iFrame]]; + float bestFreq = 0; + float leastDist = 10000; + if (hmmFreq > 0) + { + // This was a Yin estimate, so try to get original pitch estimate back + // ... a bit hacky, since we could have direclty saved the frequency + // that was assigned to the HMM bin in hmm.calculateObsProb -- but would + // have had to rethink the interface of that method. + for (size_t iPitch = 0; iPitch < pitchProb[iFrame].size(); ++iPitch) + { + float freq = 440. * std::pow(2, (pitchProb[iFrame][iPitch].first - 69)/12); + float dist = std::abs(hmmFreq-freq); + if (dist < leastDist) + { + leastDist = dist; + bestFreq = freq; + } + } + } else { + bestFreq = hmmFreq; + } + out.push_back(bestFreq); + } + delete scale; + return(out); +} diff --git a/libs/vamp-pyin/MonoPitch.h b/libs/vamp-pyin/MonoPitch.h new file mode 100644 index 0000000000..6e466d95e1 --- /dev/null +++ b/libs/vamp-pyin/MonoPitch.h @@ -0,0 +1,37 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + pYIN - A fundamental frequency estimator for monophonic audio + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#ifndef _MONOPITCH_H_ +#define _MONOPITCH_H_ + +#include "MonoPitchHMM.h" + +#include +#include +#include + +using std::vector; +using std::pair; + +class MonoPitch { +public: + MonoPitch(); + virtual ~MonoPitch(); + + // pitchProb is a frame-wise vector carrying a vector of pitch-probability pairs + const vector process(const vector > > pitchProb); +private: + MonoPitchHMM hmm; +}; + +#endif diff --git a/libs/vamp-pyin/MonoPitchHMM.cpp b/libs/vamp-pyin/MonoPitchHMM.cpp new file mode 100644 index 0000000000..c52b64e635 --- /dev/null +++ b/libs/vamp-pyin/MonoPitchHMM.cpp @@ -0,0 +1,153 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + pYIN - A fundamental frequency estimator for monophonic audio + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#include "MonoPitchHMM.h" + +#include + +#include +#include + +using std::vector; +using std::pair; + +MonoPitchHMM::MonoPitchHMM() : +m_minFreq(61.735), +m_nBPS(5), +m_nPitch(0), +m_transitionWidth(0), +m_selfTrans(0.99), +m_yinTrust(.5), +m_freqs(0) +{ + m_transitionWidth = 5*(m_nBPS/2) + 1; + m_nPitch = 69 * m_nBPS; + m_freqs = vector(2*m_nPitch); + for (size_t iPitch = 0; iPitch < m_nPitch; ++iPitch) + { + m_freqs[iPitch] = m_minFreq * std::pow(2, iPitch * 1.0 / (12 * m_nBPS)); + m_freqs[iPitch+m_nPitch] = -m_freqs[iPitch]; + } + build(); +} + +const vector +MonoPitchHMM::calculateObsProb(const vector > pitchProb) +{ + vector out = vector(2*m_nPitch+1); + double probYinPitched = 0; + // BIN THE PITCHES + for (size_t iPair = 0; iPair < pitchProb.size(); ++iPair) + { + double freq = 440. * std::pow(2, (pitchProb[iPair].first - 69)/12); + if (freq <= m_minFreq) continue; + double d = 0; + double oldd = 1000; + for (size_t iPitch = 0; iPitch < m_nPitch; ++iPitch) + { + d = std::abs(freq-m_freqs[iPitch]); + if (oldd < d && iPitch > 0) + { + // previous bin must have been the closest + out[iPitch-1] = pitchProb[iPair].second; + probYinPitched += out[iPitch-1]; + break; + } + oldd = d; + } + } + + double probReallyPitched = m_yinTrust * probYinPitched; + // std::cerr << probReallyPitched << " " << probYinPitched << std::endl; + // damn, I forget what this is all about... + for (size_t iPitch = 0; iPitch < m_nPitch; ++iPitch) + { + if (probYinPitched > 0) out[iPitch] *= (probReallyPitched/probYinPitched) ; + out[iPitch+m_nPitch] = (1 - probReallyPitched) / m_nPitch; + } + // out[2*m_nPitch] = m_yinTrust * (1 - probYinPitched); + return(out); +} + +void +MonoPitchHMM::build() +{ + // INITIAL VECTOR + init = vector(2*m_nPitch, 1.0 / 2*m_nPitch); + + // TRANSITIONS + for (size_t iPitch = 0; iPitch < m_nPitch; ++iPitch) + { + int theoreticalMinNextPitch = static_cast(iPitch)-static_cast(m_transitionWidth/2); + int minNextPitch = iPitch>m_transitionWidth/2 ? iPitch-m_transitionWidth/2 : 0; + int maxNextPitch = iPitch weights; + for (size_t i = minNextPitch; i <= maxNextPitch; ++i) + { + if (i <= iPitch) + { + weights.push_back(i-theoreticalMinNextPitch+1); + // weights.push_back(i-theoreticalMinNextPitch+1+m_transitionWidth/2); + } else { + weights.push_back(iPitch-theoreticalMinNextPitch+1-(i-iPitch)); + // weights.push_back(iPitch-theoreticalMinNextPitch+1-(i-iPitch)+m_transitionWidth/2); + } + weightSum += weights[weights.size()-1]; + } + + // std::cerr << minNextPitch << " " << maxNextPitch << std::endl; + // TRANSITIONS TO CLOSE PITCH + for (size_t i = minNextPitch; i <= maxNextPitch; ++i) + { + from.push_back(iPitch); + to.push_back(i); + transProb.push_back(weights[i-minNextPitch] / weightSum * m_selfTrans); + + from.push_back(iPitch); + to.push_back(i+m_nPitch); + transProb.push_back(weights[i-minNextPitch] / weightSum * (1-m_selfTrans)); + + from.push_back(iPitch+m_nPitch); + to.push_back(i+m_nPitch); + transProb.push_back(weights[i-minNextPitch] / weightSum * m_selfTrans); + // transProb.push_back(weights[i-minNextPitch] / weightSum * 0.5); + + from.push_back(iPitch+m_nPitch); + to.push_back(i); + transProb.push_back(weights[i-minNextPitch] / weightSum * (1-m_selfTrans)); + // transProb.push_back(weights[i-minNextPitch] / weightSum * 0.5); + } + + // TRANSITION TO UNVOICED + // from.push_back(iPitch+m_nPitch); + // to.push_back(2*m_nPitch); + // transProb.push_back(1-m_selfTrans); + + // TRANSITION FROM UNVOICED TO PITCH + // from.push_back(2*m_nPitch); + // to.push_back(iPitch+m_nPitch); + // transProb.push_back(1.0/m_nPitch); + } + // UNVOICED SELFTRANSITION + // from.push_back(2*m_nPitch); + // to.push_back(2*m_nPitch); + // transProb.push_back(m_selfTrans); + + // for (size_t i = 0; i < from.size(); ++i) { + // std::cerr << "P(["<< from[i] << " --> " << to[i] << "]) = " << transProb[i] << std::endl; + // } + +} diff --git a/libs/vamp-pyin/MonoPitchHMM.h b/libs/vamp-pyin/MonoPitchHMM.h new file mode 100644 index 0000000000..3202676a8d --- /dev/null +++ b/libs/vamp-pyin/MonoPitchHMM.h @@ -0,0 +1,43 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + pYIN - A fundamental frequency estimator for monophonic audio + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#ifndef _MONOPITCHHMM_H_ +#define _MONOPITCHHMM_H_ + +#include "SparseHMM.h" + +#include + +#include +#include + +using std::vector; + +class MonoPitchHMM : public SparseHMM +{ +public: + MonoPitchHMM(); + const std::vector calculateObsProb(const vector >); + // double getMidiPitch(size_t index); + // double getFrequency(size_t index); + void build(); + double m_minFreq; // 82.40689f/2 + size_t m_nBPS; + size_t m_nPitch; + size_t m_transitionWidth; + double m_selfTrans; + double m_yinTrust; + vector m_freqs; +}; + +#endif diff --git a/libs/vamp-pyin/PYinVamp.cpp b/libs/vamp-pyin/PYinVamp.cpp new file mode 100644 index 0000000000..ebfa6a2472 --- /dev/null +++ b/libs/vamp-pyin/PYinVamp.cpp @@ -0,0 +1,608 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + pYIN - A fundamental frequency estimator for monophonic audio + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#include "PYinVamp.h" +#include "MonoNote.h" +#include "MonoPitch.h" + +#include "vamp-sdk/FFT.h" + +#include +#include + +#include +#include +#include + +using std::string; +using std::vector; +using Vamp::RealTime; + + +PYinVamp::PYinVamp(float inputSampleRate) : + Plugin(inputSampleRate), + m_channels(0), + m_stepSize(256), + m_blockSize(2048), + m_fmin(40), + m_fmax(1600), + m_yin(2048, inputSampleRate, 0.0), + m_oF0Candidates(0), + m_oF0Probs(0), + m_oVoicedProb(0), + m_oCandidateSalience(0), + m_oSmoothedPitchTrack(0), + m_oNotes(0), + m_threshDistr(2.0f), + m_outputUnvoiced(0.0f), + m_preciseTime(0.0f), + m_lowAmp(0.1f), + m_onsetSensitivity(0.7f), + m_pruneThresh(0.1f), + m_pitchProb(0), + m_timestamp(0), + m_level(0) +{ +} + +PYinVamp::~PYinVamp() +{ +} + +string +PYinVamp::getIdentifier() const +{ + return "pyin"; +} + +string +PYinVamp::getName() const +{ + return "pYin"; +} + +string +PYinVamp::getDescription() const +{ + return "Monophonic pitch and note tracking based on a probabilistic Yin extension."; +} + +string +PYinVamp::getMaker() const +{ + return "Matthias Mauch"; +} + +int +PYinVamp::getPluginVersion() const +{ + // Increment this each time you release a version that behaves + // differently from the previous one + return 2; +} + +string +PYinVamp::getCopyright() const +{ + return "GPL"; +} + +PYinVamp::InputDomain +PYinVamp::getInputDomain() const +{ + return TimeDomain; +} + +size_t +PYinVamp::getPreferredBlockSize() const +{ + return 2048; +} + +size_t +PYinVamp::getPreferredStepSize() const +{ + return 256; +} + +size_t +PYinVamp::getMinChannelCount() const +{ + return 1; +} + +size_t +PYinVamp::getMaxChannelCount() const +{ + return 1; +} + +PYinVamp::ParameterList +PYinVamp::getParameterDescriptors() const +{ + ParameterList list; + + ParameterDescriptor d; + + d.identifier = "threshdistr"; + d.name = "Yin threshold distribution"; + d.description = "."; + d.unit = ""; + d.minValue = 0.0f; + d.maxValue = 7.0f; + d.defaultValue = 2.0f; + d.isQuantized = true; + d.quantizeStep = 1.0f; + d.valueNames.push_back("Uniform"); + d.valueNames.push_back("Beta (mean 0.10)"); + d.valueNames.push_back("Beta (mean 0.15)"); + d.valueNames.push_back("Beta (mean 0.20)"); + d.valueNames.push_back("Beta (mean 0.30)"); + d.valueNames.push_back("Single Value 0.10"); + d.valueNames.push_back("Single Value 0.15"); + d.valueNames.push_back("Single Value 0.20"); + list.push_back(d); + + d.identifier = "outputunvoiced"; + d.valueNames.clear(); + d.name = "Output estimates classified as unvoiced?"; + d.description = "."; + d.unit = ""; + d.minValue = 0.0f; + d.maxValue = 2.0f; + d.defaultValue = 0.0f; + d.isQuantized = true; + d.quantizeStep = 1.0f; + d.valueNames.push_back("No"); + d.valueNames.push_back("Yes"); + d.valueNames.push_back("Yes, as negative frequencies"); + list.push_back(d); + + d.identifier = "precisetime"; + d.valueNames.clear(); + d.name = "Use non-standard precise YIN timing (slow)."; + d.description = "."; + d.unit = ""; + d.minValue = 0.0f; + d.maxValue = 1.0f; + d.defaultValue = 0.0f; + d.isQuantized = true; + d.quantizeStep = 1.0f; + list.push_back(d); + + d.identifier = "lowampsuppression"; + d.valueNames.clear(); + d.name = "Suppress low amplitude pitch estimates."; + d.description = "."; + d.unit = ""; + d.minValue = 0.0f; + d.maxValue = 1.0f; + d.defaultValue = 0.1f; + d.isQuantized = false; + list.push_back(d); + + d.identifier = "onsetsensitivity"; + d.valueNames.clear(); + d.name = "Onset sensitivity"; + d.description = "Adds additional note onsets when RMS increases."; + d.unit = ""; + d.minValue = 0.0f; + d.maxValue = 1.0f; + d.defaultValue = 0.7f; + d.isQuantized = false; + list.push_back(d); + + d.identifier = "prunethresh"; + d.valueNames.clear(); + d.name = "Duration pruning threshold."; + d.description = "Prune notes that are shorter than this value."; + d.unit = ""; + d.minValue = 0.0f; + d.maxValue = 0.2f; + d.defaultValue = 0.1f; + d.isQuantized = false; + list.push_back(d); + + return list; +} + +float +PYinVamp::getParameter(string identifier) const +{ + if (identifier == "threshdistr") { + return m_threshDistr; + } + if (identifier == "outputunvoiced") { + return m_outputUnvoiced; + } + if (identifier == "precisetime") { + return m_preciseTime; + } + if (identifier == "lowampsuppression") { + return m_lowAmp; + } + if (identifier == "onsetsensitivity") { + return m_onsetSensitivity; + } + if (identifier == "prunethresh") { + return m_pruneThresh; + } + return 0.f; +} + +void +PYinVamp::setParameter(string identifier, float value) +{ + if (identifier == "threshdistr") + { + m_threshDistr = value; + } + if (identifier == "outputunvoiced") + { + m_outputUnvoiced = value; + } + if (identifier == "precisetime") + { + m_preciseTime = value; + } + if (identifier == "lowampsuppression") + { + m_lowAmp = value; + } + if (identifier == "onsetsensitivity") + { + m_onsetSensitivity = value; + } + if (identifier == "prunethresh") + { + m_pruneThresh = value; + } +} + +PYinVamp::ProgramList +PYinVamp::getPrograms() const +{ + ProgramList list; + return list; +} + +string +PYinVamp::getCurrentProgram() const +{ + return ""; // no programs +} + +void +PYinVamp::selectProgram(string name) +{ +} + +PYinVamp::OutputList +PYinVamp::getOutputDescriptors() const +{ + OutputList outputs; + + OutputDescriptor d; + + int outputNumber = 0; + + d.identifier = "f0candidates"; + d.name = "F0 Candidates"; + d.description = "Estimated fundamental frequency candidates."; + d.unit = "Hz"; + d.hasFixedBinCount = false; + // d.binCount = 1; + d.hasKnownExtents = true; + d.minValue = m_fmin; + d.maxValue = 500; + d.isQuantized = false; + d.sampleType = OutputDescriptor::FixedSampleRate; + d.sampleRate = (m_inputSampleRate / m_stepSize); + d.hasDuration = false; + outputs.push_back(d); + m_oF0Candidates = outputNumber++; + + d.identifier = "f0probs"; + d.name = "Candidate Probabilities"; + d.description = "Probabilities of estimated fundamental frequency candidates."; + d.unit = ""; + d.hasFixedBinCount = false; + // d.binCount = 1; + d.hasKnownExtents = true; + d.minValue = 0; + d.maxValue = 1; + d.isQuantized = false; + d.sampleType = OutputDescriptor::FixedSampleRate; + d.sampleRate = (m_inputSampleRate / m_stepSize); + d.hasDuration = false; + outputs.push_back(d); + m_oF0Probs = outputNumber++; + + d.identifier = "voicedprob"; + d.name = "Voiced Probability"; + d.description = "Probability that the signal is voiced according to Probabilistic Yin."; + d.unit = ""; + d.hasFixedBinCount = true; + d.binCount = 1; + d.hasKnownExtents = true; + d.minValue = 0; + d.maxValue = 1; + d.isQuantized = false; + d.sampleType = OutputDescriptor::FixedSampleRate; + d.sampleRate = (m_inputSampleRate / m_stepSize); + d.hasDuration = false; + outputs.push_back(d); + m_oVoicedProb = outputNumber++; + + d.identifier = "candidatesalience"; + d.name = "Candidate Salience"; + d.description = "Candidate Salience"; + d.hasFixedBinCount = true; + d.binCount = m_blockSize / 2; + d.hasKnownExtents = true; + d.minValue = 0; + d.maxValue = 1; + d.isQuantized = false; + d.sampleType = OutputDescriptor::FixedSampleRate; + d.sampleRate = (m_inputSampleRate / m_stepSize); + d.hasDuration = false; + outputs.push_back(d); + m_oCandidateSalience = outputNumber++; + + d.identifier = "smoothedpitchtrack"; + d.name = "Smoothed Pitch Track"; + d.description = "."; + d.unit = "Hz"; + d.hasFixedBinCount = true; + d.binCount = 1; + d.hasKnownExtents = false; + // d.minValue = 0; + // d.maxValue = 1; + d.isQuantized = false; + d.sampleType = OutputDescriptor::FixedSampleRate; + d.sampleRate = (m_inputSampleRate / m_stepSize); + d.hasDuration = false; + outputs.push_back(d); + m_oSmoothedPitchTrack = outputNumber++; + + d.identifier = "notes"; + d.name = "Notes"; + d.description = "Derived fixed-pitch note frequencies"; + // d.unit = "MIDI unit"; + d.unit = "Hz"; + d.hasFixedBinCount = true; + d.binCount = 1; + d.hasKnownExtents = false; + d.isQuantized = false; + d.sampleType = OutputDescriptor::VariableSampleRate; + d.sampleRate = (m_inputSampleRate / m_stepSize); + d.hasDuration = true; + outputs.push_back(d); + m_oNotes = outputNumber++; + + return outputs; +} + +bool +PYinVamp::initialise(size_t channels, size_t stepSize, size_t blockSize) +{ + if (channels < getMinChannelCount() || + channels > getMaxChannelCount()) return false; + +/* + std::cerr << "PYinVamp::initialise: channels = " << channels + << ", stepSize = " << stepSize << ", blockSize = " << blockSize + << std::endl; +*/ + m_channels = channels; + m_stepSize = stepSize; + m_blockSize = blockSize; + + reset(); + + return true; +} + +void +PYinVamp::reset() +{ + m_yin.setThresholdDistr(m_threshDistr); + m_yin.setFrameSize(m_blockSize); + m_yin.setFast(!m_preciseTime); + + m_pitchProb.clear(); + m_timestamp.clear(); + m_level.clear(); +/* + std::cerr << "PYinVamp::reset" + << ", blockSize = " << m_blockSize + << std::endl; +*/ +} + +PYinVamp::FeatureSet +PYinVamp::process(const float *const *inputBuffers, RealTime timestamp) +{ + int offset = m_preciseTime == 1.0 ? m_blockSize/2 : m_blockSize/4; + timestamp = timestamp + Vamp::RealTime::frame2RealTime(offset, lrintf(m_inputSampleRate)); + + FeatureSet fs; + + float rms = 0; + + double *dInputBuffers = new double[m_blockSize]; + for (size_t i = 0; i < m_blockSize; ++i) { + dInputBuffers[i] = inputBuffers[0][i]; + rms += inputBuffers[0][i] * inputBuffers[0][i]; + } + rms /= m_blockSize; + rms = sqrt(rms); + + bool isLowAmplitude = (rms < m_lowAmp); + + Yin::YinOutput yo = m_yin.processProbabilisticYin(dInputBuffers); + delete [] dInputBuffers; + + m_level.push_back(yo.rms); + + // First, get the things out of the way that we don't want to output + // immediately, but instead save for later. + vector > tempPitchProb; + for (size_t iCandidate = 0; iCandidate < yo.freqProb.size(); ++iCandidate) + { + double tempPitch = 12 * std::log(yo.freqProb[iCandidate].first/440)/std::log(2.) + 69; + if (!isLowAmplitude) + { + tempPitchProb.push_back(pair + (tempPitch, yo.freqProb[iCandidate].second)); + } else { + float factor = ((rms+0.01*m_lowAmp)/(1.01*m_lowAmp)); + tempPitchProb.push_back(pair + (tempPitch, yo.freqProb[iCandidate].second*factor)); + } + } + m_pitchProb.push_back(tempPitchProb); + m_timestamp.push_back(timestamp); + + // F0 CANDIDATES + Feature f; + f.hasTimestamp = true; + f.timestamp = timestamp; + for (size_t i = 0; i < yo.freqProb.size(); ++i) + { + f.values.push_back(yo.freqProb[i].first); + } + fs[m_oF0Candidates].push_back(f); + + // VOICEDPROB + f.values.clear(); + float voicedProb = 0; + for (size_t i = 0; i < yo.freqProb.size(); ++i) + { + f.values.push_back(yo.freqProb[i].second); + voicedProb += yo.freqProb[i].second; + } + fs[m_oF0Probs].push_back(f); + + f.values.push_back(voicedProb); + fs[m_oVoicedProb].push_back(f); + + // SALIENCE -- maybe this should eventually disappear + f.values.clear(); + float salienceSum = 0; + for (size_t iBin = 0; iBin < yo.salience.size(); ++iBin) + { + f.values.push_back(yo.salience[iBin]); + salienceSum += yo.salience[iBin]; + } + fs[m_oCandidateSalience].push_back(f); + + return fs; +} + +PYinVamp::FeatureSet +PYinVamp::getRemainingFeatures() +{ + FeatureSet fs; + Feature f; + f.hasTimestamp = true; + f.hasDuration = false; + + if (m_pitchProb.empty()) { + return fs; + } + + // MONO-PITCH STUFF + MonoPitch mp; + vector mpOut = mp.process(m_pitchProb); + for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame) + { + if (mpOut[iFrame] < 0 && (m_outputUnvoiced==0)) continue; + f.timestamp = m_timestamp[iFrame]; + f.values.clear(); + if (m_outputUnvoiced == 1) + { + f.values.push_back(fabs(mpOut[iFrame])); + } else { + f.values.push_back(mpOut[iFrame]); + } + + fs[m_oSmoothedPitchTrack].push_back(f); + } + + // MONO-NOTE STUFF +// std::cerr << "Mono Note Stuff" << std::endl; + MonoNote mn; + std::vector > > smoothedPitch; + for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame) { + std::vector > temp; + if (mpOut[iFrame] > 0) + { + double tempPitch = 12 * std::log(mpOut[iFrame]/440)/std::log(2.) + 69; + temp.push_back(std::pair(tempPitch, .9)); + } + smoothedPitch.push_back(temp); + } + // vector mnOut = mn.process(m_pitchProb); + vector mnOut = mn.process(smoothedPitch); + + // turning feature into a note feature + f.hasTimestamp = true; + f.hasDuration = true; + f.values.clear(); + + int onsetFrame = 0; + bool isVoiced = 0; + bool oldIsVoiced = 0; + size_t nFrame = m_pitchProb.size(); + + float minNoteFrames = (m_inputSampleRate*m_pruneThresh) / m_stepSize; + + std::vector notePitchTrack; // collects pitches for one note at a time + for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) + { + isVoiced = mnOut[iFrame].noteState < 3 + && smoothedPitch[iFrame].size() > 0 + && (iFrame >= nFrame-2 + || ((m_level[iFrame]/m_level[iFrame+2]) > m_onsetSensitivity)); + // std::cerr << m_level[iFrame]/m_level[iFrame-1] << " " << isVoiced << std::endl; + if (isVoiced && iFrame != nFrame-1) + { + if (oldIsVoiced == 0) // beginning of a note + { + onsetFrame = iFrame; + } + float pitch = smoothedPitch[iFrame][0].first; + notePitchTrack.push_back(pitch); // add to the note's pitch track + } else { // not currently voiced + if (oldIsVoiced == 1) // end of note + { + // std::cerr << notePitchTrack.size() << " " << minNoteFrames << std::endl; + if (notePitchTrack.size() >= minNoteFrames) + { + std::sort(notePitchTrack.begin(), notePitchTrack.end()); + float medianPitch = notePitchTrack[notePitchTrack.size()/2]; + float medianFreq = std::pow(2,(medianPitch - 69) / 12) * 440; + f.values.clear(); + f.values.push_back(medianFreq); + f.timestamp = m_timestamp[onsetFrame]; + f.duration = m_timestamp[iFrame] - m_timestamp[onsetFrame]; + fs[m_oNotes].push_back(f); + } + notePitchTrack.clear(); + } + } + oldIsVoiced = isVoiced; + } + return fs; +} diff --git a/libs/vamp-pyin/PYinVamp.h b/libs/vamp-pyin/PYinVamp.h new file mode 100644 index 0000000000..79c55012e2 --- /dev/null +++ b/libs/vamp-pyin/PYinVamp.h @@ -0,0 +1,84 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + pYIN - A fundamental frequency estimator for monophonic audio + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#ifndef _PYINVAMP_H_ +#define _PYINVAMP_H_ + +#include + +#include "Yin.h" + +class PYinVamp : public Vamp::Plugin +{ +public: + PYinVamp(float inputSampleRate); + virtual ~PYinVamp(); + + std::string getIdentifier() const; + std::string getName() const; + std::string getDescription() const; + std::string getMaker() const; + int getPluginVersion() const; + std::string getCopyright() const; + + InputDomain getInputDomain() const; + size_t getPreferredBlockSize() const; + size_t getPreferredStepSize() const; + size_t getMinChannelCount() const; + size_t getMaxChannelCount() const; + + ParameterList getParameterDescriptors() const; + float getParameter(std::string identifier) const; + void setParameter(std::string identifier, float value); + + ProgramList getPrograms() const; + std::string getCurrentProgram() const; + void selectProgram(std::string name); + + OutputList getOutputDescriptors() const; + + bool initialise(size_t channels, size_t stepSize, size_t blockSize); + void reset(); + + FeatureSet process(const float *const *inputBuffers, + Vamp::RealTime timestamp); + + FeatureSet getRemainingFeatures(); + +protected: + size_t m_channels; + size_t m_stepSize; + size_t m_blockSize; + float m_fmin; + float m_fmax; + Yin m_yin; + + mutable int m_oF0Candidates; + mutable int m_oF0Probs; + mutable int m_oVoicedProb; + mutable int m_oCandidateSalience; + mutable int m_oSmoothedPitchTrack; + mutable int m_oNotes; + + float m_threshDistr; + float m_outputUnvoiced; + float m_preciseTime; + float m_lowAmp; + float m_onsetSensitivity; + float m_pruneThresh; + vector > > m_pitchProb; + vector m_timestamp; + vector m_level; +}; + +#endif diff --git a/libs/vamp-pyin/SparseHMM.cpp b/libs/vamp-pyin/SparseHMM.cpp new file mode 100644 index 0000000000..737bcf3d13 --- /dev/null +++ b/libs/vamp-pyin/SparseHMM.cpp @@ -0,0 +1,145 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + pYIN - A fundamental frequency estimator for monophonic audio + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#include "SparseHMM.h" +#include +#include +#include + +using std::vector; +using std::pair; + +const vector +SparseHMM::calculateObsProb(const vector > data) +{ + // dummy (virtual?) implementation to be overloaded + return(vector()); +} + +const std::vector +SparseHMM::decodeViterbi(std::vector > obsProb, + vector *scale) +{ + if (obsProb.size() < 1) { + return vector(); + } + + size_t nState = init.size(); + size_t nFrame = obsProb.size(); + + // check for consistency + size_t nTrans = transProb.size(); + + // declaring variables + std::vector delta = std::vector(nState); + std::vector oldDelta = std::vector(nState); + vector > psi; // "matrix" of remembered indices of the best transitions + vector path = vector(nFrame, nState-1); // the final output path (current assignment arbitrary, makes sense only for Chordino, where nChord-1 is the "no chord" label) + + double deltasum = 0; + + // initialise first frame + for (size_t iState = 0; iState < nState; ++iState) + { + oldDelta[iState] = init[iState] * obsProb[0][iState]; + // std::cerr << iState << " ----- " << init[iState] << std::endl; + deltasum += oldDelta[iState]; + } + + for (size_t iState = 0; iState < nState; ++iState) + { + oldDelta[iState] /= deltasum; // normalise (scale) + // std::cerr << oldDelta[iState] << std::endl; + } + + scale->push_back(1.0/deltasum); + psi.push_back(vector(nState,0)); + + // rest of forward step + for (size_t iFrame = 1; iFrame < nFrame; ++iFrame) + { + deltasum = 0; + psi.push_back(vector(nState,0)); + + // calculate best previous state for every current state + size_t fromState; + size_t toState; + double currentTransProb; + double currentValue; + + // this is the "sparse" loop + for (size_t iTrans = 0; iTrans < nTrans; ++iTrans) + { + fromState = from[iTrans]; + toState = to[iTrans]; + currentTransProb = transProb[iTrans]; + + currentValue = oldDelta[fromState] * currentTransProb; + if (currentValue > delta[toState]) + { + delta[toState] = currentValue; // will be multiplied by the right obs later! + psi[iFrame][toState] = fromState; + } + } + + for (size_t jState = 0; jState < nState; ++jState) + { + delta[jState] *= obsProb[iFrame][jState]; + deltasum += delta[jState]; + } + + if (deltasum > 0) + { + for (size_t iState = 0; iState < nState; ++iState) + { + oldDelta[iState] = delta[iState] / deltasum; // normalise (scale) + delta[iState] = 0; + } + scale->push_back(1.0/deltasum); + } else + { + std::cerr << "WARNING: Viterbi has been fed some zero probabilities, at least they become zero at frame " << iFrame << " in combination with the model." << std::endl; + for (size_t iState = 0; iState < nState; ++iState) + { + oldDelta[iState] = 1.0/nState; + delta[iState] = 0; + } + scale->push_back(1.0); + } + } + + // initialise backward step + double bestValue = 0; + for (size_t iState = 0; iState < nState; ++iState) + { + double currentValue = oldDelta[iState]; + if (currentValue > bestValue) + { + bestValue = currentValue; + path[nFrame-1] = iState; + } + } + + // rest of backward step + for (int iFrame = nFrame-2; iFrame != -1; --iFrame) + { + path[iFrame] = psi[iFrame+1][path[iFrame+1]]; + } + + // for (size_t iState = 0; iState < nState; ++iState) + // { + // // std::cerr << psi[2][iState] << std::endl; + // } + + return path; +} diff --git a/libs/vamp-pyin/SparseHMM.h b/libs/vamp-pyin/SparseHMM.h new file mode 100644 index 0000000000..d3f02dd5a0 --- /dev/null +++ b/libs/vamp-pyin/SparseHMM.h @@ -0,0 +1,35 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + pYIN - A fundamental frequency estimator for monophonic audio + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#ifndef _SPARSEHMM_H_ +#define _SPARSEHMM_H_ + +#include +#include + +using std::vector; +using std::pair; + +class SparseHMM +{ +public: + virtual const std::vector calculateObsProb(const vector >); + const std::vector decodeViterbi(std::vector > obs, + vector *scale); + vector init; + vector from; + vector to; + vector transProb; +}; + +#endif diff --git a/libs/vamp-pyin/Yin.cpp b/libs/vamp-pyin/Yin.cpp new file mode 100644 index 0000000000..024a124c4f --- /dev/null +++ b/libs/vamp-pyin/Yin.cpp @@ -0,0 +1,153 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + pYIN - A fundamental frequency estimator for monophonic audio + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#include "Yin.h" + +#include "vamp-sdk/FFT.h" +#include "MeanFilter.h" +#include "YinUtil.h" + +#include +#include +#include +#include +#include + +using std::vector; + +Yin::Yin(size_t frameSize, size_t inputSampleRate, double thresh, bool fast) : + m_frameSize(frameSize), + m_inputSampleRate(inputSampleRate), + m_thresh(thresh), + m_threshDistr(2), + m_yinBufferSize(frameSize/2), + m_fast(fast) +{ + if (frameSize & (frameSize-1)) { + // throw "N must be a power of two"; + } +} + +Yin::~Yin() +{ +} + +Yin::YinOutput +Yin::process(const double *in) const { + + double* yinBuffer = new double[m_yinBufferSize]; + + // calculate aperiodicity function for all periods + if (m_fast) YinUtil::fastDifference(in, yinBuffer, m_yinBufferSize); + else YinUtil::slowDifference(in, yinBuffer, m_yinBufferSize); + + YinUtil::cumulativeDifference(yinBuffer, m_yinBufferSize); + + int tau = 0; + tau = YinUtil::absoluteThreshold(yinBuffer, m_yinBufferSize, m_thresh); + + double interpolatedTau; + double aperiodicity; + double f0; + + if (tau!=0) + { + interpolatedTau = YinUtil::parabolicInterpolation(yinBuffer, abs(tau), m_yinBufferSize); + f0 = m_inputSampleRate * (1.0 / interpolatedTau); + } else { + interpolatedTau = 0; + f0 = 0; + } + double rms = std::sqrt(YinUtil::sumSquare(in, 0, m_yinBufferSize)/m_yinBufferSize); + aperiodicity = yinBuffer[abs(tau)]; + // std::cerr << aperiodicity << std::endl; + if (tau < 0) f0 = -f0; + + Yin::YinOutput yo(f0, 1-aperiodicity, rms); + for (size_t iBuf = 0; iBuf < m_yinBufferSize; ++iBuf) + { + yo.salience.push_back(yinBuffer[iBuf] < 1 ? 1-yinBuffer[iBuf] : 0); // why are the values sometimes < 0 if I don't check? + } + + delete [] yinBuffer; + return yo; +} + +Yin::YinOutput +Yin::processProbabilisticYin(const double *in) const { + + double* yinBuffer = new double[m_yinBufferSize]; + + // calculate aperiodicity function for all periods + if (m_fast) YinUtil::fastDifference(in, yinBuffer, m_yinBufferSize); + else YinUtil::slowDifference(in, yinBuffer, m_yinBufferSize); + + YinUtil::cumulativeDifference(yinBuffer, m_yinBufferSize); + + vector peakProbability = YinUtil::yinProb(yinBuffer, m_threshDistr, m_yinBufferSize); + + // calculate overall "probability" from peak probability + double probSum = 0; + for (size_t iBin = 0; iBin < m_yinBufferSize; ++iBin) + { + probSum += peakProbability[iBin]; + } + double rms = std::sqrt(YinUtil::sumSquare(in, 0, m_yinBufferSize)/m_yinBufferSize); + Yin::YinOutput yo(0,0,rms); + for (size_t iBuf = 0; iBuf < m_yinBufferSize; ++iBuf) + { + yo.salience.push_back(peakProbability[iBuf]); + if (peakProbability[iBuf] > 0) + { + double currentF0 = + m_inputSampleRate * (1.0 / + YinUtil::parabolicInterpolation(yinBuffer, iBuf, m_yinBufferSize)); + yo.freqProb.push_back(pair(currentF0, peakProbability[iBuf])); + } + } + + // std::cerr << yo.freqProb.size() << std::endl; + + delete [] yinBuffer; + return yo; +} + + +int +Yin::setThreshold(double parameter) +{ + m_thresh = static_cast(parameter); + return 0; +} + +int +Yin::setThresholdDistr(float parameter) +{ + m_threshDistr = static_cast(parameter); + return 0; +} + +int +Yin::setFrameSize(size_t parameter) +{ + m_frameSize = parameter; + m_yinBufferSize = m_frameSize/2; + return 0; +} + +int +Yin::setFast(bool parameter) +{ + m_fast = parameter; + return 0; +} diff --git a/libs/vamp-pyin/Yin.h b/libs/vamp-pyin/Yin.h new file mode 100644 index 0000000000..0fb151a6a5 --- /dev/null +++ b/libs/vamp-pyin/Yin.h @@ -0,0 +1,71 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + pYIN - A fundamental frequency estimator for monophonic audio + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#ifndef _YIN_H_ +#define _YIN_H_ + +#include "vamp-sdk/FFT.h" +#include "MeanFilter.h" + +#include + +#include +#include +#include + +using std::vector; +using std::pair; + + + +class Yin +{ +public: + Yin(size_t frameSize, size_t inputSampleRate, double thresh = 0.2, bool fast = true); + virtual ~Yin(); + + struct YinOutput { + double f0; + double periodicity; + double rms; + vector salience; + vector > freqProb; + YinOutput() : f0(0), periodicity(0), rms(0), + salience(vector(0)), freqProb(vector >(0)) { } + YinOutput(double _f, double _p, double _r) : + f0(_f), periodicity(_p), rms(_r), + salience(vector(0)), freqProb(vector >(0)) { } + YinOutput(double _f, double _p, double _r, vector _salience) : + f0(_f), periodicity(_p), rms(_r), salience(_salience), + freqProb(vector >(0)) { } + }; + + int setThreshold(double parameter); + int setThresholdDistr(float parameter); + int setFrameSize(size_t frameSize); + int setFast(bool fast); + // int setRemoveUnvoiced(bool frameSize); + YinOutput process(const double *in) const; + YinOutput processProbabilisticYin(const double *in) const; + +private: + mutable size_t m_frameSize; + mutable size_t m_inputSampleRate; + mutable double m_thresh; + mutable size_t m_threshDistr; + mutable size_t m_yinBufferSize; + mutable bool m_fast; + // mutable bool m_removeUnvoiced; +}; + +#endif diff --git a/libs/vamp-pyin/YinUtil.cpp b/libs/vamp-pyin/YinUtil.cpp new file mode 100644 index 0000000000..b93409374c --- /dev/null +++ b/libs/vamp-pyin/YinUtil.cpp @@ -0,0 +1,363 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + pYIN - A fundamental frequency estimator for monophonic audio + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#include "YinUtil.h" + +#include + +#include +#include +#include + +#include + +void +YinUtil::slowDifference(const double *in, double *yinBuffer, const size_t yinBufferSize) +{ + yinBuffer[0] = 0; + double delta ; + int startPoint = 0; + int endPoint = 0; + for (int i = 1; i < yinBufferSize; ++i) { + yinBuffer[i] = 0; + startPoint = yinBufferSize/2 - i/2; + endPoint = startPoint + yinBufferSize; + for (int j = startPoint; j < endPoint; ++j) { + delta = in[i+j] - in[j]; + yinBuffer[i] += delta * delta; + } + } +} + +void +YinUtil::fastDifference(const double *in, double *yinBuffer, const size_t yinBufferSize) +{ + + // DECLARE AND INITIALISE + // initialisation of most of the arrays here was done in a separate function, + // with all the arrays as members of the class... moved them back here. + + size_t frameSize = 2 * yinBufferSize; + + double *audioTransformedReal = new double[frameSize]; + double *audioTransformedImag = new double[frameSize]; + double *nullImag = new double[frameSize]; + double *kernel = new double[frameSize]; + double *kernelTransformedReal = new double[frameSize]; + double *kernelTransformedImag = new double[frameSize]; + double *yinStyleACFReal = new double[frameSize]; + double *yinStyleACFImag = new double[frameSize]; + double *powerTerms = new double[yinBufferSize]; + + for (size_t j = 0; j < yinBufferSize; ++j) + { + yinBuffer[j] = 0.; // set to zero + powerTerms[j] = 0.; // set to zero + } + + for (size_t j = 0; j < frameSize; ++j) + { + nullImag[j] = 0.; + audioTransformedReal[j] = 0.; + audioTransformedImag[j] = 0.; + kernel[j] = 0.; + kernelTransformedReal[j] = 0.; + kernelTransformedImag[j] = 0.; + yinStyleACFReal[j] = 0.; + yinStyleACFImag[j] = 0.; + } + + // POWER TERM CALCULATION + // ... for the power terms in equation (7) in the Yin paper + powerTerms[0] = 0.0; + for (size_t j = 0; j < yinBufferSize; ++j) { + powerTerms[0] += in[j] * in[j]; + } + + // now iteratively calculate all others (saves a few multiplications) + for (size_t tau = 1; tau < yinBufferSize; ++tau) { + powerTerms[tau] = powerTerms[tau-1] - in[tau-1] * in[tau-1] + in[tau+yinBufferSize] * in[tau+yinBufferSize]; + } + + // YIN-STYLE AUTOCORRELATION via FFT + // 1. data + Vamp::FFT::forward(frameSize, in, nullImag, audioTransformedReal, audioTransformedImag); + + // 2. half of the data, disguised as a convolution kernel + for (size_t j = 0; j < yinBufferSize; ++j) { + kernel[j] = in[yinBufferSize-1-j]; + } + Vamp::FFT::forward(frameSize, kernel, nullImag, kernelTransformedReal, kernelTransformedImag); + + // 3. convolution via complex multiplication -- written into + for (size_t j = 0; j < frameSize; ++j) { + yinStyleACFReal[j] = audioTransformedReal[j]*kernelTransformedReal[j] - audioTransformedImag[j]*kernelTransformedImag[j]; // real + yinStyleACFImag[j] = audioTransformedReal[j]*kernelTransformedImag[j] + audioTransformedImag[j]*kernelTransformedReal[j]; // imaginary + } + Vamp::FFT::inverse(frameSize, yinStyleACFReal, yinStyleACFImag, audioTransformedReal, audioTransformedImag); + + // CALCULATION OF difference function + // ... according to (7) in the Yin paper. + for (size_t j = 0; j < yinBufferSize; ++j) { + // taking only the real part + yinBuffer[j] = powerTerms[0] + powerTerms[j] - 2 * audioTransformedReal[j+yinBufferSize-1]; + } + delete [] audioTransformedReal; + delete [] audioTransformedImag; + delete [] nullImag; + delete [] kernel; + delete [] kernelTransformedReal; + delete [] kernelTransformedImag; + delete [] yinStyleACFReal; + delete [] yinStyleACFImag; + delete [] powerTerms; +} + +void +YinUtil::cumulativeDifference(double *yinBuffer, const size_t yinBufferSize) +{ + size_t tau; + + yinBuffer[0] = 1; + + double runningSum = 0; + + for (tau = 1; tau < yinBufferSize; ++tau) { + runningSum += yinBuffer[tau]; + if (runningSum == 0) + { + yinBuffer[tau] = 1; + } else { + yinBuffer[tau] *= tau / runningSum; + } + } +} + +int +YinUtil::absoluteThreshold(const double *yinBuffer, const size_t yinBufferSize, const double thresh) +{ + size_t tau; + size_t minTau = 0; + double minVal = 1000.; + + // using Joren Six's "loop construct" from TarsosDSP + tau = 2; + while (tau < yinBufferSize) + { + if (yinBuffer[tau] < thresh) + { + while (tau+1 < yinBufferSize && yinBuffer[tau+1] < yinBuffer[tau]) + { + ++tau; + } + return tau; + } else { + if (yinBuffer[tau] < minVal) + { + minVal = yinBuffer[tau]; + minTau = tau; + } + } + ++tau; + } + if (minTau > 0) + { + return -minTau; + } + return 0; +} + +static float uniformDist[100] = {0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000}; +static float betaDist1[100] = {0.028911,0.048656,0.061306,0.068539,0.071703,0.071877,0.069915,0.066489,0.062117,0.057199,0.052034,0.046844,0.041786,0.036971,0.032470,0.028323,0.024549,0.021153,0.018124,0.015446,0.013096,0.011048,0.009275,0.007750,0.006445,0.005336,0.004397,0.003606,0.002945,0.002394,0.001937,0.001560,0.001250,0.000998,0.000792,0.000626,0.000492,0.000385,0.000300,0.000232,0.000179,0.000137,0.000104,0.000079,0.000060,0.000045,0.000033,0.000024,0.000018,0.000013,0.000009,0.000007,0.000005,0.000003,0.000002,0.000002,0.000001,0.000001,0.000001,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000}; +static float betaDist2[100] = {0.012614,0.022715,0.030646,0.036712,0.041184,0.044301,0.046277,0.047298,0.047528,0.047110,0.046171,0.044817,0.043144,0.041231,0.039147,0.036950,0.034690,0.032406,0.030133,0.027898,0.025722,0.023624,0.021614,0.019704,0.017900,0.016205,0.014621,0.013148,0.011785,0.010530,0.009377,0.008324,0.007366,0.006497,0.005712,0.005005,0.004372,0.003806,0.003302,0.002855,0.002460,0.002112,0.001806,0.001539,0.001307,0.001105,0.000931,0.000781,0.000652,0.000542,0.000449,0.000370,0.000303,0.000247,0.000201,0.000162,0.000130,0.000104,0.000082,0.000065,0.000051,0.000039,0.000030,0.000023,0.000018,0.000013,0.000010,0.000007,0.000005,0.000004,0.000003,0.000002,0.000001,0.000001,0.000001,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000}; +static float betaDist3[100] = {0.006715,0.012509,0.017463,0.021655,0.025155,0.028031,0.030344,0.032151,0.033506,0.034458,0.035052,0.035331,0.035332,0.035092,0.034643,0.034015,0.033234,0.032327,0.031314,0.030217,0.029054,0.027841,0.026592,0.025322,0.024042,0.022761,0.021489,0.020234,0.019002,0.017799,0.016630,0.015499,0.014409,0.013362,0.012361,0.011407,0.010500,0.009641,0.008830,0.008067,0.007351,0.006681,0.006056,0.005475,0.004936,0.004437,0.003978,0.003555,0.003168,0.002814,0.002492,0.002199,0.001934,0.001695,0.001481,0.001288,0.001116,0.000963,0.000828,0.000708,0.000603,0.000511,0.000431,0.000361,0.000301,0.000250,0.000206,0.000168,0.000137,0.000110,0.000088,0.000070,0.000055,0.000043,0.000033,0.000025,0.000019,0.000014,0.000010,0.000007,0.000005,0.000004,0.000002,0.000002,0.000001,0.000001,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000}; +static float betaDist4[100] = {0.003996,0.007596,0.010824,0.013703,0.016255,0.018501,0.020460,0.022153,0.023597,0.024809,0.025807,0.026607,0.027223,0.027671,0.027963,0.028114,0.028135,0.028038,0.027834,0.027535,0.027149,0.026687,0.026157,0.025567,0.024926,0.024240,0.023517,0.022763,0.021983,0.021184,0.020371,0.019548,0.018719,0.017890,0.017062,0.016241,0.015428,0.014627,0.013839,0.013068,0.012315,0.011582,0.010870,0.010181,0.009515,0.008874,0.008258,0.007668,0.007103,0.006565,0.006053,0.005567,0.005107,0.004673,0.004264,0.003880,0.003521,0.003185,0.002872,0.002581,0.002312,0.002064,0.001835,0.001626,0.001434,0.001260,0.001102,0.000959,0.000830,0.000715,0.000612,0.000521,0.000440,0.000369,0.000308,0.000254,0.000208,0.000169,0.000136,0.000108,0.000084,0.000065,0.000050,0.000037,0.000027,0.000019,0.000014,0.000009,0.000006,0.000004,0.000002,0.000001,0.000001,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000}; +static float single10[100] = {0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,1.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000}; +static float single15[100] = {0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,1.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000}; +static float single20[100] = {0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,1.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000}; + +std::vector +YinUtil::yinProb(const double *yinBuffer, const size_t prior, const size_t yinBufferSize, const size_t minTau0, const size_t maxTau0) +{ + size_t minTau = 2; + size_t maxTau = yinBufferSize; + + // adapt period range, if necessary + if (minTau0 > 0 && minTau0 < maxTau0) minTau = minTau0; + if (maxTau0 > 0 && maxTau0 < yinBufferSize && maxTau0 > minTau) maxTau = maxTau0; + + double minWeight = 0.01; + size_t tau; + std::vector thresholds; + std::vector distribution; + std::vector peakProb = std::vector(yinBufferSize); + + size_t nThreshold = 100; + int nThresholdInt = nThreshold; + + for (int i = 0; i < nThresholdInt; ++i) + { + switch (prior) { + case 0: + distribution.push_back(uniformDist[i]); + break; + case 1: + distribution.push_back(betaDist1[i]); + break; + case 2: + distribution.push_back(betaDist2[i]); + break; + case 3: + distribution.push_back(betaDist3[i]); + break; + case 4: + distribution.push_back(betaDist4[i]); + break; + case 5: + distribution.push_back(single10[i]); + break; + case 6: + distribution.push_back(single15[i]); + break; + case 7: + distribution.push_back(single20[i]); + break; + default: + distribution.push_back(uniformDist[i]); + } + thresholds.push_back(0.01 + i*0.01); + } + + + int currThreshInd = nThreshold-1; + tau = minTau; + + // double factor = 1.0 / (0.25 * (nThresholdInt+1) * (nThresholdInt + 1)); // factor to scale down triangular weight + size_t minInd = 0; + float minVal = 42.f; + // while (currThreshInd != -1 && tau < maxTau) + // { + // if (yinBuffer[tau] < thresholds[currThreshInd]) + // { + // while (tau + 1 < maxTau && yinBuffer[tau+1] < yinBuffer[tau]) + // { + // tau++; + // } + // // tau is now local minimum + // // std::cerr << tau << " " << currThreshInd << " "<< thresholds[currThreshInd] << " " << distribution[currThreshInd] << std::endl; + // if (yinBuffer[tau] < minVal && tau > 2){ + // minVal = yinBuffer[tau]; + // minInd = tau; + // } + // peakProb[tau] += distribution[currThreshInd]; + // currThreshInd--; + // } else { + // tau++; + // } + // } + // double nonPeakProb = 1; + // for (size_t i = minTau; i < maxTau; ++i) + // { + // nonPeakProb -= peakProb[i]; + // } + // + // std::cerr << tau << " " << currThreshInd << " "<< thresholds[currThreshInd] << " " << distribution[currThreshInd] << std::endl; + float sumProb = 0; + while (tau+1 < maxTau) + { + if (yinBuffer[tau] < thresholds[thresholds.size()-1] && yinBuffer[tau+1] < yinBuffer[tau]) + { + while (tau + 1 < maxTau && yinBuffer[tau+1] < yinBuffer[tau]) + { + tau++; + } + // tau is now local minimum + // std::cerr << tau << " " << currThreshInd << " "<< thresholds[currThreshInd] << " " << distribution[currThreshInd] << std::endl; + if (yinBuffer[tau] < minVal && tau > 2){ + minVal = yinBuffer[tau]; + minInd = tau; + } + currThreshInd = nThresholdInt-1; + while (thresholds[currThreshInd] > yinBuffer[tau] && currThreshInd > -1) { + // std::cerr << distribution[currThreshInd] << std::endl; + peakProb[tau] += distribution[currThreshInd]; + currThreshInd--; + } + // peakProb[tau] = 1 - yinBuffer[tau]; + sumProb += peakProb[tau]; + tau++; + } else { + tau++; + } + } + + if (peakProb[minInd] > 1) { + std::cerr << "WARNING: yin has prob > 1 ??? I'm returning all zeros instead." << std::endl; + return(std::vector(yinBufferSize)); + } + + double nonPeakProb = 1; + if (sumProb > 0) { + for (size_t i = minTau; i < maxTau; ++i) + { + peakProb[i] = peakProb[i] / sumProb * peakProb[minInd]; + nonPeakProb -= peakProb[i]; + } + } + if (minInd > 0) + { + // std::cerr << "min set " << minVal << " " << minInd << " " << nonPeakProb << std::endl; + peakProb[minInd] += nonPeakProb * minWeight; + } + + return peakProb; +} + +double +YinUtil::parabolicInterpolation(const double *yinBuffer, const size_t tau, const size_t yinBufferSize) +{ + // this is taken almost literally from Joren Six's Java implementation + if (tau == yinBufferSize) // not valid anyway. + { + return static_cast(tau); + } + + double betterTau = 0.0; + if (tau > 0 && tau < yinBufferSize-1) { + float s0, s1, s2; + s0 = yinBuffer[tau-1]; + s1 = yinBuffer[tau]; + s2 = yinBuffer[tau+1]; + + double adjustment = (s2 - s0) / (2 * (2 * s1 - s2 - s0)); + + if (abs(adjustment)>1) adjustment = 0; + + betterTau = tau + adjustment; + } else { + // std::cerr << "WARNING: can't do interpolation at the edge (tau = " << tau << "), will return un-interpolated value.\n"; + betterTau = tau; + } + return betterTau; +} + +double +YinUtil::sumSquare(const double *in, const size_t start, const size_t end) +{ + double out = 0; + for (size_t i = start; i < end; ++i) + { + out += in[i] * in[i]; + } + return out; +} diff --git a/libs/vamp-pyin/YinUtil.h b/libs/vamp-pyin/YinUtil.h new file mode 100644 index 0000000000..ea2d49c9fe --- /dev/null +++ b/libs/vamp-pyin/YinUtil.h @@ -0,0 +1,42 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + pYIN - A fundamental frequency estimator for monophonic audio + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#ifndef _YINUTIL_H_ +#define _YINUTIL_H_ + +#include "vamp-sdk/FFT.h" +#include "MeanFilter.h" + +#include + +#include +#include +#include + +using std::vector; + +class YinUtil +{ +public: + static double sumSquare(const double *in, const size_t startInd, const size_t endInd); + static void difference(const double *in, double *yinBuffer, const size_t yinBufferSize); + static void fastDifference(const double *in, double *yinBuffer, const size_t yinBufferSize); + static void slowDifference(const double *in, double *yinBuffer, const size_t yinBufferSize); + static void cumulativeDifference(double *yinBuffer, const size_t yinBufferSize); + static int absoluteThreshold(const double *yinBuffer, const size_t yinBufferSize, const double thresh); + static vector yinProb(const double *yinBuffer, const size_t prior, const size_t yinBufferSize, size_t minTau = 0, size_t maxTau = 0); + static double parabolicInterpolation(const double *yinBuffer, const size_t tau, + const size_t yinBufferSize); +}; + +#endif diff --git a/libs/vamp-pyin/YinVamp.cpp b/libs/vamp-pyin/YinVamp.cpp new file mode 100644 index 0000000000..bc1e010e26 --- /dev/null +++ b/libs/vamp-pyin/YinVamp.cpp @@ -0,0 +1,367 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + pYIN - A fundamental frequency estimator for monophonic audio + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#include "YinVamp.h" +#include "MonoNote.h" + +#include "vamp-sdk/FFT.h" + +#include +#include + +#include +#include +#include + +using std::string; +using std::vector; +using Vamp::RealTime; + + +YinVamp::YinVamp(float inputSampleRate) : + Plugin(inputSampleRate), + m_channels(0), + m_stepSize(256), + m_blockSize(2048), + m_fmin(40), + m_fmax(1600), + m_yin(2048, inputSampleRate, 0.0), + m_outNoF0(0), + m_outNoPeriodicity(0), + m_outNoRms(0), + m_outNoSalience(0), + m_yinParameter(0.15f), + m_outputUnvoiced(2.0f) +{ +} + +YinVamp::~YinVamp() +{ +} + +string +YinVamp::getIdentifier() const +{ + return "yin"; +} + +string +YinVamp::getName() const +{ + return "Yin"; +} + +string +YinVamp::getDescription() const +{ + return "A vamp implementation of the Yin algorithm for monophonic frequency estimation."; +} + +string +YinVamp::getMaker() const +{ + return "Matthias Mauch"; +} + +int +YinVamp::getPluginVersion() const +{ + // Increment this each time you release a version that behaves + // differently from the previous one + return 2; +} + +string +YinVamp::getCopyright() const +{ + return "GPL"; +} + +YinVamp::InputDomain +YinVamp::getInputDomain() const +{ + return TimeDomain; +} + +size_t +YinVamp::getPreferredBlockSize() const +{ + return 2048; +} + +size_t +YinVamp::getPreferredStepSize() const +{ + return 256; +} + +size_t +YinVamp::getMinChannelCount() const +{ + return 1; +} + +size_t +YinVamp::getMaxChannelCount() const +{ + return 1; +} + +YinVamp::ParameterList +YinVamp::getParameterDescriptors() const +{ + ParameterList list; + + ParameterDescriptor d; + d.identifier = "yinThreshold"; + d.name = "Yin threshold"; + d.description = "The greedy Yin search for a low value difference function is done once a dip lower than this threshold is reached."; + d.unit = ""; + d.minValue = 0.025f; + d.maxValue = 1.0f; + d.defaultValue = 0.15f; + d.isQuantized = true; + d.quantizeStep = 0.025f; + + list.push_back(d); + + d.identifier = "outputunvoiced"; + d.valueNames.clear(); + d.name = "Output estimates classified as unvoiced?"; + d.description = "."; + d.unit = ""; + d.minValue = 0.0f; + d.maxValue = 2.0f; + d.defaultValue = 2.0f; + d.isQuantized = true; + d.quantizeStep = 1.0f; + d.valueNames.push_back("No"); + d.valueNames.push_back("Yes"); + d.valueNames.push_back("Yes, as negative frequencies"); + list.push_back(d); + + return list; +} + +float +YinVamp::getParameter(string identifier) const +{ + if (identifier == "yinThreshold") { + return m_yinParameter; + } + if (identifier == "outputunvoiced") { + return m_outputUnvoiced; + } + return 0.f; +} + +void +YinVamp::setParameter(string identifier, float value) +{ + if (identifier == "yinThreshold") + { + m_yinParameter = value; + } + if (identifier == "outputunvoiced") + { + m_outputUnvoiced = value; + } +} + +YinVamp::ProgramList +YinVamp::getPrograms() const +{ + ProgramList list; + return list; +} + +string +YinVamp::getCurrentProgram() const +{ + return ""; // no programs +} + +void +YinVamp::selectProgram(string name) +{ +} + +YinVamp::OutputList +YinVamp::getOutputDescriptors() const +{ + OutputList outputs; + + OutputDescriptor d; + + int outputNumber = 0; + + d.identifier = "f0"; + d.name = "Estimated f0"; + d.description = "Estimated fundamental frequency"; + d.unit = "Hz"; + d.hasFixedBinCount = true; + d.binCount = 1; + d.hasKnownExtents = true; + d.minValue = m_fmin; + d.maxValue = 500; + d.isQuantized = false; + d.sampleType = OutputDescriptor::FixedSampleRate; + d.sampleRate = (m_inputSampleRate / m_stepSize); + d.hasDuration = false; + outputs.push_back(d); + m_outNoF0 = outputNumber++; + + d.identifier = "periodicity"; + d.name = "Periodicity"; + d.description = "by-product of Yin f0 estimation"; + d.unit = ""; + d.hasFixedBinCount = true; + d.binCount = 1; + d.hasKnownExtents = true; + d.minValue = 0; + d.maxValue = 1; + d.isQuantized = false; + d.sampleType = OutputDescriptor::FixedSampleRate; + d.sampleRate = (m_inputSampleRate / m_stepSize); + d.hasDuration = false; + outputs.push_back(d); + m_outNoPeriodicity = outputNumber++; + + d.identifier = "rms"; + d.name = "Root mean square"; + d.description = "Root mean square of the waveform."; + d.unit = ""; + d.hasFixedBinCount = true; + d.binCount = 1; + d.hasKnownExtents = true; + d.minValue = 0; + d.maxValue = 1; + d.isQuantized = false; + d.sampleType = OutputDescriptor::FixedSampleRate; + d.sampleRate = (m_inputSampleRate / m_stepSize); + d.hasDuration = false; + outputs.push_back(d); + m_outNoRms = outputNumber++; + + d.identifier = "salience"; + d.name = "Salience"; + d.description = "Yin Salience"; + d.hasFixedBinCount = true; + d.binCount = m_blockSize / 2; + d.hasKnownExtents = true; + d.minValue = 0; + d.maxValue = 1; + d.isQuantized = false; + d.sampleType = OutputDescriptor::FixedSampleRate; + d.sampleRate = (m_inputSampleRate / m_stepSize); + d.hasDuration = false; + outputs.push_back(d); + m_outNoSalience = outputNumber++; + + return outputs; +} + +bool +YinVamp::initialise(size_t channels, size_t stepSize, size_t blockSize) +{ + if (channels < getMinChannelCount() || + channels > getMaxChannelCount()) return false; + +/* + std::cerr << "YinVamp::initialise: channels = " << channels + << ", stepSize = " << stepSize << ", blockSize = " << blockSize + << std::endl; +*/ + m_channels = channels; + m_stepSize = stepSize; + m_blockSize = blockSize; + + reset(); + + return true; +} + +void +YinVamp::reset() +{ + m_yin.setThreshold(m_yinParameter); + m_yin.setFrameSize(m_blockSize); +/* + std::cerr << "YinVamp::reset: yin threshold set to " << (m_yinParameter) + << ", blockSize = " << m_blockSize + << std::endl; +*/ +} + +YinVamp::FeatureSet +YinVamp::process(const float *const *inputBuffers, RealTime timestamp) +{ + timestamp = timestamp + Vamp::RealTime::frame2RealTime(m_blockSize/2, lrintf(m_inputSampleRate)); + FeatureSet fs; + + double *dInputBuffers = new double[m_blockSize]; + for (size_t i = 0; i < m_blockSize; ++i) dInputBuffers[i] = inputBuffers[0][i]; + + Yin::YinOutput yo = m_yin.process(dInputBuffers); + // std::cerr << "f0 in YinVamp: " << yo.f0 << std::endl; + Feature f; + f.hasTimestamp = true; + f.timestamp = timestamp; + if (m_outputUnvoiced == 0.0f) + { + // std::cerr << "f0 in YinVamp: " << yo.f0 << std::endl; + if (yo.f0 > 0 && yo.f0 < m_fmax && yo.f0 > m_fmin) { + f.values.push_back(yo.f0); + fs[m_outNoF0].push_back(f); + } + } else if (m_outputUnvoiced == 1.0f) + { + if (fabs(yo.f0) < m_fmax && fabs(yo.f0) > m_fmin) { + f.values.push_back(fabs(yo.f0)); + fs[m_outNoF0].push_back(f); + } + } else + { + if (fabs(yo.f0) < m_fmax && fabs(yo.f0) > m_fmin) { + f.values.push_back(yo.f0); + fs[m_outNoF0].push_back(f); + } + } + + f.values.clear(); + f.values.push_back(yo.rms); + fs[m_outNoRms].push_back(f); + + f.values.clear(); + for (size_t iBin = 0; iBin < yo.salience.size(); ++iBin) + { + f.values.push_back(yo.salience[iBin]); + } + fs[m_outNoSalience].push_back(f); + + f.values.clear(); + // f.values[0] = yo.periodicity; + f.values.push_back(yo.periodicity); + fs[m_outNoPeriodicity].push_back(f); + + delete [] dInputBuffers; + + return fs; +} + +YinVamp::FeatureSet +YinVamp::getRemainingFeatures() +{ + FeatureSet fs; + return fs; +} diff --git a/libs/vamp-pyin/YinVamp.h b/libs/vamp-pyin/YinVamp.h new file mode 100644 index 0000000000..c82a8abc19 --- /dev/null +++ b/libs/vamp-pyin/YinVamp.h @@ -0,0 +1,75 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + pYIN - A fundamental frequency estimator for monophonic audio + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#ifndef _YINVAMP_H_ +#define _YINVAMP_H_ + +#include + +#include "Yin.h" + +class YinVamp : public Vamp::Plugin +{ +public: + YinVamp(float inputSampleRate); + virtual ~YinVamp(); + + std::string getIdentifier() const; + std::string getName() const; + std::string getDescription() const; + std::string getMaker() const; + int getPluginVersion() const; + std::string getCopyright() const; + + InputDomain getInputDomain() const; + size_t getPreferredBlockSize() const; + size_t getPreferredStepSize() const; + size_t getMinChannelCount() const; + size_t getMaxChannelCount() const; + + ParameterList getParameterDescriptors() const; + float getParameter(std::string identifier) const; + void setParameter(std::string identifier, float value); + + ProgramList getPrograms() const; + std::string getCurrentProgram() const; + void selectProgram(std::string name); + + OutputList getOutputDescriptors() const; + + bool initialise(size_t channels, size_t stepSize, size_t blockSize); + void reset(); + + FeatureSet process(const float *const *inputBuffers, + Vamp::RealTime timestamp); + + FeatureSet getRemainingFeatures(); + +protected: + size_t m_channels; + size_t m_stepSize; + size_t m_blockSize; + float m_fmin; + float m_fmax; + Yin m_yin; + + mutable int m_outNoF0; + mutable int m_outNoPeriodicity; + mutable int m_outNoRms; + mutable int m_outNoSalience; + + float m_yinParameter; + float m_outputUnvoiced; +}; + +#endif diff --git a/libs/vamp-pyin/libmain.cpp b/libs/vamp-pyin/libmain.cpp new file mode 100644 index 0000000000..350b3ba4dc --- /dev/null +++ b/libs/vamp-pyin/libmain.cpp @@ -0,0 +1,38 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + pYIN - A fundamental frequency estimator for monophonic audio + Centre for Digital Music, Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#include +#include + +#include "PYinVamp.h" +#include "YinVamp.h" +#include "LocalCandidatePYIN.h" + +static Vamp::PluginAdapter pyinvampPluginAdapter; +static Vamp::PluginAdapter yinvampPluginAdapter; +static Vamp::PluginAdapter localCandidatePYINPluginAdapter; + +const VampPluginDescriptor * +vampGetPluginDescriptor(unsigned int version, unsigned int index) +{ + if (version < 1) return 0; + + switch (index) { + case 0: return pyinvampPluginAdapter.getDescriptor(); + case 1: return yinvampPluginAdapter.getDescriptor(); + case 2: return localCandidatePYINPluginAdapter.getDescriptor(); + default: return 0; + } +} + + diff --git a/libs/vamp-pyin/wscript b/libs/vamp-pyin/wscript new file mode 100644 index 0000000000..e559856b4f --- /dev/null +++ b/libs/vamp-pyin/wscript @@ -0,0 +1,53 @@ +#!/usr/bin/env python +from waflib.extras import autowaf as autowaf +import os + +# Library version (UNIX style major, minor, micro) +# major increment <=> incompatible changes +# minor increment <=> compatible changes (additions) +# micro increment <=> no interface changes +VAMP_PYIN_LIB_VERSION = '0.0.0' + +# Variables for 'waf dist' +APPNAME = 'libardourvamppyin' +VERSION = '1.1' + +# Mandatory variables +top = '.' +out = 'build' + +def options(opt): + autowaf.set_options(opt) + +def configure(conf): + conf.load('compiler_cxx') + autowaf.configure(conf) + +def build(bld): + # Library + obj = bld(features = 'cxx cxxshlib') + obj.source = ''' + libmain.cpp + PYinVamp.cpp + YinVamp.cpp + LocalCandidatePYIN.cpp + Yin.cpp + YinUtil.cpp + MonoNote.cpp + MonoPitch.cpp + MonoNoteParameters.cpp + SparseHMM.cpp + MonoNoteHMM.cpp + MonoPitchHMM.cpp + ''' + obj.export_includes = ['.'] + obj.includes = ['.'] + obj.name = 'libardourvamppyin' + obj.target = 'ardourvamppyin' + obj.use = 'libvampplugin' + autowaf.ensure_visible_symbols (obj, True) + obj.vnum = VAMP_PYIN_LIB_VERSION + obj.install_path = os.path.join(bld.env['LIBDIR'], 'vamp') + +def shutdown(): + autowaf.shutdown() -- cgit v1.2.3