summaryrefslogtreecommitdiff
path: root/libs/qm-dsp/dsp/tempotracking/DownBeat.h
blob: 3ef0d181273610e36cf08d10fa72ab7b2ecf75ca (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

/*
    QM DSP Library

    Centre for Digital Music, Queen Mary, University of London.
    This file copyright 2008-2009 Matthew Davies and QMUL.

    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
*/

#ifndef DOWNBEAT_H
#define DOWNBEAT_H

#include <vector>

#include "dsp/rateconversion/Decimator.h"

using std::vector;

class FFTReal;

/**
 * This class takes an input audio signal and a sequence of beat
 * locations (calculated e.g. by TempoTrackV2) and estimates which of
 * the beat locations are downbeats (first beat of the bar).
 *
 * The input audio signal is expected to have been downsampled to a
 * very low sampling rate (e.g. 2700Hz).  A utility function for
 * downsampling and buffering incoming block-by-block audio is
 * provided.
 */
class DownBeat
{
public:
    /**
     * Construct a downbeat locator that will operate on audio at the
     * downsampled by the given decimation factor from the given
     * original sample rate, plus beats extracted from the same audio
     * at the given original sample rate with the given frame
     * increment.
     *
     * decimationFactor must be a power of two no greater than 64, and
     * dfIncrement must be a multiple of decimationFactor.
     */
    DownBeat(float originalSampleRate,
             size_t decimationFactor,
             size_t dfIncrement);
    ~DownBeat();

    void setBeatsPerBar(int bpb);

    /**
     * Estimate which beats are down-beats.
     *
     * audio contains the input audio stream after downsampling, and
     * audioLength contains the number of samples in this downsampled
     * stream.
     *
     * beats contains a series of beat positions expressed in
     * multiples of the df increment at the audio's original sample
     * rate, as described to the constructor.
     *
     * The returned downbeat array contains a series of indices to the
     * beats array.
     */
    void findDownBeats(const float *audio, // downsampled
                       size_t audioLength, // after downsampling
                       const vector<double> &beats,
                       vector<int> &downbeats);

    /**
     * Return the beat spectral difference function.  This is
     * calculated during findDownBeats, so this function can only be
     * meaningfully called after that has completed.  The returned
     * vector contains one value for each of the beat times passed in
     * to findDownBeats, less one.  Each value contains the spectral
     * difference between region prior to the beat's nominal position
     * and the region following it.
     */
    void getBeatSD(vector<double> &beatsd) const;

    /**
     * For your downsampling convenience: call this function
     * repeatedly with input audio blocks containing dfIncrement
     * samples at the original sample rate, to decimate them to the
     * downsampled rate and buffer them within the DownBeat class.
     *
     * Call getBufferedAudio() to retrieve the results after all
     * blocks have been processed.
     */
    void pushAudioBlock(const float *audio);

    /**
     * Retrieve the accumulated audio produced by pushAudioBlock calls.
     */
    const float *getBufferedAudio(size_t &length) const;

    /**
     * Clear any buffered downsampled audio data.
     */
    void resetAudioBuffer();

private:
    typedef vector<int> i_vec_t;
    typedef vector<vector<int> > i_mat_t;
    typedef vector<double> d_vec_t;
    typedef vector<vector<double> > d_mat_t;

    void makeDecimators();
    double measureSpecDiff(d_vec_t oldspec, d_vec_t newspec);

    int m_bpb;
    float m_rate;
    size_t m_factor;
    size_t m_increment;
    Decimator *m_decimator1;
    Decimator *m_decimator2;
    float *m_buffer;
    float *m_decbuf;
    size_t m_bufsiz;
    size_t m_buffill;
    size_t m_beatframesize;
    double *m_beatframe;
    FFTReal *m_fft;
    double *m_fftRealOut;
    double *m_fftImagOut;
    d_vec_t m_beatsd;
};

#endif