diff options
-rw-r--r-- | libs/ardour/ardour/mix.h | 12 | ||||
-rw-r--r-- | libs/ardour/ardour/runtime_functions.h | 6 | ||||
-rw-r--r-- | libs/ardour/globals.cc | 20 | ||||
-rw-r--r-- | libs/ardour/mix.cc | 6 | ||||
-rw-r--r-- | libs/ardour/wscript | 8 | ||||
-rw-r--r-- | libs/backends/wavesaudio/waves_audiobackend.cc | 5 | ||||
-rw-r--r-- | libs/backends/wavesaudio/waves_audioport.cc | 34 | ||||
-rw-r--r-- | libs/pbd/pbd/fpu.h | 4 | ||||
-rw-r--r-- | libs/pbd/wscript | 1 | ||||
-rw-r--r-- | wscript | 4 |
10 files changed, 75 insertions, 25 deletions
diff --git a/libs/ardour/ardour/mix.h b/libs/ardour/ardour/mix.h index 3cd9a3e60f..2db444d02b 100644 --- a/libs/ardour/ardour/mix.h +++ b/libs/ardour/ardour/mix.h @@ -33,7 +33,17 @@ extern "C" { LIBARDOUR_API void x86_sse_mix_buffers_no_gain (ARDOUR::Sample * dst, const ARDOUR::Sample * src, ARDOUR::pframes_t nframes); } +extern "C" { +/* AVX functions */ + LIBARDOUR_API float x86_sse_avx_compute_peak (const ARDOUR::Sample * buf, ARDOUR::pframes_t nsamples, float current); + LIBARDOUR_API void x86_sse_avx_apply_gain_to_buffer (ARDOUR::Sample * buf, ARDOUR::pframes_t nframes, float gain); + LIBARDOUR_API void x86_sse_avx_mix_buffers_with_gain(ARDOUR::Sample * dst, const ARDOUR::Sample * src, ARDOUR::pframes_t nframes, float gain); + LIBARDOUR_API void x86_sse_avx_mix_buffers_no_gain (ARDOUR::Sample * dst, const ARDOUR::Sample * src, ARDOUR::pframes_t nframes); + LIBARDOUR_API void x86_sse_avx_copy_vector (ARDOUR::Sample * dst, const ARDOUR::Sample * src, ARDOUR::pframes_t nframes); +} + LIBARDOUR_API void x86_sse_find_peaks (const ARDOUR::Sample * buf, ARDOUR::pframes_t nsamples, float *min, float *max); +LIBARDOUR_API void x86_sse_avx_find_peaks (const ARDOUR::Sample * buf, ARDOUR::pframes_t nsamples, float *min, float *max); /* debug wrappers for SSE functions */ @@ -41,6 +51,7 @@ LIBARDOUR_API float debug_compute_peak (const ARDOUR::Sample * buf LIBARDOUR_API void debug_apply_gain_to_buffer (ARDOUR::Sample * buf, ARDOUR::pframes_t nframes, float gain); LIBARDOUR_API void debug_mix_buffers_with_gain (ARDOUR::Sample * dst, const ARDOUR::Sample * src, ARDOUR::pframes_t nframes, float gain); LIBARDOUR_API void debug_mix_buffers_no_gain (ARDOUR::Sample * dst, const ARDOUR::Sample * src, ARDOUR::pframes_t nframes); +LIBARDOUR_API void debug_copy_vector (ARDOUR::Sample * dst, const ARDOUR::Sample * src, ARDOUR::pframes_t nframes); #endif @@ -61,5 +72,6 @@ LIBARDOUR_API void default_find_peaks (const ARDOUR::Sample * bu LIBARDOUR_API void default_apply_gain_to_buffer (ARDOUR::Sample * buf, ARDOUR::pframes_t nframes, float gain); LIBARDOUR_API void default_mix_buffers_with_gain (ARDOUR::Sample * dst, const ARDOUR::Sample * src, ARDOUR::pframes_t nframes, float gain); LIBARDOUR_API void default_mix_buffers_no_gain (ARDOUR::Sample * dst, const ARDOUR::Sample * src, ARDOUR::pframes_t nframes); +LIBARDOUR_API void default_copy_vector (ARDOUR::Sample * dst, const ARDOUR::Sample * src, ARDOUR::pframes_t nframes); #endif /* __ardour_mix_h__ */ diff --git a/libs/ardour/ardour/runtime_functions.h b/libs/ardour/ardour/runtime_functions.h index e1d6b99f61..45d6ec7015 100644 --- a/libs/ardour/ardour/runtime_functions.h +++ b/libs/ardour/ardour/runtime_functions.h @@ -25,17 +25,19 @@ namespace ARDOUR { - typedef float (*compute_peak_t) (const ARDOUR::Sample *, pframes_t, float); - typedef void (*find_peaks_t) (const ARDOUR::Sample *, pframes_t, float *, float*); + typedef float (*compute_peak_t) (const ARDOUR::Sample *, pframes_t, float); + typedef void (*find_peaks_t) (const ARDOUR::Sample *, pframes_t, float *, float*); typedef void (*apply_gain_to_buffer_t) (ARDOUR::Sample *, pframes_t, float); typedef void (*mix_buffers_with_gain_t) (ARDOUR::Sample *, const ARDOUR::Sample *, pframes_t, float); typedef void (*mix_buffers_no_gain_t) (ARDOUR::Sample *, const ARDOUR::Sample *, pframes_t); + typedef void (*copy_vector_t) (ARDOUR::Sample *, const ARDOUR::Sample *, pframes_t); LIBARDOUR_API extern compute_peak_t compute_peak; LIBARDOUR_API extern find_peaks_t find_peaks; LIBARDOUR_API extern apply_gain_to_buffer_t apply_gain_to_buffer; LIBARDOUR_API extern mix_buffers_with_gain_t mix_buffers_with_gain; LIBARDOUR_API extern mix_buffers_no_gain_t mix_buffers_no_gain; + LIBARDOUR_API extern copy_vector_t copy_vector; } #endif /* __ardour_runtime_functions_h__ */ diff --git a/libs/ardour/globals.cc b/libs/ardour/globals.cc index 288e69dc9e..fa6f833d94 100644 --- a/libs/ardour/globals.cc +++ b/libs/ardour/globals.cc @@ -131,6 +131,7 @@ find_peaks_t ARDOUR::find_peaks = 0; apply_gain_to_buffer_t ARDOUR::apply_gain_to_buffer = 0; mix_buffers_with_gain_t ARDOUR::mix_buffers_with_gain = 0; mix_buffers_no_gain_t ARDOUR::mix_buffers_no_gain = 0; +copy_vector_t ARDOUR::copy_vector = 0; PBD::Signal1<void,std::string> ARDOUR::BootMessage; PBD::Signal3<void,std::string,std::string,bool> ARDOUR::PluginScanMessage; @@ -160,7 +161,21 @@ setup_hardware_optimization (bool try_optimization) #if defined (ARCH_X86) && defined (BUILD_SSE_OPTIMIZATIONS) - if (fpu.has_sse()) { + if (fpu.has_avx()) { + + info << "Using AVX optimized routines" << endmsg; + + // AVX SET + compute_peak = x86_sse_avx_compute_peak; + find_peaks = x86_sse_avx_find_peaks; + apply_gain_to_buffer = x86_sse_avx_apply_gain_to_buffer; + mix_buffers_with_gain = x86_sse_avx_mix_buffers_with_gain; + mix_buffers_no_gain = x86_sse_avx_mix_buffers_no_gain; + copy_vector = x86_sse_avx_copy_vector; + + generic_mix_functions = false; + + } else if (fpu.has_sse()) { info << "Using SSE optimized routines" << endmsg; @@ -170,6 +185,7 @@ setup_hardware_optimization (bool try_optimization) apply_gain_to_buffer = x86_sse_apply_gain_to_buffer; mix_buffers_with_gain = x86_sse_mix_buffers_with_gain; mix_buffers_no_gain = x86_sse_mix_buffers_no_gain; + copy_vector = default_copy_vector; generic_mix_functions = false; @@ -187,6 +203,7 @@ setup_hardware_optimization (bool try_optimization) apply_gain_to_buffer = veclib_apply_gain_to_buffer; mix_buffers_with_gain = veclib_mix_buffers_with_gain; mix_buffers_no_gain = veclib_mix_buffers_no_gain; + copy_vector = default_copy_vector; generic_mix_functions = false; @@ -206,6 +223,7 @@ setup_hardware_optimization (bool try_optimization) apply_gain_to_buffer = default_apply_gain_to_buffer; mix_buffers_with_gain = default_mix_buffers_with_gain; mix_buffers_no_gain = default_mix_buffers_no_gain; + copy_vector = default_copy_vector; info << "No H/W specific optimizations in use" << endmsg; } diff --git a/libs/ardour/mix.cc b/libs/ardour/mix.cc index adae68ae7f..96ae624487 100644 --- a/libs/ardour/mix.cc +++ b/libs/ardour/mix.cc @@ -136,6 +136,12 @@ default_mix_buffers_no_gain (ARDOUR::Sample * dst, const ARDOUR::Sample * src, p } } +void +default_copy_vector (ARDOUR::Sample * dst, const ARDOUR::Sample * src, pframes_t nframes) +{ + memcpy(dst, src, nframes*sizeof(ARDOUR::Sample)); +} + #if defined (__APPLE__) && defined (BUILD_VECLIB_OPTIMIZATIONS) #include <Accelerate/Accelerate.h> diff --git a/libs/ardour/wscript b/libs/ardour/wscript index 115e12cbec..04b99785e5 100644 --- a/libs/ardour/wscript +++ b/libs/ardour/wscript @@ -417,8 +417,12 @@ def build(bld): # not the build host, which in turn can only be inferred from the name # of the compiler. if re.search ('/^x86_64/', str(bld.env['CC'])): - obj.source += [ 'sse_functions_xmm.cc', 'sse_functions_64bit_win.s' ] - + obj.source += [ 'sse_functions_xmm.cc', + 'sse_functions_avx.cc', + 'sse_functions_64bit_win.s', + 'sse_avx_functions_64bit_win.s', + ] + # i18n if bld.is_defined('ENABLE_NLS'): mo_files = bld.path.ant_glob('po/*.mo') diff --git a/libs/backends/wavesaudio/waves_audiobackend.cc b/libs/backends/wavesaudio/waves_audiobackend.cc index 5a8fac0a6a..7fd6da2f39 100644 --- a/libs/backends/wavesaudio/waves_audiobackend.cc +++ b/libs/backends/wavesaudio/waves_audiobackend.cc @@ -21,6 +21,8 @@ #include "waves_audioport.h" #include "waves_midiport.h" +#include "ardour/runtime_functions.h" + using namespace ARDOUR; #if defined __MINGW64__ || defined __MINGW32__ @@ -1170,13 +1172,12 @@ WavesAudioBackend::_read_audio_data_from_device (const float* input_buffer, pfra { #if defined(PLATFORM_WINDOWS) const float **buffer = (const float**)input_buffer; - size_t copied_bytes = nframes*sizeof(float); for(std::vector<WavesAudioPort*>::iterator it = _physical_audio_inputs.begin (); it != _physical_audio_inputs.end(); ++it) { - memcpy((*it)->buffer(), *buffer, copied_bytes); + ARDOUR::copy_vector ((*it)->buffer(), *buffer, nframes); ++buffer; } #else diff --git a/libs/backends/wavesaudio/waves_audioport.cc b/libs/backends/wavesaudio/waves_audioport.cc index 4ded37d906..1249f4d31e 100644 --- a/libs/backends/wavesaudio/waves_audioport.cc +++ b/libs/backends/wavesaudio/waves_audioport.cc @@ -35,20 +35,24 @@ void* WavesAudioPort::get_buffer (pframes_t nframes) std::vector<WavesDataPort*>::const_iterator it = get_connections ().begin (); if (it != get_connections ().end ()) { - /* In fact, the static casting to (const WavesAudioPort*) is not that safe. - * However, mixing the buffers is assumed in the time critical conditions. - * Base class WavesDataPort takes is supposed to provide enough consistentcy - * of the connections. - */ - for (memcpy (_buffer, ((const WavesAudioPort*)*it)->const_buffer (), nframes * sizeof (Sample)), ++it; - it != get_connections ().end (); - ++it) { - Sample* tgt = buffer (); - const Sample* src = ((const WavesAudioPort*)*it)->const_buffer (); - for (uint32_t frame = 0; frame < nframes; ++frame, ++tgt, ++src) { - *tgt += *src; - } - } + /* In fact, the static casting to (const WavesAudioPort*) is not that safe. + * However, mixing the buffers is assumed in the time critical conditions. + * Base class WavesDataPort takes is supposed to provide enough consistentcy + * of the connections. + */ + // get first buffer data + // use optimized function to fill the buffer intialy + ARDOUR::copy_vector (_buffer, ((const WavesAudioPort*)*it)->const_buffer (), nframes); + ++it; + + // mix the rest + for (; it != get_connections ().end (); ++it) { + Sample* tgt = buffer (); + const Sample* src = ((const WavesAudioPort*)*it)->const_buffer (); + for (uint32_t frame = 0; frame < nframes; ++frame, ++tgt, ++src) { + *tgt += *src; + } + } } } return _buffer; @@ -59,4 +63,4 @@ void WavesAudioPort::_wipe_buffer() { memset (_buffer, 0, sizeof (_buffer)); -}
\ No newline at end of file +} diff --git a/libs/pbd/pbd/fpu.h b/libs/pbd/pbd/fpu.h index 6627951e9f..260cf4db85 100644 --- a/libs/pbd/pbd/fpu.h +++ b/libs/pbd/pbd/fpu.h @@ -30,7 +30,8 @@ class LIBPBD_API FPU { HasFlushToZero = 0x1, HasDenormalsAreZero = 0x2, HasSSE = 0x4, - HasSSE2 = 0x8 + HasSSE2 = 0x8, + HasAVX = 0x10 }; public: @@ -41,6 +42,7 @@ class LIBPBD_API FPU { bool has_denormals_are_zero () const { return _flags & HasDenormalsAreZero; } bool has_sse () const { return _flags & HasSSE; } bool has_sse2 () const { return _flags & HasSSE2; } + bool has_avx () const { return _flags & HasAVX; } private: Flags _flags; diff --git a/libs/pbd/wscript b/libs/pbd/wscript index 8f947fbb26..27617adfa9 100644 --- a/libs/pbd/wscript +++ b/libs/pbd/wscript @@ -145,6 +145,7 @@ def build(bld): if bld.env['build_target'] == 'x86_64': obj.defines += [ 'USE_X86_64_ASM' ] if bld.env['build_target'] == 'mingw': + obj.defines += [ 'NO_POSIX_MEMALIGN' ] obj.source += [ 'windows_special_dirs.cc' ] obj.uselib += ' OLE' @@ -417,12 +417,12 @@ int main() { return 0; }''', if (re.search ("(x86_64|AMD64)", cpu) != None): # on Windows sse is supported by 64 bit platforms only build_host_supports_sse = True - + # mingw GCC compiler to uses at&t (Unix specific) assembler dialect by default # compiler_flags.append (["--mmnemonic=att", "msyntax=att") compiler_flags.extend ([ flags_dict['sse'], flags_dict['fpmath-sse'], flags_dict['xmmintrinsics'], flags_dict['attasm'] ]) - + # end of processor-specific section # optimization section |