summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--libs/ardour/ardour/mix.h12
-rw-r--r--libs/ardour/ardour/runtime_functions.h6
-rw-r--r--libs/ardour/globals.cc20
-rw-r--r--libs/ardour/mix.cc6
-rw-r--r--libs/ardour/wscript8
-rw-r--r--libs/backends/wavesaudio/waves_audiobackend.cc5
-rw-r--r--libs/backends/wavesaudio/waves_audioport.cc34
-rw-r--r--libs/pbd/pbd/fpu.h4
-rw-r--r--libs/pbd/wscript1
-rw-r--r--wscript4
10 files changed, 75 insertions, 25 deletions
diff --git a/libs/ardour/ardour/mix.h b/libs/ardour/ardour/mix.h
index 3cd9a3e60f..2db444d02b 100644
--- a/libs/ardour/ardour/mix.h
+++ b/libs/ardour/ardour/mix.h
@@ -33,7 +33,17 @@ extern "C" {
LIBARDOUR_API void x86_sse_mix_buffers_no_gain (ARDOUR::Sample * dst, const ARDOUR::Sample * src, ARDOUR::pframes_t nframes);
}
+extern "C" {
+/* AVX functions */
+ LIBARDOUR_API float x86_sse_avx_compute_peak (const ARDOUR::Sample * buf, ARDOUR::pframes_t nsamples, float current);
+ LIBARDOUR_API void x86_sse_avx_apply_gain_to_buffer (ARDOUR::Sample * buf, ARDOUR::pframes_t nframes, float gain);
+ LIBARDOUR_API void x86_sse_avx_mix_buffers_with_gain(ARDOUR::Sample * dst, const ARDOUR::Sample * src, ARDOUR::pframes_t nframes, float gain);
+ LIBARDOUR_API void x86_sse_avx_mix_buffers_no_gain (ARDOUR::Sample * dst, const ARDOUR::Sample * src, ARDOUR::pframes_t nframes);
+ LIBARDOUR_API void x86_sse_avx_copy_vector (ARDOUR::Sample * dst, const ARDOUR::Sample * src, ARDOUR::pframes_t nframes);
+}
+
LIBARDOUR_API void x86_sse_find_peaks (const ARDOUR::Sample * buf, ARDOUR::pframes_t nsamples, float *min, float *max);
+LIBARDOUR_API void x86_sse_avx_find_peaks (const ARDOUR::Sample * buf, ARDOUR::pframes_t nsamples, float *min, float *max);
/* debug wrappers for SSE functions */
@@ -41,6 +51,7 @@ LIBARDOUR_API float debug_compute_peak (const ARDOUR::Sample * buf
LIBARDOUR_API void debug_apply_gain_to_buffer (ARDOUR::Sample * buf, ARDOUR::pframes_t nframes, float gain);
LIBARDOUR_API void debug_mix_buffers_with_gain (ARDOUR::Sample * dst, const ARDOUR::Sample * src, ARDOUR::pframes_t nframes, float gain);
LIBARDOUR_API void debug_mix_buffers_no_gain (ARDOUR::Sample * dst, const ARDOUR::Sample * src, ARDOUR::pframes_t nframes);
+LIBARDOUR_API void debug_copy_vector (ARDOUR::Sample * dst, const ARDOUR::Sample * src, ARDOUR::pframes_t nframes);
#endif
@@ -61,5 +72,6 @@ LIBARDOUR_API void default_find_peaks (const ARDOUR::Sample * bu
LIBARDOUR_API void default_apply_gain_to_buffer (ARDOUR::Sample * buf, ARDOUR::pframes_t nframes, float gain);
LIBARDOUR_API void default_mix_buffers_with_gain (ARDOUR::Sample * dst, const ARDOUR::Sample * src, ARDOUR::pframes_t nframes, float gain);
LIBARDOUR_API void default_mix_buffers_no_gain (ARDOUR::Sample * dst, const ARDOUR::Sample * src, ARDOUR::pframes_t nframes);
+LIBARDOUR_API void default_copy_vector (ARDOUR::Sample * dst, const ARDOUR::Sample * src, ARDOUR::pframes_t nframes);
#endif /* __ardour_mix_h__ */
diff --git a/libs/ardour/ardour/runtime_functions.h b/libs/ardour/ardour/runtime_functions.h
index e1d6b99f61..45d6ec7015 100644
--- a/libs/ardour/ardour/runtime_functions.h
+++ b/libs/ardour/ardour/runtime_functions.h
@@ -25,17 +25,19 @@
namespace ARDOUR {
- typedef float (*compute_peak_t) (const ARDOUR::Sample *, pframes_t, float);
- typedef void (*find_peaks_t) (const ARDOUR::Sample *, pframes_t, float *, float*);
+ typedef float (*compute_peak_t) (const ARDOUR::Sample *, pframes_t, float);
+ typedef void (*find_peaks_t) (const ARDOUR::Sample *, pframes_t, float *, float*);
typedef void (*apply_gain_to_buffer_t) (ARDOUR::Sample *, pframes_t, float);
typedef void (*mix_buffers_with_gain_t) (ARDOUR::Sample *, const ARDOUR::Sample *, pframes_t, float);
typedef void (*mix_buffers_no_gain_t) (ARDOUR::Sample *, const ARDOUR::Sample *, pframes_t);
+ typedef void (*copy_vector_t) (ARDOUR::Sample *, const ARDOUR::Sample *, pframes_t);
LIBARDOUR_API extern compute_peak_t compute_peak;
LIBARDOUR_API extern find_peaks_t find_peaks;
LIBARDOUR_API extern apply_gain_to_buffer_t apply_gain_to_buffer;
LIBARDOUR_API extern mix_buffers_with_gain_t mix_buffers_with_gain;
LIBARDOUR_API extern mix_buffers_no_gain_t mix_buffers_no_gain;
+ LIBARDOUR_API extern copy_vector_t copy_vector;
}
#endif /* __ardour_runtime_functions_h__ */
diff --git a/libs/ardour/globals.cc b/libs/ardour/globals.cc
index 288e69dc9e..fa6f833d94 100644
--- a/libs/ardour/globals.cc
+++ b/libs/ardour/globals.cc
@@ -131,6 +131,7 @@ find_peaks_t ARDOUR::find_peaks = 0;
apply_gain_to_buffer_t ARDOUR::apply_gain_to_buffer = 0;
mix_buffers_with_gain_t ARDOUR::mix_buffers_with_gain = 0;
mix_buffers_no_gain_t ARDOUR::mix_buffers_no_gain = 0;
+copy_vector_t ARDOUR::copy_vector = 0;
PBD::Signal1<void,std::string> ARDOUR::BootMessage;
PBD::Signal3<void,std::string,std::string,bool> ARDOUR::PluginScanMessage;
@@ -160,7 +161,21 @@ setup_hardware_optimization (bool try_optimization)
#if defined (ARCH_X86) && defined (BUILD_SSE_OPTIMIZATIONS)
- if (fpu.has_sse()) {
+ if (fpu.has_avx()) {
+
+ info << "Using AVX optimized routines" << endmsg;
+
+ // AVX SET
+ compute_peak = x86_sse_avx_compute_peak;
+ find_peaks = x86_sse_avx_find_peaks;
+ apply_gain_to_buffer = x86_sse_avx_apply_gain_to_buffer;
+ mix_buffers_with_gain = x86_sse_avx_mix_buffers_with_gain;
+ mix_buffers_no_gain = x86_sse_avx_mix_buffers_no_gain;
+ copy_vector = x86_sse_avx_copy_vector;
+
+ generic_mix_functions = false;
+
+ } else if (fpu.has_sse()) {
info << "Using SSE optimized routines" << endmsg;
@@ -170,6 +185,7 @@ setup_hardware_optimization (bool try_optimization)
apply_gain_to_buffer = x86_sse_apply_gain_to_buffer;
mix_buffers_with_gain = x86_sse_mix_buffers_with_gain;
mix_buffers_no_gain = x86_sse_mix_buffers_no_gain;
+ copy_vector = default_copy_vector;
generic_mix_functions = false;
@@ -187,6 +203,7 @@ setup_hardware_optimization (bool try_optimization)
apply_gain_to_buffer = veclib_apply_gain_to_buffer;
mix_buffers_with_gain = veclib_mix_buffers_with_gain;
mix_buffers_no_gain = veclib_mix_buffers_no_gain;
+ copy_vector = default_copy_vector;
generic_mix_functions = false;
@@ -206,6 +223,7 @@ setup_hardware_optimization (bool try_optimization)
apply_gain_to_buffer = default_apply_gain_to_buffer;
mix_buffers_with_gain = default_mix_buffers_with_gain;
mix_buffers_no_gain = default_mix_buffers_no_gain;
+ copy_vector = default_copy_vector;
info << "No H/W specific optimizations in use" << endmsg;
}
diff --git a/libs/ardour/mix.cc b/libs/ardour/mix.cc
index adae68ae7f..96ae624487 100644
--- a/libs/ardour/mix.cc
+++ b/libs/ardour/mix.cc
@@ -136,6 +136,12 @@ default_mix_buffers_no_gain (ARDOUR::Sample * dst, const ARDOUR::Sample * src, p
}
}
+void
+default_copy_vector (ARDOUR::Sample * dst, const ARDOUR::Sample * src, pframes_t nframes)
+{
+ memcpy(dst, src, nframes*sizeof(ARDOUR::Sample));
+}
+
#if defined (__APPLE__) && defined (BUILD_VECLIB_OPTIMIZATIONS)
#include <Accelerate/Accelerate.h>
diff --git a/libs/ardour/wscript b/libs/ardour/wscript
index 115e12cbec..04b99785e5 100644
--- a/libs/ardour/wscript
+++ b/libs/ardour/wscript
@@ -417,8 +417,12 @@ def build(bld):
# not the build host, which in turn can only be inferred from the name
# of the compiler.
if re.search ('/^x86_64/', str(bld.env['CC'])):
- obj.source += [ 'sse_functions_xmm.cc', 'sse_functions_64bit_win.s' ]
-
+ obj.source += [ 'sse_functions_xmm.cc',
+ 'sse_functions_avx.cc',
+ 'sse_functions_64bit_win.s',
+ 'sse_avx_functions_64bit_win.s',
+ ]
+
# i18n
if bld.is_defined('ENABLE_NLS'):
mo_files = bld.path.ant_glob('po/*.mo')
diff --git a/libs/backends/wavesaudio/waves_audiobackend.cc b/libs/backends/wavesaudio/waves_audiobackend.cc
index 5a8fac0a6a..7fd6da2f39 100644
--- a/libs/backends/wavesaudio/waves_audiobackend.cc
+++ b/libs/backends/wavesaudio/waves_audiobackend.cc
@@ -21,6 +21,8 @@
#include "waves_audioport.h"
#include "waves_midiport.h"
+#include "ardour/runtime_functions.h"
+
using namespace ARDOUR;
#if defined __MINGW64__ || defined __MINGW32__
@@ -1170,13 +1172,12 @@ WavesAudioBackend::_read_audio_data_from_device (const float* input_buffer, pfra
{
#if defined(PLATFORM_WINDOWS)
const float **buffer = (const float**)input_buffer;
- size_t copied_bytes = nframes*sizeof(float);
for(std::vector<WavesAudioPort*>::iterator it = _physical_audio_inputs.begin ();
it != _physical_audio_inputs.end();
++it)
{
- memcpy((*it)->buffer(), *buffer, copied_bytes);
+ ARDOUR::copy_vector ((*it)->buffer(), *buffer, nframes);
++buffer;
}
#else
diff --git a/libs/backends/wavesaudio/waves_audioport.cc b/libs/backends/wavesaudio/waves_audioport.cc
index 4ded37d906..1249f4d31e 100644
--- a/libs/backends/wavesaudio/waves_audioport.cc
+++ b/libs/backends/wavesaudio/waves_audioport.cc
@@ -35,20 +35,24 @@ void* WavesAudioPort::get_buffer (pframes_t nframes)
std::vector<WavesDataPort*>::const_iterator it = get_connections ().begin ();
if (it != get_connections ().end ()) {
- /* In fact, the static casting to (const WavesAudioPort*) is not that safe.
- * However, mixing the buffers is assumed in the time critical conditions.
- * Base class WavesDataPort takes is supposed to provide enough consistentcy
- * of the connections.
- */
- for (memcpy (_buffer, ((const WavesAudioPort*)*it)->const_buffer (), nframes * sizeof (Sample)), ++it;
- it != get_connections ().end ();
- ++it) {
- Sample* tgt = buffer ();
- const Sample* src = ((const WavesAudioPort*)*it)->const_buffer ();
- for (uint32_t frame = 0; frame < nframes; ++frame, ++tgt, ++src) {
- *tgt += *src;
- }
- }
+ /* In fact, the static casting to (const WavesAudioPort*) is not that safe.
+ * However, mixing the buffers is assumed in the time critical conditions.
+ * Base class WavesDataPort takes is supposed to provide enough consistentcy
+ * of the connections.
+ */
+ // get first buffer data
+ // use optimized function to fill the buffer intialy
+ ARDOUR::copy_vector (_buffer, ((const WavesAudioPort*)*it)->const_buffer (), nframes);
+ ++it;
+
+ // mix the rest
+ for (; it != get_connections ().end (); ++it) {
+ Sample* tgt = buffer ();
+ const Sample* src = ((const WavesAudioPort*)*it)->const_buffer ();
+ for (uint32_t frame = 0; frame < nframes; ++frame, ++tgt, ++src) {
+ *tgt += *src;
+ }
+ }
}
}
return _buffer;
@@ -59,4 +63,4 @@ void
WavesAudioPort::_wipe_buffer()
{
memset (_buffer, 0, sizeof (_buffer));
-} \ No newline at end of file
+}
diff --git a/libs/pbd/pbd/fpu.h b/libs/pbd/pbd/fpu.h
index 6627951e9f..260cf4db85 100644
--- a/libs/pbd/pbd/fpu.h
+++ b/libs/pbd/pbd/fpu.h
@@ -30,7 +30,8 @@ class LIBPBD_API FPU {
HasFlushToZero = 0x1,
HasDenormalsAreZero = 0x2,
HasSSE = 0x4,
- HasSSE2 = 0x8
+ HasSSE2 = 0x8,
+ HasAVX = 0x10
};
public:
@@ -41,6 +42,7 @@ class LIBPBD_API FPU {
bool has_denormals_are_zero () const { return _flags & HasDenormalsAreZero; }
bool has_sse () const { return _flags & HasSSE; }
bool has_sse2 () const { return _flags & HasSSE2; }
+ bool has_avx () const { return _flags & HasAVX; }
private:
Flags _flags;
diff --git a/libs/pbd/wscript b/libs/pbd/wscript
index 8f947fbb26..27617adfa9 100644
--- a/libs/pbd/wscript
+++ b/libs/pbd/wscript
@@ -145,6 +145,7 @@ def build(bld):
if bld.env['build_target'] == 'x86_64':
obj.defines += [ 'USE_X86_64_ASM' ]
if bld.env['build_target'] == 'mingw':
+ obj.defines += [ 'NO_POSIX_MEMALIGN' ]
obj.source += [ 'windows_special_dirs.cc' ]
obj.uselib += ' OLE'
diff --git a/wscript b/wscript
index 84a8fb3e6c..e9c1da8f06 100644
--- a/wscript
+++ b/wscript
@@ -417,12 +417,12 @@ int main() { return 0; }''',
if (re.search ("(x86_64|AMD64)", cpu) != None):
# on Windows sse is supported by 64 bit platforms only
build_host_supports_sse = True
-
+
# mingw GCC compiler to uses at&t (Unix specific) assembler dialect by default
# compiler_flags.append (["--mmnemonic=att", "msyntax=att")
compiler_flags.extend ([ flags_dict['sse'], flags_dict['fpmath-sse'], flags_dict['xmmintrinsics'], flags_dict['attasm'] ])
-
+
# end of processor-specific section
# optimization section