summaryrefslogtreecommitdiff
path: root/libs
diff options
context:
space:
mode:
authorPaul Davis <paul@linuxaudiosystems.com>2015-05-12 21:07:09 -0400
committerPaul Davis <paul@linuxaudiosystems.com>2015-06-29 14:18:13 -0400
commit6410aa896f974002f8539ee3ca2f70bf66c0a0af (patch)
tree7ef2a62226a6ac5927c563efe21ea6d1ecc0da72 /libs
parente2a76746e65c42fd10a892ffd82300f1cf776ac6 (diff)
Added optimized AVX function for sample processing
Added AVX versions of existing 5 SSE functions. Added 6th AVX function to copy vectors which is 1.5 times faster then memcpy. Data consistency and validness is fully tested after processing with new AVX functions on aligned and non aligned buffers.
Diffstat (limited to 'libs')
-rw-r--r--libs/ardour/ardour/mix.h12
-rw-r--r--libs/ardour/ardour/runtime_functions.h6
-rw-r--r--libs/ardour/globals.cc20
-rw-r--r--libs/ardour/mix.cc6
-rw-r--r--libs/ardour/wscript8
-rw-r--r--libs/backends/wavesaudio/waves_audiobackend.cc5
-rw-r--r--libs/backends/wavesaudio/waves_audioport.cc34
-rw-r--r--libs/pbd/pbd/fpu.h4
-rw-r--r--libs/pbd/wscript1
9 files changed, 73 insertions, 23 deletions
diff --git a/libs/ardour/ardour/mix.h b/libs/ardour/ardour/mix.h
index 3cd9a3e60f..2db444d02b 100644
--- a/libs/ardour/ardour/mix.h
+++ b/libs/ardour/ardour/mix.h
@@ -33,7 +33,17 @@ extern "C" {
LIBARDOUR_API void x86_sse_mix_buffers_no_gain (ARDOUR::Sample * dst, const ARDOUR::Sample * src, ARDOUR::pframes_t nframes);
}
+extern "C" {
+/* AVX functions */
+ LIBARDOUR_API float x86_sse_avx_compute_peak (const ARDOUR::Sample * buf, ARDOUR::pframes_t nsamples, float current);
+ LIBARDOUR_API void x86_sse_avx_apply_gain_to_buffer (ARDOUR::Sample * buf, ARDOUR::pframes_t nframes, float gain);
+ LIBARDOUR_API void x86_sse_avx_mix_buffers_with_gain(ARDOUR::Sample * dst, const ARDOUR::Sample * src, ARDOUR::pframes_t nframes, float gain);
+ LIBARDOUR_API void x86_sse_avx_mix_buffers_no_gain (ARDOUR::Sample * dst, const ARDOUR::Sample * src, ARDOUR::pframes_t nframes);
+ LIBARDOUR_API void x86_sse_avx_copy_vector (ARDOUR::Sample * dst, const ARDOUR::Sample * src, ARDOUR::pframes_t nframes);
+}
+
LIBARDOUR_API void x86_sse_find_peaks (const ARDOUR::Sample * buf, ARDOUR::pframes_t nsamples, float *min, float *max);
+LIBARDOUR_API void x86_sse_avx_find_peaks (const ARDOUR::Sample * buf, ARDOUR::pframes_t nsamples, float *min, float *max);
/* debug wrappers for SSE functions */
@@ -41,6 +51,7 @@ LIBARDOUR_API float debug_compute_peak (const ARDOUR::Sample * buf
LIBARDOUR_API void debug_apply_gain_to_buffer (ARDOUR::Sample * buf, ARDOUR::pframes_t nframes, float gain);
LIBARDOUR_API void debug_mix_buffers_with_gain (ARDOUR::Sample * dst, const ARDOUR::Sample * src, ARDOUR::pframes_t nframes, float gain);
LIBARDOUR_API void debug_mix_buffers_no_gain (ARDOUR::Sample * dst, const ARDOUR::Sample * src, ARDOUR::pframes_t nframes);
+LIBARDOUR_API void debug_copy_vector (ARDOUR::Sample * dst, const ARDOUR::Sample * src, ARDOUR::pframes_t nframes);
#endif
@@ -61,5 +72,6 @@ LIBARDOUR_API void default_find_peaks (const ARDOUR::Sample * bu
LIBARDOUR_API void default_apply_gain_to_buffer (ARDOUR::Sample * buf, ARDOUR::pframes_t nframes, float gain);
LIBARDOUR_API void default_mix_buffers_with_gain (ARDOUR::Sample * dst, const ARDOUR::Sample * src, ARDOUR::pframes_t nframes, float gain);
LIBARDOUR_API void default_mix_buffers_no_gain (ARDOUR::Sample * dst, const ARDOUR::Sample * src, ARDOUR::pframes_t nframes);
+LIBARDOUR_API void default_copy_vector (ARDOUR::Sample * dst, const ARDOUR::Sample * src, ARDOUR::pframes_t nframes);
#endif /* __ardour_mix_h__ */
diff --git a/libs/ardour/ardour/runtime_functions.h b/libs/ardour/ardour/runtime_functions.h
index e1d6b99f61..45d6ec7015 100644
--- a/libs/ardour/ardour/runtime_functions.h
+++ b/libs/ardour/ardour/runtime_functions.h
@@ -25,17 +25,19 @@
namespace ARDOUR {
- typedef float (*compute_peak_t) (const ARDOUR::Sample *, pframes_t, float);
- typedef void (*find_peaks_t) (const ARDOUR::Sample *, pframes_t, float *, float*);
+ typedef float (*compute_peak_t) (const ARDOUR::Sample *, pframes_t, float);
+ typedef void (*find_peaks_t) (const ARDOUR::Sample *, pframes_t, float *, float*);
typedef void (*apply_gain_to_buffer_t) (ARDOUR::Sample *, pframes_t, float);
typedef void (*mix_buffers_with_gain_t) (ARDOUR::Sample *, const ARDOUR::Sample *, pframes_t, float);
typedef void (*mix_buffers_no_gain_t) (ARDOUR::Sample *, const ARDOUR::Sample *, pframes_t);
+ typedef void (*copy_vector_t) (ARDOUR::Sample *, const ARDOUR::Sample *, pframes_t);
LIBARDOUR_API extern compute_peak_t compute_peak;
LIBARDOUR_API extern find_peaks_t find_peaks;
LIBARDOUR_API extern apply_gain_to_buffer_t apply_gain_to_buffer;
LIBARDOUR_API extern mix_buffers_with_gain_t mix_buffers_with_gain;
LIBARDOUR_API extern mix_buffers_no_gain_t mix_buffers_no_gain;
+ LIBARDOUR_API extern copy_vector_t copy_vector;
}
#endif /* __ardour_runtime_functions_h__ */
diff --git a/libs/ardour/globals.cc b/libs/ardour/globals.cc
index 288e69dc9e..fa6f833d94 100644
--- a/libs/ardour/globals.cc
+++ b/libs/ardour/globals.cc
@@ -131,6 +131,7 @@ find_peaks_t ARDOUR::find_peaks = 0;
apply_gain_to_buffer_t ARDOUR::apply_gain_to_buffer = 0;
mix_buffers_with_gain_t ARDOUR::mix_buffers_with_gain = 0;
mix_buffers_no_gain_t ARDOUR::mix_buffers_no_gain = 0;
+copy_vector_t ARDOUR::copy_vector = 0;
PBD::Signal1<void,std::string> ARDOUR::BootMessage;
PBD::Signal3<void,std::string,std::string,bool> ARDOUR::PluginScanMessage;
@@ -160,7 +161,21 @@ setup_hardware_optimization (bool try_optimization)
#if defined (ARCH_X86) && defined (BUILD_SSE_OPTIMIZATIONS)
- if (fpu.has_sse()) {
+ if (fpu.has_avx()) {
+
+ info << "Using AVX optimized routines" << endmsg;
+
+ // AVX SET
+ compute_peak = x86_sse_avx_compute_peak;
+ find_peaks = x86_sse_avx_find_peaks;
+ apply_gain_to_buffer = x86_sse_avx_apply_gain_to_buffer;
+ mix_buffers_with_gain = x86_sse_avx_mix_buffers_with_gain;
+ mix_buffers_no_gain = x86_sse_avx_mix_buffers_no_gain;
+ copy_vector = x86_sse_avx_copy_vector;
+
+ generic_mix_functions = false;
+
+ } else if (fpu.has_sse()) {
info << "Using SSE optimized routines" << endmsg;
@@ -170,6 +185,7 @@ setup_hardware_optimization (bool try_optimization)
apply_gain_to_buffer = x86_sse_apply_gain_to_buffer;
mix_buffers_with_gain = x86_sse_mix_buffers_with_gain;
mix_buffers_no_gain = x86_sse_mix_buffers_no_gain;
+ copy_vector = default_copy_vector;
generic_mix_functions = false;
@@ -187,6 +203,7 @@ setup_hardware_optimization (bool try_optimization)
apply_gain_to_buffer = veclib_apply_gain_to_buffer;
mix_buffers_with_gain = veclib_mix_buffers_with_gain;
mix_buffers_no_gain = veclib_mix_buffers_no_gain;
+ copy_vector = default_copy_vector;
generic_mix_functions = false;
@@ -206,6 +223,7 @@ setup_hardware_optimization (bool try_optimization)
apply_gain_to_buffer = default_apply_gain_to_buffer;
mix_buffers_with_gain = default_mix_buffers_with_gain;
mix_buffers_no_gain = default_mix_buffers_no_gain;
+ copy_vector = default_copy_vector;
info << "No H/W specific optimizations in use" << endmsg;
}
diff --git a/libs/ardour/mix.cc b/libs/ardour/mix.cc
index adae68ae7f..96ae624487 100644
--- a/libs/ardour/mix.cc
+++ b/libs/ardour/mix.cc
@@ -136,6 +136,12 @@ default_mix_buffers_no_gain (ARDOUR::Sample * dst, const ARDOUR::Sample * src, p
}
}
+void
+default_copy_vector (ARDOUR::Sample * dst, const ARDOUR::Sample * src, pframes_t nframes)
+{
+ memcpy(dst, src, nframes*sizeof(ARDOUR::Sample));
+}
+
#if defined (__APPLE__) && defined (BUILD_VECLIB_OPTIMIZATIONS)
#include <Accelerate/Accelerate.h>
diff --git a/libs/ardour/wscript b/libs/ardour/wscript
index 115e12cbec..04b99785e5 100644
--- a/libs/ardour/wscript
+++ b/libs/ardour/wscript
@@ -417,8 +417,12 @@ def build(bld):
# not the build host, which in turn can only be inferred from the name
# of the compiler.
if re.search ('/^x86_64/', str(bld.env['CC'])):
- obj.source += [ 'sse_functions_xmm.cc', 'sse_functions_64bit_win.s' ]
-
+ obj.source += [ 'sse_functions_xmm.cc',
+ 'sse_functions_avx.cc',
+ 'sse_functions_64bit_win.s',
+ 'sse_avx_functions_64bit_win.s',
+ ]
+
# i18n
if bld.is_defined('ENABLE_NLS'):
mo_files = bld.path.ant_glob('po/*.mo')
diff --git a/libs/backends/wavesaudio/waves_audiobackend.cc b/libs/backends/wavesaudio/waves_audiobackend.cc
index 5a8fac0a6a..7fd6da2f39 100644
--- a/libs/backends/wavesaudio/waves_audiobackend.cc
+++ b/libs/backends/wavesaudio/waves_audiobackend.cc
@@ -21,6 +21,8 @@
#include "waves_audioport.h"
#include "waves_midiport.h"
+#include "ardour/runtime_functions.h"
+
using namespace ARDOUR;
#if defined __MINGW64__ || defined __MINGW32__
@@ -1170,13 +1172,12 @@ WavesAudioBackend::_read_audio_data_from_device (const float* input_buffer, pfra
{
#if defined(PLATFORM_WINDOWS)
const float **buffer = (const float**)input_buffer;
- size_t copied_bytes = nframes*sizeof(float);
for(std::vector<WavesAudioPort*>::iterator it = _physical_audio_inputs.begin ();
it != _physical_audio_inputs.end();
++it)
{
- memcpy((*it)->buffer(), *buffer, copied_bytes);
+ ARDOUR::copy_vector ((*it)->buffer(), *buffer, nframes);
++buffer;
}
#else
diff --git a/libs/backends/wavesaudio/waves_audioport.cc b/libs/backends/wavesaudio/waves_audioport.cc
index 4ded37d906..1249f4d31e 100644
--- a/libs/backends/wavesaudio/waves_audioport.cc
+++ b/libs/backends/wavesaudio/waves_audioport.cc
@@ -35,20 +35,24 @@ void* WavesAudioPort::get_buffer (pframes_t nframes)
std::vector<WavesDataPort*>::const_iterator it = get_connections ().begin ();
if (it != get_connections ().end ()) {
- /* In fact, the static casting to (const WavesAudioPort*) is not that safe.
- * However, mixing the buffers is assumed in the time critical conditions.
- * Base class WavesDataPort takes is supposed to provide enough consistentcy
- * of the connections.
- */
- for (memcpy (_buffer, ((const WavesAudioPort*)*it)->const_buffer (), nframes * sizeof (Sample)), ++it;
- it != get_connections ().end ();
- ++it) {
- Sample* tgt = buffer ();
- const Sample* src = ((const WavesAudioPort*)*it)->const_buffer ();
- for (uint32_t frame = 0; frame < nframes; ++frame, ++tgt, ++src) {
- *tgt += *src;
- }
- }
+ /* In fact, the static casting to (const WavesAudioPort*) is not that safe.
+ * However, mixing the buffers is assumed in the time critical conditions.
+ * Base class WavesDataPort takes is supposed to provide enough consistentcy
+ * of the connections.
+ */
+ // get first buffer data
+ // use optimized function to fill the buffer intialy
+ ARDOUR::copy_vector (_buffer, ((const WavesAudioPort*)*it)->const_buffer (), nframes);
+ ++it;
+
+ // mix the rest
+ for (; it != get_connections ().end (); ++it) {
+ Sample* tgt = buffer ();
+ const Sample* src = ((const WavesAudioPort*)*it)->const_buffer ();
+ for (uint32_t frame = 0; frame < nframes; ++frame, ++tgt, ++src) {
+ *tgt += *src;
+ }
+ }
}
}
return _buffer;
@@ -59,4 +63,4 @@ void
WavesAudioPort::_wipe_buffer()
{
memset (_buffer, 0, sizeof (_buffer));
-} \ No newline at end of file
+}
diff --git a/libs/pbd/pbd/fpu.h b/libs/pbd/pbd/fpu.h
index 6627951e9f..260cf4db85 100644
--- a/libs/pbd/pbd/fpu.h
+++ b/libs/pbd/pbd/fpu.h
@@ -30,7 +30,8 @@ class LIBPBD_API FPU {
HasFlushToZero = 0x1,
HasDenormalsAreZero = 0x2,
HasSSE = 0x4,
- HasSSE2 = 0x8
+ HasSSE2 = 0x8,
+ HasAVX = 0x10
};
public:
@@ -41,6 +42,7 @@ class LIBPBD_API FPU {
bool has_denormals_are_zero () const { return _flags & HasDenormalsAreZero; }
bool has_sse () const { return _flags & HasSSE; }
bool has_sse2 () const { return _flags & HasSSE2; }
+ bool has_avx () const { return _flags & HasAVX; }
private:
Flags _flags;
diff --git a/libs/pbd/wscript b/libs/pbd/wscript
index 8f947fbb26..27617adfa9 100644
--- a/libs/pbd/wscript
+++ b/libs/pbd/wscript
@@ -145,6 +145,7 @@ def build(bld):
if bld.env['build_target'] == 'x86_64':
obj.defines += [ 'USE_X86_64_ASM' ]
if bld.env['build_target'] == 'mingw':
+ obj.defines += [ 'NO_POSIX_MEMALIGN' ]
obj.source += [ 'windows_special_dirs.cc' ]
obj.uselib += ' OLE'