summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSampo Savolainen <v2@iki.fi>2007-03-13 22:42:34 +0000
committerSampo Savolainen <v2@iki.fi>2007-03-13 22:42:34 +0000
commit75d2f51193f6fd25881a9c766db9078f3b68d80e (patch)
tree2fd7df7152970a134e291bf307f69158ac79bdf8
parent29f4d8b52c937b984dfba80cd912a20ee6b526ce (diff)
Added a xmmintrin.h based SSE function find_peaks(). Needs polishing as
this commit breaks the build system for i386 builds with dynamic SSE enabled. git-svn-id: svn://localhost/ardour2/trunk@1586 d708f5d6-7413-0410-9779-e7cbd77b26cf
-rw-r--r--libs/ardour/SConscript3
-rw-r--r--libs/ardour/ardour/mix.h18
-rw-r--r--libs/ardour/ardour/session.h6
-rw-r--r--libs/ardour/globals.cc5
-rw-r--r--libs/ardour/mix.cc39
-rw-r--r--libs/ardour/session.cc3
-rw-r--r--libs/ardour/sse_functions_xmm.cc93
7 files changed, 156 insertions, 11 deletions
diff --git a/libs/ardour/SConscript b/libs/ardour/SConscript
index 3772246f11..47015f4d07 100644
--- a/libs/ardour/SConscript
+++ b/libs/ardour/SConscript
@@ -287,10 +287,13 @@ env['BUILDERS']['SharedAsmObject'] = Builder (action = '$CXX -c -fPIC $SOURCE -o
if env['FPU_OPTIMIZATION']:
if env['DIST_TARGET'] == "i386":
arch_specific_objects = env.SharedAsmObject('sse_functions.os', 'sse_functions.s')
+ ardour_files += ['sse_functions_xmm.cc']
if env['DIST_TARGET'] == "i686":
arch_specific_objects = env.SharedAsmObject('sse_functions.os', 'sse_functions.s')
+ ardour_files += ['sse_functions_xmm.cc']
if env['DIST_TARGET'] == "x86_64":
arch_specific_objects = env.SharedAsmObject('sse_functions_64bit.os', 'sse_functions_64bit.s')
+ ardour_files += ['sse_functions_xmm.cc']
libardour = ardour.SharedLibrary('ardour', ardour_files + extra_sources + arch_specific_objects)
diff --git a/libs/ardour/ardour/mix.h b/libs/ardour/ardour/mix.h
index 495ea74132..7515401a2a 100644
--- a/libs/ardour/ardour/mix.h
+++ b/libs/ardour/ardour/mix.h
@@ -27,7 +27,7 @@
extern "C" {
/* SSE functions */
- float x86_sse_compute_peak (ARDOUR::Sample *buf, nframes_t nsamples, float current);
+ float x86_sse_compute_peak (ARDOUR::Sample *buf, nframes_t nsamples, float current);
void x86_sse_apply_gain_to_buffer (ARDOUR::Sample *buf, nframes_t nframes, float gain);
@@ -36,9 +36,11 @@ extern "C" {
void x86_sse_mix_buffers_no_gain (ARDOUR::Sample *dst, ARDOUR::Sample *src, nframes_t nframes);
}
+float x86_sse_find_peaks (ARDOUR::Sample *buf, nframes_t nsamples, float *min, float *max);
+
/* debug wrappers for SSE functions */
-float debug_compute_peak (ARDOUR::Sample *buf, nframes_t nsamples, float current);
+float debug_compute_peak (ARDOUR::Sample *buf, nframes_t nsamples, float current);
void debug_apply_gain_to_buffer (ARDOUR::Sample *buf, nframes_t nframes, float gain);
@@ -52,6 +54,8 @@ void debug_mix_buffers_no_gain (ARDOUR::Sample *dst, ARDOUR::Sample *src, nfra
float veclib_compute_peak (ARDOUR::Sample *buf, nframes_t nsamples, float current);
+float veclib_find_peaks (ARDOUR::Sample *buf, nframes_t nsamples, float *min, float *max);
+
void veclib_apply_gain_to_buffer (ARDOUR::Sample *buf, nframes_t nframes, float gain);
void veclib_mix_buffers_with_gain (ARDOUR::Sample *dst, ARDOUR::Sample *src, nframes_t nframes, float gain);
@@ -62,12 +66,14 @@ void veclib_mix_buffers_no_gain (ARDOUR::Sample *dst, ARDOUR::Sample *src
/* non-optimized functions */
-float compute_peak (ARDOUR::Sample *buf, nframes_t nsamples, float current);
+float compute_peak (ARDOUR::Sample *buf, nframes_t nsamples, float current);
+
+float find_peaks (ARDOUR::Sample *buf, nframes_t nsamples, float *min, float *max);
-void apply_gain_to_buffer (ARDOUR::Sample *buf, nframes_t nframes, float gain);
+void apply_gain_to_buffer (ARDOUR::Sample *buf, nframes_t nframes, float gain);
-void mix_buffers_with_gain (ARDOUR::Sample *dst, ARDOUR::Sample *src, nframes_t nframes, float gain);
+void mix_buffers_with_gain (ARDOUR::Sample *dst, ARDOUR::Sample *src, nframes_t nframes, float gain);
-void mix_buffers_no_gain (ARDOUR::Sample *dst, ARDOUR::Sample *src, nframes_t nframes);
+void mix_buffers_no_gain (ARDOUR::Sample *dst, ARDOUR::Sample *src, nframes_t nframes);
#endif /* __ardour_mix_h__ */
diff --git a/libs/ardour/ardour/session.h b/libs/ardour/ardour/session.h
index 3380fc03e0..7794dc95c8 100644
--- a/libs/ardour/ardour/session.h
+++ b/libs/ardour/ardour/session.h
@@ -905,12 +905,14 @@ class Session : public PBD::StatefulDestructible
void* ptr,
float opt);
- typedef float (*compute_peak_t) (Sample *, nframes_t, float);
+ typedef float (*compute_peak_t) (Sample *, nframes_t, float);
+ typedef float (*find_peaks_t) (Sample *, nframes_t, float *, float*);
typedef void (*apply_gain_to_buffer_t) (Sample *, nframes_t, float);
typedef void (*mix_buffers_with_gain_t) (Sample *, Sample *, nframes_t, float);
typedef void (*mix_buffers_no_gain_t) (Sample *, Sample *, nframes_t);
- static compute_peak_t compute_peak;
+ static compute_peak_t compute_peak;
+ static find_peaks_t find_peaks;
static apply_gain_to_buffer_t apply_gain_to_buffer;
static mix_buffers_with_gain_t mix_buffers_with_gain;
static mix_buffers_no_gain_t mix_buffers_no_gain;
diff --git a/libs/ardour/globals.cc b/libs/ardour/globals.cc
index 05b7f3e8f0..406f21832c 100644
--- a/libs/ardour/globals.cc
+++ b/libs/ardour/globals.cc
@@ -233,6 +233,7 @@ setup_hardware_optimization (bool try_optimization)
// SSE SET
Session::compute_peak = x86_sse_compute_peak;
+ Session::find_peaks = x86_sse_find_peaks;
Session::apply_gain_to_buffer = x86_sse_apply_gain_to_buffer;
Session::mix_buffers_with_gain = x86_sse_mix_buffers_with_gain;
Session::mix_buffers_no_gain = x86_sse_mix_buffers_no_gain;
@@ -249,6 +250,7 @@ setup_hardware_optimization (bool try_optimization)
if (sysVersion >= 0x00001040) { // Tiger at least
Session::compute_peak = veclib_compute_peak;
+ Session::find_peaks = veclib_find_peaks;
Session::apply_gain_to_buffer = veclib_apply_gain_to_buffer;
Session::mix_buffers_with_gain = veclib_mix_buffers_with_gain;
Session::mix_buffers_no_gain = veclib_mix_buffers_no_gain;
@@ -262,7 +264,8 @@ setup_hardware_optimization (bool try_optimization)
if (generic_mix_functions) {
- Session::compute_peak = compute_peak;
+ Session::compute_peak = compute_peak;
+ Session::find_peaks = find_peaks;
Session::apply_gain_to_buffer = apply_gain_to_buffer;
Session::mix_buffers_with_gain = mix_buffers_with_gain;
Session::mix_buffers_no_gain = mix_buffers_no_gain;
diff --git a/libs/ardour/mix.cc b/libs/ardour/mix.cc
index 63ccc8b7ea..e2096178dd 100644
--- a/libs/ardour/mix.cc
+++ b/libs/ardour/mix.cc
@@ -24,7 +24,6 @@
#include <stdint.h>
#if defined (ARCH_X86) && defined (BUILD_SSE_OPTIMIZATIONS)
-
// Debug wrappers
float
@@ -90,6 +89,25 @@ compute_peak (ARDOUR::Sample *buf, nframes_t nsamples, float current)
return current;
}
+float
+find_peaks (ARDOUR::Sample *buf, nframes_t nframes, float *min, float *max)
+{
+ long i;
+ float a, b;
+
+ a = *max;
+ b = *min;
+
+ for (i = 0; i < nframes; i++)
+ {
+ a = fmax (buf[i], a);
+ b = fmin (buf[i], b);
+ }
+
+ *max = a;
+ *min = b;
+}
+
void
apply_gain_to_buffer (ARDOUR::Sample *buf, nframes_t nframes, float gain)
{
@@ -124,6 +142,25 @@ veclib_compute_peak (ARDOUR::Sample *buf, nframes_t nsamples, float current)
return f_max(current, tmpmax);
}
+float
+veclib_find_peaks (ARDOUR::Sample *buf, nframes_t nframes, float *min, float *max)
+{
+ // TODO: someone with veclib skills needs to write this one
+ long i;
+ float a, b;
+
+ a = *max;
+ b = *min;
+
+ for (i = 0; i < nframes; i++)
+ {
+ a = fmax (buf[i], a);
+ b = fmin (buf[i], b);
+ }
+
+ *max = a;
+ *min = b;
+}
void
veclib_apply_gain_to_buffer (ARDOUR::Sample *buf, nframes_t nframes, float gain)
{
diff --git a/libs/ardour/session.cc b/libs/ardour/session.cc
index 8feee3e16b..d1531e4c96 100644
--- a/libs/ardour/session.cc
+++ b/libs/ardour/session.cc
@@ -88,7 +88,8 @@ const char* Session::dead_sound_dir_name = X_("dead_sounds");
const char* Session::interchange_dir_name = X_("interchange");
const char* Session::export_dir_name = X_("export");
-Session::compute_peak_t Session::compute_peak = 0;
+Session::compute_peak_t Session::compute_peak = 0;
+Session::find_peaks_t Session::find_peaks = 0;
Session::apply_gain_to_buffer_t Session::apply_gain_to_buffer = 0;
Session::mix_buffers_with_gain_t Session::mix_buffers_with_gain = 0;
Session::mix_buffers_no_gain_t Session::mix_buffers_no_gain = 0;
diff --git a/libs/ardour/sse_functions_xmm.cc b/libs/ardour/sse_functions_xmm.cc
new file mode 100644
index 0000000000..7b5ea143ec
--- /dev/null
+++ b/libs/ardour/sse_functions_xmm.cc
@@ -0,0 +1,93 @@
+/*
+ Copyright (C) 2007 Paul Davis
+ Written by Sampo Savolainen
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+*/
+
+#include <xmmintrin.h>
+#include <ardour/types.h>
+
+void
+x86_sse_find_peaks(float *buf, nframes_t nframes, float *min, float *max)
+{
+ __m128 current_max, current_min, work;
+
+ // Load max and min values into all four slots of the XMM registers
+ current_min = _mm_set1_ps(*min);
+ current_max = _mm_set1_ps(*max);
+
+ // Work input until "buf" reaches 16 byte alignment
+ while ( ((unsigned long)buf) % 16 != 0 && nframes > 0) {
+
+ // Load the next float into the work buffer
+ work = _mm_set1_ps(*buf);
+
+ current_min = _mm_min_ps(current_min, work);
+ current_max = _mm_max_ps(current_max, work);
+
+ buf++;
+ nframes--;
+ }
+
+ // work through aligned buffers
+ while (nframes >= 4) {
+
+ work = _mm_load_ps(buf);
+
+ current_min = _mm_min_ps(current_min, work);
+ current_max = _mm_max_ps(current_max, work);
+
+ buf+=4;
+ nframes-=4;
+ }
+
+ // work through the rest < 4 samples
+ while ( nframes > 0) {
+
+ // Load the next float into the work buffer
+ work = _mm_set1_ps(*buf);
+
+ current_min = _mm_min_ps(current_min, work);
+ current_max = _mm_max_ps(current_max, work);
+
+ buf++;
+ nframes--;
+ }
+
+ // Find min & max value in current_max through shuffle tricks
+
+ work = current_min;
+ work = _mm_shuffle_ps(work, work, _MM_SHUFFLE(2, 3, 0, 1));
+ work = _mm_min_ps (work, current_min);
+ current_min = work;
+ work = _mm_shuffle_ps(work, work, _MM_SHUFFLE(1, 0, 3, 2));
+ work = _mm_min_ps (work, current_min);
+
+ _mm_store_ss(min, work);
+
+ work = current_max;
+ work = _mm_shuffle_ps(work, work, _MM_SHUFFLE(2, 3, 0, 1));
+ work = _mm_max_ps (work, current_max);
+ current_max = work;
+ work = _mm_shuffle_ps(work, work, _MM_SHUFFLE(1, 0, 3, 2));
+ work = _mm_max_ps (work, current_max);
+
+ _mm_store_ss(max, work);
+}
+
+
+