From 52c8e00eddc6b943e03838ddeafa7d516bdc5205 Mon Sep 17 00:00:00 2001 From: Sampo Savolainen Date: Thu, 15 Mar 2007 19:54:18 +0000 Subject: Add Mike's quad-quad + prefetch optimization. git-svn-id: svn://localhost/ardour2/trunk@1597 d708f5d6-7413-0410-9779-e7cbd77b26cf --- libs/ardour/sse_functions_xmm.cc | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'libs/ardour/sse_functions_xmm.cc') diff --git a/libs/ardour/sse_functions_xmm.cc b/libs/ardour/sse_functions_xmm.cc index d4330eb37f..5554462132 100644 --- a/libs/ardour/sse_functions_xmm.cc +++ b/libs/ardour/sse_functions_xmm.cc @@ -43,6 +43,29 @@ x86_sse_find_peaks(float *buf, nframes_t nframes, float *min, float *max) nframes--; } + // use 64 byte prefetch for quadruple quads + while (nframes >= 16) { + __builtin_prefetch(buf+64,0,0); + + work = _mm_load_ps(buf); + current_min = _mm_min_ps(current_min, work); + current_max = _mm_max_ps(current_max, work); + buf+=4; + work = _mm_load_ps(buf); + current_min = _mm_min_ps(current_min, work); + current_max = _mm_max_ps(current_max, work); + buf+=4; + work = _mm_load_ps(buf); + current_min = _mm_min_ps(current_min, work); + current_max = _mm_max_ps(current_max, work); + buf+=4; + work = _mm_load_ps(buf); + current_min = _mm_min_ps(current_min, work); + current_max = _mm_max_ps(current_max, work); + buf+=4; + nframes-=16; + } + // work through aligned buffers while (nframes >= 4) { -- cgit v1.2.3