From 9c0d7d72d70082a54f823cd44c0ccda5da64bb6f Mon Sep 17 00:00:00 2001
From: Doug McLain <doug@nostar.net>
Date: Mon, 2 Jun 2008 05:02:28 +0000
Subject: remove empty sigc++2 directory

git-svn-id: svn://localhost/ardour2/branches/3.0@3432 d708f5d6-7413-0410-9779-e7cbd77b26cf
---
 libs/ardour/sse_functions_64bit.s | 609 --------------------------------------
 1 file changed, 609 deletions(-)
 delete mode 100644 libs/ardour/sse_functions_64bit.s

(limited to 'libs/ardour/sse_functions_64bit.s')

diff --git a/libs/ardour/sse_functions_64bit.s b/libs/ardour/sse_functions_64bit.s
deleted file mode 100644
index 0242db3e77..0000000000
--- a/libs/ardour/sse_functions_64bit.s
+++ /dev/null
@@ -1,609 +0,0 @@
-/*
-    Copyright (C) 2005-2006 Paul Davis, John Rigg
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program; if not, write to the Free Software
-    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
-	Author: Sampo Savolainen
-	64-bit conversion: John Rigg
-
-    $Id$
-*/
-
-
-#; void x86_sse_mix_buffers_with_gain (float *dst, float *src, unsigned int nframes, float gain);
-
-.globl x86_sse_mix_buffers_with_gain
-	.type	x86_sse_mix_buffers_with_gain,@function
-
-x86_sse_mix_buffers_with_gain:
-
-#; %rdi float	*dst
-#; %rsi float	*src	
-#; %rdx unsigned int nframes
-#; %xmm0 float	gain
-
-	pushq %rbp
-	movq %rsp, %rbp
-
-	#; save the registers
-	pushq %rbx
-	pushq %rdi
-	pushq %rsi
-	
-	#; if nframes == 0, go to end
-	cmp	$0, %rdx
-	je	.MBWG_END
-
-	#; Check for alignment
-
-	movq %rdi, %rax
-	andq $12, %rax #; mask alignment offset
-
-	movq %rsi, %rbx
-	andq $12, %rbx #; mask alignment offset
-
-	cmp %rax, %rbx
-	jne .MBWG_NONALIGN #; if not aligned, calculate manually
-
-	#; if we are aligned
-	cmp $0, %rbx
-	jz .MBWG_SSE
-	
-	#; Pre-loop, we need to run 1-3 frames "manually" without
-	#; SSE instructions
-
-.MBWG_PRELOOP:
-	
-	#; gain is already in %xmm0
-	movss (%rsi), %xmm1
-	mulss %xmm0, %xmm1
-	addss (%rdi), %xmm1
-	movss %xmm1, (%rdi)
-
-	addq $4, %rdi #; dst++
-	addq $4, %rsi #; src++
-	decq %rdx 	  #; nframes--
-	jz .MBWG_END
-
-	addq $4, %rbx
-	
-	cmp $16, %rbx #; test if we've reached 16 byte alignment
-	jne .MBWG_PRELOOP
-
-
-.MBWG_SSE:
-
-	cmp $4, %rdx #; we know it's not zero, but if it's not >=4, then
-	jnge .MBWG_NONALIGN #; we jump straight to the "normal" code
-
-	#; gain is already in %xmm0
-	shufps  $0x00, %xmm0, %xmm0
-
-
-.MBWG_SSELOOP:
-
-	movaps	(%rsi), %xmm1 #; source => xmm0
-	mulps	%xmm0,  %xmm1 #; apply gain to source
-	addps	(%rdi), %xmm1 #; mix with destination
-	movaps  %xmm1, (%rdi) #; copy result to destination
-	
-	addq $16, %rdi #; dst+=4
-	addq $16, %rsi #; src+=4
-
-	subq $4, %rdx #; nframes-=4
-	cmp $4, %rdx
-	jge .MBWG_SSELOOP
-
-	cmp $0, %rdx
-	je .MBWG_END
-
-	#; if there are remaining frames, the nonalign code will do nicely
-	#; for the rest 1-3 frames.
-	
-.MBWG_NONALIGN:
-	#; not aligned!
-
-	#; gain is already in %xmm0
-
-.MBWG_NONALIGNLOOP:
-
-	movss (%rsi), %xmm1
-	mulss %xmm0, %xmm1
-	addss (%rdi), %xmm1
-	movss %xmm1, (%rdi)
-	
-	addq $4, %rdi
-	addq $4, %rsi
-	
-	decq %rdx
-	jnz .MBWG_NONALIGNLOOP
-
-.MBWG_END:
-
-	popq %rsi
-	popq %rdi
-	popq %rbx
-	
-	#; return
-	leave
-	ret
-
-.size	x86_sse_mix_buffers_with_gain, .-x86_sse_mix_buffers_with_gain
-
-
-#; void x86_sse_mix_buffers_no_gain (float *dst, float *src, unsigned int nframes);
-
-.globl x86_sse_mix_buffers_no_gain
-	.type	x86_sse_mix_buffers_no_gain,@function
-
-x86_sse_mix_buffers_no_gain:
-
-#; %rdi float *dst
-#; %rsi float *src
-#; %rdx unsigned int nframes
-
-	pushq %rbp
-	movq %rsp, %rbp
-
-	#; save the registers
-	pushq %rbx
-	pushq %rdi
-	pushq %rsi
-	
-	#; the real function
-
-	#; if nframes == 0, go to end
-	cmp	$0, %rdx
-	je	.MBNG_END
-
-	#; Check for alignment
-
-	movq %rdi, %rax
-	andq $12, %rax #; mask alignment offset
-
-	movq %rsi, %rbx
-	andq $12, %rbx #; mask alignment offset
-
-	cmp %rax, %rbx
-	jne .MBNG_NONALIGN #; if not aligned, calculate manually
-
-	cmp $0, %rbx
-	je .MBNG_SSE
-
-	#; Pre-loop, we need to run 1-3 frames "manually" without
-	#; SSE instructions
-
-.MBNG_PRELOOP:
-		
-	movss (%rsi), %xmm0
-	addss (%rdi), %xmm0
-	movss %xmm0, (%rdi)
-
-	addq $4, %rdi #; dst++
-	addq $4, %rsi #; src++
-	decq %rdx 	  #; nframes--
-	jz	.MBNG_END
-	addq $4, %rbx
-	
-	cmp $16, %rbx #; test if we've reached 16 byte alignment
-	jne .MBNG_PRELOOP
-
-.MBNG_SSE:
-
-	cmp $4, %rdx #; if there are frames left, but less than 4
-	jnge .MBNG_NONALIGN #; we can't run SSE
-
-.MBNG_SSELOOP:
-
-	movaps	(%rsi), %xmm0 #; source => xmm0
-	addps	(%rdi), %xmm0 #; mix with destination
-	movaps  %xmm0, (%rdi) #; copy result to destination
-	
-	addq $16, %rdi #; dst+=4
-	addq $16, %rsi #; src+=4
-
-	subq $4, %rdx #; nframes-=4
-	cmp $4, %rdx
-	jge .MBNG_SSELOOP
-
-	cmp $0, %rdx
-	je .MBNG_END
-
-	#; if there are remaining frames, the nonalign code will do nicely
-	#; for the rest 1-3 frames.
-	
-.MBNG_NONALIGN:
-	#; not aligned!
-
-	movss (%rsi), %xmm0 #; src => xmm0
-	addss (%rdi), %xmm0 #; xmm0 += dst
-	movss %xmm0, (%rdi) #; xmm0 => dst
-	
-	addq $4, %rdi
-	addq $4, %rsi
-	
-	decq %rdx
-	jnz .MBNG_NONALIGN
-
-.MBNG_END:
-
-	popq %rsi
-	popq %rdi
-	popq %rbx
-	
-	#; return
-	leave
-	ret
-
-.size	x86_sse_mix_buffers_no_gain, .-x86_sse_mix_buffers_no_gain
-
-
-#; void x86_sse_apply_gain_to_buffer (float *buf, unsigned int nframes, float gain);
-
-.globl x86_sse_apply_gain_to_buffer
-	.type	x86_sse_apply_gain_to_buffer,@function
-
-x86_sse_apply_gain_to_buffer:
-
-#; %rdi	 float 		*buf	32(%rbp)
-#; %rsi  unsigned int 	nframes
-#; %xmm0 float 		gain
-#; %xmm1 float		buf[0]
-
-	pushq %rbp
-	movq %rsp, %rbp
-
-	#; save %rdi
-	pushq %rdi
-	
-	#; the real function
-
-	#; if nframes == 0, go to end
-	movq %rsi, %rcx #; nframes
-	cmp	$0, %rcx
-	je	.AG_END
-
-	#; set up the gain buffer (gain is already in %xmm0)
-	shufps	$0x00, %xmm0, %xmm0
-	
-	#; Check for alignment
-
-	movq %rdi, %rdx #; buf => %rdx
-	andq $12, %rdx #; mask bits 1 & 2, result = 0, 4, 8 or 12
-	jz	.AG_SSE #; if buffer IS aligned
-
-	#; PRE-LOOP
-	#; we iterate 1-3 times, doing normal x87 float comparison
-	#; so we reach a 16 byte aligned "buf" (=%rdi) value
-
-.AGLP_START:
-
-	#; Load next value from the buffer into %xmm1
-	movss (%rdi), %xmm1
-	mulss %xmm0, %xmm1
-	movss %xmm1, (%rdi)
-
-	#; increment buffer, decrement counter
-	addq $4, %rdi #; buf++;
-	
-	decq %rcx   #; nframes--
-	jz	.AG_END #; if we run out of frames, we go to the end
-	
-	addq $4, %rdx #; one non-aligned byte less
-	cmp $16, %rdx
-	jne .AGLP_START #; if more non-aligned frames exist, we do a do-over
-
-.AG_SSE:
-
-	#; We have reached the 16 byte aligned "buf" ("rdi") value
-
-	#; Figure out how many loops we should do
-	movq %rcx, %rax #; copy remaining nframes to %rax for division
-	movq $0, %rdx   #; 0 the edx register
-	
-	
-	pushq %rdi
-	movq $4, %rdi
-	divq %rdi #; %rdx = remainder == 0
-	popq %rdi
-
-	#; %rax = SSE iterations
-	cmp $0, %rax
-	je .AGPOST_START
-
-	
-.AGLP_SSE:
-
-	movaps (%rdi), %xmm1
-	mulps %xmm0, %xmm1
-	movaps %xmm1, (%rdi)
-
-	addq $16, %rdi
-	subq $4, %rcx   #; nframes-=4
-
-	decq %rax
-	jnz .AGLP_SSE
-
-	#; Next we need to post-process all remaining frames
-	#; the remaining frame count is in %rcx
-	
-	#; if no remaining frames, jump to the end
-	cmp $0, %rcx
-	andq $3, %rcx #; nframes % 4
-	je .AG_END
-
-.AGPOST_START:
-
-	movss (%rdi), %xmm1
-	mulss %xmm0, %xmm1
-	movss %xmm1, (%rdi)
-
-	#; increment buffer, decrement counter
-	addq $4, %rdi #; buf++;
-	
-	decq %rcx   #; nframes--
-	jnz	.AGPOST_START #; if we run out of frames, we go to the end
-	
-.AG_END:
-
-
-	popq %rdi
-	
-	#; return
-	leave
-	ret
-
-.size	x86_sse_apply_gain_to_buffer, .-x86_sse_apply_gain_to_buffer
-#; end proc
-
-
-#; x86_sse_apply_gain_vector(float *buf, float *gain_vector, unsigned int nframes)
-
-.globl x86_sse_apply_gain_vector
-        .type   x86_sse_apply_gain_vector,@function
-
-x86_sse_apply_gain_vector:
-
-#; %rdi float *buf
-#; %rsi float *gain_vector
-#; %rdx unsigned int nframes
-
-	pushq %rbp
-	movq %rsp, %rbp
-
-	#; Save registers
-	pushq %rdi
-	pushq %rsi
-	pushq %rbx
-
-	#; if nframes == 0 go to end
-	cmp $0, %rdx
-	je .AGA_END
-		
-	#; Check alignment
-	movq %rdi, %rax
-	andq $12, %rax
-		
-	movq %rsi, %rbx
-	andq $12, %rbx
-
-	cmp %rax,%rbx
-	jne .AGA_ENDLOOP
-
-	cmp $0, %rax
-	jz .AGA_SSE #; if buffers are aligned, jump to the SSE loop
-
-#; Buffers aren't 16 byte aligned, but they are unaligned by the same amount
-.AGA_ALIGNLOOP:
-		
-	movss (%rdi), %xmm0 #; buf => xmm0
-	movss (%rsi), %xmm1 #; gain value => xmm1
-	mulss %xmm1, %xmm0  #; xmm1 * xmm0 => xmm0
-	movss %xmm0, (%rdi) #; signal with gain => buf
-
-	decq %rdx
-	jz .AGA_END
-
-	addq $4, %rdi #; buf++
-	addq $4, %rsi #; gab++
-	
-	addq $4, %rax
-	cmp $16, %rax
-	jne .AGA_ALIGNLOOP
-	
-#; There are frames left for sure, as that is checked in the beginning
-#; and within the previous loop. BUT, there might be less than 4 frames
-#; to process
-
-.AGA_SSE:
-	movq %rdx, %rax #; nframes => %rax
-	shr $2, %rax #; unsigned divide by 4
-
-	cmp $0, %rax  #; Jos toimii ilman tätä, niin kiva
-	je .AGA_ENDLOOP
-
-.AGA_SSELOOP:
-	movaps (%rdi), %xmm0
-	movaps (%rsi), %xmm1
-	mulps %xmm1, %xmm0
-	movaps %xmm0, (%rdi)
-
-	addq $16, %rdi
-	addq $16, %rsi
-
-	decq %rax
-	jnz .AGA_SSELOOP
-
-	andq $3, %rdx #; Remaining frames are nframes & 3
-	jz .AGA_END
-
-
-#; Inside this loop, we know there are frames left to process
-#; but because either there are < 4 frames left, or the buffers
-#; are not aligned, we can't use the parallel SSE ops
-.AGA_ENDLOOP:
-	movss (%rdi), %xmm0 #; buf => xmm0
-	movss (%rsi), %xmm1 #; gain value => xmm1
-	mulss %xmm1, %xmm0  #; xmm1 * xmm0 => xmm0
-	movss %xmm0, (%rdi) #; signal with gain => buf
-
-	addq $4,%rdi
-	addq $4,%rsi
-	decq %rdx #; nframes--
-	jnz .AGA_ENDLOOP
-
-.AGA_END:
-
-	popq %rbx
-	popq %rsi
-	popq %rdi
-
-	leave
-	ret
-
-.size	x86_sse_apply_gain_vector, .-x86_sse_apply_gain_vector
-#; end proc
-
-
-#; float x86_sse_compute_peak(float *buf, long nframes, float current);
-
-.globl x86_sse_compute_peak
-	.type	x86_sse_compute_peak,@function
-
-	
-x86_sse_compute_peak:
-
-#; %rdi	 float 		*buf	32(%rbp)
-#; %rsi	 unsigned int 	nframes
-#; %xmm0 float		current
-#; %xmm1 float		buf[0]
-
-	pushq %rbp
-	movq %rsp, %rbp
-
-	#; save %rdi
-	pushq %rdi
-	
-	#; if nframes == 0, go to end
-	movq %rsi, %rcx #; nframes
-	cmp	$0, %rcx
-	je	.CP_END
-
-	#; create the "abs" mask in %xmm2
-	pushq   $2147483647
-	movss	(%rsp), %xmm2
-	addq    $8, %rsp
-	shufps	$0x00, %xmm2, %xmm2
-
-	#; Check for alignment
-
-	#;movq 8(%rbp), %rdi #; buf 
-	movq %rdi, %rdx #; buf => %rdx
-	andq $12, %rdx #; mask bits 1 & 2, result = 0, 4, 8 or 12
-	jz	.CP_SSE #; if buffer IS aligned
-
-	#; PRE-LOOP
-	#; we iterate 1-3 times, doing normal x87 float comparison
-	#; so we reach a 16 byte aligned "buf" (=%rdi) value
-
-.LP_START:
-
-	#; Load next value from the buffer
-	movss (%rdi), %xmm1
-	andps %xmm2, %xmm1
-	maxss %xmm1, %xmm0
-
-	#; increment buffer, decrement counter
-	addq $4, %rdi #; buf++;
-	
-	decq %rcx   #; nframes--
-	jz	.CP_END #; if we run out of frames, we go to the end
-	
-	addq $4, %rdx #; one non-aligned byte less
-	cmp $16, %rdx
-	jne .LP_START #; if more non-aligned frames exist, we do a do-over
-
-.CP_SSE:
-
-	#; We have reached the 16 byte aligned "buf" ("rdi") value
-
-	#; Figure out how many loops we should do
-	movq %rcx, %rax #; copy remaining nframes to %rax for division
-
-	shr $2,%rax #; unsigned divide by 4
-	jz .POST_START
-
-	#; %rax = SSE iterations
-
-	#; current maximum is at %xmm0, but we need to ..
-	shufps $0x00, %xmm0, %xmm0 #; shuffle "current" to all 4 FP's
-
-	#;prefetcht0 16(%rdi)
-
-.LP_SSE:
-
-	movaps (%rdi), %xmm1
-	andps %xmm2, %xmm1
-	maxps %xmm1, %xmm0
-
-	addq $16, %rdi
-
-	decq %rax
-	jnz .LP_SSE
-
-	#; Calculate the maximum value contained in the 4 FP's in %xmm0
-	movaps %xmm0, %xmm1
-	shufps $0x4e, %xmm1, %xmm1 #; shuffle left & right pairs (1234 => 3412)
-	maxps  %xmm1, %xmm0 #; maximums of the two pairs
-	movaps %xmm0, %xmm1
-	shufps $0xb1, %xmm1, %xmm1 #; shuffle the floats inside the two pairs (1234 => 2143)
-	maxps  %xmm1, %xmm0 
-
-	#; now every float in %xmm0 is the same value, current maximum value
-	
-	#; Next we need to post-process all remaining frames
-	#; the remaining frame count is in %rcx
-	
-	#; if no remaining frames, jump to the end
-
-	andq $3, %rcx #; nframes % 4
-	jz .CP_END
-
-.POST_START:
-
-	movss (%rdi), %xmm1
-	andps %xmm2, %xmm1
-	maxss %xmm1, %xmm0
-	
-	addq $4, %rdi 	#; buf++;
-	
-	decq %rcx		#; nframes--;
-	jnz .POST_START
-
-.CP_END:
-
-	popq %rdi
-	
-	#; return
-	leave
-	ret
-
-.size	x86_sse_compute_peak, .-x86_sse_compute_peak
-#; end proc
-
-#ifdef __ELF__
-.section .note.GNU-stack,"",%progbits
-#endif
-
-- 
cgit v1.2.3