summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'dev-libs/DirectFB/files/DirectFB-0.9.20-simd-amd64.diff')
-rw-r--r--dev-libs/DirectFB/files/DirectFB-0.9.20-simd-amd64.diff183
1 files changed, 183 insertions, 0 deletions
diff --git a/dev-libs/DirectFB/files/DirectFB-0.9.20-simd-amd64.diff b/dev-libs/DirectFB/files/DirectFB-0.9.20-simd-amd64.diff
new file mode 100644
index 000000000000..8755daab0c8e
--- /dev/null
+++ b/dev-libs/DirectFB/files/DirectFB-0.9.20-simd-amd64.diff
@@ -0,0 +1,183 @@
+diff -urN DirectFB-0.9.20.orig/src/gfx/generic/generic_mmx.h DirectFB-0.9.20/src/gfx/generic/generic_mmx.h
+--- DirectFB-0.9.20.orig/src/gfx/generic/generic_mmx.h 2003-06-30 23:26:04.000000000 +0200
++++ DirectFB-0.9.20/src/gfx/generic/generic_mmx.h 2004-01-08 23:46:40.000000000 +0100
+@@ -172,15 +172,19 @@
+ "movq %%mm1, (%1)\n\t"
+ "dec %2\n\t"
+ "jz 3f\n\t"
+- "addl $8, %1\n\t"
+- "addl %4, %0\n\t"
+- "testl $0xFFFF0000, %0\n\t"
++ "add $8, %1\n\t"
++ "add %4, %0\n\t"
++ "test $0xFFFF0000, %0\n\t"
+ "jz 2b\n\t"
+- "movl %0, %%ebx\n\t"
+- "andl $0xFFFF0000, %%ebx\n\t"
+- "shrl $14, %%ebx\n\t"
++ "mov %0, %%ebx\n\t"
++ "and $0xFFFF0000, %%ebx\n\t"
++ "shr $14, %%ebx\n\t"
++#ifdef __x86_64__
++ "add %%rbx, %3\n\t"
++#else
+ "add %%ebx, %3\n\t"
+- "andl $0xFFFF, %0\n\t"
++#endif
++ "and $0xFFFF, %0\n\t"
+ "jmp 1b\n"
+ "3:\n\t"
+ "emms"
+@@ -201,8 +205,8 @@
+ "movd (%2), %%mm1\n\t"
+ "punpcklbw %%mm0, %%mm1\n\t"
+ "movq %%mm1, (%0)\n\t"
+- "addl $4, %2\n\t"
+- "addl $8, %0\n\t"
++ "add $4, %2\n\t"
++ "add $8, %0\n\t"
+ "dec %1\n\t"
+ "jnz 1b\n\t"
+ "emms"
+@@ -238,7 +242,7 @@
+ "dec %1\n\t"
+ "jz 2f\n\t"
+ "psrlq $16, %%mm0\n\t"
+- "addl $8, %0\n\t"
++ "add $8, %0\n\t"
+ /* 2. Konvertierung nach 24 bit interleaved */
+ "movq %%mm0, %%mm3\n\t"
+ "punpcklwd %%mm3, %%mm3\n\t"
+@@ -252,7 +256,7 @@
+ "dec %1\n\t"
+ "jz 2f\n\t"
+ "psrlq $16, %%mm0\n\t"
+- "addl $8, %0\n\t"
++ "add $8, %0\n\t"
+ /* 3. Konvertierung nach 24 bit interleaved */
+ "movq %%mm0, %%mm3\n\t"
+ "punpcklwd %%mm3, %%mm3\n\t"
+@@ -266,7 +270,7 @@
+ "dec %1\n\t"
+ "jz 2f\n\t"
+ "psrlq $16, %%mm0\n\t"
+- "addl $8, %0\n\t"
++ "add $8, %0\n\t"
+ /* 4. Konvertierung nach 24 bit interleaved */
+ "movq %%mm0, %%mm3\n\t"
+ "punpcklwd %%mm3, %%mm3\n\t"
+@@ -279,8 +283,8 @@
+ "movq %%mm3, (%0)\n\t"
+ "dec %1\n\t"
+ "jz 2f\n\t"
+- "addl $8, %0\n\t"
+- "addl $8, %2\n\t"
++ "add $8, %0\n\t"
++ "add $8, %2\n\t"
+ "jmp 1b\n"
+ "2:\n\t"
+ "emms"
+@@ -304,8 +308,8 @@
+ "punpcklbw %%mm6, %%mm0\n\t"
+ "por %%mm7, %%mm0\n\t"
+ "movq %%mm0, (%0)\n\t"
+- "addl $4, %2\n\t"
+- "addl $8, %0\n\t"
++ "add $4, %2\n\t"
++ "add $8, %0\n\t"
+ "dec %1\n\t"
+ "jnz 1b\n\t"
+ "emms"
+@@ -322,7 +326,7 @@
+
+ __asm__ __volatile__ (
+ "movq %3, %%mm7\n\t"
+- "cmpl $0, %2\n\t"
++ "cmp $0, %2\n\t"
+ "jne 3f\n\t"
+ "movq %4, %%mm6\n\t"
+ "movd %5, %%mm0\n\t"
+@@ -341,7 +345,7 @@
+ "psrlw $8, %%mm0\n\t"
+ "movq %%mm0, (%0)\n"
+ "1:\n\t"
+- "addl $8, %0\n\t"
++ "add $8, %0\n\t"
+ "dec %1\n\t"
+ "jnz 4b\n\t"
+ "jmp 2f\n\t"
+@@ -360,8 +364,8 @@
+ "psrlw $8, %%mm0\n\t"
+ "movq %%mm0, (%0)\n"
+ "1:\n\t"
+- "addl $8, %2\n\t"
+- "addl $8, %0\n\t"
++ "add $8, %2\n\t"
++ "add $8, %0\n\t"
+ "dec %1\n\t"
+ "jnz 3b\n\t"
+ "2:\n\t"
+@@ -379,7 +383,7 @@
+
+ __asm__ __volatile__ (
+ "movq %3, %%mm7\n\t"
+- "cmpl $0, %2\n\t"
++ "cmp $0, %2\n\t"
+ "jne 3f\n\t"
+ "movq %4, %%mm6\n\t"
+ "movd %5, %%mm0\n\t"
+@@ -397,7 +401,7 @@
+ "psrlw $8, %%mm1\n\t"
+ "movq %%mm1, (%0)\n"
+ "1:\n\t"
+- "addl $8, %0\n\t"
++ "add $8, %0\n\t"
+ "dec %1\n\t"
+ "jnz 4b\n\t"
+ "jmp 2f\n\t"
+@@ -415,8 +419,8 @@
+ "psrlw $8, %%mm1\n\t"
+ "movq %%mm1, (%0)\n"
+ "1:\n\t"
+- "addl $8, %2\n\t"
+- "addl $8, %0\n\t"
++ "add $8, %2\n\t"
++ "add $8, %0\n\t"
+ "dec %1\n\t"
+ "jnz 3b\n\t"
+ "2:\n\t"
+diff -urN DirectFB-0.9.20.orig/src/misc/cpu_accel.c DirectFB-0.9.20/src/misc/cpu_accel.c
+--- DirectFB-0.9.20.orig/src/misc/cpu_accel.c 2003-08-15 13:32:45.000000000 +0200
++++ DirectFB-0.9.20/src/misc/cpu_accel.c 2004-01-08 23:48:05.000000000 +0100
+@@ -57,6 +57,13 @@
+
+ #include "cpu_accel.h"
+
++#ifdef __x86_64__
++static __u32 arch_accel (void)
++{
++ return MM_ACCEL_X86_MMX | MM_ACCEL_X86_SSE | MM_ACCEL_X86_MMXEXT | MM_ACCEL_X86_SSE2 | MM_ACCEL_X86_3DNOW;
++}
++#endif
++
+ #ifdef ARCH_X86
+ static __u32 arch_accel (void)
+ {
+@@ -175,7 +182,7 @@
+
+ __u32 dfb_mm_accel (void)
+ {
+-#if defined (ARCH_X86) || (defined (ARCH_PPC) && defined (ENABLE_ALTIVEC))
++#if defined(__x86_64__) || defined (ARCH_X86) || (defined (ARCH_PPC) && defined (ENABLE_ALTIVEC))
+ static __u32 accel = ~0U;
+
+ if (accel != ~0U)
+@@ -183,7 +190,7 @@
+
+ accel = arch_accel ();
+
+-#ifdef USE_SSE
++#if defined(USE_SSE) && !defined(__x86_64__)
+
+ /* test OS support for SSE */
+ if (accel & MM_ACCEL_X86_SSE) {