diff options
Diffstat (limited to 'media-libs/gst-plugins-base/files/gst-0.10.32-0011-add-some-neon.patch')
-rw-r--r-- | media-libs/gst-plugins-base/files/gst-0.10.32-0011-add-some-neon.patch | 293 |
1 files changed, 0 insertions, 293 deletions
diff --git a/media-libs/gst-plugins-base/files/gst-0.10.32-0011-add-some-neon.patch b/media-libs/gst-plugins-base/files/gst-0.10.32-0011-add-some-neon.patch deleted file mode 100644 index 6737811..0000000 --- a/media-libs/gst-plugins-base/files/gst-0.10.32-0011-add-some-neon.patch +++ /dev/null @@ -1,293 +0,0 @@ -From 537d185b9e9b25f7dacb5e5c4dab47bb8524da34 Mon Sep 17 00:00:00 2001 -From: Rob Clark <rob@ti.com> -Date: Thu, 8 Apr 2010 00:30:25 -0500 -Subject: [PATCH 11/24] add some neon - ---- - configure.ac | 1 + - gst/stride/Makefile.am | 1 + - gst/stride/armv7.s | 119 ++++++++++++++++++++++++++++++++++++++++++++++++ - gst/stride/convert.c | 76 ++++++++++++++++-------------- - 4 files changed, 162 insertions(+), 35 deletions(-) - create mode 100644 gst/stride/armv7.s - -diff --git a/configure.ac b/configure.ac -index af6cd52..8e7ba18 100644 ---- a/configure.ac -+++ b/configure.ac -@@ -58,6 +58,7 @@ dnl AS_LIBTOOL_TAGS - - AC_LIBTOOL_WIN32_DLL - AM_PROG_LIBTOOL -+AM_PROG_AS - - dnl *** required versions of GStreamer stuff *** - GST_REQ=0.10.32 -diff --git a/gst/stride/Makefile.am b/gst/stride/Makefile.am -index 0b61d55..3b466de 100644 ---- a/gst/stride/Makefile.am -+++ b/gst/stride/Makefile.am -@@ -3,6 +3,7 @@ plugin_LTLIBRARIES = libgststridetransform.la - libgststridetransform_la_SOURCES = \ - gststridetransform.c \ - convert.c \ -+ armv7.s \ - plugin.c - - libgststridetransform_la_CFLAGS = $(GST_PLUGINS_BASE_CFLAGS) $(GST_BASE_CFLAGS) $(GST_CFLAGS) -diff --git a/gst/stride/armv7.s b/gst/stride/armv7.s -new file mode 100644 -index 0000000..ed636f7 ---- /dev/null -+++ b/gst/stride/armv7.s -@@ -0,0 +1,119 @@ -+@ GStreamer -+@ -+@ Copyright (C) 2009 Texas Instruments, Inc - http://www.ti.com/ -+@ -+@ Description: NEON/VFP accelerated functions for armv7 architecture -+@ Created on: Nov 27, 2009 -+@ Author: Rob Clark <rob@ti.com> -+@ -+@ This library is free software; you can redistribute it and/or -+@ modify it under the terms of the GNU Library General Public -+@ License as published by the Free Software Foundation; either -+@ version 2 of the License, or (at your option) any later version. -+@ -+@ This library is distributed in the hope that it will be useful, -+@ but WITHOUT ANY WARRANTY; without even the implied warranty of -+@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+@ Library General Public License for more details. -+@ -+@ You should have received a copy of the GNU Library General Public -+@ License along with this library; if not, write to the -+@ Free Software Foundation, Inc., 59 Temple Place - Suite 330, -+@ Boston, MA 02111-1307, USA. -+ -+ .fpu neon -+ .text -+ -+ .align -+ .global stride_copy_zip2 -+ .type stride_copy_zip2, %function -+@void -+@stride_copy_zip2 (guchar *new_buf, guchar *orig_buf1, guchar *orig_buf2, gint sz) -+@{ -+@@@@ note: r0-r3, q0-3, and q8-q15 do not need to be preserved -+stride_copy_zip2: -+@ interleave remaining >= 16 bytes: -+ pld [r1, #64] -+ pld [r2, #64] -+ cmp r3, #16 -+ blt stride_copy_zip2_2 -+stride_copy_zip2_1: -+ vld1.8 {q8}, [r1]! -+ vld1.8 {q9}, [r2]! -+ -+ vzip.8 q8, q9 -+ -+ pld [r1, #64] -+ vst1.8 {q8,q9}, [r0]! -+ pld [r2, #64] -+ sub r3, r3, #16 -+ -+ cmp r3, #16 -+ bge stride_copy_zip2_1 -+@ interleave remaining >= 8 bytes: -+stride_copy_zip2_2: -+ cmp r3, #8 -+ blt stride_copy_zip2_3 -+ -+ vld1.8 {d16}, [r1]! -+ vld1.8 {d17}, [r2]! -+ -+ vzip.8 d16, d17 -+ -+ vst1.8 {d16,d17}, [r0]! -+ sub r3, r3, #8 -+ -+@ interleave remaining < 8 bytes: -+stride_copy_zip2_3: -+@XXX -+ bx lr -+@} -+ -+ .align -+ .global stride_copy -+ .type stride_copy, %function -+@void -+@stride_copy (guchar *new_buf, guchar *orig_buf, gint sz) -+@{ -+@@@@ note: r0-r3, q0-3, and q8-q15 do not need to be preserved -+stride_copy: -+@ copy remaining >= 64 bytes: -+ pld [r1, #64] -+ cmp r2, #64 -+ blt stride_copy_2 -+stride_copy_1: -+ vld1.8 {q8-q9}, [r1]! -+ sub r2, r2, #64 -+ vld1.8 {q10-q11},[r1]! -+ vst1.8 {q8-q9}, [r0]! -+ pld [r1, #64] -+ cmp r2, #64 -+ vst1.8 {q10-q11},[r0]! -+ bge stride_copy_1 -+@ copy remaining >= 32 bytes: -+stride_copy_2: -+ cmp r2, #32 -+ blt stride_copy_3 -+ vld1.8 {q8-q9}, [r1]! -+ sub r2, r2, #32 -+ vst1.8 {q8-q9}, [r0]! -+@ copy remaining >= 16 bytes: -+stride_copy_3: -+ cmp r2, #16 -+ blt stride_copy_4 -+ vld1.8 {q8}, [r1]! -+ sub r2, r2, #16 -+ vst1.8 {q8}, [r0]! -+@ copy remaining >= 8 bytes: -+stride_copy_4: -+ cmp r2, #8 -+ blt stride_copy_5 -+ vld1.8 {d16}, [r1]! -+ sub r2, r2, #8 -+ vst1.8 {d16}, [r0]! -+@ copy remaining < 8 bytes: -+stride_copy_5: -+@XXX -+ bx lr -+@} -+ -diff --git a/gst/stride/convert.c b/gst/stride/convert.c -index 860f16c..a15063b 100644 ---- a/gst/stride/convert.c -+++ b/gst/stride/convert.c -@@ -37,38 +37,43 @@ GST_DEBUG_CATEGORY_EXTERN (stridetransform_debug); - #define GST_CAT_DEFAULT stridetransform_debug - - -+/* note: some parts of code support in-place transform.. some do not.. I'm -+ * not sure if zip/interleave functions could really support in-place copy.. -+ * I need to think about this after having some sleep ;-) -+ */ -+ -+#define WEAK __attribute__((weak)) -+ - /* - * Conversion utilities: - */ - --static void --memmove_demux (guchar *new_buf, guchar *orig_buf, gint sz, gint pxstride) -+WEAK void -+stride_copy_zip2 (guchar *new_buf, guchar *orig_buf1, guchar *orig_buf2, gint sz) - { -- if (new_buf > orig_buf) { -- /* copy backwards */ -- new_buf += ((sz - 1) * pxstride); -- orig_buf += sz - 1; -- while(sz--) { -- *new_buf = *orig_buf; -- new_buf -= pxstride; -- orig_buf--; -- } -- } else { -- while(sz--) { -- *new_buf = *orig_buf; -- new_buf += pxstride; -- orig_buf++; -- } -+ while (sz--) { -+ *new_buf++ = *orig_buf1++; -+ *new_buf++ = *orig_buf2++; - } - } - -+WEAK void -+stride_copy (guchar *new_buf, guchar *orig_buf, gint sz) -+{ -+ memcpy (new_buf, orig_buf, sz); -+} -+ -+ -+/** -+ * move to strided buffer, interleaving two planes of identical dimensions -+ */ - static void --stridemove_demux (guchar *new_buf, guchar *orig_buf, gint new_width, gint orig_width, gint height, gint pxstride) -+stridemove_zip2 (guchar *new_buf, guchar *orig_buf1, guchar *orig_buf2, gint new_width, gint orig_width, gint height) - { - int row; - -- GST_DEBUG ("new_buf=%p, orig_buf=%p, new_width=%d, orig_width=%d, height=%d", -- new_buf, orig_buf, new_width, orig_width, height); -+ GST_DEBUG ("new_buf=%p, orig_buf1=%p, orig_buf2=%p, new_width=%d, orig_width=%d, height=%d", -+ new_buf, orig_buf1, orig_buf2, new_width, orig_width, height); - - /* if increasing the stride, work from bottom-up to avoid overwriting data - * that has not been moved yet.. otherwise, work in the opposite order, -@@ -76,11 +81,19 @@ stridemove_demux (guchar *new_buf, guchar *orig_buf, gint new_width, gint orig_w - */ - if (new_width > orig_width) { - for (row=height-1; row>=0; row--) { -- memmove_demux (new_buf+(new_width*row), orig_buf+(orig_width*row), orig_width, pxstride); -+ stride_copy_zip2 ( -+ new_buf+(new_width*row), -+ orig_buf1+(orig_width*row), -+ orig_buf2+(orig_width*row), -+ orig_width); - } - } else { - for (row=0; row<height; row++) { -- memmove_demux (new_buf+(new_width*row), orig_buf+(orig_width*row), new_width, pxstride); -+ stride_copy_zip2 ( -+ new_buf+(new_width*row), -+ orig_buf1+(orig_width*row), -+ orig_buf2+(orig_width*row), -+ new_width); - } - } - } -@@ -106,11 +119,11 @@ stridemove (guchar *new_buf, guchar *orig_buf, gint new_width, gint orig_width, - */ - if (new_width > orig_width) { - for (row=height-1; row>=0; row--) { -- memmove (new_buf+(new_width*row), orig_buf+(orig_width*row), orig_width); -+ stride_copy (new_buf+(new_width*row), orig_buf+(orig_width*row), orig_width); - } - } else { - for (row=0; row<height; row++) { -- memmove (new_buf+(new_width*row), orig_buf+(orig_width*row), new_width); -+ stride_copy (new_buf+(new_width*row), orig_buf+(orig_width*row), new_width); - } - } - } -@@ -234,19 +247,12 @@ stridify_i420_nv12 (GstStrideTransform *self, guchar *strided, guchar *unstrided - - g_return_val_if_fail (stride >= width, GST_FLOW_ERROR); - -- /* note: if not an in-place conversion, then doing the U&V in one pass -- * would be more efficient... but if it is an in-place conversion, I'd -- * need to think about whether it is potential for the new UV plane to -- * corrupt the V plane before it is done copying.. -- */ -- stridemove_demux ( -- strided + (height*stride) + 1, -- unstrided + (int)(height*width*1.25), -- stride, width/2, height/2, 2); /* move V */ -- stridemove_demux ( -+ /* XXX widths/heights/strides that are not multiple of four??: */ -+ stridemove_zip2 ( - strided + (height*stride), - unstrided + (height*width), -- stride, width/2, height/2, 2); /* move U */ -+ unstrided + (int)(height*width*1.25), -+ stride, width/2, height/2); /* interleave U&V */ - stridemove (strided, unstrided, stride, width, height); /* move Y */ - - return GST_FLOW_OK; --- -1.7.1 - |