@***************************************************************************** @ i420_yuyv.S : ARM NEONv1 I420 to YUYV chroma conversion @***************************************************************************** @ Copyright (C) 2009-2011 RĂ©mi Denis-Courmont @ @ This program is free software; you can redistribute it and/or modify @ it under the terms of the GNU Lesser General Public License as published by @ the Free Software Foundation; either version 2.1 of the License, or @ (at your option) any later version. @ @ This program is distributed in the hope that it will be useful, @ but WITHOUT ANY WARRANTY; without even the implied warranty of @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the @ GNU Lesser General Public License for more details. @ @ You should have received a copy of the GNU Lesser General Public License @ along with this program; if not, write to the Free Software Foundation, @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA. @****************************************************************************/ #include "asm.S" .syntax unified #if HAVE_AS_FPU_DIRECTIVE .fpu neon #endif .text #define O1 r0 #define O2 r1 #define WIDTH r2 #define HEIGHT r3 #define Y1 r4 #define Y2 r5 #define U r6 #define V r7 #define YPITCH r8 #define OPAD r10 #define YPAD r11 #define COUNT ip #define OPITCH lr .align 2 function i420_yuyv_neon push {r4-r8,r10-r11,lr} ldmia r0, {O1, OPITCH} ldmia r1, {Y1, U, V, YPITCH} cmp HEIGHT, #0 sub OPAD, OPITCH, WIDTH, lsl #1 sub YPAD, YPITCH, WIDTH 1: it gt movsgt COUNT, WIDTH add O2, O1, OPITCH add Y2, Y1, YPITCH it le pople {r4-r8,r10-r11,pc} 2: pld [U, #64] vld1.u8 {d2}, [U,:64]! pld [V, #64] vld1.u8 {d3}, [V,:64]! pld [Y1, #64] vzip.u8 d2, d3 subs COUNT, COUNT, #16 vld1.u8 {q0}, [Y1,:128]! pld [Y2, #64] vmov q3, q1 vzip.u8 q0, q1 vld1.u8 {q2}, [Y2,:128]! vzip.u8 q2, q3 vst1.u8 {q0-q1}, [O1,:128]! vst1.u8 {q2-q3}, [O2,:128]! bgt 2b subs HEIGHT, #2 add O1, O2, OPAD add Y1, Y2, YPAD add U, U, YPAD, lsr #1 add V, V, YPAD, lsr #1 b 1b function i420_uyvy_neon push {r4-r8,r10-r11,lr} ldmia r0, {O1, OPITCH} ldmia r1, {Y1, U, V, YPITCH} cmp HEIGHT, #0 sub OPAD, OPITCH, WIDTH, lsl #1 sub YPAD, YPITCH, WIDTH 1: it gt movsgt COUNT, WIDTH add O2, O1, OPITCH add Y2, Y1, YPITCH it le pople {r4-r8,r10-r11,pc} 2: pld [U, #64] vld1.u8 {d0}, [U,:64]! pld [V, #64] vld1.u8 {d1}, [V,:64]! pld [Y1, #64] vzip.u8 d0, d1 subs COUNT, COUNT, #16 vld1.u8 {q1}, [Y1,:128]! pld [Y2, #64] vmov q2, q0 vzip.u8 q0, q1 vld1.u8 {q3}, [Y2,:128]! vzip.u8 q2, q3 vst1.u8 {q0-q1}, [O1,:128]! vst1.u8 {q2-q3}, [O2,:128]! bgt 2b subs HEIGHT, #2 add O1, O2, OPAD add Y1, Y2, YPAD add U, U, YPAD, lsr #1 add V, V, YPAD, lsr #1 b 1b