/***************************************************************************** * Copyright (C) 2020-2021 MulticoreWare, Inc * * Authors: Hongbin Liu * Sebastian Pop * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. * * This program is also available under a commercial proprietary license. * For more information, contact us at license @ x265.com. *****************************************************************************/ #ifndef ASM_S_ // #include guards #define ASM_S_ .arch armv8-a #define PFX3(prefix, name) prefix ## _ ## name #define PFX2(prefix, name) PFX3(prefix, name) #define PFX(name) PFX2(X265_NS, name) #ifdef __APPLE__ #define PREFIX 1 #endif #ifdef PREFIX #define EXTERN_ASM _ #define HAVE_AS_FUNC 0 #elif defined __clang__ #define EXTERN_ASM #define HAVE_AS_FUNC 0 #define PREFIX 1 #else #define EXTERN_ASM #define HAVE_AS_FUNC 1 #endif #ifdef __ELF__ #define ELF #else #ifdef PREFIX #define ELF # #else #define ELF @ #endif #endif #if HAVE_AS_FUNC #define FUNC #else #ifdef PREFIX #define FUNC # #else #define FUNC @ #endif #endif #define GLUE(a, b) a ## b #define JOIN(a, b) GLUE(a, b) #define PFX_C(name) JOIN(JOIN(JOIN(EXTERN_ASM, X265_NS), _), name) // Alignment of stack arguments of size less than 8 bytes. #ifdef __APPLE__ #define STACK_ARG_ALIGNMENT 4 #else #define STACK_ARG_ALIGNMENT 8 #endif // Get offset from SP of stack argument at index `idx`. #define STACK_ARG_OFFSET(idx) (idx * STACK_ARG_ALIGNMENT) #ifdef __APPLE__ .macro endfunc ELF .size \name, . - \name FUNC .endfunc .endm #endif .macro function name, export=1 #ifdef __APPLE__ .global \name endfunc #else .macro endfunc ELF .size \name, . - \name FUNC .endfunc .purgem endfunc .endm #endif .align 2 .if \export == 1 .global EXTERN_ASM\name ELF .hidden EXTERN_ASM\name ELF .type EXTERN_ASM\name, %function FUNC .func EXTERN_ASM\name EXTERN_ASM\name: .else ELF .hidden \name ELF .type \name, %function FUNC .func \name \name: .endif .endm .macro const name, align=2 .macro endconst ELF .size \name, . - \name .purgem endconst .endm #ifdef __MACH__ .const_data #else .section .rodata #endif .align \align \name: .endm .macro movrel rd, val, offset=0 #if defined(__APPLE__) .if \offset < 0 adrp \rd, \val@PAGE add \rd, \rd, \val@PAGEOFF sub \rd, \rd, -(\offset) .else adrp \rd, \val+(\offset)@PAGE add \rd, \rd, \val+(\offset)@PAGEOFF .endif #elif defined(PIC) && defined(_WIN32) .if \offset < 0 adrp \rd, \val add \rd, \rd, :lo12:\val sub \rd, \rd, -(\offset) .else adrp \rd, \val+(\offset) add \rd, \rd, :lo12:\val+(\offset) .endif #else adrp \rd, \val+(\offset) add \rd, \rd, :lo12:\val+(\offset) #endif .endm #define FENC_STRIDE 64 #define FDEC_STRIDE 32 .macro SUMSUB_AB sum, diff, a, b add \sum, \a, \b sub \diff, \a, \b .endm .macro SUMSUB_ABCD s1, d1, s2, d2, a, b, c, d SUMSUB_AB \s1, \d1, \a, \b SUMSUB_AB \s2, \d2, \c, \d .endm .macro HADAMARD4_V r1, r2, r3, r4, t1, t2, t3, t4 SUMSUB_ABCD \t1, \t2, \t3, \t4, \r1, \r2, \r3, \r4 SUMSUB_ABCD \r1, \r3, \r2, \r4, \t1, \t3, \t2, \t4 .endm .macro ABS2 a b abs \a, \a abs \b, \b .endm .macro ABS8 v0, v1, v2, v3, v4, v5, v6, v7 ABS2 \v0, \v1 ABS2 \v2, \v3 ABS2 \v4, \v5 ABS2 \v6, \v7 .endm .macro vtrn t1, t2, s1, s2 trn1 \t1, \s1, \s2 trn2 \t2, \s1, \s2 .endm .macro trn4 t1, t2, t3, t4, s1, s2, s3, s4 vtrn \t1, \t2, \s1, \s2 vtrn \t3, \t4, \s3, \s4 .endm .macro push_vec_regs stp d8, d9, [sp,#-16]! stp d10, d11, [sp,#-16]! stp d12, d13, [sp,#-16]! stp d14, d15, [sp,#-16]! .endm .macro pop_vec_regs ldp d14, d15, [sp], #16 ldp d12, d13, [sp], #16 ldp d10, d11, [sp], #16 ldp d8, d9, [sp], #16 .endm #endif