/* * Copyright © 2018, VideoLAN and dav1d authors * Copyright © 2018, Janne Grunau * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "config.h" #include "common/attributes.h" #include "src/cpu.h" #include "src/arm/cpu.h" #if HAVE_GETAUXVAL || HAVE_ELF_AUX_INFO #include #if ARCH_AARCH64 #define HWCAP_AARCH64_ASIMDDP (1 << 20) #define HWCAP_AARCH64_SVE (1 << 22) #define HWCAP2_AARCH64_SVE2 (1 << 1) #define HWCAP2_AARCH64_I8MM (1 << 13) COLD unsigned dav1d_get_cpu_flags_arm(void) { #if HAVE_GETAUXVAL unsigned long hw_cap = getauxval(AT_HWCAP); unsigned long hw_cap2 = getauxval(AT_HWCAP2); #else unsigned long hw_cap = 0; unsigned long hw_cap2 = 0; elf_aux_info(AT_HWCAP, &hw_cap, sizeof(hw_cap)); elf_aux_info(AT_HWCAP2, &hw_cap2, sizeof(hw_cap2)); #endif unsigned flags = dav1d_get_default_cpu_flags(); flags |= (hw_cap & HWCAP_AARCH64_ASIMDDP) ? DAV1D_ARM_CPU_FLAG_DOTPROD : 0; flags |= (hw_cap2 & HWCAP2_AARCH64_I8MM) ? DAV1D_ARM_CPU_FLAG_I8MM : 0; flags |= (hw_cap & HWCAP_AARCH64_SVE) ? DAV1D_ARM_CPU_FLAG_SVE : 0; flags |= (hw_cap2 & HWCAP2_AARCH64_SVE2) ? DAV1D_ARM_CPU_FLAG_SVE2 : 0; return flags; } #else /* !ARCH_AARCH64 */ #ifndef HWCAP_ARM_NEON #define HWCAP_ARM_NEON (1 << 12) #endif #define HWCAP_ARM_ASIMDDP (1 << 24) #define HWCAP_ARM_I8MM (1 << 27) COLD unsigned dav1d_get_cpu_flags_arm(void) { #if HAVE_GETAUXVAL unsigned long hw_cap = getauxval(AT_HWCAP); #else unsigned long hw_cap = 0; elf_aux_info(AT_HWCAP, &hw_cap, sizeof(hw_cap)); #endif unsigned flags = dav1d_get_default_cpu_flags(); flags |= (hw_cap & HWCAP_ARM_NEON) ? DAV1D_ARM_CPU_FLAG_NEON : 0; flags |= (hw_cap & HWCAP_ARM_ASIMDDP) ? DAV1D_ARM_CPU_FLAG_DOTPROD : 0; flags |= (hw_cap & HWCAP_ARM_I8MM) ? DAV1D_ARM_CPU_FLAG_I8MM : 0; return flags; } #endif /* ARCH_AARCH64 */ #elif defined(__APPLE__) #include static int have_feature(const char *feature) { int supported = 0; size_t size = sizeof(supported); if (sysctlbyname(feature, &supported, &size, NULL, 0) != 0) { return 0; } return supported; } COLD unsigned dav1d_get_cpu_flags_arm(void) { unsigned flags = dav1d_get_default_cpu_flags(); if (have_feature("hw.optional.arm.FEAT_DotProd")) flags |= DAV1D_ARM_CPU_FLAG_DOTPROD; if (have_feature("hw.optional.arm.FEAT_I8MM")) flags |= DAV1D_ARM_CPU_FLAG_I8MM; /* No SVE and SVE2 feature detection available on Apple platforms. */ return flags; } #elif defined(__OpenBSD__) && ARCH_AARCH64 #include #include #include #include COLD unsigned dav1d_get_cpu_flags_arm(void) { unsigned flags = dav1d_get_default_cpu_flags(); #ifdef CPU_ID_AA64ISAR0 int mib[2]; uint64_t isar0; uint64_t isar1; size_t len; mib[0] = CTL_MACHDEP; mib[1] = CPU_ID_AA64ISAR0; len = sizeof(isar0); if (sysctl(mib, 2, &isar0, &len, NULL, 0) != -1) { if (ID_AA64ISAR0_DP(isar0) >= ID_AA64ISAR0_DP_IMPL) flags |= DAV1D_ARM_CPU_FLAG_DOTPROD; } mib[0] = CTL_MACHDEP; mib[1] = CPU_ID_AA64ISAR1; len = sizeof(isar1); if (sysctl(mib, 2, &isar1, &len, NULL, 0) != -1) { #ifdef ID_AA64ISAR1_I8MM_IMPL if (ID_AA64ISAR1_I8MM(isar1) >= ID_AA64ISAR1_I8MM_IMPL) flags |= DAV1D_ARM_CPU_FLAG_I8MM; #endif } #endif return flags; } #elif defined(_WIN32) #include COLD unsigned dav1d_get_cpu_flags_arm(void) { unsigned flags = dav1d_get_default_cpu_flags(); #ifdef PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE if (IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE)) flags |= DAV1D_ARM_CPU_FLAG_DOTPROD; #endif #ifdef PF_ARM_SVE_INSTRUCTIONS_AVAILABLE if (IsProcessorFeaturePresent(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE)) flags |= DAV1D_ARM_CPU_FLAG_SVE; #endif #ifdef PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE if (IsProcessorFeaturePresent(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE)) flags |= DAV1D_ARM_CPU_FLAG_SVE2; #endif #ifdef PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE /* There's no PF_* flag that indicates whether plain I8MM is available * or not. But if SVE_I8MM is available, that also implies that * regular I8MM is available. */ if (IsProcessorFeaturePresent(PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE)) flags |= DAV1D_ARM_CPU_FLAG_I8MM; #endif return flags; } #elif defined(__ANDROID__) #include #include #include static unsigned parse_proc_cpuinfo(const char *flag) { FILE *file = fopen("/proc/cpuinfo", "r"); if (!file) return 0; char line_buffer[120]; const char *line; size_t flaglen = strlen(flag); while ((line = fgets(line_buffer, sizeof(line_buffer), file))) { // check all occurances as whole words const char *found = line; while ((found = strstr(found, flag))) { if ((found == line_buffer || !isgraph(found[-1])) && (isspace(found[flaglen]) || feof(file))) { fclose(file); return 1; } found += flaglen; } // if line is incomplete seek back to avoid splitting the search // string into two buffers if (!strchr(line, '\n') && strlen(line) > flaglen) { // use fseek since the 64 bit fseeko is only available since // Android API level 24 and meson defines _FILE_OFFSET_BITS // by default 64 if (fseek(file, -flaglen, SEEK_CUR)) break; } } fclose(file); return 0; } COLD unsigned dav1d_get_cpu_flags_arm(void) { unsigned flags = dav1d_get_default_cpu_flags(); flags |= parse_proc_cpuinfo("neon") ? DAV1D_ARM_CPU_FLAG_NEON : 0; flags |= parse_proc_cpuinfo("asimd") ? DAV1D_ARM_CPU_FLAG_NEON : 0; flags |= parse_proc_cpuinfo("asimddp") ? DAV1D_ARM_CPU_FLAG_DOTPROD : 0; flags |= parse_proc_cpuinfo("i8mm") ? DAV1D_ARM_CPU_FLAG_I8MM : 0; #if ARCH_AARCH64 flags |= parse_proc_cpuinfo("sve") ? DAV1D_ARM_CPU_FLAG_SVE : 0; flags |= parse_proc_cpuinfo("sve2") ? DAV1D_ARM_CPU_FLAG_SVE2 : 0; #endif /* ARCH_AARCH64 */ return flags; } #else /* Unsupported OS */ COLD unsigned dav1d_get_cpu_flags_arm(void) { return dav1d_get_default_cpu_flags(); } #endif