/* * FAAC - Freeware Advanced Audio Coder * Copyright (C) 2026 Nils Schimmelmann * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include #include "faac_real.h" #include "quantize.h" void quantize_sse2(const faac_real * __restrict xr, int * __restrict xi, int n, faac_real sfacfix) { const __m128 zero = _mm_setzero_ps(); const __m128 sfac = _mm_set1_ps(sfacfix); const __m128 magic = _mm_set1_ps(MAGIC_NUMBER); // Mask to strip the sign bit (0x7FFFFFFF) const __m128 abs_mask = _mm_castsi128_ps(_mm_set1_epi32(0x7FFFFFFF)); int cnt = 0; // Process 4 elements per iteration for (; cnt <= n - 4; cnt += 4) { #ifdef FAAC_PRECISION_SINGLE __m128 x_orig = _mm_loadu_ps((const float*)&xr[cnt]); #else // Convert 4 doubles to 4 floats via two 128-bit loads __m128 low = _mm_cvtpd_ps(_mm_loadu_pd(&xr[cnt])); __m128 high = _mm_cvtpd_ps(_mm_loadu_pd(&xr[cnt + 2])); __m128 x_orig = _mm_movelh_ps(low, high); #endif // Capture sign and Absolute value __m128 sign_mask = _mm_cmplt_ps(x_orig, zero); __m128 x = _mm_and_ps(x_orig, abs_mask); // Math: (x * sfac)^0.75 + magic // Logic: sqrt( (x*sfac) * sqrt(x*sfac) ) x = _mm_mul_ps(x, sfac); x = _mm_mul_ps(x, _mm_sqrt_ps(x)); x = _mm_sqrt_ps(x); x = _mm_add_ps(x, magic); // Convert to integer __m128i xi_vec = _mm_cvttps_epi32(x); // Bitwise Sign Fix: (val ^ mask) - mask __m128i m_int = _mm_castps_si128(sign_mask); xi_vec = _mm_sub_epi32(_mm_xor_si128(xi_vec, m_int), m_int); _mm_storeu_si128((__m128i*)&xi[cnt], xi_vec); } // Safe scalar remainder loop for widths not multiple of 4 for (; cnt < n; cnt++) { faac_real val = xr[cnt]; faac_real tmp = FAAC_FABS(val); tmp *= sfacfix; tmp = FAAC_SQRT(tmp * FAAC_SQRT(tmp)); int q = (int)(tmp + (faac_real)MAGIC_NUMBER); xi[cnt] = (val < 0) ? -q : q; } }