/***************************************************************************** * Copyright (C) 2013-2020 MulticoreWare, Inc * * Authors: Steve Borho * Min Chen * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. * * This program is also available under a commercial proprietary license. * For more information, contact us at license @ x265.com. *****************************************************************************/ #include "pixelharness.h" #include "primitives.h" #include "entropy.h" using namespace X265_NS; PixelHarness::PixelHarness() { /* [0] --- Random values * [1] --- Minimum * [2] --- Maximum */ for (int i = 0; i < BUFFSIZE; i++) { pixel_test_buff[0][i] = rand() % PIXEL_MAX; short_test_buff[0][i] = (rand() % (2 * SMAX + 1)) - SMAX - 1; // max(SHORT_MIN, min(rand(), SMAX)); short_test_buff1[0][i] = rand() & PIXEL_MAX; // For block copy only short_test_buff2[0][i] = rand() % 16383; // for addAvg int_test_buff[0][i] = rand() % SHORT_MAX; ushort_test_buff[0][i] = rand() % ((1 << 16) - 1); uchar_test_buff[0][i] = rand() % ((1 << 8) - 1); residual_test_buff[0][i] = (rand() % (2 * RMAX + 1)) - RMAX - 1;// For sse_ss only double_test_buff[0][i] = (double)(short_test_buff[0][i]) / 256.0; pixel_test_buff[1][i] = PIXEL_MIN; short_test_buff[1][i] = (int16_t)SMIN; short_test_buff1[1][i] = PIXEL_MIN; short_test_buff2[1][i] = -16384; int_test_buff[1][i] = SHORT_MIN; ushort_test_buff[1][i] = PIXEL_MIN; uchar_test_buff[1][i] = PIXEL_MIN; residual_test_buff[1][i] = RMIN; double_test_buff[1][i] = (double)(short_test_buff[1][i]) / 256.0; pixel_test_buff[2][i] = PIXEL_MAX; short_test_buff[2][i] = SMAX; short_test_buff1[2][i] = PIXEL_MAX; short_test_buff2[2][i] = 16383; int_test_buff[2][i] = SHORT_MAX; ushort_test_buff[2][i] = ((1 << 16) - 1); uchar_test_buff[2][i] = 255; residual_test_buff[2][i] = RMAX; double_test_buff[2][i] = (double)(short_test_buff[2][i]) / 256.0; pbuf1[i] = rand() & PIXEL_MAX; pbuf2[i] = rand() & PIXEL_MAX; pbuf3[i] = rand() & PIXEL_MAX; pbuf4[i] = rand() & PIXEL_MAX; sbuf1[i] = (rand() % (2 * SMAX + 1)) - SMAX - 1; //max(SHORT_MIN, min(rand(), SMAX)); sbuf2[i] = (rand() % (2 * SMAX + 1)) - SMAX - 1; //max(SHORT_MIN, min(rand(), SMAX)); ibuf1[i] = (rand() % (2 * SMAX + 1)) - SMAX - 1; psbuf1[i] = psbuf4[i] = (rand() % 65) - 32; // range is between -32 to 32 psbuf2[i] = psbuf5[i] = (rand() % 3) - 1; // possible values {-1,0,1} psbuf3[i] = (rand() % 129) - 128; sbuf3[i] = rand() % PIXEL_MAX; // for blockcopy only } } bool PixelHarness::check_pixelcmp(pixelcmp_t ref, pixelcmp_t opt) { int j = 0; intptr_t stride = STRIDE; for (int i = 0; i < ITERS; i++) { int index1 = rand() % TEST_CASES; int index2 = rand() % TEST_CASES; int vres = (int)checked(opt, pixel_test_buff[index1], stride, pixel_test_buff[index2] + j, stride); int cres = ref(pixel_test_buff[index1], stride, pixel_test_buff[index2] + j, stride); if (vres != cres) return false; reportfail(); j += INCR; } return true; } bool PixelHarness::check_pixel_sse(pixel_sse_t ref, pixel_sse_t opt) { int j = 0; intptr_t stride = STRIDE; for (int i = 0; i < ITERS; i++) { int index1 = rand() % TEST_CASES; int index2 = rand() % TEST_CASES; sse_t vres = (sse_t)checked(opt, pixel_test_buff[index1], stride, pixel_test_buff[index2] + j, stride); sse_t cres = ref(pixel_test_buff[index1], stride, pixel_test_buff[index2] + j, stride); if (vres != cres) return false; reportfail(); j += INCR; } return true; } bool PixelHarness::check_pixel_sse_ss(pixel_sse_ss_t ref, pixel_sse_ss_t opt) { int j = 0; intptr_t stride = STRIDE; for (int i = 0; i < ITERS; i++) { int index1 = rand() % TEST_CASES; int index2 = rand() % TEST_CASES; sse_t vres = (sse_t)checked(opt, residual_test_buff[index1], stride, residual_test_buff[index2] + j, stride); sse_t cres = ref(residual_test_buff[index1], stride, residual_test_buff[index2] + j, stride); if (vres != cres) return false; reportfail(); j += INCR; } return true; } bool PixelHarness::check_pixelcmp_x3(pixelcmp_x3_t ref, pixelcmp_x3_t opt) { ALIGN_VAR_16(int, cres[16]); ALIGN_VAR_16(int, vres[16]); int j = 0; intptr_t stride = FENC_STRIDE - 5; for (int i = 0; i < ITERS; i++) { int index1 = rand() % TEST_CASES; int index2 = rand() % TEST_CASES; checked(opt, pixel_test_buff[index1], pixel_test_buff[index2] + j, pixel_test_buff[index2] + j + 1, pixel_test_buff[index2] + j + 2, stride, &vres[0]); ref(pixel_test_buff[index1], pixel_test_buff[index2] + j, pixel_test_buff[index2] + j + 1, pixel_test_buff[index2] + j + 2, stride, &cres[0]); if ((vres[0] != cres[0]) || ((vres[1] != cres[1])) || ((vres[2] != cres[2]))) return false; reportfail(); j += INCR; } return true; } bool PixelHarness::check_pixelcmp_x4(pixelcmp_x4_t ref, pixelcmp_x4_t opt) { ALIGN_VAR_16(int, cres[16]); ALIGN_VAR_16(int, vres[16]); int j = 0; intptr_t stride = FENC_STRIDE - 5; for (int i = 0; i < ITERS; i++) { int index1 = rand() % TEST_CASES; int index2 = rand() % TEST_CASES; checked(opt, pixel_test_buff[index1], pixel_test_buff[index2] + j, pixel_test_buff[index2] + j + 1, pixel_test_buff[index2] + j + 2, pixel_test_buff[index2] + j + 3, stride, &vres[0]); ref(pixel_test_buff[index1], pixel_test_buff[index2] + j, pixel_test_buff[index2] + j + 1, pixel_test_buff[index2] + j + 2, pixel_test_buff[index2] + j + 3, stride, &cres[0]); if ((vres[0] != cres[0]) || ((vres[1] != cres[1])) || ((vres[2] != cres[2])) || ((vres[3] != cres[3]))) return false; reportfail(); j += INCR; } return true; } bool PixelHarness::check_calresidual(calcresidual_t ref, calcresidual_t opt) { ALIGN_VAR_16(int16_t, ref_dest[64 * 64]); ALIGN_VAR_16(int16_t, opt_dest[64 * 64]); memset(ref_dest, 0, 64 * 64 * sizeof(int16_t)); memset(opt_dest, 0, 64 * 64 * sizeof(int16_t)); int j = 0; intptr_t stride = STRIDE; for (int i = 0; i < ITERS; i++) { int index = i % TEST_CASES; checked(opt, pbuf1 + j, pixel_test_buff[index] + j, opt_dest, stride); ref(pbuf1 + j, pixel_test_buff[index] + j, ref_dest, stride); if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t))) return false; reportfail(); j += INCR; } return true; } bool PixelHarness::check_calresidual_aligned(calcresidual_t ref, calcresidual_t opt) { ALIGN_VAR_16(int16_t, ref_dest[64 * 64]); ALIGN_VAR_16(int16_t, opt_dest[64 * 64]); memset(ref_dest, 0, 64 * 64 * sizeof(int16_t)); memset(opt_dest, 0, 64 * 64 * sizeof(int16_t)); int j = 0; intptr_t stride = STRIDE; for (int i = 0; i < ITERS; i++) { int index = i % TEST_CASES; checked(opt, pbuf1 + j, pixel_test_buff[index] + j, opt_dest, stride); ref(pbuf1 + j, pixel_test_buff[index] + j, ref_dest, stride); if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t))) return false; reportfail(); j += INCR; } return true; } bool PixelHarness::check_ssd_s(pixel_ssd_s_t ref, pixel_ssd_s_t opt) { int j = 0; for (int i = 0; i < ITERS; i++) { // NOTE: stride must be multiple of 16, because minimum block is 4x4 int stride = (STRIDE + (rand() % STRIDE)) & ~15; sse_t cres = ref(sbuf1 + j, stride); sse_t vres = (sse_t)checked(opt, sbuf1 + j, (intptr_t)stride); if (cres != vres) return false; reportfail(); j += INCR; } return true; } bool PixelHarness::check_ssd_s_aligned(pixel_ssd_s_t ref, pixel_ssd_s_t opt) { int j = 0; for (int i = 0; i < ITERS; i++) { // NOTE: stride must be multiple of 16, because minimum block is 4x4 int stride = STRIDE; sse_t cres = ref(sbuf1 + j, stride); sse_t vres = (sse_t)checked(opt, sbuf1 + j, (intptr_t)stride); if (cres != vres) return false; reportfail(); j += INCR+32; } return true; } bool PixelHarness::check_weightp(weightp_sp_t ref, weightp_sp_t opt) { ALIGN_VAR_16(pixel, ref_dest[64 * (64 + 1)]); ALIGN_VAR_16(pixel, opt_dest[64 * (64 + 1)]); memset(ref_dest, 0, 64 * 64 * sizeof(pixel)); memset(opt_dest, 0, 64 * 64 * sizeof(pixel)); int j = 0; int width = 2 * (rand() % 32 + 1); int height = 8; int w0 = rand() % 128; int shift = rand() % 8; // maximum is 7, see setFromWeightAndOffset() int round = shift ? (1 << (shift - 1)) : 0; int offset = (rand() % 256) - 128; intptr_t stride = 64; const int correction = (IF_INTERNAL_PREC - X265_DEPTH); for (int i = 0; i < ITERS; i++) { int index = i % TEST_CASES; checked(opt, short_test_buff[index] + j, opt_dest, stride, stride + 1, width, height, w0, round << correction, shift + correction, offset); ref(short_test_buff[index] + j, ref_dest, stride, stride + 1, width, height, w0, round << correction, shift + correction, offset); if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel))) { opt(short_test_buff[index] + j, opt_dest, stride, stride + 1, width, height, w0, round << correction, shift + correction, offset); return false; } reportfail(); j += INCR; } return true; } bool PixelHarness::check_weightp(weightp_pp_t ref, weightp_pp_t opt) { ALIGN_VAR_16(pixel, ref_dest[64 * 64]); ALIGN_VAR_16(pixel, opt_dest[64 * 64]); memset(ref_dest, 0, 64 * 64 * sizeof(pixel)); memset(opt_dest, 0, 64 * 64 * sizeof(pixel)); int j = 0; bool enableavx512 = true; int width = 16 * (rand() % 4 + 1); int cpuid = X265_NS::cpu_detect(enableavx512); if (cpuid & X265_CPU_AVX512) width = 32 * (rand() % 2 + 1); int height = 8; int w0 = rand() % 128; int shift = rand() % 8; // maximum is 7, see setFromWeightAndOffset() int round = shift ? (1 << (shift - 1)) : 0; int offset = (rand() % 256) - 128; intptr_t stride = 64; const int correction = (IF_INTERNAL_PREC - X265_DEPTH); for (int i = 0; i < ITERS; i++) { int index = i % TEST_CASES; checked(opt, pixel_test_buff[index] + j, opt_dest, stride, width, height, w0, round << correction, shift + correction, offset); ref(pixel_test_buff[index] + j, ref_dest, stride, width, height, w0, round << correction, shift + correction, offset); if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel))) { checked(opt, pixel_test_buff[index] + j, opt_dest, stride, width, height, w0, round << correction, shift + correction, offset); return false; } reportfail(); j += INCR; } return true; } bool PixelHarness::check_downscale_t(downscale_t ref, downscale_t opt) { ALIGN_VAR_16(pixel, ref_destf[32 * 32]); ALIGN_VAR_16(pixel, opt_destf[32 * 32]); ALIGN_VAR_16(pixel, ref_desth[32 * 32]); ALIGN_VAR_16(pixel, opt_desth[32 * 32]); ALIGN_VAR_16(pixel, ref_destv[32 * 32]); ALIGN_VAR_16(pixel, opt_destv[32 * 32]); ALIGN_VAR_16(pixel, ref_destc[32 * 32]); ALIGN_VAR_16(pixel, opt_destc[32 * 32]); intptr_t src_stride = 64; intptr_t dst_stride = 32; int bx = 32; int by = 32; int j = 0; for (int i = 0; i < ITERS; i++) { int index = i % TEST_CASES; ref(pixel_test_buff[index] + j, ref_destf, ref_desth, ref_destv, ref_destc, src_stride, dst_stride, bx, by); checked(opt, pixel_test_buff[index] + j, opt_destf, opt_desth, opt_destv, opt_destc, src_stride, dst_stride, bx, by); if (memcmp(ref_destf, opt_destf, 32 * 32 * sizeof(pixel))) return false; if (memcmp(ref_desth, opt_desth, 32 * 32 * sizeof(pixel))) return false; if (memcmp(ref_destv, opt_destv, 32 * 32 * sizeof(pixel))) return false; if (memcmp(ref_destc, opt_destc, 32 * 32 * sizeof(pixel))) return false; reportfail(); j += INCR; } return true; } bool PixelHarness::check_downscaleluma_t(downscaleluma_t ref, downscaleluma_t opt) { ALIGN_VAR_16(pixel, ref_destf[32 * 32]); ALIGN_VAR_16(pixel, opt_destf[32 * 32]); intptr_t src_stride = 64; intptr_t dst_stride = 32; int bx = 32; int by = 32; int j = 0; for (int i = 0; i < ITERS; i++) { int index = i % TEST_CASES; ref(pixel_test_buff[index] + j, ref_destf, src_stride, dst_stride, bx, by); checked(opt, pixel_test_buff[index] + j, opt_destf, src_stride, dst_stride, bx, by); if (memcmp(ref_destf, opt_destf, 32 * 32 * sizeof(pixel))) return false; reportfail(); j += INCR; } return true; } bool PixelHarness::check_cpy2Dto1D_shl_t(cpy2Dto1D_shl_t ref, cpy2Dto1D_shl_t opt) { ALIGN_VAR_16(int16_t, ref_dest[64 * 64]); ALIGN_VAR_16(int16_t, opt_dest[64 * 64]); memset(ref_dest, 0xCD, sizeof(ref_dest)); memset(opt_dest, 0xCD, sizeof(opt_dest)); int j = 0; intptr_t stride = STRIDE; for (int i = 0; i < ITERS; i++) { int shift = (rand() % 7 + 1); int index = i % TEST_CASES; checked(opt, opt_dest, short_test_buff[index] + j, stride, shift); ref(ref_dest, short_test_buff[index] + j, stride, shift); if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t))) return false; reportfail(); j += INCR; } return true; } bool PixelHarness::check_cpy2Dto1D_shr_t(cpy2Dto1D_shr_t ref, cpy2Dto1D_shr_t opt) { ALIGN_VAR_16(int16_t, ref_dest[64 * 64]); ALIGN_VAR_16(int16_t, opt_dest[64 * 64]); memset(ref_dest, 0xCD, sizeof(ref_dest)); memset(opt_dest, 0xCD, sizeof(opt_dest)); int j = 0; intptr_t stride = STRIDE; for (int i = 0; i < ITERS; i++) { int shift = (rand() % 7 + 1); int index = i % TEST_CASES; checked(opt, opt_dest, short_test_buff[index] + j, stride, shift); ref(ref_dest, short_test_buff[index] + j, stride, shift); if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t))) return false; reportfail(); j += INCR; } return true; } bool PixelHarness::check_copy_cnt_t(copy_cnt_t ref, copy_cnt_t opt) { ALIGN_VAR_16(int16_t, ref_dest[64 * 64]); ALIGN_VAR_16(int16_t, opt_dest[64 * 64]); memset(ref_dest, 0xCD, sizeof(ref_dest)); memset(opt_dest, 0xCD, sizeof(opt_dest)); int j = 0; intptr_t stride = STRIDE; for (int i = 0; i < ITERS; i++) { int index = i % TEST_CASES; int opt_cnt = (int)checked(opt, opt_dest, short_test_buff1[index] + j, stride); int ref_cnt = ref(ref_dest, short_test_buff1[index] + j, stride); if ((ref_cnt != opt_cnt) || memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t))) return false; reportfail(); j += INCR; } return true; } bool PixelHarness::check_cpy1Dto2D_shl_t(cpy1Dto2D_shl_t ref, cpy1Dto2D_shl_t opt) { ALIGN_VAR_64(int16_t, ref_dest[64 * 64]); ALIGN_VAR_64(int16_t, opt_dest[64 * 64]); memset(ref_dest, 0xCD, sizeof(ref_dest)); memset(opt_dest, 0xCD, sizeof(opt_dest)); int j = 0; intptr_t stride = STRIDE; for (int i = 0; i < ITERS; i++) { int shift = (rand() % 7 + 1); int index = i % TEST_CASES; checked(opt, opt_dest, short_test_buff[index] + j, stride, shift); ref(ref_dest, short_test_buff[index] + j, stride, shift); if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t))) return false; reportfail(); j += INCR; } return true; } bool PixelHarness::check_cpy1Dto2D_shl_aligned_t(cpy1Dto2D_shl_t ref, cpy1Dto2D_shl_t opt) { ALIGN_VAR_64(int16_t, ref_dest[64 * 64]); ALIGN_VAR_64(int16_t, opt_dest[64 * 64]); memset(ref_dest, 0xCD, sizeof(ref_dest)); memset(opt_dest, 0xCD, sizeof(opt_dest)); int j = 0; intptr_t stride = STRIDE; for (int i = 0; i < ITERS; i++) { int shift = (rand() % 7 + 1); int index = i % TEST_CASES; checked(opt, opt_dest, short_test_buff[index] + j, stride, shift); ref(ref_dest, short_test_buff[index] + j, stride, shift); if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t))) return false; reportfail(); j += INCR + 32; } return true; } bool PixelHarness::check_cpy1Dto2D_shr_t(cpy1Dto2D_shr_t ref, cpy1Dto2D_shr_t opt) { ALIGN_VAR_16(int16_t, ref_dest[64 * 64]); ALIGN_VAR_16(int16_t, opt_dest[64 * 64]); memset(ref_dest, 0xCD, sizeof(ref_dest)); memset(opt_dest, 0xCD, sizeof(opt_dest)); int j = 0; intptr_t stride = STRIDE; for (int i = 0; i < ITERS; i++) { int shift = (rand() % 7 + 1); int index = i % TEST_CASES; checked(opt, opt_dest, short_test_buff[index] + j, stride, shift); ref(ref_dest, short_test_buff[index] + j, stride, shift); if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t))) return false; reportfail(); j += INCR; } return true; } bool PixelHarness::check_pixelavg_pp(pixelavg_pp_t ref, pixelavg_pp_t opt) { ALIGN_VAR_64(pixel, ref_dest[64 * 64]); ALIGN_VAR_64(pixel, opt_dest[64 * 64]); int j = 0; memset(ref_dest, 0xCD, sizeof(ref_dest)); memset(opt_dest, 0xCD, sizeof(opt_dest)); intptr_t stride = STRIDE; for (int i = 0; i < ITERS; i++) { int index1 = rand() % TEST_CASES; int index2 = rand() % TEST_CASES; checked(ref, ref_dest, stride, pixel_test_buff[index1] + j, stride, pixel_test_buff[index2] + j, stride, 32); opt(opt_dest, stride, pixel_test_buff[index1] + j, stride, pixel_test_buff[index2] + j, stride, 32); if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel))) return false; reportfail(); j += INCR; } return true; } bool PixelHarness::check_pixelavg_pp_aligned(pixelavg_pp_t ref, pixelavg_pp_t opt) { ALIGN_VAR_64(pixel, ref_dest[64 * 64]); ALIGN_VAR_64(pixel, opt_dest[64 * 64]); int j = 0; memset(ref_dest, 0xCD, sizeof(ref_dest)); memset(opt_dest, 0xCD, sizeof(opt_dest)); intptr_t stride = STRIDE; for (int i = 0; i < ITERS; i++) { int index1 = rand() % TEST_CASES; int index2 = rand() % TEST_CASES; checked(ref, ref_dest, stride, pixel_test_buff[index1] + j, stride, pixel_test_buff[index2] + j, stride, 32); opt(opt_dest, stride, pixel_test_buff[index1] + j, stride, pixel_test_buff[index2] + j, stride, 32); if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel))) return false; reportfail(); j += INCR + 32; } return true; } bool PixelHarness::check_copy_pp(copy_pp_t ref, copy_pp_t opt) { ALIGN_VAR_16(pixel, ref_dest[64 * 64]); ALIGN_VAR_16(pixel, opt_dest[64 * 64]); // we don't know the partition size so we are checking the entire output buffer so // we must initialize the buffers memset(ref_dest, 0, sizeof(ref_dest)); memset(opt_dest, 0, sizeof(opt_dest)); int j = 0; intptr_t stride = STRIDE; for (int i = 0; i < ITERS; i++) { int index = i % TEST_CASES; checked(opt, opt_dest, stride, pixel_test_buff[index] + j, stride); ref(ref_dest, stride, pixel_test_buff[index] + j, stride); if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel))) return false; reportfail(); j += INCR; } return true; } bool PixelHarness::check_copy_sp(copy_sp_t ref, copy_sp_t opt) { ALIGN_VAR_16(pixel, ref_dest[64 * 64]); ALIGN_VAR_16(pixel, opt_dest[64 * 64]); // we don't know the partition size so we are checking the entire output buffer so // we must initialize the buffers memset(ref_dest, 0xCD, sizeof(ref_dest)); memset(opt_dest, 0xCD, sizeof(opt_dest)); int j = 0; intptr_t stride1 = 64, stride2 = STRIDE; for (int i = 0; i < ITERS; i++) { int index = i % TEST_CASES; checked(opt, opt_dest, stride1, short_test_buff1[index] + j, stride2); ref(ref_dest, stride1, short_test_buff1[index] + j, stride2); if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel))) return false; reportfail(); j += INCR; } return true; } bool PixelHarness::check_copy_ps(copy_ps_t ref, copy_ps_t opt) { ALIGN_VAR_16(int16_t, ref_dest[64 * 64]); ALIGN_VAR_16(int16_t, opt_dest[64 * 64]); // we don't know the partition size so we are checking the entire output buffer so // we must initialize the buffers memset(ref_dest, 0xCD, sizeof(ref_dest)); memset(opt_dest, 0xCD, sizeof(opt_dest)); int j = 0; intptr_t stride = STRIDE; for (int i = 0; i < ITERS; i++) { int index = i % TEST_CASES; checked(opt, opt_dest, stride, pixel_test_buff[index] + j, stride); ref(ref_dest, stride, pixel_test_buff[index] + j, stride); if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t))) return false; reportfail(); j += INCR; } return true; } bool PixelHarness::check_copy_ss(copy_ss_t ref, copy_ss_t opt) { ALIGN_VAR_16(int16_t, ref_dest[64 * 64]); ALIGN_VAR_16(int16_t, opt_dest[64 * 64]); // we don't know the partition size so we are checking the entire output buffer so // we must initialize the buffers memset(ref_dest, 0xCD, sizeof(ref_dest)); memset(opt_dest, 0xCD, sizeof(opt_dest)); int j = 0; intptr_t stride = STRIDE; for (int i = 0; i < ITERS; i++) { int index = i % TEST_CASES; checked(opt, opt_dest, stride, short_test_buff1[index] + j, stride); ref(ref_dest, stride, short_test_buff1[index] + j, stride); if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t))) return false; reportfail(); j += INCR; } return true; } bool PixelHarness::check_blockfill_s(blockfill_s_t ref, blockfill_s_t opt) { ALIGN_VAR_64(int16_t, ref_dest[64 * 64]); ALIGN_VAR_64(int16_t, opt_dest[64 * 64]); memset(ref_dest, 0xCD, sizeof(ref_dest)); memset(opt_dest, 0xCD, sizeof(opt_dest)); intptr_t stride = 64; for (int i = 0; i < ITERS; i++) { int16_t value = (rand() % SHORT_MAX) + 1; checked(opt, opt_dest, stride, value); ref(ref_dest, stride, value); if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t))) return false; reportfail(); } return true; } bool PixelHarness::check_blockfill_s_aligned(blockfill_s_t ref, blockfill_s_t opt) { ALIGN_VAR_64(int16_t, ref_dest[64 * 64]); ALIGN_VAR_64(int16_t, opt_dest[64 * 64]); memset(ref_dest, 0xCD, sizeof(ref_dest)); memset(opt_dest, 0xCD, sizeof(opt_dest)); intptr_t stride = 64; for (int i = 0; i < ITERS; i++) { int16_t value = (rand() % SHORT_MAX) + 1; checked(opt, opt_dest, stride, value); ref(ref_dest, stride, value); if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t))) return false; reportfail(); } return true; } bool PixelHarness::check_pixel_sub_ps(pixel_sub_ps_t ref, pixel_sub_ps_t opt) { ALIGN_VAR_16(int16_t, ref_dest[64 * 64]); ALIGN_VAR_16(int16_t, opt_dest[64 * 64]); memset(ref_dest, 0xCD, sizeof(ref_dest)); memset(opt_dest, 0xCD, sizeof(opt_dest)); int j = 0; intptr_t stride2 = 64, stride = STRIDE; for (int i = 0; i < 1; i++) { int index1 = rand() % TEST_CASES; int index2 = rand() % TEST_CASES; checked(opt, opt_dest, stride2, pixel_test_buff[index1] + j, pixel_test_buff[index2] + j, stride, stride); ref(ref_dest, stride2, pixel_test_buff[index1] + j, pixel_test_buff[index2] + j, stride, stride); if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t))) return false; reportfail(); j += INCR; } return true; } bool PixelHarness::check_scale1D_pp(scale1D_t ref, scale1D_t opt) { ALIGN_VAR_64(pixel, ref_dest[64 * 64]); ALIGN_VAR_64(pixel, opt_dest[64 * 64]); memset(ref_dest, 0, sizeof(ref_dest)); memset(opt_dest, 0, sizeof(opt_dest)); int j = 0; for (int i = 0; i < ITERS; i++) { int index = i % TEST_CASES; checked(opt, opt_dest, pixel_test_buff[index] + j); ref(ref_dest, pixel_test_buff[index] + j); if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel))) return false; reportfail(); j += INCR; } return true; } bool PixelHarness::check_scale1D_pp_aligned(scale1D_t ref, scale1D_t opt) { ALIGN_VAR_64(pixel, ref_dest[64 * 64]); ALIGN_VAR_64(pixel, opt_dest[64 * 64]); memset(ref_dest, 0, sizeof(ref_dest)); memset(opt_dest, 0, sizeof(opt_dest)); int j = 0; for (int i = 0; i < ITERS; i++) { int index = i % TEST_CASES; checked(opt, opt_dest, pixel_test_buff[index] + j); ref(ref_dest, pixel_test_buff[index] + j); if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel))) return false; reportfail(); j += INCR * 2; } return true; } bool PixelHarness::check_scale2D_pp(scale2D_t ref, scale2D_t opt) { ALIGN_VAR_16(pixel, ref_dest[64 * 64]); ALIGN_VAR_16(pixel, opt_dest[64 * 64]); memset(ref_dest, 0, sizeof(ref_dest)); memset(opt_dest, 0, sizeof(opt_dest)); int j = 0; intptr_t stride = STRIDE; for (int i = 0; i < ITERS; i++) { int index = i % TEST_CASES; checked(opt, opt_dest, pixel_test_buff[index] + j, stride); ref(ref_dest, pixel_test_buff[index] + j, stride); if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel))) return false; reportfail(); j += INCR; } return true; } bool PixelHarness::check_transpose(transpose_t ref, transpose_t opt) { ALIGN_VAR_16(pixel, ref_dest[64 * 64]); ALIGN_VAR_16(pixel, opt_dest[64 * 64]); memset(ref_dest, 0, sizeof(ref_dest)); memset(opt_dest, 0, sizeof(opt_dest)); int j = 0; intptr_t stride = STRIDE; for (int i = 0; i < ITERS; i++) { int index = i % TEST_CASES; checked(opt, opt_dest, pixel_test_buff[index] + j, stride); ref(ref_dest, pixel_test_buff[index] + j, stride); if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel))) return false; reportfail(); j += INCR; } return true; } bool PixelHarness::check_pixel_add_ps(pixel_add_ps_t ref, pixel_add_ps_t opt) { ALIGN_VAR_16(pixel, ref_dest[64 * 64]); ALIGN_VAR_16(pixel, opt_dest[64 * 64]); memset(ref_dest, 0xCD, sizeof(ref_dest)); memset(opt_dest, 0xCD, sizeof(opt_dest)); int j = 0; intptr_t stride2 = 64, stride = STRIDE; for (int i = 0; i < ITERS; i++) { int index1 = rand() % TEST_CASES; int index2 = rand() % TEST_CASES; checked(opt, opt_dest, stride2, pixel_test_buff[index1] + j, short_test_buff[index2] + j, stride, stride); ref(ref_dest, stride2, pixel_test_buff[index1] + j, short_test_buff[index2] + j, stride, stride); if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel))) return false; reportfail(); j += INCR; } return true; } bool PixelHarness::check_pixel_add_ps_aligned(pixel_add_ps_t ref, pixel_add_ps_t opt) { ALIGN_VAR_64(pixel, ref_dest[64 * 64]); ALIGN_VAR_64(pixel, opt_dest[64 * 64]); memset(ref_dest, 0xCD, sizeof(ref_dest)); memset(opt_dest, 0xCD, sizeof(opt_dest)); int j = 0; intptr_t stride2 = 64, stride = STRIDE; for (int i = 0; i < ITERS; i++) { int index1 = rand() % TEST_CASES; int index2 = rand() % TEST_CASES; checked(opt, opt_dest, stride2, pixel_test_buff[index1] + j, short_test_buff[index2] + j, stride, stride); ref(ref_dest, stride2, pixel_test_buff[index1] + j, short_test_buff[index2] + j, stride, stride); if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel))) return false; reportfail(); j += 2 * INCR; } return true; } bool PixelHarness::check_pixel_var(var_t ref, var_t opt) { int j = 0; intptr_t stride = STRIDE; for (int i = 0; i < ITERS; i++) { int index = i % TEST_CASES; uint64_t vres = checked(opt, pixel_test_buff[index], stride); uint64_t cres = ref(pixel_test_buff[index], stride); if (vres != cres) return false; reportfail(); j += INCR; } return true; } bool PixelHarness::check_ssim_4x4x2_core(ssim_4x4x2_core_t ref, ssim_4x4x2_core_t opt) { ALIGN_VAR_32(int, sum0[2][4]); ALIGN_VAR_32(int, sum1[2][4]); for (int i = 0; i < ITERS; i++) { intptr_t stride = rand() % 64; int index1 = rand() % TEST_CASES; int index2 = rand() % TEST_CASES; ref(pixel_test_buff[index1] + i, stride, pixel_test_buff[index2] + i, stride, sum0); checked(opt, pixel_test_buff[index1] + i, stride, pixel_test_buff[index2] + i, stride, sum1); if (memcmp(sum0, sum1, sizeof(sum0))) return false; reportfail(); } return true; } bool PixelHarness::check_ssim_end(ssim_end4_t ref, ssim_end4_t opt) { ALIGN_VAR_32(int, sum0[5][4]); ALIGN_VAR_32(int, sum1[5][4]); for (int i = 0; i < ITERS; i++) { for (int j = 0; j < 5; j++) { for (int k = 0; k < 4; k++) { sum0[j][k] = rand() % (1 << 12); sum1[j][k] = rand() % (1 << 12); } } int width = (rand() % 4) + 1; // range[1-4] float cres = ref(sum0, sum1, width); float vres = checked_float(opt, sum0, sum1, width); if (fabs(vres - cres) > 0.001) return false; reportfail(); } return true; } bool PixelHarness::check_addAvg(addAvg_t ref, addAvg_t opt) { ALIGN_VAR_64(pixel, ref_dest[64 * 64]); ALIGN_VAR_64(pixel, opt_dest[64 * 64]); int j = 0; memset(ref_dest, 0xCD, sizeof(ref_dest)); memset(opt_dest, 0xCD, sizeof(opt_dest)); intptr_t stride = STRIDE; for (int i = 0; i < ITERS; i++) { int index1 = rand() % TEST_CASES; int index2 = rand() % TEST_CASES; ref(short_test_buff2[index1] + j, short_test_buff2[index2] + j, ref_dest, stride, stride, stride); checked(opt, short_test_buff2[index1] + j, short_test_buff2[index2] + j, opt_dest, stride, stride, stride); if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel))) return false; reportfail(); j += INCR; } return true; } bool PixelHarness::check_addAvg_aligned(addAvg_t ref, addAvg_t opt) { ALIGN_VAR_64(pixel, ref_dest[64 * 64]); ALIGN_VAR_64(pixel, opt_dest[64 * 64]); int j = 0; memset(ref_dest, 0xCD, sizeof(ref_dest)); memset(opt_dest, 0xCD, sizeof(opt_dest)); intptr_t stride = STRIDE; for (int i = 0; i < ITERS; i++) { int index1 = rand() % TEST_CASES; int index2 = rand() % TEST_CASES; ref(short_test_buff2[index1] + j, short_test_buff2[index2] + j, ref_dest, stride, stride, stride); checked(opt, short_test_buff2[index1] + j, short_test_buff2[index2] + j, opt_dest, stride, stride, stride); if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel))) return false; reportfail(); j += INCR * 2; } return true; } bool PixelHarness::check_calSign(sign_t ref, sign_t opt) { ALIGN_VAR_16(int8_t, ref_dest[64 * 2]); ALIGN_VAR_16(int8_t, opt_dest[64 * 2]); memset(ref_dest, 0xCD, sizeof(ref_dest)); memset(opt_dest, 0xCD, sizeof(opt_dest)); int j = 0; for (int i = 0; i < ITERS; i++) { int width = (rand() % 64) + 1; ref(ref_dest, pbuf2 + j, pbuf3 + j, width); checked(opt, opt_dest, pbuf2 + j, pbuf3 + j, width); if (memcmp(ref_dest, opt_dest, sizeof(ref_dest))) return false; reportfail(); j += INCR; } return true; } bool PixelHarness::check_saoCuOrgE0_t(saoCuOrgE0_t ref, saoCuOrgE0_t opt) { ALIGN_VAR_16(pixel, ref_dest[64 * 64]); ALIGN_VAR_16(pixel, opt_dest[64 * 64]); for (int i = 0; i < 64 * 64; i++) ref_dest[i] = opt_dest[i] = rand() % (PIXEL_MAX); int j = 0; for (int i = 0; i < ITERS; i++) { int width = 16 * (rand() % 4 + 1); int stride = width + 1; ref(ref_dest, psbuf1 + j, width, psbuf2 + j, stride); checked(opt, opt_dest, psbuf1 + j, width, psbuf5 + j, stride); if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel))) return false; reportfail(); j += INCR; } return true; } bool PixelHarness::check_saoCuOrgE1_t(saoCuOrgE1_t ref, saoCuOrgE1_t opt) { ALIGN_VAR_16(pixel, ref_dest[64 * 64]); ALIGN_VAR_16(pixel, opt_dest[64 * 64]); for (int i = 0; i < 64 * 64; i++) ref_dest[i] = opt_dest[i] = rand() % (PIXEL_MAX); int j = 0; for (int i = 0; i < ITERS; i++) { int width = 16 * (rand() % 4 + 1); int stride = width + 1; ref(ref_dest, psbuf2 + j, psbuf1 + j, stride, width); checked(opt, opt_dest, psbuf5 + j, psbuf1 + j, stride, width); if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)) || memcmp(psbuf2, psbuf5, BUFFSIZE)) return false; reportfail(); j += INCR; } return true; } bool PixelHarness::check_saoCuOrgE2_t(saoCuOrgE2_t ref[2], saoCuOrgE2_t opt[2]) { ALIGN_VAR_16(pixel, ref_dest[64 * 64]); ALIGN_VAR_16(pixel, opt_dest[64 * 64]); for (int i = 0; i < 64 * 64; i++) ref_dest[i] = opt_dest[i] = rand() % (PIXEL_MAX); for (int id = 0; id < 2; id++) { int j = 0; if (opt[id]) { for (int i = 0; i < ITERS; i++) { int width = 16 * (1 << (id * (rand() % 2 + 1))) - (rand() % 2); int stride = width + 1; ref[width > 16](ref_dest, psbuf1 + j, psbuf2 + j, psbuf3 + j, width, stride); checked(opt[width > 16], opt_dest, psbuf4 + j, psbuf2 + j, psbuf3 + j, width, stride); if (memcmp(psbuf1 + j, psbuf4 + j, width * sizeof(int8_t))) return false; if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel))) return false; reportfail(); j += INCR; } } } return true; } bool PixelHarness::check_saoCuOrgE3_t(saoCuOrgE3_t ref, saoCuOrgE3_t opt) { ALIGN_VAR_16(pixel, ref_dest[64 * 64]); ALIGN_VAR_16(pixel, opt_dest[64 * 64]); for (int i = 0; i < 64 * 64; i++) ref_dest[i] = opt_dest[i] = rand() % (PIXEL_MAX); int j = 0; for (int i = 0; i < ITERS; i++) { int stride = 16 * (rand() % 4 + 1); int start = rand() % 2; int end = 16 - rand() % 2; ref(ref_dest, psbuf2 + j, psbuf1 + j, stride, start, end); checked(opt, opt_dest, psbuf5 + j, psbuf1 + j, stride, start, end); if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)) || memcmp(psbuf2, psbuf5, BUFFSIZE)) return false; reportfail(); j += INCR; } return true; } bool PixelHarness::check_saoCuStatsBO_t(saoCuStatsBO_t ref, saoCuStatsBO_t opt) { enum { NUM_EDGETYPE = 33 }; // classIdx = 1 + (rec[x] >> 3); int32_t stats_ref[NUM_EDGETYPE]; int32_t stats_vec[NUM_EDGETYPE]; int32_t count_ref[NUM_EDGETYPE]; int32_t count_vec[NUM_EDGETYPE]; int j = 0; for (int i = 0; i < ITERS; i++) { // initialize input data to random, the dynamic range wrong but good to verify our asm code for (int x = 0; x < NUM_EDGETYPE; x++) { stats_ref[x] = stats_vec[x] = rand(); count_ref[x] = count_vec[x] = rand(); } intptr_t stride = 16 * (rand() % 4 + 1); int endX = MAX_CU_SIZE - (rand() % 5); int endY = MAX_CU_SIZE - (rand() % 4) - 1; ref(sbuf2 + j + 1, pbuf3 + 1, stride, endX, endY, stats_ref, count_ref); checked(opt, sbuf2 + j + 1, pbuf3 + 1, stride, endX, endY, stats_vec, count_vec); if (memcmp(stats_ref, stats_vec, sizeof(stats_ref)) || memcmp(count_ref, count_vec, sizeof(count_ref))) return false; reportfail(); j += INCR; } return true; } bool PixelHarness::check_saoCuStatsE0_t(saoCuStatsE0_t ref, saoCuStatsE0_t opt) { enum { NUM_EDGETYPE = 5 }; int32_t stats_ref[NUM_EDGETYPE]; int32_t stats_vec[NUM_EDGETYPE]; int32_t count_ref[NUM_EDGETYPE]; int32_t count_vec[NUM_EDGETYPE]; int j = 0; for (int i = 0; i < ITERS; i++) { // initialize input data to random, the dynamic range wrong but good to verify our asm code for (int x = 0; x < NUM_EDGETYPE; x++) { stats_ref[x] = stats_vec[x] = rand(); count_ref[x] = count_vec[x] = rand(); } intptr_t stride = 16 * (rand() % 4 + 1); int endX = MAX_CU_SIZE - (rand() % 5) - 1; int endY = MAX_CU_SIZE - (rand() % 4) - 1; ref(sbuf2 + j + 1, pbuf3 + j + 1, stride, endX, endY, stats_ref, count_ref); checked(opt, sbuf2 + j + 1, pbuf3 + j + 1, stride, endX, endY, stats_vec, count_vec); if (memcmp(stats_ref, stats_vec, sizeof(stats_ref)) || memcmp(count_ref, count_vec, sizeof(count_ref))) return false; reportfail(); j += INCR; } return true; } bool PixelHarness::check_saoCuStatsE1_t(saoCuStatsE1_t ref, saoCuStatsE1_t opt) { enum { NUM_EDGETYPE = 5 }; int32_t stats_ref[NUM_EDGETYPE]; int32_t stats_vec[NUM_EDGETYPE]; int32_t count_ref[NUM_EDGETYPE]; int32_t count_vec[NUM_EDGETYPE]; int8_t _upBuff1_ref[MAX_CU_SIZE + 2], *upBuff1_ref = _upBuff1_ref + 1; int8_t _upBuff1_vec[MAX_CU_SIZE + 2], *upBuff1_vec = _upBuff1_vec + 1; int j = 0; for (int i = 0; i < ITERS; i++) { // initialize input data to random, the dynamic range wrong but good to verify our asm code for (int x = 0; x < NUM_EDGETYPE; x++) { stats_ref[x] = stats_vec[x] = rand(); count_ref[x] = count_vec[x] = rand(); } // initial sign for (int x = 0; x < MAX_CU_SIZE + 2; x++) _upBuff1_ref[x] = _upBuff1_vec[x] = (rand() % 3) - 1; intptr_t stride = 16 * (rand() % 4 + 1); int endX = MAX_CU_SIZE - (rand() % 5); int endY = MAX_CU_SIZE - (rand() % 4) - 1; ref(sbuf2 + 1, pbuf3 + 1, stride, upBuff1_ref, endX, endY, stats_ref, count_ref); checked(opt, sbuf2 + 1, pbuf3 + 1, stride, upBuff1_vec, endX, endY, stats_vec, count_vec); if ( memcmp(stats_ref, stats_vec, sizeof(stats_ref)) || memcmp(count_ref, count_vec, sizeof(count_ref))) return false; reportfail(); j += INCR; } return true; } bool PixelHarness::check_saoCuStatsE2_t(saoCuStatsE2_t ref, saoCuStatsE2_t opt) { enum { NUM_EDGETYPE = 5 }; int32_t stats_ref[NUM_EDGETYPE]; int32_t stats_vec[NUM_EDGETYPE]; int32_t count_ref[NUM_EDGETYPE]; int32_t count_vec[NUM_EDGETYPE]; int8_t _upBuff1_ref[MAX_CU_SIZE + 2], *upBuff1_ref = _upBuff1_ref + 1; int8_t _upBufft_ref[MAX_CU_SIZE + 2], *upBufft_ref = _upBufft_ref + 1; int8_t _upBuff1_vec[MAX_CU_SIZE + 2], *upBuff1_vec = _upBuff1_vec + 1; int8_t _upBufft_vec[MAX_CU_SIZE + 2], *upBufft_vec = _upBufft_vec + 1; int j = 0; // NOTE: verify more times since our asm is NOT exact match to C, the output of upBuff* will be DIFFERENT for (int i = 0; i < ITERS * 10; i++) { // initialize input data to random, the dynamic range wrong but good to verify our asm code for (int x = 0; x < NUM_EDGETYPE; x++) { stats_ref[x] = stats_vec[x] = rand(); count_ref[x] = count_vec[x] = rand(); } // initial sign for (int x = 0; x < MAX_CU_SIZE + 2; x++) { _upBuff1_ref[x] = _upBuff1_vec[x] = (rand() % 3) - 1; _upBufft_ref[x] = _upBufft_vec[x] = (rand() % 3) - 1; } intptr_t stride = 16 * (rand() % 4 + 1); int endX = MAX_CU_SIZE - (rand() % 5) - 1; int endY = MAX_CU_SIZE - (rand() % 4) - 1; ref(sbuf2 + 1, pbuf3 + 1, stride, upBuff1_ref, upBufft_ref, endX, endY, stats_ref, count_ref); checked(opt, sbuf2 + 1, pbuf3 + 1, stride, upBuff1_vec, upBufft_vec, endX, endY, stats_vec, count_vec); if ( memcmp(stats_ref, stats_vec, sizeof(stats_ref)) || memcmp(count_ref, count_vec, sizeof(count_ref))) return false; reportfail(); j += INCR; } return true; } bool PixelHarness::check_saoCuStatsE3_t(saoCuStatsE3_t ref, saoCuStatsE3_t opt) { enum { NUM_EDGETYPE = 5 }; int32_t stats_ref[NUM_EDGETYPE]; int32_t stats_vec[NUM_EDGETYPE]; int32_t count_ref[NUM_EDGETYPE]; int32_t count_vec[NUM_EDGETYPE]; int8_t _upBuff1_ref[MAX_CU_SIZE + 2], *upBuff1_ref = _upBuff1_ref + 1; int8_t _upBuff1_vec[MAX_CU_SIZE + 2], *upBuff1_vec = _upBuff1_vec + 1; int j = 0; // (const pixel *fenc, const pixel *rec, intptr_t stride, int8_t *upBuff1, int endX, int endY, int32_t *stats, int32_t *count) for (int i = 0; i < ITERS; i++) { // initialize input data to random, the dynamic range wrong but good to verify our asm code for (int x = 0; x < NUM_EDGETYPE; x++) { stats_ref[x] = stats_vec[x] = rand(); count_ref[x] = count_vec[x] = rand(); } // initial sign for (int x = 0; x < (int)sizeof(_upBuff1_ref); x++) { _upBuff1_ref[x] = _upBuff1_vec[x] = (rand() % 3) - 1; } intptr_t stride = 16 * (rand() % 4 + 1); int endX = MAX_CU_SIZE - (rand() % 5) - 1; int endY = MAX_CU_SIZE - (rand() % 4) - 1; ref(sbuf2, pbuf3, stride, upBuff1_ref, endX, endY, stats_ref, count_ref); checked(opt, sbuf2, pbuf3, stride, upBuff1_vec, endX, endY, stats_vec, count_vec); if ( memcmp(stats_ref, stats_vec, sizeof(stats_ref)) || memcmp(count_ref, count_vec, sizeof(count_ref))) return false; reportfail(); j += INCR; } return true; } bool PixelHarness::check_saoCuOrgE3_32_t(saoCuOrgE3_t ref, saoCuOrgE3_t opt) { ALIGN_VAR_16(pixel, ref_dest[64 * 64]); ALIGN_VAR_16(pixel, opt_dest[64 * 64]); for (int i = 0; i < 64 * 64; i++) ref_dest[i] = opt_dest[i] = rand() % (PIXEL_MAX); int j = 0; for (int i = 0; i < ITERS; i++) { int stride = 32 * (rand() % 2 + 1); int start = rand() % 2; int end = (32 * (rand() % 2 + 1)) - rand() % 2; ref(ref_dest, psbuf2 + j, psbuf1 + j, stride, start, end); checked(opt, opt_dest, psbuf5 + j, psbuf1 + j, stride, start, end); if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)) || memcmp(psbuf2, psbuf5, BUFFSIZE)) return false; reportfail(); j += INCR; } return true; } bool PixelHarness::check_planecopy_sp(planecopy_sp_t ref, planecopy_sp_t opt) { ALIGN_VAR_16(pixel, ref_dest[64 * 64]); ALIGN_VAR_16(pixel, opt_dest[64 * 64]); memset(ref_dest, 0xCD, sizeof(ref_dest)); memset(opt_dest, 0xCD, sizeof(opt_dest)); int width = 32 + (rand() % 32); int height = 32 + (rand() % 32); intptr_t srcStride = 64; intptr_t dstStride = width; int j = 0; for (int i = 0; i < ITERS; i++) { int index = i % TEST_CASES; checked(opt, ushort_test_buff[index] + j, srcStride, opt_dest, dstStride, width, height, (int)8, (uint16_t)((1 << X265_DEPTH) - 1)); ref(ushort_test_buff[index] + j, srcStride, ref_dest, dstStride, width, height, (int)8, (uint16_t)((1 << X265_DEPTH) - 1)); if (memcmp(ref_dest, opt_dest, dstStride * height * sizeof(pixel))) { memcpy(opt_dest, ref_dest, sizeof(ref_dest)); opt(ushort_test_buff[index] + j, srcStride, opt_dest, dstStride, width, height, (int)8, (uint16_t)((1 << X265_DEPTH) - 1)); return false; } // check tail memory area for(int x = width; x < dstStride; x++) { if (opt_dest[(height - 1 * dstStride) + x] != 0xCD) return false; } reportfail(); j += INCR; } return true; } bool PixelHarness::check_planecopy_cp(planecopy_cp_t ref, planecopy_cp_t opt) { ALIGN_VAR_16(pixel, ref_dest[64 * 64 * 2]); ALIGN_VAR_16(pixel, opt_dest[64 * 64 * 2]); memset(ref_dest, 0xCD, sizeof(ref_dest)); memset(opt_dest, 0xCD, sizeof(opt_dest)); int width = 16 + rand() % 48; int height = 16 + rand() % 48; intptr_t srcStride = 64; intptr_t dstStride = width; int j = 0; for (int i = 0; i < ITERS; i++) { int index = i % TEST_CASES; checked(opt, uchar_test_buff[index] + j, srcStride, opt_dest, dstStride, width, height, (int)2); ref(uchar_test_buff[index] + j, srcStride, ref_dest, dstStride, width, height, (int)2); if (memcmp(ref_dest, opt_dest, sizeof(ref_dest))) return false; // check tail memory area for(int x = width; x < dstStride; x++) { if (opt_dest[(height - 1 * dstStride) + x] != 0xCD) return false; } reportfail(); j += INCR; } return true; } bool PixelHarness::check_cutree_propagate_cost(cutree_propagate_cost ref, cutree_propagate_cost opt) { ALIGN_VAR_16(int, ref_dest[64 * 64]); ALIGN_VAR_16(int, opt_dest[64 * 64]); memset(ref_dest, 0xCD, sizeof(ref_dest)); memset(opt_dest, 0xCD, sizeof(opt_dest)); double fps = 1.0; int j = 0; for (int i = 0; i < ITERS; i++) { int width = 16 + rand() % 64; int index = i % TEST_CASES; checked(opt, opt_dest, ushort_test_buff[index] + j, int_test_buff[index] + j, ushort_test_buff[index] + j, int_test_buff[index] + j, &fps, width); ref(ref_dest, ushort_test_buff[index] + j, int_test_buff[index] + j, ushort_test_buff[index] + j, int_test_buff[index] + j, &fps, width); if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel))) return false; reportfail(); j += INCR; } return true; } bool PixelHarness::check_cutree_fix8_pack(cutree_fix8_pack ref, cutree_fix8_pack opt) { ALIGN_VAR_32(uint16_t, ref_dest[64 * 64]); ALIGN_VAR_32(uint16_t, opt_dest[64 * 64]); memset(ref_dest, 0xCD, sizeof(ref_dest)); memset(opt_dest, 0xCD, sizeof(opt_dest)); int j = 0; for (int i = 0; i < ITERS; i++) { int count = 256 + i; int index = i % TEST_CASES; checked(opt, opt_dest, double_test_buff[index] + j, count); ref(ref_dest, double_test_buff[index] + j, count); if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(uint16_t))) return false; reportfail(); j += INCR; } return true; } bool PixelHarness::check_cutree_fix8_unpack(cutree_fix8_unpack ref, cutree_fix8_unpack opt) { ALIGN_VAR_32(double, ref_dest[64 * 64]); ALIGN_VAR_32(double, opt_dest[64 * 64]); memset(ref_dest, 0xCD, sizeof(ref_dest)); memset(opt_dest, 0xCD, sizeof(opt_dest)); int j = 0; for (int i = 0; i < ITERS; i++) { int count = 256 + i; int index = i % TEST_CASES; checked(opt, opt_dest, ushort_test_buff[index] + j, count); ref(ref_dest, ushort_test_buff[index] + j, count); if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(double))) return false; reportfail(); j += INCR; } return true; } bool PixelHarness::check_psyCost_pp(pixelcmp_t ref, pixelcmp_t opt) { int j = 0, index1, index2, optres, refres; intptr_t stride = STRIDE; for (int i = 0; i < ITERS; i++) { index1 = rand() % TEST_CASES; index2 = rand() % TEST_CASES; optres = (int)checked(opt, pixel_test_buff[index1], stride, pixel_test_buff[index2] + j, stride); refres = ref(pixel_test_buff[index1], stride, pixel_test_buff[index2] + j, stride); if (optres != refres) return false; reportfail(); j += INCR; } return true; } bool PixelHarness::check_saoCuOrgB0_t(saoCuOrgB0_t ref, saoCuOrgB0_t opt) { ALIGN_VAR_16(pixel, ref_dest[64 * 64]); ALIGN_VAR_16(pixel, opt_dest[64 * 64]); for (int i = 0; i < 64 * 64; i++) ref_dest[i] = opt_dest[i] = rand() % (PIXEL_MAX); int j = 0; for (int i = 0; i < ITERS; i++) { int width = 16 * (rand() % 4 + 1); int height = rand() % 63 + 2; int stride = width; ref(ref_dest, psbuf1 + j, width, height, stride); checked(opt, opt_dest, psbuf1 + j, width, height, stride); if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel))) return false; reportfail(); j += INCR; } return true; } bool PixelHarness::check_scanPosLast(scanPosLast_t ref, scanPosLast_t opt) { ALIGN_VAR_16(coeff_t, ref_src[32 * 32 + ITERS * 2]); uint8_t ref_coeffNum[MLS_GRP_NUM], opt_coeffNum[MLS_GRP_NUM]; // value range[0, 16] uint16_t ref_coeffSign[MLS_GRP_NUM], opt_coeffSign[MLS_GRP_NUM]; // bit mask map for non-zero coeff sign uint16_t ref_coeffFlag[MLS_GRP_NUM], opt_coeffFlag[MLS_GRP_NUM]; // bit mask map for non-zero coeff int totalCoeffs = 0; for (int i = 0; i < 32 * 32; i++) { ref_src[i] = rand() & SHORT_MAX; // more zero coeff if (ref_src[i] < SHORT_MAX * 2 / 3) ref_src[i] = 0; // more negtive if ((rand() % 10) < 8) ref_src[i] *= -1; totalCoeffs += (ref_src[i] != 0); } // extra test area all of 0x1234 for (int i = 0; i < ITERS * 2; i++) { ref_src[32 * 32 + i] = 0x1234; } memset(ref_coeffNum, 0xCD, sizeof(ref_coeffNum)); memset(ref_coeffSign, 0xCD, sizeof(ref_coeffSign)); memset(ref_coeffFlag, 0xCD, sizeof(ref_coeffFlag)); memset(opt_coeffNum, 0xCD, sizeof(opt_coeffNum)); memset(opt_coeffSign, 0xCD, sizeof(opt_coeffSign)); memset(opt_coeffFlag, 0xCD, sizeof(opt_coeffFlag)); for (int i = 0; i < ITERS; i++) { int rand_scan_type = rand() % NUM_SCAN_TYPE; int rand_scan_size = rand() % NUM_SCAN_SIZE; int rand_numCoeff = 0; for (int j = 0; j < 1 << (2 * (rand_scan_size + 2)); j++) rand_numCoeff += (ref_src[i + j] != 0); // at least one coeff in transform block if (rand_numCoeff == 0) { ref_src[i + (1 << (2 * (rand_scan_size + 2))) - 1] = -1; rand_numCoeff = 1; } const int trSize = (1 << (rand_scan_size + 2)); const uint16_t* const scanTbl = g_scanOrder[rand_scan_type][rand_scan_size]; const uint16_t* const scanTblCG4x4 = g_scan4x4[rand_scan_size <= (MDCS_LOG2_MAX_SIZE - 2) ? rand_scan_type : SCAN_DIAG]; int ref_scanPos = ref(scanTbl, ref_src + i, ref_coeffSign, ref_coeffFlag, ref_coeffNum, rand_numCoeff, scanTblCG4x4, trSize); int opt_scanPos = (int)checked(opt, scanTbl, ref_src + i, opt_coeffSign, opt_coeffFlag, opt_coeffNum, rand_numCoeff, scanTblCG4x4, trSize); if (ref_scanPos != opt_scanPos) return false; for (int j = 0; rand_numCoeff; j++) { if (ref_coeffSign[j] != opt_coeffSign[j]) return false; if (ref_coeffFlag[j] != opt_coeffFlag[j]) return false; if (ref_coeffNum[j] != opt_coeffNum[j]) return false; rand_numCoeff -= ref_coeffNum[j]; } if (rand_numCoeff != 0) return false; reportfail(); } return true; } bool PixelHarness::check_findPosFirstLast(findPosFirstLast_t ref, findPosFirstLast_t opt) { ALIGN_VAR_16(coeff_t, ref_src[4 * 32 + ITERS * 2]); memset(ref_src, 0, sizeof(ref_src)); // minus ITERS for keep probability to generate all zeros block for (int i = 0; i < 4 * 32 - ITERS; i++) { ref_src[i] = rand() & SHORT_MAX; } // extra test area all of Zeros for (int i = 0; i < ITERS; i++) { int rand_scan_type = rand() % NUM_SCAN_TYPE; int rand_scan_size = (rand() % NUM_SCAN_SIZE) + 2; const int trSize = (1 << rand_scan_size); coeff_t *rand_src = ref_src + i; const uint16_t* const scanTbl = g_scan4x4[rand_scan_type]; int j; for (j = 0; j < SCAN_SET_SIZE; j++) { const uint32_t idxY = j / MLS_CG_SIZE; const uint32_t idxX = j % MLS_CG_SIZE; if (rand_src[idxY * trSize + idxX]) break; } uint32_t ref_scanPos = ref(rand_src, trSize, scanTbl); uint32_t opt_scanPos = (int)checked(opt, rand_src, trSize, scanTbl); // specially case: all coeff group are zero if (j >= SCAN_SET_SIZE) { // all zero block the high 24-bits undefined if ((uint8_t)ref_scanPos != (uint8_t)opt_scanPos) return false; } else if (ref_scanPos != opt_scanPos) return false; reportfail(); } return true; } bool PixelHarness::check_costCoeffNxN(costCoeffNxN_t ref, costCoeffNxN_t opt) { ALIGN_VAR_16(coeff_t, ref_src[32 * 32 + ITERS * 3]); ALIGN_VAR_32(uint16_t, ref_absCoeff[(1 << MLS_CG_SIZE)]); ALIGN_VAR_32(uint16_t, opt_absCoeff[(1 << MLS_CG_SIZE) + 4]); memset(ref_absCoeff, 0xCD, sizeof(ref_absCoeff)); memset(opt_absCoeff, 0xCD, sizeof(opt_absCoeff)); int totalCoeffs = 0; for (int i = 0; i < 32 * 32; i++) { ref_src[i] = rand() & SHORT_MAX; // more zero coeff if (ref_src[i] < SHORT_MAX * 2 / 3) ref_src[i] = 0; // more negtive if ((rand() % 10) < 8) ref_src[i] *= -1; totalCoeffs += (ref_src[i] != 0); } // extra test area all of 0x1234 for (int i = 0; i < ITERS * 3; i++) { ref_src[32 * 32 + i] = 0x1234; } // Safe check magic opt_absCoeff[(1 << MLS_CG_SIZE) + 0] = 0x0123; opt_absCoeff[(1 << MLS_CG_SIZE) + 1] = 0x4567; opt_absCoeff[(1 << MLS_CG_SIZE) + 2] = 0xBA98; opt_absCoeff[(1 << MLS_CG_SIZE) + 3] = 0xFEDC; // generate CABAC context table uint8_t m_contextState_ref[OFF_SIG_FLAG_CTX + NUM_SIG_FLAG_CTX_LUMA]; uint8_t m_contextState_opt[OFF_SIG_FLAG_CTX + NUM_SIG_FLAG_CTX_LUMA]; for (int k = 0; k < (OFF_SIG_FLAG_CTX + NUM_SIG_FLAG_CTX_LUMA); k++) { m_contextState_ref[k] = (rand() % (125 - 2)) + 2; m_contextState_opt[k] = m_contextState_ref[k]; } uint8_t *const ref_baseCtx = m_contextState_ref; uint8_t *const opt_baseCtx = m_contextState_opt; for (int i = 0; i < ITERS * 2; i++) { int rand_scan_type = rand() % NUM_SCAN_TYPE; int rand_scanPosSigOff = rand() % 16; //rand_scanPosSigOff range is [1,15] int rand_patternSigCtx = rand() % 4; //range [0,3] int rand_scan_size = rand() % NUM_SCAN_SIZE; int offset; // the value have a exact range, details in CoeffNxN() if (rand_scan_size == 2) offset = 0; else if (rand_scan_size == 3) offset = 9; else offset = 12; const int trSize = (1 << (rand_scan_size + 2)); ALIGN_VAR_32(static const uint8_t, table_cnt[5][SCAN_SET_SIZE]) = { // patternSigCtx = 0 { 2, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, }, // patternSigCtx = 1 { 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, // patternSigCtx = 2 { 2, 1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0, }, // patternSigCtx = 3 { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, }, // 4x4 { 0, 1, 4, 5, 2, 3, 4, 5, 6, 6, 8, 8, 7, 7, 8, 8 } }; const uint8_t *rand_tabSigCtx = table_cnt[(rand_scan_size == 2) ? 4 : (uint32_t)rand_patternSigCtx]; const uint16_t* const scanTbl = g_scanOrder[rand_scan_type][rand_scan_size]; const uint16_t* const scanTblCG4x4 = g_scan4x4[rand_scan_size <= (MDCS_LOG2_MAX_SIZE - 2) ? rand_scan_type : SCAN_DIAG]; int rand_scanPosCG = rand() % (trSize * trSize / MLS_CG_BLK_SIZE); int subPosBase = rand_scanPosCG * MLS_CG_BLK_SIZE; int rand_numCoeff = 0; uint32_t scanFlagMask = 0; const int numNonZero = (rand_scanPosSigOff < (MLS_CG_BLK_SIZE - 1)) ? 1 : 0; for(int k = 0; k <= rand_scanPosSigOff; k++) { uint32_t pos = scanTbl[subPosBase + k]; coeff_t tmp_coeff = ref_src[i + pos]; if (tmp_coeff != 0) { rand_numCoeff++; } scanFlagMask = scanFlagMask * 2 + (tmp_coeff != 0); } // can't process all zeros block if (rand_numCoeff == 0) continue; const uint32_t blkPosBase = scanTbl[subPosBase]; uint32_t ref_sum = ref(scanTblCG4x4, &ref_src[blkPosBase + i], (intptr_t)trSize, ref_absCoeff + numNonZero, rand_tabSigCtx, scanFlagMask, (uint8_t*)ref_baseCtx, offset, rand_scanPosSigOff, subPosBase); uint32_t opt_sum = (uint32_t)checked(opt, scanTblCG4x4, &ref_src[blkPosBase + i], (intptr_t)trSize, opt_absCoeff + numNonZero, rand_tabSigCtx, scanFlagMask, (uint8_t*)opt_baseCtx, offset, rand_scanPosSigOff, subPosBase); if (ref_sum != opt_sum) return false; if (memcmp(ref_baseCtx, opt_baseCtx, sizeof(m_contextState_ref))) return false; // NOTE: just first rand_numCoeff valid, but I check full buffer for confirm no overwrite bug if (memcmp(ref_absCoeff, opt_absCoeff, rand_numCoeff * sizeof(ref_absCoeff[0]))) return false; // Check memory beyond-bound write if ( opt_absCoeff[(1 << MLS_CG_SIZE) + 1] != 0x4567 || opt_absCoeff[(1 << MLS_CG_SIZE) + 2] != 0xBA98 || opt_absCoeff[(1 << MLS_CG_SIZE) + 3] != 0xFEDC) return false; reportfail(); } return true; } bool PixelHarness::check_costCoeffRemain(costCoeffRemain_t ref, costCoeffRemain_t opt) { ALIGN_VAR_32(uint16_t, absCoeff[(1 << MLS_CG_SIZE) + ITERS]); for (int i = 0; i < (1 << MLS_CG_SIZE) + ITERS; i++) { absCoeff[i] = rand() & SHORT_MAX; // more coeff with value one if (absCoeff[i] < SHORT_MAX * 2 / 3) absCoeff[i] = 1; } for (int i = 0; i < ITERS; i++) { uint32_t firstC2Idx = 0; int k = 0; int numNonZero = rand() % 17; //can be random, range[1, 16] for (k = 0; k < C1FLAG_NUMBER; k++) { if (absCoeff[i + k] >= 2) { break; } } firstC2Idx = k; // it is index of exact first coeff that value more than 2 int ref_sum = ref(absCoeff + i, numNonZero, firstC2Idx); int opt_sum = (int)checked(opt, absCoeff + i, numNonZero, firstC2Idx); if (ref_sum != opt_sum) return false; } return true; } bool PixelHarness::check_costC1C2Flag(costC1C2Flag_t ref, costC1C2Flag_t opt) { ALIGN_VAR_32(uint16_t, absCoeff[(1 << MLS_CG_SIZE)]); // generate CABAC context table uint8_t ref_baseCtx[8]; uint8_t opt_baseCtx[8]; for (int k = 0; k < 8; k++) { ref_baseCtx[k] = opt_baseCtx[k] = (rand() % (125 - 2)) + 2; } for (int i = 0; i < ITERS; i++) { int rand_offset = rand() % 4; int numNonZero = 0; // generate test data, all are Absolute value and Aligned for (int k = 0; k < C1FLAG_NUMBER; k++) { int value = rand() & SHORT_MAX; // more coeff with value [0,2] if (value < SHORT_MAX * 1 / 3) value = 0; else if (value < SHORT_MAX * 2 / 3) value = 1; else if (value < SHORT_MAX * 3 / 4) value = 2; if (value) { absCoeff[numNonZero] = (uint16_t)value; numNonZero++; } } if (numNonZero == 0) { numNonZero = 1; absCoeff[0] = 1; } int ref_sum = ref(absCoeff, (intptr_t)numNonZero, ref_baseCtx, (intptr_t)rand_offset); int opt_sum = (int)checked(opt, absCoeff, (intptr_t)numNonZero, opt_baseCtx, (intptr_t)rand_offset); if (ref_sum != opt_sum) { ref_sum = ref(absCoeff, (intptr_t)numNonZero, ref_baseCtx, (intptr_t)rand_offset); opt_sum = opt(absCoeff, (intptr_t)numNonZero, opt_baseCtx, (intptr_t)rand_offset); return false; } } return true; } bool PixelHarness::check_pelFilterLumaStrong_H(pelFilterLumaStrong_t ref, pelFilterLumaStrong_t opt) { intptr_t srcStep = 1, offset = 64; int32_t tcP, tcQ, maskP, maskQ, tc; int j = 0; pixel pixel_test_buff1[TEST_CASES][BUFFSIZE]; for (int i = 0; i < TEST_CASES; i++) memcpy(pixel_test_buff1[i], pixel_test_buff[i], sizeof(pixel) * BUFFSIZE); for (int i = 0; i < ITERS; i++) { tc = rand() % PIXEL_MAX; maskP = (rand() % PIXEL_MAX) - 1; maskQ = (rand() % PIXEL_MAX) - 1; tcP = (tc & maskP); tcQ = (tc & maskQ); int index = rand() % 3; ref(pixel_test_buff[index] + 4 * offset + j, srcStep, offset, tcP, tcQ); checked(opt, pixel_test_buff1[index] + 4 * offset + j, srcStep, offset, tcP, tcQ); if (memcmp(pixel_test_buff[index], pixel_test_buff1[index], sizeof(pixel) * BUFFSIZE)) return false; reportfail() j += INCR; } return true; } bool PixelHarness::check_pelFilterLumaStrong_V(pelFilterLumaStrong_t ref, pelFilterLumaStrong_t opt) { intptr_t srcStep = 64, offset = 1; int32_t tcP, tcQ, maskP, maskQ, tc; int j = 0; pixel pixel_test_buff1[TEST_CASES][BUFFSIZE]; for (int i = 0; i < TEST_CASES; i++) memcpy(pixel_test_buff1[i], pixel_test_buff[i], sizeof(pixel) * BUFFSIZE); for (int i = 0; i < ITERS; i++) { tc = rand() % PIXEL_MAX; maskP = (rand() % PIXEL_MAX) - 1; maskQ = (rand() % PIXEL_MAX) - 1; tcP = (tc & maskP); tcQ = (tc & maskQ); int index = rand() % 3; ref(pixel_test_buff[index] + 4 + j, srcStep, offset, tcP, tcQ); checked(opt, pixel_test_buff1[index] + 4 + j, srcStep, offset, tcP, tcQ); if (memcmp(pixel_test_buff[index], pixel_test_buff1[index], sizeof(pixel) * BUFFSIZE)) return false; reportfail() j += INCR; } return true; } bool PixelHarness::check_pelFilterChroma_H(pelFilterChroma_t ref, pelFilterChroma_t opt) { intptr_t srcStep = 1, offset = 64; int32_t maskP, maskQ, tc; int j = 0; pixel pixel_test_buff1[TEST_CASES][BUFFSIZE]; for (int i = 0; i < TEST_CASES; i++) memcpy(pixel_test_buff1[i], pixel_test_buff[i], sizeof(pixel)* BUFFSIZE); for (int i = 0; i < ITERS; i++) { tc = rand() % PIXEL_MAX; maskP = (rand() % PIXEL_MAX) - 1; maskQ = (rand() % PIXEL_MAX) - 1; int index = rand() % 3; ref(pixel_test_buff[index] + 4 * offset + j, srcStep, offset, tc, maskP, maskQ); checked(opt, pixel_test_buff1[index] + 4 * offset + j, srcStep, offset, tc, maskP, maskQ); if (memcmp(pixel_test_buff[index], pixel_test_buff1[index], sizeof(pixel)* BUFFSIZE)) return false; reportfail() j += INCR; } return true; } bool PixelHarness::check_pelFilterChroma_V(pelFilterChroma_t ref, pelFilterChroma_t opt) { intptr_t srcStep = 64, offset = 1; int32_t maskP, maskQ, tc; int j = 0; pixel pixel_test_buff1[TEST_CASES][BUFFSIZE]; for (int i = 0; i < TEST_CASES; i++) memcpy(pixel_test_buff1[i], pixel_test_buff[i], sizeof(pixel)* BUFFSIZE); for (int i = 0; i < ITERS; i++) { tc = rand() % PIXEL_MAX; maskP = (rand() % PIXEL_MAX) - 1; maskQ = (rand() % PIXEL_MAX) - 1; int index = rand() % 3; ref(pixel_test_buff[index] + 4 + j, srcStep, offset, tc, maskP, maskQ); checked(opt, pixel_test_buff1[index] + 4 + j, srcStep, offset, tc, maskP, maskQ); if (memcmp(pixel_test_buff[index], pixel_test_buff1[index], sizeof(pixel)* BUFFSIZE)) return false; reportfail() j += INCR; } return true; } bool PixelHarness::check_integral_initv(integralv_t ref, integralv_t opt) { intptr_t srcStep = 64; int j = 0; uint32_t dst_ref[BUFFSIZE] = { 0 }; uint32_t dst_opt[BUFFSIZE] = { 0 }; for (int i = 0; i < 64; i++) { dst_ref[i] = pixel_test_buff[0][i]; dst_opt[i] = pixel_test_buff[0][i]; } for (int i = 0, k = 0; i < BUFFSIZE; i++) { if (i % 64 == 0) k++; dst_ref[i] = dst_ref[i % 64] + k; dst_opt[i] = dst_opt[i % 64] + k; } int padx = 4; int pady = 4; uint32_t *dst_ref_ptr = dst_ref + srcStep * pady + padx; uint32_t *dst_opt_ptr = dst_opt + srcStep * pady + padx; for (int i = 0; i < ITERS; i++) { ref(dst_ref_ptr, srcStep); checked(opt, dst_opt_ptr, srcStep); if (memcmp(dst_ref, dst_opt, sizeof(uint32_t) * BUFFSIZE)) return false; reportfail() j += INCR; } return true; } bool PixelHarness::check_integral_inith(integralh_t ref, integralh_t opt) { /* Since stride is always a multiple of 8 and data movement in AVX2 is 16 elements at a time for 8 bit pixel, we need * to check correctness for two cases: stride multiple of 16 and stride not a multiple of 16; fine for High bit depth * where data movement in AVX2 is 8 elements at a time */ intptr_t srcStep[2] = { 56, 64 }; int j = 0; uint32_t dst_ref[BUFFSIZE] = { 0 }; uint32_t dst_opt[BUFFSIZE] = { 0 }; int padx = 4; int pady = 4; for (int l = 0; l < 2; l++) { uint32_t *dst_ref_ptr = dst_ref + srcStep[l] * pady + padx; uint32_t *dst_opt_ptr = dst_opt + srcStep[l] * pady + padx; for (int k = 0; k < ITERS; k++) { ref(dst_ref_ptr, pixel_test_buff[0], srcStep[l]); checked(opt, dst_opt_ptr, pixel_test_buff[0], srcStep[l]); if (memcmp(dst_ref, dst_opt, sizeof(uint32_t) * BUFFSIZE)) return false; reportfail() j += INCR; } } return true; } bool PixelHarness::check_ssimDist(ssimDistortion_t ref, ssimDistortion_t opt) { uint32_t srcStride[5] = { 4, 8, 16, 32, 64 }; intptr_t dstStride[5] = { 4, 8, 16, 32, 64 }; int shift = X265_DEPTH - 8; uint64_t opt_dest1 = 0, ref_dest1 = 0, opt_dest2 = 0, ref_dest2 = 0; int j = 0; for (int i = 0; i < ITERS; i++) { int index = i % TEST_CASES; int k1 = rand() % 5, k2 = rand() % 5; ref(pixel_test_buff[index] + j, srcStride[k1], pixel_test_buff[index + 10] + j, dstStride[k2], &ref_dest1, shift, &ref_dest2); opt(pixel_test_buff[index] + j, srcStride[k1], pixel_test_buff[index + 10] + j, dstStride[k2], &opt_dest1, shift, &opt_dest2); if (opt_dest1 != ref_dest1 && opt_dest2 != ref_dest2) { return false; } reportfail() j += INCR; } return true; } bool PixelHarness::check_normFact(normFactor_t ref, normFactor_t opt, int block) { int shift = X265_DEPTH - 8; uint64_t opt_dest = 0, ref_dest = 0; int j = 0; int blockSize = 4 << block; for (int i = 0; i < ITERS; i++) { int index = i % TEST_CASES; ref(pixel_test_buff[index] + j, blockSize, shift, &ref_dest); opt(pixel_test_buff[index] + j, blockSize, shift, &opt_dest); if (opt_dest != ref_dest) { return false; } reportfail() j += INCR; } return true; } bool PixelHarness::testPU(int part, const EncoderPrimitives& ref, const EncoderPrimitives& opt) { if (opt.pu[part].satd) { if (!check_pixelcmp(ref.pu[part].satd, opt.pu[part].satd)) { printf("satd[%s]: failed!\n", lumaPartStr[part]); return false; } } if (opt.pu[part].sad) { if (!check_pixelcmp(ref.pu[part].sad, opt.pu[part].sad)) { printf("sad[%s]: failed!\n", lumaPartStr[part]); return false; } } if (opt.pu[part].sad_x3) { if (!check_pixelcmp_x3(ref.pu[part].sad_x3, opt.pu[part].sad_x3)) { printf("sad_x3[%s]: failed!\n", lumaPartStr[part]); return false; } } if (opt.pu[part].sad_x4) { if (!check_pixelcmp_x4(ref.pu[part].sad_x4, opt.pu[part].sad_x4)) { printf("sad_x4[%s]: failed!\n", lumaPartStr[part]); return false; } } if (opt.pu[part].pixelavg_pp[NONALIGNED]) { if (!check_pixelavg_pp(ref.pu[part].pixelavg_pp[NONALIGNED], opt.pu[part].pixelavg_pp[NONALIGNED])) { printf("pixelavg_pp[%s]: failed!\n", lumaPartStr[part]); return false; } } if (opt.pu[part].pixelavg_pp[ALIGNED]) { if (!check_pixelavg_pp_aligned(ref.pu[part].pixelavg_pp[ALIGNED], opt.pu[part].pixelavg_pp[ALIGNED])) { printf("pixelavg_pp_aligned[%s]: failed!\n", lumaPartStr[part]); return false; } } if (opt.pu[part].copy_pp) { if (!check_copy_pp(ref.pu[part].copy_pp, opt.pu[part].copy_pp)) { printf("copy_pp[%s] failed\n", lumaPartStr[part]); return false; } } if (opt.pu[part].addAvg[NONALIGNED]) { if (!check_addAvg(ref.pu[part].addAvg[NONALIGNED], opt.pu[part].addAvg[NONALIGNED])) { printf("addAvg[%s] failed\n", lumaPartStr[part]); return false; } } if (opt.pu[part].addAvg[ALIGNED]) { if (!check_addAvg_aligned(ref.pu[part].addAvg[ALIGNED], opt.pu[part].addAvg[ALIGNED])) { printf("addAvg_aligned[%s] failed\n", lumaPartStr[part]); return false; } } if (part < NUM_CU_SIZES) { if (opt.cu[part].sse_pp) { if (!check_pixel_sse(ref.cu[part].sse_pp, opt.cu[part].sse_pp)) { printf("sse_pp[%s]: failed!\n", lumaPartStr[part]); return false; } } if (opt.cu[part].sse_ss) { if (!check_pixel_sse_ss(ref.cu[part].sse_ss, opt.cu[part].sse_ss)) { printf("sse_ss[%s]: failed!\n", lumaPartStr[part]); return false; } } if (opt.cu[part].sub_ps) { if (!check_pixel_sub_ps(ref.cu[part].sub_ps, opt.cu[part].sub_ps)) { printf("sub_ps[%s] failed\n", lumaPartStr[part]); return false; } } if (opt.cu[part].add_ps[NONALIGNED]) { if (!check_pixel_add_ps(ref.cu[part].add_ps[NONALIGNED], opt.cu[part].add_ps[NONALIGNED])) { printf("add_ps[%s] failed\n", lumaPartStr[part]); return false; } } if (opt.cu[part].add_ps[ALIGNED]) { if (!check_pixel_add_ps_aligned(ref.cu[part].add_ps[ALIGNED], opt.cu[part].add_ps[ALIGNED])) { printf("add_ps_aligned[%s] failed\n", lumaPartStr[part]); return false; } } if (opt.cu[part].copy_ss) { if (!check_copy_ss(ref.cu[part].copy_ss, opt.cu[part].copy_ss)) { printf("copy_ss[%s] failed\n", lumaPartStr[part]); return false; } } if (opt.cu[part].copy_sp) { if (!check_copy_sp(ref.cu[part].copy_sp, opt.cu[part].copy_sp)) { printf("copy_sp[%s] failed\n", lumaPartStr[part]); return false; } } if (opt.cu[part].copy_ps) { if (!check_copy_ps(ref.cu[part].copy_ps, opt.cu[part].copy_ps)) { printf("copy_ps[%s] failed\n", lumaPartStr[part]); return false; } } } for (int i = 0; i < X265_CSP_COUNT; i++) { if (opt.chroma[i].pu[part].copy_pp) { if (!check_copy_pp(ref.chroma[i].pu[part].copy_pp, opt.chroma[i].pu[part].copy_pp)) { printf("chroma_copy_pp[%s][%s] failed\n", x265_source_csp_names[i], chromaPartStr[i][part]); return false; } } if (opt.chroma[i].pu[part].addAvg[NONALIGNED]) { if (!check_addAvg(ref.chroma[i].pu[part].addAvg[NONALIGNED], opt.chroma[i].pu[part].addAvg[NONALIGNED])) { printf("chroma_addAvg[%s][%s] failed\n", x265_source_csp_names[i], chromaPartStr[i][part]); return false; } } if (opt.chroma[i].pu[part].addAvg[ALIGNED]) { if (!check_addAvg_aligned(ref.chroma[i].pu[part].addAvg[ALIGNED], opt.chroma[i].pu[part].addAvg[ALIGNED])) { printf("chroma_addAvg_aligned[%s][%s] failed\n", x265_source_csp_names[i], chromaPartStr[i][part]); return false; } } if (opt.chroma[i].pu[part].satd) { if (!check_pixelcmp(ref.chroma[i].pu[part].satd, opt.chroma[i].pu[part].satd)) { printf("chroma_satd[%s][%s] failed!\n", x265_source_csp_names[i], chromaPartStr[i][part]); return false; } } if (part < NUM_CU_SIZES) { if (opt.chroma[i].cu[part].sse_pp) { if (!check_pixel_sse(ref.chroma[i].cu[part].sse_pp, opt.chroma[i].cu[part].sse_pp)) { printf("chroma_sse_pp[%s][%s]: failed!\n", x265_source_csp_names[i], chromaPartStr[i][part]); return false; } } if (opt.chroma[i].cu[part].sub_ps) { if (!check_pixel_sub_ps(ref.chroma[i].cu[part].sub_ps, opt.chroma[i].cu[part].sub_ps)) { printf("chroma_sub_ps[%s][%s] failed\n", x265_source_csp_names[i], chromaPartStr[i][part]); return false; } } if (opt.chroma[i].cu[part].add_ps[NONALIGNED]) { if (!check_pixel_add_ps(ref.chroma[i].cu[part].add_ps[NONALIGNED], opt.chroma[i].cu[part].add_ps[NONALIGNED])) { printf("chroma_add_ps[%s][%s] failed\n", x265_source_csp_names[i], chromaPartStr[i][part]); return false; } } if (opt.chroma[i].cu[part].add_ps[ALIGNED]) { if (!check_pixel_add_ps_aligned(ref.chroma[i].cu[part].add_ps[ALIGNED], opt.chroma[i].cu[part].add_ps[ALIGNED])) { printf("chroma_add_ps_aligned[%s][%s] failed\n", x265_source_csp_names[i], chromaPartStr[i][part]); return false; } } if (opt.chroma[i].cu[part].copy_sp) { if (!check_copy_sp(ref.chroma[i].cu[part].copy_sp, opt.chroma[i].cu[part].copy_sp)) { printf("chroma_copy_sp[%s][%s] failed\n", x265_source_csp_names[i], chromaPartStr[i][part]); return false; } } if (opt.chroma[i].cu[part].copy_ps) { if (!check_copy_ps(ref.chroma[i].cu[part].copy_ps, opt.chroma[i].cu[part].copy_ps)) { printf("chroma_copy_ps[%s][%s] failed\n", x265_source_csp_names[i], chromaPartStr[i][part]); return false; } } if (opt.chroma[i].cu[part].copy_ss) { if (!check_copy_ss(ref.chroma[i].cu[part].copy_ss, opt.chroma[i].cu[part].copy_ss)) { printf("chroma_copy_ss[%s][%s] failed\n", x265_source_csp_names[i], chromaPartStr[i][part]); return false; } } if (opt.chroma[i].cu[part].sa8d) { if (!check_pixelcmp(ref.chroma[i].cu[part].sa8d, opt.chroma[i].cu[part].sa8d)) { printf("chroma_sa8d[%s][%s] failed\n", x265_source_csp_names[i], chromaPartStr[i][part]); return false; } } } } return true; } bool PixelHarness::testCorrectness(const EncoderPrimitives& ref, const EncoderPrimitives& opt) { for (int size = 4; size <= 64; size *= 2) { int part = partitionFromSizes(size, size); // 2Nx2N if (!testPU(part, ref, opt)) return false; if (size > 4) { part = partitionFromSizes(size, size >> 1); // 2NxN if (!testPU(part, ref, opt)) return false; part = partitionFromSizes(size >> 1, size); // Nx2N if (!testPU(part, ref, opt)) return false; } if (size > 8) { // 4 AMP modes part = partitionFromSizes(size, size >> 2); if (!testPU(part, ref, opt)) return false; part = partitionFromSizes(size, 3 * (size >> 2)); if (!testPU(part, ref, opt)) return false; part = partitionFromSizes(size >> 2, size); if (!testPU(part, ref, opt)) return false; part = partitionFromSizes(3 * (size >> 2), size); if (!testPU(part, ref, opt)) return false; } } for (int i = 0; i < NUM_CU_SIZES; i++) { if (opt.cu[i].sa8d) { if (!check_pixelcmp(ref.cu[i].sa8d, opt.cu[i].sa8d)) { printf("sa8d[%dx%d]: failed!\n", 4 << i, 4 << i); return false; } } if (opt.cu[i].blockfill_s[NONALIGNED]) { if (!check_blockfill_s(ref.cu[i].blockfill_s[NONALIGNED], opt.cu[i].blockfill_s[NONALIGNED])) { printf("blockfill_s[%dx%d]: failed!\n", 4 << i, 4 << i); return false; } } if (opt.cu[i].blockfill_s[ALIGNED]) { if (!check_blockfill_s_aligned(ref.cu[i].blockfill_s[ALIGNED], opt.cu[i].blockfill_s[ALIGNED])) { printf("blockfill_s_aligned[%dx%d]: failed!\n", 4 << i, 4 << i); return false; } } if (opt.cu[i].var) { if (!check_pixel_var(ref.cu[i].var, opt.cu[i].var)) { printf("var[%dx%d] failed\n", 4 << i, 4 << i); return false; } } if (opt.cu[i].psy_cost_pp) { if (!check_psyCost_pp(ref.cu[i].psy_cost_pp, opt.cu[i].psy_cost_pp)) { printf("\npsy_cost_pp[%dx%d] failed!\n", 4 << i, 4 << i); return false; } } if (opt.cu[i].ssimDist) { if (!check_ssimDist(ref.cu[i].ssimDist, opt.cu[i].ssimDist)) { printf("\nssimDist[%dx%d] failed!\n", 4 << i, 4 << i); return false; } } if (i < BLOCK_64x64) { /* TU only primitives */ if (opt.cu[i].calcresidual[NONALIGNED]) { if (!check_calresidual(ref.cu[i].calcresidual[NONALIGNED], opt.cu[i].calcresidual[NONALIGNED])) { printf("calcresidual width: %d failed!\n", 4 << i); return false; } } if (opt.cu[i].calcresidual[ALIGNED]) { if (!check_calresidual_aligned(ref.cu[i].calcresidual[ALIGNED], opt.cu[i].calcresidual[ALIGNED])) { printf("calcresidual_aligned width: %d failed!\n", 4 << i); return false; } } if (opt.cu[i].transpose) { if (!check_transpose(ref.cu[i].transpose, opt.cu[i].transpose)) { printf("transpose[%dx%d] failed\n", 4 << i, 4 << i); return false; } } if (opt.cu[i].ssd_s[NONALIGNED]) { if (!check_ssd_s(ref.cu[i].ssd_s[NONALIGNED], opt.cu[i].ssd_s[NONALIGNED])) { printf("ssd_s[%dx%d]: failed!\n", 4 << i, 4 << i); return false; } } if (opt.cu[i].ssd_s[ALIGNED]) { if (!check_ssd_s_aligned(ref.cu[i].ssd_s[ALIGNED], opt.cu[i].ssd_s[ALIGNED])) { printf("ssd_s_aligned[%dx%d]: failed!\n", 4 << i, 4 << i); return false; } } if (opt.cu[i].copy_cnt) { if (!check_copy_cnt_t(ref.cu[i].copy_cnt, opt.cu[i].copy_cnt)) { printf("copy_cnt[%dx%d] failed!\n", 4 << i, 4 << i); return false; } } if (opt.cu[i].cpy2Dto1D_shl) { if (!check_cpy2Dto1D_shl_t(ref.cu[i].cpy2Dto1D_shl, opt.cu[i].cpy2Dto1D_shl)) { printf("cpy2Dto1D_shl[%dx%d] failed!\n", 4 << i, 4 << i); return false; } } if (opt.cu[i].cpy2Dto1D_shr) { if (!check_cpy2Dto1D_shr_t(ref.cu[i].cpy2Dto1D_shr, opt.cu[i].cpy2Dto1D_shr)) { printf("cpy2Dto1D_shr failed!\n"); return false; } } if (opt.cu[i].cpy1Dto2D_shl[NONALIGNED]) { if (!check_cpy1Dto2D_shl_t(ref.cu[i].cpy1Dto2D_shl[NONALIGNED], opt.cu[i].cpy1Dto2D_shl[NONALIGNED])) { printf("cpy1Dto2D_shl[%dx%d] failed!\n", 4 << i, 4 << i); return false; } } if (opt.cu[i].cpy1Dto2D_shl[ALIGNED]) { if (!check_cpy1Dto2D_shl_aligned_t(ref.cu[i].cpy1Dto2D_shl[ALIGNED], opt.cu[i].cpy1Dto2D_shl[ALIGNED])) { printf("cpy1Dto2D_shl_aligned[%dx%d] failed!\n", 4 << i, 4 << i); return false; } } if (opt.cu[i].cpy1Dto2D_shr) { if (!check_cpy1Dto2D_shr_t(ref.cu[i].cpy1Dto2D_shr, opt.cu[i].cpy1Dto2D_shr)) { printf("cpy1Dto2D_shr[%dx%d] failed!\n", 4 << i, 4 << i); return false; } } } } if (opt.weight_pp) { if (!check_weightp(ref.weight_pp, opt.weight_pp)) { printf("Weighted Prediction (pixel) failed!\n"); return false; } } if (opt.weight_sp) { if (!check_weightp(ref.weight_sp, opt.weight_sp)) { printf("Weighted Prediction (short) failed!\n"); return false; } } if (opt.frameInitLowres) { if (!check_downscale_t(ref.frameInitLowres, opt.frameInitLowres)) { printf("downscale failed!\n"); return false; } } if (opt.frameSubSampleLuma) { if (!check_downscaleluma_t(ref.frameSubSampleLuma, opt.frameSubSampleLuma)) { printf("SubSample Luma failed!\n"); return false; } } if (opt.scale1D_128to64[NONALIGNED]) { if (!check_scale1D_pp(ref.scale1D_128to64[NONALIGNED], opt.scale1D_128to64[NONALIGNED])) { printf("scale1D_128to64 failed!\n"); return false; } } if (opt.scale1D_128to64[ALIGNED]) { if (!check_scale1D_pp_aligned(ref.scale1D_128to64[ALIGNED], opt.scale1D_128to64[ALIGNED])) { printf("scale1D_128to64_aligned failed!\n"); return false; } } if (opt.scale2D_64to32) { if (!check_scale2D_pp(ref.scale2D_64to32, opt.scale2D_64to32)) { printf("scale2D_64to32 failed!\n"); return false; } } if (opt.ssim_4x4x2_core) { if (!check_ssim_4x4x2_core(ref.ssim_4x4x2_core, opt.ssim_4x4x2_core)) { printf("ssim_4x4x2_core failed!\n"); return false; } } if (opt.ssim_end_4) { if (!check_ssim_end(ref.ssim_end_4, opt.ssim_end_4)) { printf("ssim_end_4 failed!\n"); return false; } } if (opt.sign) { if (!check_calSign(ref.sign, opt.sign)) { printf("calSign failed\n"); return false; } } if (opt.saoCuOrgE0) { if (!check_saoCuOrgE0_t(ref.saoCuOrgE0, opt.saoCuOrgE0)) { printf("SAO_EO_0 failed\n"); return false; } } if (opt.saoCuOrgE1) { if (!check_saoCuOrgE1_t(ref.saoCuOrgE1, opt.saoCuOrgE1)) { printf("SAO_EO_1 failed\n"); return false; } } if (opt.saoCuOrgE1_2Rows) { if (!check_saoCuOrgE1_t(ref.saoCuOrgE1_2Rows, opt.saoCuOrgE1_2Rows)) { printf("SAO_EO_1_2Rows failed\n"); return false; } } if (opt.saoCuOrgE2[0] || opt.saoCuOrgE2[1]) { saoCuOrgE2_t ref1[] = { ref.saoCuOrgE2[0], ref.saoCuOrgE2[1] }; saoCuOrgE2_t opt1[] = { opt.saoCuOrgE2[0], opt.saoCuOrgE2[1] }; if (!check_saoCuOrgE2_t(ref1, opt1)) { printf("SAO_EO_2[0] && SAO_EO_2[1] failed\n"); return false; } } if (opt.saoCuOrgE3[0]) { if (!check_saoCuOrgE3_t(ref.saoCuOrgE3[0], opt.saoCuOrgE3[0])) { printf("SAO_EO_3[0] failed\n"); return false; } } if (opt.saoCuOrgE3[1]) { if (!check_saoCuOrgE3_32_t(ref.saoCuOrgE3[1], opt.saoCuOrgE3[1])) { printf("SAO_EO_3[1] failed\n"); return false; } } if (opt.saoCuOrgB0) { if (!check_saoCuOrgB0_t(ref.saoCuOrgB0, opt.saoCuOrgB0)) { printf("SAO_BO_0 failed\n"); return false; } } if (opt.saoCuStatsBO) { if (!check_saoCuStatsBO_t(ref.saoCuStatsBO, opt.saoCuStatsBO)) { printf("saoCuStatsBO failed\n"); return false; } } if (opt.saoCuStatsE0) { if (!check_saoCuStatsE0_t(ref.saoCuStatsE0, opt.saoCuStatsE0)) { printf("saoCuStatsE0 failed\n"); return false; } } if (opt.saoCuStatsE1) { if (!check_saoCuStatsE1_t(ref.saoCuStatsE1, opt.saoCuStatsE1)) { printf("saoCuStatsE1 failed\n"); return false; } } if (opt.saoCuStatsE2) { if (!check_saoCuStatsE2_t(ref.saoCuStatsE2, opt.saoCuStatsE2)) { printf("saoCuStatsE2 failed\n"); return false; } } if (opt.saoCuStatsE3) { if (!check_saoCuStatsE3_t(ref.saoCuStatsE3, opt.saoCuStatsE3)) { printf("saoCuStatsE3 failed\n"); return false; } } if (opt.planecopy_sp) { if (!check_planecopy_sp(ref.planecopy_sp, opt.planecopy_sp)) { printf("planecopy_sp failed\n"); return false; } } if (opt.planecopy_sp_shl) { if (!check_planecopy_sp(ref.planecopy_sp_shl, opt.planecopy_sp_shl)) { printf("planecopy_sp_shl failed\n"); return false; } } if (opt.planecopy_cp) { if (!check_planecopy_cp(ref.planecopy_cp, opt.planecopy_cp)) { printf("planecopy_cp failed\n"); return false; } } if (opt.propagateCost) { if (!check_cutree_propagate_cost(ref.propagateCost, opt.propagateCost)) { printf("propagateCost failed\n"); return false; } } if (opt.fix8Pack) { if (!check_cutree_fix8_pack(ref.fix8Pack, opt.fix8Pack)) { printf("cuTreeFix8Pack failed\n"); return false; } } if (opt.fix8Unpack) { if (!check_cutree_fix8_unpack(ref.fix8Unpack, opt.fix8Unpack)) { printf("cuTreeFix8Unpack failed\n"); return false; } } if (opt.scanPosLast) { if (!check_scanPosLast(ref.scanPosLast, opt.scanPosLast)) { printf("scanPosLast failed!\n"); return false; } } if (opt.findPosFirstLast) { if (!check_findPosFirstLast(ref.findPosFirstLast, opt.findPosFirstLast)) { printf("findPosFirstLast failed!\n"); return false; } } if (opt.costCoeffNxN) { if (!check_costCoeffNxN(ref.costCoeffNxN, opt.costCoeffNxN)) { printf("costCoeffNxN failed!\n"); return false; } } if (opt.costCoeffRemain) { if (!check_costCoeffRemain(ref.costCoeffRemain, opt.costCoeffRemain)) { printf("costCoeffRemain failed!\n"); return false; } } if (opt.costC1C2Flag) { if (!check_costC1C2Flag(ref.costC1C2Flag, opt.costC1C2Flag)) { printf("costC1C2Flag failed!\n"); return false; } } if (opt.pelFilterLumaStrong[0]) { if (!check_pelFilterLumaStrong_V(ref.pelFilterLumaStrong[0], opt.pelFilterLumaStrong[0])) { printf("pelFilterLumaStrong Vertical failed!\n"); return false; } } if (opt.pelFilterLumaStrong[1]) { if (!check_pelFilterLumaStrong_H(ref.pelFilterLumaStrong[1], opt.pelFilterLumaStrong[1])) { printf("pelFilterLumaStrong Horizontal failed!\n"); return false; } } if (opt.pelFilterChroma[0]) { if (!check_pelFilterChroma_V(ref.pelFilterChroma[0], opt.pelFilterChroma[0])) { printf("pelFilterChroma Vertical failed!\n"); return false; } } if (opt.pelFilterChroma[1]) { if (!check_pelFilterChroma_H(ref.pelFilterChroma[1], opt.pelFilterChroma[1])) { printf("pelFilterChroma Horizontal failed!\n"); return false; } } for (int k = 0; k < NUM_INTEGRAL_SIZE; k++) { if (opt.integral_initv[k] && !check_integral_initv(ref.integral_initv[k], opt.integral_initv[k])) { switch (k) { case 0: printf("Integral4v failed!\n"); break; case 1: printf("Integral8v failed!\n"); break; case 2: printf("Integral12v failed!\n"); break; case 3: printf("Integral16v failed!\n"); break; case 4: printf("Integral24v failed!\n"); break; case 5: printf("Integral32v failed!\n"); break; } return false; } } for (int k = 0; k < NUM_INTEGRAL_SIZE; k++) { if (opt.integral_inith[k] && !check_integral_inith(ref.integral_inith[k], opt.integral_inith[k])) { switch (k) { case 0: printf("Integral4h failed!\n"); break; case 1: printf("Integral8h failed!\n"); break; case 2: printf("Integral12h failed!\n"); break; case 3: printf("Integral16h failed!\n"); break; case 4: printf("Integral24h failed!\n"); break; case 5: printf("Integral32h failed!\n"); break; } return false; } } for (int i = BLOCK_8x8; i < NUM_CU_SIZES; i++) { if (opt.cu[i].normFact) { if (!check_normFact(ref.cu[i].normFact, opt.cu[i].normFact, i)) { printf("\nnormFact[%dx%d] failed!\n", 4 << i, 4 << i); return false; } } } return true; } void PixelHarness::measurePartition(int part, const EncoderPrimitives& ref, const EncoderPrimitives& opt) { ALIGN_VAR_16(int, cres[16]); pixel *fref = pbuf2 + 2 * INCR; char header[128]; #define HEADER(str, ...) sprintf(header, str, __VA_ARGS__); printf("%22s", header); if (opt.pu[part].satd) { HEADER("satd[%s]", lumaPartStr[part]); REPORT_SPEEDUP(opt.pu[part].satd, ref.pu[part].satd, pbuf1, STRIDE, fref, STRIDE); } if (opt.pu[part].pixelavg_pp[NONALIGNED]) { HEADER("avg_pp[%s]", lumaPartStr[part]); REPORT_SPEEDUP(opt.pu[part].pixelavg_pp[NONALIGNED], ref.pu[part].pixelavg_pp[NONALIGNED], pbuf1, STRIDE, pbuf2, STRIDE, pbuf3, STRIDE, 32); } if (opt.pu[part].pixelavg_pp[ALIGNED]) { HEADER("avg_pp_aligned[%s]", lumaPartStr[part]); REPORT_SPEEDUP(opt.pu[part].pixelavg_pp[ALIGNED], ref.pu[part].pixelavg_pp[ALIGNED], pbuf1, STRIDE, pbuf2, STRIDE, pbuf3, STRIDE, 32); } if (opt.pu[part].sad) { HEADER("sad[%s]", lumaPartStr[part]); REPORT_SPEEDUP(opt.pu[part].sad, ref.pu[part].sad, pbuf1, STRIDE, fref, STRIDE); } if (opt.pu[part].sad_x3) { HEADER("sad_x3[%s]", lumaPartStr[part]); REPORT_SPEEDUP(opt.pu[part].sad_x3, ref.pu[part].sad_x3, pbuf1, fref, fref + 1, fref - 1, FENC_STRIDE + 5, &cres[0]); } if (opt.pu[part].sad_x4) { HEADER("sad_x4[%s]", lumaPartStr[part]); REPORT_SPEEDUP(opt.pu[part].sad_x4, ref.pu[part].sad_x4, pbuf1, fref, fref + 1, fref - 1, fref - INCR, FENC_STRIDE + 5, &cres[0]); } if (opt.pu[part].copy_pp) { HEADER("copy_pp[%s]", lumaPartStr[part]); REPORT_SPEEDUP(opt.pu[part].copy_pp, ref.pu[part].copy_pp, pbuf1, 64, pbuf2, 64); } if (opt.pu[part].addAvg[NONALIGNED]) { HEADER("addAvg[%s]", lumaPartStr[part]); REPORT_SPEEDUP(opt.pu[part].addAvg[NONALIGNED], ref.pu[part].addAvg[NONALIGNED], sbuf1, sbuf2, pbuf1, STRIDE, STRIDE, STRIDE); } if (opt.pu[part].addAvg[ALIGNED]) { HEADER("addAvg_aligned[%s]", lumaPartStr[part]); REPORT_SPEEDUP(opt.pu[part].addAvg[ALIGNED], ref.pu[part].addAvg[ALIGNED], sbuf1, sbuf2, pbuf1, STRIDE, STRIDE, STRIDE); } if (part < NUM_CU_SIZES) { if (opt.cu[part].sse_pp) { HEADER("sse_pp[%s]", lumaPartStr[part]); REPORT_SPEEDUP(opt.cu[part].sse_pp, ref.cu[part].sse_pp, pbuf1, STRIDE, fref, STRIDE); } if (opt.cu[part].sse_ss) { HEADER("sse_ss[%s]", lumaPartStr[part]); REPORT_SPEEDUP(opt.cu[part].sse_ss, ref.cu[part].sse_ss, (int16_t*)pbuf1, STRIDE, (int16_t*)fref, STRIDE); } if (opt.cu[part].sub_ps) { HEADER("sub_ps[%s]", lumaPartStr[part]); REPORT_SPEEDUP(opt.cu[part].sub_ps, ref.cu[part].sub_ps, (int16_t*)pbuf1, FENC_STRIDE, pbuf2, pbuf1, STRIDE, STRIDE); } if (opt.cu[part].add_ps[NONALIGNED]) { HEADER("add_ps[%s]", lumaPartStr[part]); REPORT_SPEEDUP(opt.cu[part].add_ps[NONALIGNED], ref.cu[part].add_ps[NONALIGNED], pbuf1, FENC_STRIDE, pbuf2, sbuf1, STRIDE, STRIDE); } if (opt.cu[part].add_ps[ALIGNED]) { HEADER("add_ps_aligned[%s]", lumaPartStr[part]); REPORT_SPEEDUP(opt.cu[part].add_ps[ALIGNED], ref.cu[part].add_ps[ALIGNED], pbuf1, FENC_STRIDE, pbuf2, sbuf1, STRIDE, STRIDE); } if (opt.cu[part].copy_ss) { HEADER("copy_ss[%s]", lumaPartStr[part]); REPORT_SPEEDUP(opt.cu[part].copy_ss, ref.cu[part].copy_ss, sbuf1, 128, sbuf2, 128); } if (opt.cu[part].copy_sp) { HEADER("copy_sp[%s]", lumaPartStr[part]); REPORT_SPEEDUP(opt.cu[part].copy_sp, ref.cu[part].copy_sp, pbuf1, 64, sbuf3, 128); } if (opt.cu[part].copy_ps) { HEADER("copy_ps[%s]", lumaPartStr[part]); REPORT_SPEEDUP(opt.cu[part].copy_ps, ref.cu[part].copy_ps, sbuf1, 128, pbuf1, 64); } } for (int i = 0; i < X265_CSP_COUNT; i++) { if (opt.chroma[i].pu[part].copy_pp) { HEADER("[%s] copy_pp[%s]", x265_source_csp_names[i], chromaPartStr[i][part]); REPORT_SPEEDUP(opt.chroma[i].pu[part].copy_pp, ref.chroma[i].pu[part].copy_pp, pbuf1, 64, pbuf2, 128); } if (opt.chroma[i].pu[part].addAvg[NONALIGNED]) { HEADER("[%s] addAvg[%s]", x265_source_csp_names[i], chromaPartStr[i][part]); REPORT_SPEEDUP(opt.chroma[i].pu[part].addAvg[NONALIGNED], ref.chroma[i].pu[part].addAvg[NONALIGNED], sbuf1, sbuf2, pbuf1, STRIDE, STRIDE, STRIDE); } if (opt.chroma[i].pu[part].addAvg[ALIGNED]) { HEADER("[%s] addAvg_aligned[%s]", x265_source_csp_names[i], chromaPartStr[i][part]); REPORT_SPEEDUP(opt.chroma[i].pu[part].addAvg[ALIGNED], ref.chroma[i].pu[part].addAvg[ALIGNED], sbuf1, sbuf2, pbuf1, STRIDE, STRIDE, STRIDE); } if (opt.chroma[i].pu[part].satd) { HEADER("[%s] satd[%s]", x265_source_csp_names[i], chromaPartStr[i][part]); REPORT_SPEEDUP(opt.chroma[i].pu[part].satd, ref.chroma[i].pu[part].satd, pbuf1, STRIDE, fref, STRIDE); } if (part < NUM_CU_SIZES) { if (opt.chroma[i].cu[part].copy_ss) { HEADER("[%s] copy_ss[%s]", x265_source_csp_names[i], chromaPartStr[i][part]); REPORT_SPEEDUP(opt.chroma[i].cu[part].copy_ss, ref.chroma[i].cu[part].copy_ss, sbuf1, 64, sbuf2, 128); } if (opt.chroma[i].cu[part].copy_ps) { HEADER("[%s] copy_ps[%s]", x265_source_csp_names[i], chromaPartStr[i][part]); REPORT_SPEEDUP(opt.chroma[i].cu[part].copy_ps, ref.chroma[i].cu[part].copy_ps, sbuf1, 64, pbuf1, 128); } if (opt.chroma[i].cu[part].copy_sp) { HEADER("[%s] copy_sp[%s]", x265_source_csp_names[i], chromaPartStr[i][part]); REPORT_SPEEDUP(opt.chroma[i].cu[part].copy_sp, ref.chroma[i].cu[part].copy_sp, pbuf1, 64, sbuf3, 128); } if (opt.chroma[i].cu[part].sse_pp) { HEADER("[%s] sse_pp[%s]", x265_source_csp_names[i], chromaPartStr[i][part]); REPORT_SPEEDUP(opt.chroma[i].cu[part].sse_pp, ref.chroma[i].cu[part].sse_pp, pbuf1, STRIDE, fref, STRIDE); } if (opt.chroma[i].cu[part].sub_ps) { HEADER("[%s] sub_ps[%s]", x265_source_csp_names[i], chromaPartStr[i][part]); REPORT_SPEEDUP(opt.chroma[i].cu[part].sub_ps, ref.chroma[i].cu[part].sub_ps, (int16_t*)pbuf1, FENC_STRIDE, pbuf2, pbuf1, STRIDE, STRIDE); } if (opt.chroma[i].cu[part].add_ps[NONALIGNED]) { HEADER("[%s] add_ps[%s]", x265_source_csp_names[i], chromaPartStr[i][part]); REPORT_SPEEDUP(opt.chroma[i].cu[part].add_ps[NONALIGNED], ref.chroma[i].cu[part].add_ps[NONALIGNED], pbuf1, FENC_STRIDE, pbuf2, sbuf1, STRIDE, STRIDE); } if (opt.chroma[i].cu[part].add_ps[ALIGNED]) { HEADER("[%s] add_ps_aligned[%s]", x265_source_csp_names[i], chromaPartStr[i][part]); REPORT_SPEEDUP(opt.chroma[i].cu[part].add_ps[ALIGNED], ref.chroma[i].cu[part].add_ps[ALIGNED], pbuf1, FENC_STRIDE, pbuf2, sbuf1, STRIDE, STRIDE); } if (opt.chroma[i].cu[part].sa8d) { HEADER("[%s] sa8d[%s]", x265_source_csp_names[i], chromaPartStr[i][part]); REPORT_SPEEDUP(opt.chroma[i].cu[part].sa8d, ref.chroma[i].cu[part].sa8d, pbuf1, STRIDE, pbuf2, STRIDE); } } } #undef HEADER } void PixelHarness::measureSpeed(const EncoderPrimitives& ref, const EncoderPrimitives& opt) { char header[128]; #define HEADER(str, ...) sprintf(header, str, __VA_ARGS__); printf("%22s", header); #define HEADER0(str) printf("%22s", str); for (int size = 4; size <= 64; size *= 2) { int part = partitionFromSizes(size, size); // 2Nx2N measurePartition(part, ref, opt); if (size > 4) { part = partitionFromSizes(size, size >> 1); // 2NxN measurePartition(part, ref, opt); part = partitionFromSizes(size >> 1, size); // Nx2N measurePartition(part, ref, opt); } if (size > 8) { // 4 AMP modes part = partitionFromSizes(size, size >> 2); measurePartition(part, ref, opt); part = partitionFromSizes(size, 3 * (size >> 2)); measurePartition(part, ref, opt); part = partitionFromSizes(size >> 2, size); measurePartition(part, ref, opt); part = partitionFromSizes(3 * (size >> 2), size); measurePartition(part, ref, opt); } } for (int i = 0; i < NUM_CU_SIZES; i++) { if ((i <= BLOCK_32x32) && opt.cu[i].ssd_s[NONALIGNED]) { HEADER("ssd_s[%dx%d]", 4 << i, 4 << i); REPORT_SPEEDUP(opt.cu[i].ssd_s[NONALIGNED], ref.cu[i].ssd_s[NONALIGNED], sbuf1, STRIDE); } if ((i <= BLOCK_32x32) && opt.cu[i].ssd_s[ALIGNED]) { HEADER("ssd_s_aligned[%dx%d]", 4 << i, 4 << i); REPORT_SPEEDUP(opt.cu[i].ssd_s[ALIGNED], ref.cu[i].ssd_s[ALIGNED], sbuf1, STRIDE); } if (opt.cu[i].sa8d) { HEADER("sa8d[%dx%d]", 4 << i, 4 << i); REPORT_SPEEDUP(opt.cu[i].sa8d, ref.cu[i].sa8d, pbuf1, STRIDE, pbuf2, STRIDE); } if (opt.cu[i].calcresidual[NONALIGNED]) { HEADER("residual[%dx%d]", 4 << i, 4 << i); REPORT_SPEEDUP(opt.cu[i].calcresidual[NONALIGNED], ref.cu[i].calcresidual[NONALIGNED], pbuf1, pbuf2, sbuf1, 64); } if (opt.cu[i].calcresidual[ALIGNED]) { HEADER("residual_aligned[%dx%d]", 4 << i, 4 << i); REPORT_SPEEDUP(opt.cu[i].calcresidual[ALIGNED], ref.cu[i].calcresidual[ALIGNED], pbuf1, pbuf2, sbuf1, 64); } if (opt.cu[i].blockfill_s[NONALIGNED]) { HEADER("blkfill[%dx%d]", 4 << i, 4 << i); REPORT_SPEEDUP(opt.cu[i].blockfill_s[NONALIGNED], ref.cu[i].blockfill_s[NONALIGNED], sbuf1, 64, SHORT_MAX); } if (opt.cu[i].blockfill_s[ALIGNED]) { HEADER("blkfill_aligned[%dx%d]", 4 << i, 4 << i); REPORT_SPEEDUP(opt.cu[i].blockfill_s[ALIGNED], ref.cu[i].blockfill_s[ALIGNED], sbuf1, 64, SHORT_MAX); } if (opt.cu[i].transpose) { HEADER("transpose[%dx%d]", 4 << i, 4 << i); REPORT_SPEEDUP(opt.cu[i].transpose, ref.cu[i].transpose, pbuf1, pbuf2, STRIDE); } if (opt.cu[i].var) { HEADER("var[%dx%d]", 4 << i, 4 << i); REPORT_SPEEDUP(opt.cu[i].var, ref.cu[i].var, pbuf1, STRIDE); } if ((i < BLOCK_64x64) && opt.cu[i].cpy2Dto1D_shl) { HEADER("cpy2Dto1D_shl[%dx%d]", 4 << i, 4 << i); const int shift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - (i + 2); REPORT_SPEEDUP(opt.cu[i].cpy2Dto1D_shl, ref.cu[i].cpy2Dto1D_shl, sbuf1, sbuf2, STRIDE, X265_MAX(0, shift)); } if ((i < BLOCK_64x64) && opt.cu[i].cpy2Dto1D_shr) { HEADER("cpy2Dto1D_shr[%dx%d]", 4 << i, 4 << i); REPORT_SPEEDUP(opt.cu[i].cpy2Dto1D_shr, ref.cu[i].cpy2Dto1D_shr, sbuf1, sbuf2, STRIDE, 3); } if ((i < BLOCK_64x64) && opt.cu[i].cpy1Dto2D_shl[NONALIGNED]) { HEADER("cpy1Dto2D_shl[%dx%d]", 4 << i, 4 << i); REPORT_SPEEDUP(opt.cu[i].cpy1Dto2D_shl[NONALIGNED], ref.cu[i].cpy1Dto2D_shl[NONALIGNED], sbuf1, sbuf2, STRIDE, 64); } if ((i < BLOCK_64x64) && opt.cu[i].cpy1Dto2D_shl[ALIGNED]) { HEADER("cpy1Dto2D_shl_aligned[%dx%d]", 4 << i, 4 << i); REPORT_SPEEDUP(opt.cu[i].cpy1Dto2D_shl[ALIGNED], ref.cu[i].cpy1Dto2D_shl[ALIGNED], sbuf1, sbuf2, STRIDE, 64); } if ((i < BLOCK_64x64) && opt.cu[i].cpy1Dto2D_shr) { HEADER("cpy1Dto2D_shr[%dx%d]", 4 << i, 4 << i); REPORT_SPEEDUP(opt.cu[i].cpy1Dto2D_shr, ref.cu[i].cpy1Dto2D_shr, sbuf1, sbuf2, STRIDE, 64); } if ((i < BLOCK_64x64) && opt.cu[i].copy_cnt) { HEADER("copy_cnt[%dx%d]", 4 << i, 4 << i); REPORT_SPEEDUP(opt.cu[i].copy_cnt, ref.cu[i].copy_cnt, sbuf1, sbuf2, STRIDE); } if (opt.cu[i].psy_cost_pp) { HEADER("psy_cost_pp[%dx%d]", 4 << i, 4 << i); REPORT_SPEEDUP(opt.cu[i].psy_cost_pp, ref.cu[i].psy_cost_pp, pbuf1, STRIDE, pbuf2, STRIDE); } if (opt.cu[i].ssimDist) { uint64_t dst1 = 0, dst2 = 0; int shift = X265_DEPTH - 8; printf("ssimDist[%dx%d]", 4 << i, 4 << i); REPORT_SPEEDUP(opt.cu[i].ssimDist, ref.cu[i].ssimDist, pixel_test_buff[0], 32, pixel_test_buff[5], 64, &dst1, shift, &dst2); } } if (opt.weight_pp) { HEADER0("weight_pp"); REPORT_SPEEDUP(opt.weight_pp, ref.weight_pp, pbuf1, pbuf2, 64, 32, 32, 128, 1 << 9, 10, 100); } if (opt.weight_sp) { HEADER0("weight_sp"); REPORT_SPEEDUP(opt.weight_sp, ref.weight_sp, (int16_t*)sbuf1, pbuf1, 64, 64, 32, 32, 128, 1 << 9, 10, 100); } if (opt.frameInitLowres) { HEADER0("downscale"); REPORT_SPEEDUP(opt.frameInitLowres, ref.frameInitLowres, pbuf2, pbuf1, pbuf2, pbuf3, pbuf4, 64, 64, 64, 64); } if (opt.frameSubSampleLuma) { HEADER0("downscaleluma"); REPORT_SPEEDUP(opt.frameSubSampleLuma, ref.frameSubSampleLuma, pbuf2, pbuf1, 64, 64, 64, 64); } if (opt.scale1D_128to64[NONALIGNED]) { HEADER0("scale1D_128to64"); REPORT_SPEEDUP(opt.scale1D_128to64[NONALIGNED], ref.scale1D_128to64[NONALIGNED], pbuf2, pbuf1); } if (opt.scale1D_128to64[ALIGNED]) { HEADER0("scale1D_128to64_aligned"); REPORT_SPEEDUP(opt.scale1D_128to64[ALIGNED], ref.scale1D_128to64[ALIGNED], pbuf2, pbuf1); } if (opt.scale2D_64to32) { HEADER0("scale2D_64to32"); REPORT_SPEEDUP(opt.scale2D_64to32, ref.scale2D_64to32, pbuf2, pbuf1, 64); } if (opt.ssim_4x4x2_core) { HEADER0("ssim_4x4x2_core"); REPORT_SPEEDUP(opt.ssim_4x4x2_core, ref.ssim_4x4x2_core, pbuf1, 64, pbuf2, 64, (int(*)[4])sbuf1); } if (opt.ssim_end_4) { HEADER0("ssim_end_4"); REPORT_SPEEDUP(opt.ssim_end_4, ref.ssim_end_4, (int(*)[4])pbuf2, (int(*)[4])pbuf1, 4); } if (opt.sign) { HEADER0("calSign"); REPORT_SPEEDUP(opt.sign, ref.sign, psbuf1, pbuf1, pbuf2, 64); } if (opt.saoCuOrgE0) { HEADER0("SAO_EO_0"); REPORT_SPEEDUP(opt.saoCuOrgE0, ref.saoCuOrgE0, pbuf1, psbuf1, 64, psbuf2, 64); } if (opt.saoCuOrgE1) { HEADER0("SAO_EO_1"); REPORT_SPEEDUP(opt.saoCuOrgE1, ref.saoCuOrgE1, pbuf1, psbuf2, psbuf1, 64, 64); } if (opt.saoCuOrgE1_2Rows) { HEADER0("SAO_EO_1_2Rows"); REPORT_SPEEDUP(opt.saoCuOrgE1_2Rows, ref.saoCuOrgE1_2Rows, pbuf1, psbuf2, psbuf1, 64, 64); } if (opt.saoCuOrgE2[0]) { HEADER0("SAO_EO_2[0]"); REPORT_SPEEDUP(opt.saoCuOrgE2[0], ref.saoCuOrgE2[0], pbuf1, psbuf1, psbuf2, psbuf3, 16, 64); } if (opt.saoCuOrgE2[1]) { HEADER0("SAO_EO_2[1]"); REPORT_SPEEDUP(opt.saoCuOrgE2[1], ref.saoCuOrgE2[1], pbuf1, psbuf1, psbuf2, psbuf3, 64, 64); } if (opt.saoCuOrgE3[0]) { HEADER0("SAO_EO_3[0]"); REPORT_SPEEDUP(opt.saoCuOrgE3[0], ref.saoCuOrgE3[0], pbuf1, psbuf2, psbuf1, 64, 0, 16); } if (opt.saoCuOrgE3[1]) { HEADER0("SAO_EO_3[1]"); REPORT_SPEEDUP(opt.saoCuOrgE3[1], ref.saoCuOrgE3[1], pbuf1, psbuf2, psbuf1, 64, 0, 64); } if (opt.saoCuOrgB0) { HEADER0("SAO_BO_0"); REPORT_SPEEDUP(opt.saoCuOrgB0, ref.saoCuOrgB0, pbuf1, psbuf1, 64, 64, 64); } if (opt.saoCuStatsBO) { int32_t stats[33], count[33]; HEADER0("saoCuStatsBO"); REPORT_SPEEDUP(opt.saoCuStatsBO, ref.saoCuStatsBO, sbuf2, pbuf3, 64, 60, 61, stats, count); } if (opt.saoCuStatsE0) { int32_t stats[33], count[33]; HEADER0("saoCuStatsE0"); REPORT_SPEEDUP(opt.saoCuStatsE0, ref.saoCuStatsE0, sbuf2, pbuf3, 64, 60, 61, stats, count); } if (opt.saoCuStatsE1) { int32_t stats[5], count[5]; int8_t upBuff1[MAX_CU_SIZE + 2]; memset(upBuff1, 1, sizeof(upBuff1)); HEADER0("saoCuStatsE1"); REPORT_SPEEDUP(opt.saoCuStatsE1, ref.saoCuStatsE1, sbuf2, pbuf3, 64, upBuff1 + 1,60, 61, stats, count); } if (opt.saoCuStatsE2) { int32_t stats[5], count[5]; int8_t upBuff1[MAX_CU_SIZE + 2]; int8_t upBufft[MAX_CU_SIZE + 2]; memset(upBuff1, 1, sizeof(upBuff1)); memset(upBufft, -1, sizeof(upBufft)); HEADER0("saoCuStatsE2"); REPORT_SPEEDUP(opt.saoCuStatsE2, ref.saoCuStatsE2, sbuf2, pbuf3, 64, upBuff1 + 1, upBufft + 1, 60, 61, stats, count); } if (opt.saoCuStatsE3) { int8_t upBuff1[MAX_CU_SIZE + 2]; int32_t stats[5], count[5]; memset(upBuff1, 1, sizeof(upBuff1)); HEADER0("saoCuStatsE3"); REPORT_SPEEDUP(opt.saoCuStatsE3, ref.saoCuStatsE3, sbuf2, pbuf3, 64, upBuff1 + 1, 60, 61, stats, count); } if (opt.planecopy_sp) { HEADER0("planecopy_sp"); REPORT_SPEEDUP(opt.planecopy_sp, ref.planecopy_sp, ushort_test_buff[0], 64, pbuf1, 64, 64, 64, 8, 255); } if (opt.planecopy_cp) { HEADER0("planecopy_cp"); REPORT_SPEEDUP(opt.planecopy_cp, ref.planecopy_cp, uchar_test_buff[0], 64, pbuf1, 64, 64, 64, 2); } if (opt.propagateCost) { HEADER0("propagateCost"); REPORT_SPEEDUP(opt.propagateCost, ref.propagateCost, ibuf1, ushort_test_buff[0], int_test_buff[0], ushort_test_buff[0], int_test_buff[0], double_test_buff[0], 80); } if (opt.fix8Pack) { HEADER0("cuTreeFix8Pack"); REPORT_SPEEDUP(opt.fix8Pack, ref.fix8Pack, ushort_test_buff[0], double_test_buff[0], 390); } if (opt.fix8Unpack) { HEADER0("cuTreeFix8Unpack"); REPORT_SPEEDUP(opt.fix8Unpack, ref.fix8Unpack, double_test_buff[0], ushort_test_buff[0], 390); } if (opt.scanPosLast) { HEADER0("scanPosLast"); coeff_t coefBuf[32 * 32]; memset(coefBuf, 0, sizeof(coefBuf)); memset(coefBuf + 32 * 31, 1, 32 * sizeof(coeff_t)); REPORT_SPEEDUP(opt.scanPosLast, ref.scanPosLast, g_scanOrder[SCAN_DIAG][NUM_SCAN_SIZE - 1], coefBuf, (uint16_t*)sbuf1, (uint16_t*)sbuf2, (uint8_t*)psbuf1, 32, g_scan4x4[SCAN_DIAG], 32); } if (opt.findPosFirstLast) { HEADER0("findPosFirstLast"); coeff_t coefBuf[32 * MLS_CG_SIZE]; memset(coefBuf, 0, sizeof(coefBuf)); // every CG can't be all zeros! coefBuf[3 + 0 * 32] = 0x0BAD; coefBuf[3 + 1 * 32] = 0x0BAD; coefBuf[3 + 2 * 32] = 0x0BAD; coefBuf[3 + 3 * 32] = 0x0BAD; REPORT_SPEEDUP(opt.findPosFirstLast, ref.findPosFirstLast, coefBuf, 32, g_scan4x4[SCAN_DIAG]); } if (opt.costCoeffNxN) { HEADER0("costCoeffNxN"); coeff_t coefBuf[32 * 32]; uint16_t tmpOut[16]; memset(coefBuf, 1, sizeof(coefBuf)); ALIGN_VAR_32(static uint8_t const, ctxSig[]) = { 0, 1, 4, 5, 2, 3, 4, 5, 6, 6, 8, 8, 7, 7, 8, 8 }; uint8_t ctx[OFF_SIG_FLAG_CTX + NUM_SIG_FLAG_CTX_LUMA]; memset(ctx, 120, sizeof(ctx)); REPORT_SPEEDUP(opt.costCoeffNxN, ref.costCoeffNxN, g_scan4x4[SCAN_DIAG], coefBuf, 32, tmpOut, ctxSig, 0xFFFF, ctx, 1, 15, 32); } if (opt.costCoeffRemain) { HEADER0("costCoeffRemain"); uint16_t abscoefBuf[32 * 32]; memset(abscoefBuf, 0, sizeof(abscoefBuf)); memset(abscoefBuf + 32 * 31, 1, 32 * sizeof(uint16_t)); REPORT_SPEEDUP(opt.costCoeffRemain, ref.costCoeffRemain, abscoefBuf, 16, 3); } if (opt.costC1C2Flag) { HEADER0("costC1C2Flag"); ALIGN_VAR_32(uint16_t, abscoefBuf[C1FLAG_NUMBER]); memset(abscoefBuf, 1, sizeof(abscoefBuf)); abscoefBuf[C1FLAG_NUMBER - 2] = 2; abscoefBuf[C1FLAG_NUMBER - 1] = 3; REPORT_SPEEDUP(opt.costC1C2Flag, ref.costC1C2Flag, abscoefBuf, C1FLAG_NUMBER, (uint8_t*)psbuf1, 1); } if (opt.pelFilterLumaStrong[0]) { int32_t tcP = (rand() % PIXEL_MAX) - 1; int32_t tcQ = (rand() % PIXEL_MAX) - 1; HEADER0("pelFilterLumaStrong_Vertical"); REPORT_SPEEDUP(opt.pelFilterLumaStrong[0], ref.pelFilterLumaStrong[0], pbuf1, STRIDE, 1, tcP, tcQ); } if (opt.pelFilterLumaStrong[1]) { int32_t tcP = (rand() % PIXEL_MAX) - 1; int32_t tcQ = (rand() % PIXEL_MAX) - 1; HEADER0("pelFilterLumaStrong_Horizontal"); REPORT_SPEEDUP(opt.pelFilterLumaStrong[1], ref.pelFilterLumaStrong[1], pbuf1, 1, STRIDE, tcP, tcQ); } if (opt.pelFilterChroma[0]) { int32_t tc = (rand() % PIXEL_MAX); int32_t maskP = (rand() % PIXEL_MAX) - 1; int32_t maskQ = (rand() % PIXEL_MAX) - 1; HEADER0("pelFilterChroma_Vertical"); REPORT_SPEEDUP(opt.pelFilterChroma[0], ref.pelFilterChroma[0], pbuf1, STRIDE, 1, tc, maskP, maskQ); } if (opt.pelFilterChroma[1]) { int32_t tc = (rand() % PIXEL_MAX); int32_t maskP = (rand() % PIXEL_MAX) - 1; int32_t maskQ = (rand() % PIXEL_MAX) - 1; HEADER0("pelFilterChroma_Horizontal"); REPORT_SPEEDUP(opt.pelFilterChroma[1], ref.pelFilterChroma[1], pbuf1, 1, STRIDE, tc, maskP, maskQ); } for (int k = 0; k < NUM_INTEGRAL_SIZE; k++) { if (opt.integral_initv[k]) { switch (k) { case 0: HEADER0("integral_init4v"); break; case 1: HEADER0("integral_init8v"); break; case 2: HEADER0("integral_init12v"); break; case 3: HEADER0("integral_init16v"); break; case 4: HEADER0("integral_init24v"); break; case 5: HEADER0("integral_init32v"); break; default: break; } REPORT_SPEEDUP(opt.integral_initv[k], ref.integral_initv[k], (uint32_t*)pbuf1, STRIDE); } } for (int k = 0; k < NUM_INTEGRAL_SIZE; k++) { if (opt.integral_inith[k]) { uint32_t dst_buf[BUFFSIZE] = { 0 }; switch (k) { case 0: HEADER0("integral_init4h"); break; case 1: HEADER0("integral_init8h"); break; case 2: HEADER0("integral_init12h"); break; case 3: HEADER0("integral_init16h"); break; case 4: HEADER0("integral_init24h"); break; case 5: HEADER0("integral_init32h"); break; default: break; } REPORT_SPEEDUP(opt.integral_inith[k], ref.integral_inith[k], dst_buf, pbuf1, STRIDE); } } for (int i = BLOCK_8x8; i < NUM_CU_SIZES; i++) { if (opt.cu[i].normFact) { uint64_t dst = 0; int blockSize = 4 << i; int shift = X265_DEPTH - 8; printf("normFact[%dx%d]", blockSize, blockSize); REPORT_SPEEDUP(opt.cu[i].normFact, ref.cu[i].normFact, pixel_test_buff[0], blockSize, shift, &dst); } } }