/* * Copyright © 2023, VideoLAN and dav1d authors * Copyright © 2023, Loongson Technology Corporation Limited * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "src/loongarch/loongson_asm.S" /* static void splat_mv_c(refmvs_block **rr, const refmvs_block *const rmv, const int bx4, const int bw4, int bh4) */ function splat_mv_lsx vld vr0, a1, 0 // 0 1 ... 11 ... clz.w t4, a3 vaddi.bu vr1, vr0, 0 addi.w t4, t4, -26 vextrins.w vr1, vr0, 0x30 // 0 1 2 ... 11 0 1 2 3 la.local t5, .SPLAT_LSX_JRTABLE vbsrl.v vr2, vr1, 4 // 4 5 6 7...11 0 1 2 3 0 0 0 0 alsl.d t6, t4, t5, 1 vextrins.w vr2, vr0, 0x31 // 4 5 6 7...11 0 1 2 3 4 5 6 7 ld.h t7, t6, 0 vbsrl.v vr3, vr2, 4 // 8 9 10 11 0 1 2 3 4 5 6 7 0 0 0 0 add.d t8, t5, t7 alsl.d a2, a2, a2, 1 vextrins.w vr3, vr0, 0x32 // 8 9 10 11 0 1 2 3 4 5 6 7 8 9 10 11 slli.w a2, a2, 2 jirl $r0, t8, 0 .SPLAT_LSX_JRTABLE: .hword .SPLAT_W32_LSX - .SPLAT_LSX_JRTABLE .hword .SPLAT_W16_LSX - .SPLAT_LSX_JRTABLE .hword .SPLAT_W8_LSX - .SPLAT_LSX_JRTABLE .hword .SPLAT_W4_LSX - .SPLAT_LSX_JRTABLE .hword .SPLAT_W2_LSX - .SPLAT_LSX_JRTABLE .hword .SPLAT_W1_LSX - .SPLAT_LSX_JRTABLE .SPLAT_W1_LSX: ld.d t3, a0, 0 addi.d a0, a0, 8 addi.d a4, a4, -1 add.d t3, t3, a2 fst.d f1, t3, 0 fst.s f3, t3, 8 blt zero, a4, .SPLAT_W1_LSX b .splat_end .SPLAT_W2_LSX: ld.d t3, a0, 0 addi.d a0, a0, 8 addi.d a4, a4, -1 add.d t3, t3, a2 vst vr1, t3, 0 fst.d f2, t3, 16 blt zero, a4, .SPLAT_W2_LSX b .splat_end .SPLAT_W4_LSX: ld.d t3, a0, 0 addi.d a0, a0, 8 addi.d a4, a4, -1 add.d t3, t3, a2 vst vr1, t3, 0 vst vr2, t3, 16 vst vr3, t3, 32 blt zero, a4, .SPLAT_W4_LSX b .splat_end .SPLAT_W8_LSX: ld.d t3, a0, 0 addi.d a0, a0, 8 addi.d a4, a4, -1 add.d t3, t3, a2 vst vr1, t3, 0 vst vr2, t3, 16 vst vr3, t3, 32 vst vr1, t3, 48 vst vr2, t3, 64 vst vr3, t3, 80 blt zero, a4, .SPLAT_W8_LSX b .splat_end .SPLAT_W16_LSX: ld.d t3, a0, 0 addi.d a0, a0, 8 addi.d a4, a4, -1 add.d t3, t3, a2 .rept 2 vst vr1, t3, 0 vst vr2, t3, 16 vst vr3, t3, 32 vst vr1, t3, 48 vst vr2, t3, 64 vst vr3, t3, 80 addi.d t3, t3, 96 .endr blt zero, a4, .SPLAT_W16_LSX b .splat_end .SPLAT_W32_LSX: ld.d t3, a0, 0 addi.d a0, a0, 8 addi.d a4, a4, -1 add.d t3, t3, a2 .rept 4 vst vr1, t3, 0 vst vr2, t3, 16 vst vr3, t3, 32 vst vr1, t3, 48 vst vr2, t3, 64 vst vr3, t3, 80 addi.d t3, t3, 96 .endr blt zero, a4, .SPLAT_W32_LSX .splat_end: endfunc const la_div_mult .short 0, 16384, 8192, 5461, 4096, 3276, 2730, 2340 .short 2048, 1820, 1638, 1489, 1365, 1260, 1170, 1092 .short 1024, 963, 910, 862, 819, 780, 744, 712 .short 682, 655, 630, 606, 585, 564, 546, 528 endconst /* * temp reg: a6 a7 */ .macro LOAD_SET_LOOP is_odd slli.d a6, t6, 2 add.d a6, a6, t6 // col_w * 5 0: addi.d a7, zero, 0 // x .if \is_odd stx.w t7, t3, a7 addi.d a7, a7, 5 bge a7, a6, 2f .endif 1: stx.w t7, t3, a7 addi.d a7, a7, 5 stx.w t7, t3, a7 addi.d a7, a7, 5 blt a7, a6, 1b 2: add.d t3, t3, t2 addi.d t5, t5, 1 blt t5, a5, 0b .endm /* * static void load_tmvs_c(const refmvs_frame *const rf, int tile_row_idx, * const int col_start8, const int col_end8, * const int row_start8, int row_end8) */ function load_tmvs_lsx addi.d sp, sp, -80 st.d s0, sp, 0 st.d s1, sp, 8 st.d s2, sp, 16 st.d s3, sp, 24 st.d s4, sp, 32 st.d s5, sp, 40 st.d s6, sp, 48 st.d s7, sp, 56 st.d s8, sp, 64 vld vr16, a0, 16 vld vr0, a0, 52 // rf->mfmv_ref ld.w s8, a0, 152 // [0] - rf->n_mfmvs vld vr17, a0, 168 // [0] - rp_ref| [1]- rp_proj ld.d t1, a0, 184 // stride ld.w t0, a0, 200 addi.w t0, t0, -1 bnez t0, 1f addi.w a1, zero, 0 1: addi.d t0, a3, 8 vinsgr2vr.w vr1, t0, 0 vinsgr2vr.w vr1, a5, 1 vmin.w vr1, vr1, vr16 // [0] col_end8i [1] row_end8 addi.d t0, a2, -8 bge t0, zero, 2f addi.w t0, zero, 0 // t0 col_start8i 2: vpickve2gr.d t4, vr17, 1 // rf->rp_proj slli.d t2, t1, 2 add.d t2, t2, t1 // stride * 5 slli.d a1, a1, 4 // tile_row_idx * 16 andi t3, a4, 0xf add.d t3, t3, a1 // tile_row_idx * 16 + row_start8 & 15 mul.w t3, t3, t2 mul.w t8, a1, t2 vpickve2gr.w a5, vr1, 1 addi.d t5, a4, 0 sub.d t6, a3, a2 // col_end8 - col_start8 li.w t7, 0x80008000 slli.d a7, a2, 2 add.d t3, t3, a2 add.d t3, t3, a7 add.d t3, t3, t4 // rp_proj andi a6, t6, 1 bnez a6, 3f LOAD_SET_LOOP 0 b 4f 3: LOAD_SET_LOOP 1 4: addi.d a6, zero, 0 // n bge a6, s8, .end_load add.d t3, t8, t4 // rp_proj mul.w t6, a4, t2 addi.d s7, zero, 40 vpickve2gr.w t1, vr1, 0 // col_end8i vbsrl.v vr2, vr0, 4 // rf->mfmv_ref2cur addi.d t5, a0, 64 // rf->mfmv_ref2ref la.local t8, la_div_mult vld vr6, t8, 0 vld vr7, t8, 16 vld vr8, t8, 32 vld vr9, t8, 48 li.w t8, 0x3fff vreplgr2vr.h vr21, t8 vxor.v vr18, vr18, vr18 // zero vsub.h vr20, vr18, vr21 vpickev.b vr12, vr7, vr6 vpickod.b vr13, vr7, vr6 vpickev.b vr14, vr9, vr8 vpickod.b vr15, vr9, vr8 vpickve2gr.d s6, vr17, 0 // rf->rp_ref 5: vld vr10, t5, 0 vld vr11, t5, 16 vpickev.h vr10, vr11, vr10 vpickev.b vr10, vr11, vr10 // [1...7] vbsrl.v vr0, vr0, 1 vpickve2gr.wu t8, vr2, 0 // ref2cur vbsrl.v vr2, vr2, 4 srli.d t4, t8, 24 xori t4, t4, 0x80 beqz t4, 8f vreplgr2vr.h vr23, t8 vshuf.b vr6, vr14, vr12, vr10 vshuf.b vr7, vr15, vr13, vr10 vilvl.b vr8, vr7, vr6 vmulwev.w.h vr6, vr8, vr23 vmulwod.w.h vr7, vr8, vr23 vpickve2gr.b s0, vr0, 0 // ref slli.d t8, s0, 3 ldx.d s1, s6, t8 // rf->rp_ref[ref] addi.d s0, s0, -4 // ref_sign vreplgr2vr.h vr19, s0 add.d s1, s1, t6 // &rf->rp_ref[ref][row_start8 * stride] addi.d s2, a4, 0 // y vilvl.w vr8, vr7, vr6 vilvh.w vr9, vr7, vr6 6: // for (int y = row_start8; andi s3, s2, 0xff8 addi.d s4, s3, 8 blt a4, s3, 0f addi.d s3, a4, 0 // y_proj_start 0: blt s4, a5, 0f addi.d s4, a5, 0 // y_proj_end 0: addi.d s5, t0, 0 // x 7: // for (int x = col_start8i; slli.d a7, s5, 2 add.d a7, a7, s5 add.d a7, s1, a7 // rb vld vr3, a7, 0 // [rb] vpickve2gr.b t4, vr3, 4 // b_ref beqz t4, .end_x vreplve.b vr11, vr10, t4 vpickve2gr.b t7, vr11, 4 // ref2ref beqz t7, .end_x vsllwil.w.h vr4, vr3, 0 vreplgr2vr.w vr6, t4 vshuf.w vr6, vr9, vr8 // frac vmul.w vr5, vr6, vr4 vsrai.w vr4, vr5, 31 vadd.w vr4, vr4, vr5 vssrarni.h.w vr4, vr4, 14 vclip.h vr4, vr4, vr20, vr21 // offset vxor.v vr5, vr4, vr19 // offset.x ^ ref_sign vori.b vr5, vr5, 0x1 // offset.x ^ ref_sign vabsd.h vr4, vr4, vr18 vsrli.h vr4, vr4, 6 // abs(offset.x) >> 6 vsigncov.h vr4, vr5, vr4 // apply_sign vpickve2gr.h s0, vr4, 0 add.d s0, s2, s0 // pos_y blt s0, s3, .n_posy bge s0, s4, .n_posy andi s0, s0, 0xf mul.w s0, s0, t2 // pos vpickve2gr.h t7, vr4, 1 add.d t7, t7, s5 // pos_x add.d s0, t3, s0 // rp_proj + pos .loop_posx: andi t4, s5, 0xff8 // x_sb_align blt t7, a2, .n_posx addi.d t8, t4, -8 blt t7, t8, .n_posx bge t7, a3, .n_posx addi.d t4, t4, 16 bge t7, t4, .n_posx slli.d t4, t7, 2 add.d t4, t4, t7 // pos_x * 5 add.d t4, s0, t4 // rp_proj[pos + pos_x] vstelm.w vr3, t4, 0, 0 vstelm.b vr11, t4, 4, 4 .n_posx: addi.d s5, s5, 1 // x + 1 bge s5, t1, .ret_posx addi.d a7, a7, 5 // rb + 1 vld vr4, a7, 0 // [rb] vseq.b vr5, vr4, vr3 vpickve2gr.d t8, vr5, 0 cto.d t8, t8 blt t8, s7, 7b addi.d t7, t7, 1 // pos_x + 1 /* Core computing loop expansion(sencond) */ andi t4, s5, 0xff8 // x_sb_align blt t7, a2, .n_posx addi.d t8, t4, -8 blt t7, t8, .n_posx bge t7, a3, .n_posx addi.d t4, t4, 16 bge t7, t4, .n_posx slli.d t4, t7, 2 add.d t4, t4, t7 // pos_x * 5 add.d t4, s0, t4 // rp_proj[pos + pos_x] vstelm.w vr3, t4, 0, 0 vstelm.b vr11, t4, 4, 4 addi.d s5, s5, 1 // x + 1 bge s5, t1, .ret_posx addi.d a7, a7, 5 // rb + 1 vld vr4, a7, 0 // [rb] vseq.b vr5, vr4, vr3 vpickve2gr.d t8, vr5, 0 cto.d t8, t8 blt t8, s7, 7b addi.d t7, t7, 1 // pos_x + 1 /* Core computing loop expansion(third) */ andi t4, s5, 0xff8 // x_sb_align blt t7, a2, .n_posx addi.d t8, t4, -8 blt t7, t8, .n_posx bge t7, a3, .n_posx addi.d t4, t4, 16 bge t7, t4, .n_posx slli.d t4, t7, 2 add.d t4, t4, t7 // pos_x * 5 add.d t4, s0, t4 // rp_proj[pos + pos_x] vstelm.w vr3, t4, 0, 0 vstelm.b vr11, t4, 4, 4 addi.d s5, s5, 1 // x + 1 bge s5, t1, .ret_posx addi.d a7, a7, 5 // rb + 1 vld vr4, a7, 0 // [rb] vseq.b vr5, vr4, vr3 vpickve2gr.d t8, vr5, 0 cto.d t8, t8 blt t8, s7, 7b addi.d t7, t7, 1 // pos_x + 1 b .loop_posx .n_posy: addi.d s5, s5, 1 // x + 1 bge s5, t1, .ret_posx addi.d a7, a7, 5 // rb + 1 vld vr4, a7, 0 // [rb] vseq.b vr5, vr4, vr3 vpickve2gr.d t8, vr5, 0 cto.d t8, t8 blt t8, s7, 7b addi.d s5, s5, 1 // x + 1 bge s5, t1, .ret_posx addi.d a7, a7, 5 // rb + 1 vld vr4, a7, 0 // [rb] vseq.b vr5, vr4, vr3 vpickve2gr.d t8, vr5, 0 cto.d t8, t8 blt t8, s7, 7b b .n_posy .end_x: addi.d s5, s5, 1 // x + 1 blt s5, t1, 7b .ret_posx: add.d s1, s1, t2 // r + stride addi.d s2, s2, 1 // y + 1 blt s2, a5, 6b 8: addi.d a6, a6, 1 // n + 1 addi.d t5, t5, 28 // mfmv_ref2ref(offset) + 28 blt a6, s8, 5b .end_load: ld.d s0, sp, 0 ld.d s1, sp, 8 ld.d s2, sp, 16 ld.d s3, sp, 24 ld.d s4, sp, 32 ld.d s5, sp, 40 ld.d s6, sp, 48 ld.d s7, sp, 56 ld.d s8, sp, 64 addi.d sp, sp, 80 endfunc const mv_tbls .byte 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 .byte 0, 1, 2, 3, 8, 0, 1, 2, 3, 8, 0, 1, 2, 3, 8, 0 .byte 4, 5, 6, 7, 9, 4, 5, 6, 7, 9, 4, 5, 6, 7, 9, 4 .byte 4, 5, 6, 7, 9, 4, 5, 6, 7, 9, 4, 5, 6, 7, 9, 4 endconst const mask_mult .byte 1, 0, 2, 0, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0 endconst const mask_mv0 .byte 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 endconst const mask_mv1 .byte 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 endconst // void dav1d_save_tmvs_lsx(refmvs_temporal_block *rp, ptrdiff_t stride, // refmvs_block **rr, const uint8_t *ref_sign, // int col_end8, int row_end8, // int col_start8, int row_start8) function save_tmvs_lsx addi.d sp, sp, -0x28 st.d s0, sp, 0x00 st.d s1, sp, 0x08 st.d s2, sp, 0x10 st.d s3, sp, 0x18 st.d s4, sp, 0x20 move t0, ra vxor.v vr10, vr10, vr10 vld vr11, a3, 0 // Load ref_sign[0] ~ Load ref_sign[7] la.local t2, .save_tevs_tbl la.local s1, mask_mult la.local t7, mv_tbls vld vr9, s1, 0 // Load mask_mult vslli.d vr11, vr11, 8 // 0, ref_sign[0], ... ,ref_sign[6] la.local s3, mask_mv0 vld vr8, s3, 0 // Load mask_mv0 la.local s4, mask_mv1 vld vr7, s4, 0 // Load mask_mv1 li.d s0, 5 li.d t8, 12 * 2 mul.d a1, a1, s0 // stride *= 5 sub.d a5, a5, a7 // h = row_end8 - row_start8 slli.d a7, a7, 1 // row_start8 <<= 1 1: li.d s0, 5 andi t3, a7, 30 // (y & 15) * 2 slli.d s4, t3, 3 ldx.d t3, a2, s4 // b = rr[(y & 15) * 2] addi.d t3, t3, 12 // &b[... + 1] mul.d s4, a4, t8 add.d t4, s4, t3 // end_cand_b = &b[col_end8*2 + 1] mul.d s3, a6, t8 add.d t3, s3, t3 // cand_b = &b[x*2 + 1] mul.d s4, a6, s0 add.d a3, s4, a0 // &rp[x] 2: /* First cand_b */ ld.b t5, t3, 10 // cand_b->bs vld vr0, t3, 0 // cand_b->mv and ref alsl.d t5, t5, t2, 2 // bt2 index ld.h s3, t3, 8 // cand_b->ref ld.h t6, t5, 0 // bt2 move s0, t2 alsl.d t3, t6, t3, 1 // Next cand_b += bt2 * 2 vor.v vr2, vr0, vr0 vinsgr2vr.h vr1, s3, 0 move t1 , t3 bge t3, t4, 3f /* Next cand_b */ ld.b s0, t3, 10 // cand_b->bs vld vr4, t3, 0 // cand_b->mv and ref alsl.d s0, s0, t2, 2 // bt2 index ld.h s4, t3, 8 // cand_b->ref ld.h t6, s0, 0 // bt2 alsl.d t3, t6, t3, 1 // Next cand_b += bt2*2 vpackev.d vr2, vr4, vr0 // a0.mv[0] a0.mv[1] a1.mv[0], a1.mv[1] vinsgr2vr.h vr1, s4, 1 // a0.ref[0] a0.ref[1], a1.ref[0], a1.ref[1] 3: vabsd.h vr2, vr2, vr10 // abs(mv[].xy) vsle.b vr16, vr10, vr1 vand.v vr1, vr16, vr1 vshuf.b vr1, vr11, vr11, vr1 // ref_sign[ref] vsrli.h vr2, vr2, 12 // abs(mv[].xy) >> 12 vilvl.b vr1, vr1, vr1 vmulwev.h.bu vr1, vr1, vr9 // ef_sign[ref] * {1, 2} vseqi.w vr2, vr2, 0 // abs(mv[].xy) <= 4096 vpickev.h vr2, vr2, vr2 // abs() condition to 16 bit vand.v vr1, vr2, vr1 // h[0-3] contains conditions for mv[0-1] vhaddw.wu.hu vr1, vr1, vr1 // Combine condition for [1] and [0] vpickve2gr.wu s1, vr1, 0 // Extract case for first block vpickve2gr.wu s2, vr1, 1 ld.hu t5, t5, 2 // Fetch jump table entry ld.hu s0, s0, 2 alsl.d s3, s1, t7, 4 // Load permutation table base on case vld vr1, s3, 0 alsl.d s4, s2, t7, 4 vld vr5, s4, 0 sub.d t5, t2, t5 // Find jump table target sub.d s0, t2, s0 vshuf.b vr0, vr0, vr0, vr1 // Permute cand_b to output refmvs_temporal_block vshuf.b vr4, vr4, vr4, vr5 vsle.b vr16, vr10, vr1 vand.v vr0, vr16, vr0 vsle.b vr17, vr10, vr5 vand.v vr4, vr17, vr4 // v1 follows on v0, with another 3 full repetitions of the pattern. vshuf.b vr1, vr0, vr0, vr8 // 1, 2, 3, ... , 15, 16 vshuf.b vr5, vr4, vr4, vr8 // 1, 2, 3, ... , 15, 16 // v2 ends with 3 complete repetitions of the pattern. vshuf.b vr2, vr1, vr0, vr7 vshuf.b vr6, vr5, vr4, vr7 // 4, 5, 6, 7, ... , 12, 13, 14, 15, 16, 17, 18, 19 jirl ra, t5, 0 bge t1 , t4, 4f // if (cand_b >= end) vor.v vr0, vr4, vr4 vor.v vr1, vr5, vr5 vor.v vr2, vr6, vr6 jirl ra, s0, 0 blt t3, t4, 2b // if (cand_b < end) 4: addi.d a5, a5, -1 // h-- addi.d a7, a7, 2 // y += 2 add.d a0, a0, a1 // rp += stride blt zero, a5, 1b ld.d s0, sp, 0x00 ld.d s1, sp, 0x08 ld.d s2, sp, 0x10 ld.d s3, sp, 0x18 ld.d s4, sp, 0x20 addi.d sp, sp, 0x28 move ra, t0 jirl zero, ra, 0x00 10: addi.d s1, a3, 4 vstelm.w vr0, a3, 0, 0 // .mv vstelm.b vr0, s1, 0, 4 // .ref addi.d a3, a3, 5 jirl zero, ra, 0x00 20: addi.d s1, a3, 8 vstelm.d vr0, a3, 0, 0 // .mv vstelm.h vr0, s1, 0, 4 // .ref addi.d a3, a3, 2 * 5 jirl zero, ra, 0x00 40: vst vr0, a3, 0 vstelm.w vr1, a3, 0x10, 0 addi.d a3, a3, 4 * 5 jirl zero, ra, 0x00 80: vst vr0, a3, 0 vst vr1, a3, 0x10 // This writes 6 full entries plus 2 extra bytes vst vr2, a3, 5 * 8 - 16 // Write the last few, overlapping with the first write. addi.d a3, a3, 8 * 5 jirl zero, ra, 0x00 160: addi.d s1, a3, 6 * 5 addi.d s2, a3, 12 * 5 vst vr0, a3, 0 vst vr1, a3, 0x10 // This writes 6 full entries plus 2 extra bytes vst vr0, a3, 6 * 5 vst vr1, a3, 6 * 5 + 16 // Write another 6 full entries, slightly overlapping with the first set vstelm.d vr0, s2, 0, 0 // Write 8 bytes (one full entry) after the first 12 vst vr2, a3, 5 * 16 - 16 // Write the last 3 entries addi.d a3, a3, 16 * 5 jirl zero, ra, 0x00 .save_tevs_tbl: .hword 16 * 12 // bt2 * 12, 12 is sizeof(refmvs_block) .hword .save_tevs_tbl - 160b .hword 16 * 12 .hword .save_tevs_tbl - 160b .hword 8 * 12 .hword .save_tevs_tbl - 80b .hword 8 * 12 .hword .save_tevs_tbl - 80b .hword 8 * 12 .hword .save_tevs_tbl - 80b .hword 8 * 12 .hword .save_tevs_tbl - 80b .hword 4 * 12 .hword .save_tevs_tbl - 40b .hword 4 * 12 .hword .save_tevs_tbl - 40b .hword 4 * 12 .hword .save_tevs_tbl - 40b .hword 4 * 12 .hword .save_tevs_tbl - 40b .hword 2 * 12 .hword .save_tevs_tbl - 20b .hword 2 * 12 .hword .save_tevs_tbl - 20b .hword 2 * 12 .hword .save_tevs_tbl - 20b .hword 2 * 12 .hword .save_tevs_tbl - 20b .hword 2 * 12 .hword .save_tevs_tbl - 20b .hword 1 * 12 .hword .save_tevs_tbl - 10b .hword 1 * 12 .hword .save_tevs_tbl - 10b .hword 1 * 12 .hword .save_tevs_tbl - 10b .hword 1 * 12 .hword .save_tevs_tbl - 10b .hword 1 * 12 .hword .save_tevs_tbl - 10b .hword 1 * 12 .hword .save_tevs_tbl - 10b .hword 1 * 12 .hword .save_tevs_tbl - 10b endfunc