#include "video_utils_p5p.h" #include #ifdef HAS_VIDEO_BLOCKLINE_TO_MACRO_BLOCKS #include "config-tcm.h" .section ".text.itcm","ax" .global video_blockline_to_macro_blocks .global video_blockline_patch_block_1 .global video_blockline_patch_block_2_start .global video_blockline_patch_block_2 .global video_blockline_patch_block_3_start .global video_blockline_patch_block_3 .global video_blockline_patch_block_4_start .global video_blockline_patch_block_4 .global video_blockline_patch_fix_y .global video_blockline_patch_block_cb .global video_blockline_patch_fix_cb .global video_blockline_patch_block_cr .global video_blockline_patch_fix_cr .type video_blockline_to_macro_blocks, %function /* Registers usage r0 : ctx r1 : dst r2 : num_macro_blocks r3 : y_src r4 : cb_src r5 : cr_src r6, r7, r8, r9 : Pixels in 16 bits format (write in dst) r10, r11 : Pixels in 8 bits format (read from y_src, cb_src or cr_src) ip/r12 : not used for instance lr/r14 : line counter in internal loop (8 lines per block) */ video_blockline_to_macro_blocks: stmdb sp!, {r4, r5, r6, r7, r8, r9, r10, r11, lr} ldm r0, { r3, r4, r5 } video_blockline_to_macro_blocks_loop: @@ Luminances @@ Copy first block mov lr, #8 copy_block_1: @ data conversion from 8 bits to 16 bits ldmia r3!, {r10-r11} @ Get 8 pixels from y_src @ isolate the first four pixel and r6, r10, #0x00FF and r8, r10, #0xFF00 mov r10, r10, LSR #16 and r7, r10, #0x00FF and r9, r10, #0xFF00 @ Combine them orr r6, r8, LSL #8 orr r7, r9, LSL #8 @ isolate next four pixels and r8, r11, #0x00FF and r10, r11, #0xFF00 mov r11, r11, LSR #16 and r9, r11, #0x00FF and r11, r11, #0xFF00 @ Combine them orr r8, r10, LSL #8 orr r9, r11, LSL #8 @ Store result of conversion to dst stmia r1!, {r6-r9} subs lr, lr, #1 @ Proceed to next line video_blockline_patch_block_1: addne r3, #(0) bne copy_block_1 @@ Copy second block video_blockline_patch_block_2_start: sub r3, #(0) mov lr, #8 copy_block_2: @ data conversion from 8 bits to 16 bits ldm r3, {r10-r11} @ Get 8 pixels from y_src @ isolate the first four pixel and r6, r10, #0x00FF and r8, r10, #0xFF00 mov r10, r10, LSR #16 and r7, r10, #0x00FF and r9, r10, #0xFF00 @ Combine them orr r6, r8, LSL #8 orr r7, r9, LSL #8 @ isolate next four pixels and r8, r11, #0x00FF and r10, r11, #0xFF00 mov r11, r11, LSR #16 and r9, r11, #0x00FF and r11, r11, #0xFF00 @ Combine them orr r8, r10, LSL #8 orr r9, r11, LSL #8 @ Store result of conversion to dst stmia r1!, {r6-r9} subs lr, lr, #1 @ Proceed to next line video_blockline_patch_block_2: addne r3, #(0) bne copy_block_2 @@ Copy third block video_blockline_patch_block_3_start: add r3, #(0) mov lr, #8 copy_block_3: @ data conversion from 8 bits to 16 bits ldmia r3!, {r10-r11} @ Get 8 pixels from y_src @ isolate the first four pixel and r6, r10, #0x00FF and r8, r10, #0xFF00 mov r10, r10, LSR #16 and r7, r10, #0x00FF and r9, r10, #0xFF00 @ Combine them orr r6, r8, LSL #8 orr r7, r9, LSL #8 @ isolate next four pixels and r8, r11, #0x00FF and r10, r11, #0xFF00 mov r11, r11, LSR #16 and r9, r11, #0x00FF and r11, r11, #0xFF00 @ Combine them orr r8, r10, LSL #8 orr r9, r11, LSL #8 @ Store result of conversion to dst stmia r1!, {r6-r9} subs lr, lr, #1 @ Proceed to next line video_blockline_patch_block_3: addne r3, #(0) bne copy_block_3 @@ Copy fourth block video_blockline_patch_block_4_start: sub r3, #(0) mov lr, #8 copy_block_4: @ data conversion from 8 bits to 16 bits ldmia r3!, {r10-r11} @ Get 8 pixels from y_src @ isolate the first four pixel and r6, r10, #0x00FF and r8, r10, #0xFF00 mov r10, r10, LSR #16 and r7, r10, #0x00FF and r9, r10, #0xFF00 @ Combine them orr r6, r8, LSL #8 orr r7, r9, LSL #8 @ isolate next four pixels and r8, r11, #0x00FF and r10, r11, #0xFF00 mov r11, r11, LSR #16 and r9, r11, #0x00FF and r11, r11, #0xFF00 @ Combine them orr r8, r10, LSL #8 orr r9, r11, LSL #8 @ Store result of conversion to dst stmia r1!, {r6-r9} subs lr, lr, #1 @ Proceed to next line video_blockline_patch_block_4: addne r3, #(0) bne copy_block_4 video_blockline_patch_fix_y: sub r3, #(0) @ Fix r3 for next iteration @@ Chrominances @@ Copy fifth block mov lr, #8 copy_block_cb: @ data conversion from 8 bits to 16 bits ldmia r4!, {r10-r11} @ Get 8 pixels from cb_src @ isolate the first four pixel and r6, r10, #0x00FF and r8, r10, #0xFF00 mov r10, r10, LSR #16 and r7, r10, #0x00FF and r9, r10, #0xFF00 @ Combine them orr r6, r8, LSL #8 orr r7, r9, LSL #8 @ isolate next four pixels and r8, r11, #0x00FF and r10, r11, #0xFF00 mov r11, r11, LSR #16 and r9, r11, #0x00FF and r11, r11, #0xFF00 @ Combine them orr r8, r10, LSL #8 orr r9, r11, LSL #8 @ Store result of conversion to dst stmia r1!, {r6-r9} subs lr, lr, #1 @ Proceed to next line video_blockline_patch_block_cb: addne r4, #(0) bne copy_block_cb video_blockline_patch_fix_cb: sub r4, #(0) @ Fix r4 for next iteration @@ Copy sixth block mov lr, #8 copy_block_cr: @ data conversion from 8 bits to 16 bits ldmia r5!, {r10-r11} @ Get 8 pixels from cr_src @ isolate the first four pixel and r6, r10, #0x00FF and r8, r10, #0xFF00 mov r10, r10, LSR #16 and r7, r10, #0x00FF and r9, r10, #0xFF00 @ Combine them orr r6, r8, LSL #8 orr r7, r9, LSL #8 @ isolate next four pixels and r8, r11, #0x00FF and r10, r11, #0xFF00 mov r11, r11, LSR #16 and r9, r11, #0x00FF and r11, r11, #0xFF00 @ Combine them orr r8, r10, LSL #8 orr r9, r11, LSL #8 @ Store result of conversion to dst stmia r1!, {r6-r9} subs lr, lr, #1 @ Proceed to next line video_blockline_patch_block_cr: addne r5, #(0) bne copy_block_cr video_blockline_patch_fix_cr: sub r5, #(0) @ Fix r5 for next iteration subs r2, r2, #1 bne video_blockline_to_macro_blocks_loop stm r0, { r3, r4, r5 } ldmia sp!, {r4, r5, r6, r7, r8, r9, r10, r11, pc} #endif // HAS_VIDEO_BLOCKLINE_TO_MACRO_BLOCKS