1 #include <VP_Os/vp_os_print.h>
3 #include <VLIB/Platform/video_utils.h>
4 #include <VLIB/video_picture.h>
5 #include <VP_Os/vp_os_malloc.h>
7 #ifndef HAS_VIDEO_BLOCKLINE_TO_MACRO_BLOCKS
9 // Convert a 8x8 block of 8 bits data to a 8x8 block of 16 bits data
10 static void copy_block_8_16(int16_t* dst, int32_t dst_offset, uint8_t* src, int32_t src_offset)
12 uint32_t* src32 = (uint32_t*) src;
13 uint32_t* dst32 = (uint32_t*) dst;
15 uint32_t src_offset32 = src_offset >> 2;
16 uint32_t dst_offset32 = dst_offset >> 1;
22 for( i = 0; i < MCU_BLOCK_SIZE; i += MCU_WIDTH, src32 += src_offset32, dst32 += dst_offset32 )
26 *dst32++ = ((temp << 8) & 0x00FF0000) | (temp & 0x000000FF);
27 *dst32++ = ((temp >> 8) & 0x00FF0000) | ((temp >> 16) & 0x000000FF);
31 *dst32++ = ((temp << 8) & 0x00FF0000) | (temp & 0x000000FF);
32 *dst32++ = ((temp >> 8) & 0x00FF0000) | ((temp >> 16) & 0x000000FF);
39 // Convert a 8x8 block of 16 bits data to a 8x8 block of 8 bits data
40 static void copy_block_16_8(uint8_t* dst, int32_t dst_offset, int16_t* src, int32_t src_offset)
45 for( i = 0; i < MCU_BLOCK_SIZE; i += MCU_WIDTH, dst += dst_offset, src += src_offset )
47 temp = *src++; if( temp > 0xff ) temp = 0xff; if(temp < 0) temp = 0; temp &= 0xff; *dst++ = (uint8_t) temp;
48 temp = *src++; if( temp > 0xff ) temp = 0xff; if(temp < 0) temp = 0; temp &= 0xff; *dst++ = (uint8_t) temp;
49 temp = *src++; if( temp > 0xff ) temp = 0xff; if(temp < 0) temp = 0; temp &= 0xff; *dst++ = (uint8_t) temp;
50 temp = *src++; if( temp > 0xff ) temp = 0xff; if(temp < 0) temp = 0; temp &= 0xff; *dst++ = (uint8_t) temp;
51 temp = *src++; if( temp > 0xff ) temp = 0xff; if(temp < 0) temp = 0; temp &= 0xff; *dst++ = (uint8_t) temp;
52 temp = *src++; if( temp > 0xff ) temp = 0xff; if(temp < 0) temp = 0; temp &= 0xff; *dst++ = (uint8_t) temp;
53 temp = *src++; if( temp > 0xff ) temp = 0xff; if(temp < 0) temp = 0; temp &= 0xff; *dst++ = (uint8_t) temp;
54 temp = *src++; if( temp > 0xff ) temp = 0xff; if(temp < 0) temp = 0; temp &= 0xff; *dst++ = (uint8_t) temp;
59 // Transform blockline in macro blocks
75 // _______________________
76 // | 1 | 2 | 3 | 4 | 5 | 6 | ...
77 // |___|___|___|___|___|___|
80 #ifndef HAS_VIDEO_BLOCKLINE_TO_MACRO_BLOCKS
82 C_RESULT video_blockline_to_macro_blocks(video_picture_context_t* ctx, int16_t* dst, int32_t num_macro_blocks)
84 uint8_t* y_src = ctx->y_src;
85 uint8_t* cb_src = ctx->cb_src;
86 uint8_t* cr_src = ctx->cr_src;
88 while( num_macro_blocks > 0 )
94 ctx->y_woffset - MCU_WIDTH );
96 dst += MCU_BLOCK_SIZE; // skip block 1
101 ctx->y_woffset - MCU_WIDTH );
103 dst += MCU_BLOCK_SIZE; // skip block 2
105 copy_block_8_16( dst,
107 y_src + ctx->y_hoffset,
108 ctx->y_woffset - MCU_WIDTH );
110 dst += MCU_BLOCK_SIZE; // skip block 3
112 copy_block_8_16( dst,
114 y_src + ctx->y_hoffset + MCU_WIDTH,
115 ctx->y_woffset - MCU_WIDTH );
117 dst += MCU_BLOCK_SIZE; // skip block 4
119 copy_block_8_16( dst,
122 ctx->c_woffset - MCU_WIDTH );
124 dst += MCU_BLOCK_SIZE; // skip blocks 5
126 copy_block_8_16( dst,
129 ctx->c_woffset - MCU_WIDTH );
131 dst += MCU_BLOCK_SIZE; // skip blocks 6
133 y_src += MCU_WIDTH*2;
141 ctx->cb_src = cb_src;
142 ctx->cr_src = cr_src;
149 // Transform macro blocks in picture of specified format
150 static C_RESULT video_blockline_from_macro_blocks_yuv420(video_picture_context_t* ctx, int16_t* src, int32_t num_macro_blocks);
151 static C_RESULT video_blockline_from_macro_blocks_rgb565(video_picture_context_t* ctx, int16_t* src, int32_t num_macro_blocks);
153 C_RESULT video_blockline_from_macro_blocks(video_picture_context_t* ctx, int16_t* src, int32_t num_macro_blocks, enum PixelFormat format)
159 case PIX_FMT_YUV420P:
160 res = video_blockline_from_macro_blocks_yuv420(ctx, src, num_macro_blocks);
163 res = video_blockline_from_macro_blocks_rgb565(ctx, src, num_macro_blocks);
167 PRINT("In file %s, in function %s, format %d not supported\n", __FILE__, __FUNCTION__, format);
175 C_RESULT video_blockline_from_macro_blocks_yuv420(video_picture_context_t* ctx, int16_t* src, int32_t num_macro_blocks)
177 uint8_t *y_dst, *cb_dst, *cr_dst;
180 cb_dst = ctx->cb_src;
181 cr_dst = ctx->cr_src;
183 while( num_macro_blocks > 0 )
186 copy_block_16_8( y_dst,
187 ctx->y_woffset - MCU_WIDTH,
191 src += MCU_BLOCK_SIZE;
193 copy_block_16_8( y_dst + MCU_WIDTH,
194 ctx->y_woffset - MCU_WIDTH,
198 src += MCU_BLOCK_SIZE;
200 copy_block_16_8( y_dst + ctx->y_hoffset,
201 ctx->y_woffset - MCU_WIDTH,
205 src += MCU_BLOCK_SIZE;
207 copy_block_16_8( y_dst + ctx->y_hoffset + MCU_WIDTH,
208 ctx->y_woffset - MCU_WIDTH,
212 src += MCU_BLOCK_SIZE;
214 copy_block_16_8( cb_dst,
215 ctx->c_woffset - MCU_WIDTH,
219 src += MCU_BLOCK_SIZE;
221 copy_block_16_8( cr_dst,
222 ctx->c_woffset - MCU_WIDTH,
226 src += MCU_BLOCK_SIZE;
228 y_dst += MCU_WIDTH*2;
236 ctx->cb_src = cb_dst;
237 ctx->cr_src = cr_dst;
242 #define MAKE_RGBA_565(r, g, b) ( ((r) << 11) | ((g) << 5) | (b) )
244 #if TARGET_CPU_ARM == 1 && defined(TARGET_OS_IPHONE)
245 static inline int32_t saturate8(int32_t x)
252 static inline uint32_t saturate5(int32_t x)
259 static inline uint32_t saturate6(int32_t x)
268 // To make sure that you are bounding your inputs in the range of 0 & 255
270 static inline int32_t saturate8(int32_t x)
279 return x > 0xFF ? 0xFF : x;
282 static inline uint16_t saturate5(int32_t x)
291 return x > 0x1F ? 0x1F : x;
294 static inline uint16_t saturate6(int32_t x)
303 return x > 0x3F ? 0x3F : x;
308 static C_RESULT video_blockline_from_macro_blocks_rgb565(video_picture_context_t* ctx, int16_t* src, int32_t num_macro_blocks)
310 uint32_t y_up_read, y_down_read, cr_current, cb_current;
311 int32_t u, v, vr, ug, vg, ub, r, g, b;
312 int16_t *y_buf1, *y_buf2, *cr_buf, *cb_buf;
313 uint16_t *dst_up, *dst_down;
316 int32_t line_size, block_size, y_woffset, y_hoffset;
319 y_buf2 = y_buf1 + MCU_WIDTH;
321 cb_buf = y_buf1 + MCU_BLOCK_SIZE * 4;
322 cr_buf = cb_buf + MCU_BLOCK_SIZE;
324 // Our ptrs are 16 bits
325 y_woffset = ctx->y_woffset / 2;
326 y_hoffset = ctx->y_hoffset / 2;
328 dst_up = (uint16_t*) ctx->y_src;
329 dst_down = dst_up + y_woffset;
331 line_size = MCU_WIDTH / 2; // We compute two pixels at a time
332 block_size = MCU_HEIGHT / 2; // We compute two lines at a time
334 while( num_macro_blocks > 0 )
337 cb_current = cb_buf[0];
338 cr_current = cr_buf[0];
340 u = cb_current - 128;
343 v = cr_current - 128;
347 y_up_read = y_buf1[0] << 8;
348 y_down_read = y_buf2[0] << 8;
350 r = saturate5((y_up_read + vr));
351 g = saturate6((y_up_read - ug - vg));
352 b = saturate5((y_up_read + ub));
354 dst_up[0] = MAKE_RGBA_565(r, g, b);
356 r = saturate5((y_down_read + vr));
357 g = saturate6((y_down_read - ug - vg));
358 b = saturate5((y_down_read + ub));
360 dst_down[0] = MAKE_RGBA_565(r, g, b);
362 y_up_read = y_buf1[1] << 8;
363 y_down_read = y_buf2[1] << 8;
365 r = saturate5((y_up_read + vr));
366 g = saturate6((y_up_read - ug - vg));
367 b = saturate5((y_up_read + ub));
369 dst_up[1] = MAKE_RGBA_565(r, g, b);
371 r = saturate5((y_down_read + vr));
372 g = saturate6((y_down_read - ug - vg));
373 b = saturate5((y_down_read + ub));
375 dst_down[1] = MAKE_RGBA_565(r, g, b);
378 cr_current = cr_buf[MCU_WIDTH / 2];
379 cb_current = cb_buf[MCU_WIDTH / 2];
381 u = cb_current - 128;
384 v = cr_current - 128;
388 y_up_read = y_buf1[MCU_BLOCK_SIZE] << 8;
389 y_down_read = y_buf2[MCU_BLOCK_SIZE] << 8;
391 r = saturate5((y_up_read + vr));
392 g = saturate6((y_up_read - ug - vg));
393 b = saturate5((y_up_read + ub));
395 dst_up[MCU_WIDTH] = MAKE_RGBA_565(r, g, b);
397 r = saturate5((y_down_read + vr));
398 g = saturate6((y_down_read - ug - vg));
399 b = saturate5((y_down_read + ub));
401 dst_down[MCU_WIDTH] = MAKE_RGBA_565(r, g, b);
403 y_up_read = y_buf1[MCU_BLOCK_SIZE + 1] << 8;
404 y_down_read = y_buf2[MCU_BLOCK_SIZE + 1] << 8;
406 r = saturate5((y_up_read + vr));
407 g = saturate6((y_up_read - ug - vg));
408 b = saturate5((y_up_read + ub));
410 dst_up[MCU_WIDTH+1] = MAKE_RGBA_565(r, g, b);
412 r = saturate5((y_down_read + vr));
413 g = saturate6((y_down_read - ug - vg));
414 b = saturate5((y_down_read + ub));
416 dst_down[MCU_WIDTH+1] = MAKE_RGBA_565(r, g, b);
419 cr_current = cr_buf[MCU_BLOCK_SIZE/2];
420 cb_current = cb_buf[MCU_BLOCK_SIZE/2];
422 u = cb_current - 128;
425 v = cr_current - 128;
429 y_up_read = y_buf1[MCU_BLOCK_SIZE*2] << 8;
430 y_down_read = y_buf2[MCU_BLOCK_SIZE*2] << 8;
432 r = saturate5((y_up_read + vr));
433 g = saturate6((y_up_read - ug - vg));
434 b = saturate5((y_up_read + ub));
436 dst_up[y_hoffset] = MAKE_RGBA_565(r, g, b);
438 r = saturate5((y_down_read + vr));
439 g = saturate6((y_down_read - ug - vg));
440 b = saturate5((y_down_read + ub));
442 dst_down[y_hoffset] = MAKE_RGBA_565(r, g, b);
444 y_up_read = y_buf1[MCU_BLOCK_SIZE*2 + 1] << 8;
445 y_down_read = y_buf2[MCU_BLOCK_SIZE*2 + 1] << 8;
447 r = saturate5((y_up_read + vr));
448 g = saturate6((y_up_read - ug - vg));
449 b = saturate5((y_up_read + ub));
451 dst_up[y_hoffset + 1] = MAKE_RGBA_565(r, g, b);
453 r = saturate5((y_down_read + vr));
454 g = saturate6((y_down_read - ug - vg));
455 b = saturate5((y_down_read + ub));
457 dst_down[y_hoffset + 1] = MAKE_RGBA_565(r, g, b);
460 cr_current = cr_buf[MCU_BLOCK_SIZE/2 + MCU_WIDTH/2];
461 cb_current = cb_buf[MCU_BLOCK_SIZE/2 + MCU_WIDTH/2];
463 u = cb_current - 128;
466 v = cr_current - 128;
470 y_up_read = y_buf1[MCU_BLOCK_SIZE*3] << 8;
471 y_down_read = y_buf2[MCU_BLOCK_SIZE*3] << 8;
473 r = saturate5((y_up_read + vr));
474 g = saturate6((y_up_read - ug - vg));
475 b = saturate5((y_up_read + ub));
477 dst_up[y_hoffset + MCU_WIDTH] = MAKE_RGBA_565(r, g, b);
479 r = saturate5((y_down_read + vr));
480 g = saturate6((y_down_read - ug - vg));
481 b = saturate5((y_down_read + ub));
483 dst_down[y_hoffset + MCU_WIDTH] = MAKE_RGBA_565(r, g, b);
485 y_up_read = y_buf1[MCU_BLOCK_SIZE*3 + 1] << 8;
486 y_down_read = y_buf2[MCU_BLOCK_SIZE*3 + 1] << 8;
488 r = saturate5((y_up_read + vr));
489 g = saturate6((y_up_read - ug - vg));
490 b = saturate5((y_up_read + ub));
492 dst_up[y_hoffset + MCU_WIDTH + 1] = MAKE_RGBA_565(r, g, b);
494 r = saturate5((y_down_read + vr));
495 g = saturate6((y_down_read - ug - vg));
496 b = saturate5((y_down_read + ub));
498 dst_down[y_hoffset + MCU_WIDTH + 1] = MAKE_RGBA_565(r, g, b);
509 if( line_size == 0 ) // We computed one line of a luma-block
511 dst_up += y_woffset*2 - MCU_WIDTH;
512 dst_down += y_woffset*2 - MCU_WIDTH;
516 if( block_size == 0 )
518 y_buf1 = cr_buf + MCU_BLOCK_SIZE/2 + MCU_WIDTH/2;
519 y_buf2 = y_buf1 + MCU_WIDTH;
521 cb_buf = y_buf1 + MCU_BLOCK_SIZE * 4;
522 cr_buf = cb_buf + MCU_BLOCK_SIZE;
524 block_size = MCU_WIDTH / 2; // We compute two lines at a time
526 dst_up += 2*MCU_WIDTH - y_hoffset;
527 dst_down = dst_up + y_woffset;
536 cr_buf += MCU_WIDTH / 2;
537 cb_buf += MCU_WIDTH / 2;
540 line_size = MCU_WIDTH / 2; // We compute two pixels at a time
544 ctx->y_src = (uint8_t*) dst_up;
551 // Transform a blockline YUV 4:2:0 in picture of specified format
552 static C_RESULT video_blockline_from_blockline_yuv420(video_picture_context_t* ctx, video_picture_context_t* src, int32_t num_macro_blocks);
553 static C_RESULT video_blockline_from_blockline_rgb565(video_picture_context_t* ctx, video_picture_context_t* src, int32_t num_macro_blocks);
555 C_RESULT video_blockline_from_blockline(video_picture_context_t* ctx, video_picture_context_t* src, int32_t num_macro_blocks, enum PixelFormat format)
561 case PIX_FMT_YUV420P:
562 res = video_blockline_from_blockline_yuv420(ctx, src, num_macro_blocks);
565 res = video_blockline_from_blockline_rgb565(ctx, src, num_macro_blocks);
569 PRINT("In file %s, in function %s, format %d not supported\n", __FILE__, __FUNCTION__, format);
577 static C_RESULT video_blockline_from_blockline_yuv420(video_picture_context_t* blockline_ctx, video_picture_context_t* blockline_src, int32_t num_macro_blocks)
582 uint32_t copy_line_size;
585 copy_line_size = num_macro_blocks * MB_HEIGHT_Y;
586 dest = blockline_ctx->y_src;
587 src = blockline_src->y_src;
591 vp_os_memcpy(dest,src,copy_line_size);
592 dest += blockline_ctx->y_woffset;
593 src += blockline_src->y_woffset;
597 copy_line_size = num_macro_blocks * MB_HEIGHT_C;
598 dest = blockline_ctx->cb_src;
599 src = blockline_src->cb_src;
603 vp_os_memcpy(dest,src,copy_line_size);
604 dest += blockline_ctx->c_woffset;
605 src += blockline_src->c_woffset;
609 copy_line_size = num_macro_blocks * MB_HEIGHT_C;
610 dest = blockline_ctx->cr_src;
611 src = blockline_src->cr_src;
615 vp_os_memcpy(dest,src,copy_line_size);
616 dest += blockline_ctx->c_woffset;
617 src += blockline_src->c_woffset;
625 video_blockline_from_blockline_rgb565(video_picture_context_t* ctx,
626 video_picture_context_t* src, int32_t num_macro_blocks)
628 uint32_t y_up_read, y_down_read, cr_current, cb_current;
629 int32_t u, v, vr, ug, vg, ub, r, g, b;
630 uint8_t *y_buf_up, *y_buf_down, *cr_buf, *cb_buf;
631 uint16_t *dst_up, *dst_down;
632 int32_t line_size,dest_y_woffset;
638 y_buf_up = src->y_src;
639 y_buf_down = y_buf_up + src->y_woffset;
641 cb_buf = src->cb_src;
642 cr_buf = src->cr_src;
644 // Out ptrs are 16 bits
645 dest_y_woffset = ctx->y_woffset / 2;
647 dst_up = (uint16_t*) ctx->y_src;
648 dst_down = dst_up + dest_y_woffset;
650 // We compute two pixels at a time
651 line_size = MB_WIDTH_Y*num_macro_blocks;
653 pixel = line_size>>1;
654 line = MB_WIDTH_Y>>1;
659 cb_current = *cb_buf++;
660 cr_current = *cr_buf++;
662 u = cb_current - 128;
665 v = cr_current - 128;
669 // compute pixel(0,0)
670 y_up_read = (*y_buf_up++) << 8;
671 r = saturate5((y_up_read + vr));
672 g = saturate6((y_up_read - ug - vg));
673 b = saturate5((y_up_read + ub));
675 *dst_up++ = MAKE_RGBA_565(r, g, b);
677 // compute pixel(1,0)
678 y_up_read = (*y_buf_up++) << 8;
680 r = saturate5((y_up_read + vr));
681 g = saturate6((y_up_read - ug - vg));
682 b = saturate5((y_up_read + ub));
684 *dst_up++ = MAKE_RGBA_565(r, g, b);
686 // compute pixel (0,1)
687 y_down_read = (*y_buf_down++) << 8;
689 r = saturate5((y_down_read + vr));
690 g = saturate6((y_down_read - ug - vg));
691 b = saturate5((y_down_read + ub));
693 *dst_down++ = MAKE_RGBA_565(r, g, b);
695 // compute pixel (1,1)
696 y_down_read = (*y_buf_down++) << 8;
698 r = saturate5((y_down_read + vr));
699 g = saturate6((y_down_read - ug - vg));
700 b = saturate5((y_down_read + ub));
702 *dst_down++ = MAKE_RGBA_565(r, g, b);
708 y_buf_up += 2*src->y_woffset - line_size;
709 y_buf_down += 2*src->y_woffset - line_size;
710 cb_buf += src->c_woffset - (line_size>>1);
711 cr_buf += src->c_woffset - (line_size>>1);
713 dst_up += 2*dest_y_woffset - line_size;
714 dst_down += 2*dest_y_woffset - line_size;
716 pixel = line_size>>1;