8 #define HAS_UVLC_DECODE_BLOCKLINE
10 extern C_RESULT video_utils_init( video_controller_t* controller );
11 extern C_RESULT video_utils_close( video_controller_t* controller );
13 extern void uvlc_codec_alloc( video_controller_t* controller );
14 extern void uvlc_codec_free( video_controller_t* controller );
16 extern void p263_codec_alloc( video_controller_t* controller );
17 extern void p263_codec_free( video_controller_t* controller );
21 aligned_realloc(void* ptr, size_t size, size_t align_size)
36 int* ptr2 = (int*)ptr - 1;
43 ptr_ret = aligned_malloc(size, align_size);
45 // Compute smallest size
52 memcpy( ptr_ret, aligned_ptr, size );
54 free( ((char*)ptr - *ptr2) );
58 ptr_ret = aligned_malloc(size, align_size);
66 void* aligned_malloc(size_t size, size_t align_size)
68 char *ptr, *aligned_ptr;
71 size_t align_mask = align_size - 1;
73 // Check if align_size is a power of two
74 // If the result of this test is non zero then align_size is not a power of two
75 if( align_size & align_mask )
78 // Allocation size is :
79 // - Requested user size
80 // - a size (align_size) to make sure we can align on the requested boundary
81 // - 8 more bytes to register base adress & allocation size
82 allocation_size = size + align_size + 2*sizeof(int);
84 ptr = (char*) malloc(allocation_size);
88 ptr2 = (int*)(ptr + 2*sizeof(int));
89 aligned_ptr = ptr + 2*sizeof(int) + (align_size - ((size_t) ptr2 & align_mask));
91 ptr2 = (int*)(aligned_ptr - 2*sizeof(int));
92 *ptr2++ = (int) (aligned_ptr - ptr);
98 void aligned_free(void *ptr)
100 int* ptr2 = (int*)ptr - 2;
102 free( ((char*)ptr - *ptr2) );
106 C_RESULT video_codec_open( video_controller_t* controller, codec_type_t codec_type )
109 // Data used to initilize macroblock's cache
112 video_macroblock_t* mb;
114 // Close any previously allocated codec for this controller
115 video_codec_close( controller );
117 video_utils_init( controller );
119 controller->mode = 0;
120 controller->use_me = FALSE;
121 controller->do_azq = FALSE;
124 controller->target_bitrate = VLIB_DEFAULT_BITRATE;
125 controller->num_frames = 0;
126 controller->picture_type = 0;
127 controller->width = 0;
128 controller->height = 0;
129 controller->num_blockline = 0;
130 controller->mb_blockline = 0;
131 controller->blockline = 0;
132 controller->picture_complete= 0;
133 #ifdef USE_TABLE_QUANTIZATION
134 controller->quant = TABLE_QUANTIZATION;
136 controller->quant = DEFAULT_QUANTIZATION;
138 controller->dquant = 0;
140 controller->invQp = 1;
141 controller->gobs = NULL;
142 controller->cache = NULL;
143 controller->codec_type = 0;
144 controller->video_codec = NULL;
146 if( controller->blockline_cache == NULL )
148 // We alloc two buffers to be compatible with an asynchronous DCT
149 // When a DCT will be performed on one buffer, we will be able to use the other for caching or computing purpose
150 // DCT_BUFFER_SIZE = MAX_NUM_MACRO_BLOCKS_PER_CALL * 6 * MCU_BLOCK_SIZE
151 controller->blockline_cache = (int16_t*)aligned_malloc( 2*DCT_BUFFER_SIZE*sizeof(int16_t), VLIB_ALLOC_ALIGN );
154 controller->cache_mbs = malloc( 2 * MAX_NUM_MACRO_BLOCKS_PER_CALL * sizeof(video_macroblock_t) );
155 mb = &controller->cache_mbs[0];
156 cache = controller->blockline_cache;
157 for(i = 2*MAX_NUM_MACRO_BLOCKS_PER_CALL; i > 0; i-- )
160 cache += MCU_BLOCK_SIZE*6;
164 video_packetizer_init( controller );
165 video_quantizer_init( controller );
170 uvlc_codec_alloc( controller );
174 //p263_codec_alloc( controller );
178 controller->video_codec = NULL;
182 if( controller->video_codec != NULL )
184 controller->codec_type = codec_type;
195 C_RESULT video_codec_close( video_controller_t* controller )
197 video_utils_close( controller );
199 if( controller->blockline_cache != NULL )
200 aligned_free( controller->blockline_cache );
202 if( controller->in_stream.bytes != NULL )
203 video_packetizer_close( controller );
205 switch( controller->codec_type )
208 uvlc_codec_free( controller );
212 //p263_codec_free( controller );
220 video_controller_cleanup( controller );
225 C_RESULT video_encode_picture( video_controller_t* controller, const vp_api_picture_t* picture, bool_t* got_image )
227 vp_api_picture_t blockline = { 0 };
229 controller->mode = VIDEO_ENCODE;
231 video_controller_set_format( controller, picture->width, picture->height );
233 blockline = *picture;
234 blockline.height = MB_HEIGHT_Y;
235 blockline.complete = 1;
236 blockline.vision_complete = 0;
238 // Reset internal stream for new blockline/picture
239 controller->in_stream.used = 0;
240 controller->in_stream.index = 0;
242 while( !controller->picture_complete )
244 video_encode_blockline( controller, &blockline, blockline.blockline == (controller->num_blockline-1) );
246 blockline.y_buf += MB_HEIGHT_Y * picture->y_line_size;
247 blockline.cb_buf += MB_HEIGHT_C * picture->cb_line_size;
248 blockline.cr_buf += MB_HEIGHT_C * picture->cr_line_size;
250 blockline.blockline++;
253 if( picture->complete )
255 video_write_data( &controller->in_stream, 0, controller->in_stream.length+1 );
256 controller->in_stream.length = 32;
257 controller->picture_complete = 0;
264 C_RESULT video_decode_picture( video_controller_t* controller, vp_api_picture_t* picture, video_stream_t* ex_stream, bool_t* got_image )
266 vp_api_picture_t blockline = { 0 };
268 controller->mode = VIDEO_DECODE; // mandatory because of video_cache_stream
270 blockline = *picture;
271 blockline.height = MB_HEIGHT_Y;
272 blockline.complete = 1;
273 blockline.vision_complete = 0;
275 while( SUCCEED(video_cache_stream( controller, ex_stream )) )
277 video_decode_blockline( controller, &blockline, got_image );
282 C_RESULT video_packetizer_init( video_controller_t* controller )
284 // Internal buffer configuration
285 controller->in_stream.bytes = malloc( DEFAULT_INTERNAL_STREAM_SIZE );
286 controller->in_stream.used = 0;
287 controller->in_stream.size = DEFAULT_INTERNAL_STREAM_SIZE;
288 controller->in_stream.index = 0;
289 controller->in_stream.length = 32;
290 controller->in_stream.code = 0;
291 controller->in_stream.endianess = VIDEO_STREAM_LITTLE_ENDIAN;
296 C_RESULT video_packetizer_close( video_controller_t* controller )
298 free( controller->in_stream.bytes );
300 controller->in_stream.bytes = NULL;
301 controller->in_stream.used = 0;
302 controller->in_stream.size = 0;
303 controller->in_stream.index = 0;
304 controller->in_stream.length = 0;
305 controller->in_stream.code = 0;
310 C_RESULT video_cache_stream( video_controller_t* controller, video_stream_t* in )
312 video_codec_t* video_codec = controller->video_codec;
314 return video_codec->cache_stream( controller, in );
317 #ifndef HAS_VIDEO_WRITE_DATA
319 // Fill stream->code from right to left with data in parameters (code & length)
320 // New bits are always always inserted at the rigth of stream->code (least significant bits)
321 // This way old bits are put in most significant bits
322 // 31 .... 0 (length-1) .... 0
323 // stream <= ------------ <= -----------------------
325 void video_write_data( video_stream_t* const stream, uint32_t code, int32_t length )
327 while( length > stream->length )
329 // code's length is bigger than number of our free bits
330 // we put as many bits in cache as possible
331 stream->code <<= stream->length;
332 stream->code |= code >> (length - stream->length);
334 length -= stream->length; // Compute number of bits left
335 code &= (1 << length) - 1; // We keep only bits we didn't push in cache
337 stream->bytes[stream->index] = stream->code;
347 // In this case, previous loop ended with case length < stream->length
348 stream->code <<= length;
349 stream->code |= code;
351 stream->length -= length;
357 C_RESULT video_stuff8( video_stream_t* const stream )
361 length8 = (stream->length & ~7); // TODO: Check if generated code use bic on arm
363 stream->code <<= ( stream->length - length8 );
364 stream->length = length8;
369 // Fill code from right to left with length bits from stream->code
370 // Next bits in stream->code to take are always at the left (most significant bits)
371 // This way new bits are put in least significant bits
372 // (length-1) .... 0 31 .... 0
373 // ----------------------- <= ------------ <= stream
375 C_RESULT video_read_data( video_stream_t* const stream, uint32_t* code, int32_t length )
377 uint32_t out_code = *code;
379 while( length > (32 - stream->length) )
381 /// We need more bits than available in current read bits
383 out_code = (out_code << (32 - stream->length)) | (stream->code >> stream->length);
384 length -= (32 - stream->length);
386 stream->code = stream->bytes[stream->index];
393 out_code = (out_code << length) | (stream->code >> ( 32 - length ));
395 stream->code <<= length;
396 stream->length += length;
404 C_RESULT video_peek_data( const video_stream_t* const stream, uint32_t* code, int32_t length )
406 uint32_t out_code = *code;
407 uint32_t stream_code = stream->code;
408 uint32_t stream_length = stream->length;
410 while( length > (32 - stream_length) )
412 /// We need more bits than available in current read bits
414 out_code = (out_code << (32 - stream_length)) | (stream_code >> stream_length);
415 length -= (32 - stream_length);
417 stream_code = stream->bytes[stream->index];
423 out_code = (out_code << length) | (stream_code >> ( 32 - length ));
431 C_RESULT video_align8( video_stream_t* const stream )
433 uint32_t length8, length = stream->length;
437 // Do alignment only when stream->length > 0
438 length8 = ( length & ~7); // TODO: Check if generated code use bic on arm
439 if( length8 != length )
442 stream->code <<= ( length8 - length );
443 stream->length = length8;
450 #ifndef HAS_UVLC_ENCODE
452 #define PACK_BITS( bits_out, length_out, bits_in, length_in ) \
453 bits_out <<= length_in; \
454 length_out += length_in; \
457 void uvlc_encode( video_stream_t* const stream, int32_t level, int32_t run, int32_t not_last )
459 int32_t sign, length, data, value_code, value_length;
461 /// Encode number of zeros
469 length = 32 - clz(data); // compute number of bits used in run ( = length of run )
470 data -= 1 << ( length - 1 ); // compute value of run
473 value_length = length + 1;
478 PACK_BITS( value_code, value_length, data, length );
492 // TODO Check saturation & if level == -128
493 length = 32 - clz(data); // number of bits used in level ( = length of level )
496 data -= 1 << (length - 1);
500 PACK_BITS( value_code, value_length, 1, length );
502 assert( length != 2 );
507 PACK_BITS( value_code, value_length, data, length );
510 PACK_BITS( value_code, value_length, sign, 1 );
513 // add sequence for end of block if required
516 PACK_BITS( value_code, value_length, 0x5, 3 );
520 video_write_data( stream, value_code, value_length );
523 #endif // HAS_UVLC_ENCODE
525 C_RESULT uvlc_decode( video_stream_t* const stream, int32_t* run, int32_t* level, int32_t* last)
527 uint32_t stream_code, stream_length;
528 int32_t r = 0, z, sign;
530 stream_code = stream_length = 0;
532 // Peek 32 bits from stream because we know our datas fit in
533 video_peek_data( stream, &stream_code, 32 );
536 /// Decode number of zeros
537 z = clz(stream_code);
539 stream_code <<= z + 1; // Skip all zeros & 1
540 stream_length += z + 1;
544 r = stream_code >> (32 - (z-1));
546 stream_code <<= (z-1);
547 stream_length += (z-1);
549 *run = r + (1 << (z-1));
557 /// Decode level / last
558 z = clz(stream_code);
560 stream_code <<= z + 1; // Skip all zeros & 1
561 stream_length += z + 1;
578 stream_code >>= (32 - z);
579 sign = stream_code & 1;
583 r = stream_code >> 1;
587 *level = sign ? -r : r;
591 // Do the real Read in stream to consume what we used
592 video_read_data( stream, &stream_code, stream_length );
597 const uvlc_codec_t uvlc_codec = {
598 uvlc_encode_blockline,
599 uvlc_decode_blockline,
605 void uvlc_codec_alloc( video_controller_t* controller )
607 video_codec_t* video_codec;
609 video_codec = (video_codec_t*) malloc( sizeof(uvlc_codec) );
611 memcpy(video_codec, &uvlc_codec, sizeof(uvlc_codec));
613 controller->video_codec = video_codec;
616 void uvlc_codec_free( video_controller_t* controller )
618 uvlc_codec_t* uvlc_codec = (uvlc_codec_t*) controller->video_codec;
620 if( uvlc_codec != NULL )
623 controller->video_codec = NULL;
627 static C_RESULT uvlc_flush_stream( video_stream_t* out, video_stream_t* in )
629 // They are still data in cache
630 // Always copy a number of bytes that is a times of 4.
631 // Only for the last copy, we can have exactly the number of bytes left
632 int32_t offset, size;
633 uint32_t out_stream_size;
635 if( in->length != 32 )
637 // flush & reset internal stream
638 video_write_data( in, 0, in->length+1 );
642 out_stream_size = out->size & ~3; // Round to the highest times of 4 available
644 offset = in->index - (in->used >> 2);
645 size = ( in->used < out_stream_size ) ? in->used : out_stream_size;
647 memcpy(out->bytes, in->bytes + offset, size);
649 out->index = size >> 2;
657 static C_RESULT uvlc_load_stream( video_stream_t* out, video_stream_t* in )
659 // We cache as many blockline as possible
661 bool_t found, last_zero, last_zero_temp;
664 int32_t value, nb_bytes;
665 uint32_t in_index = (in->used >> 2) - 1;
667 // -> start looking for last blockline's end
670 if( in->index == 0 ) // First call, we look for full blocklines
674 while( (in_index > in->index) && !found )
676 value = in->bytes[in_index];
678 last_zero_temp = (value & 0xFF) == 0; // 0x??????00
679 found = last_zero_temp & last_zero;
683 last_zero = last_zero_temp;
686 last_zero_temp = (value & 0xFF) == 0; // 0x????00??
687 found = last_zero_temp & last_zero;
691 last_zero = last_zero_temp;
694 last_zero_temp = (value & 0xFF) == 0; // 0x??00????
695 found = last_zero_temp & last_zero;
699 in_index--; // Handle both the special case where blockline is dword aligned &
700 // blockline start is still not found
702 last_zero = last_zero_temp;
705 last_zero_temp = (value & 0xFF) == 0; // 0x00??????
706 found = last_zero_temp & last_zero;
710 last_zero = last_zero_temp;
720 // configure parameters for memcpy
724 nb_bytes = in->used - in->index * 4;
730 // cache only data containing full blocklines
731 nb_bytes = (in_index - in->index) * 4;
736 // Realloc internal stream to have enough space to hold all required data
737 while( out->used + nb_bytes >= out->size )
739 out->bytes = realloc( out->bytes, out->size + 2048 ); // Add 2KB to internal stream
743 dst = (uint8_t*)&out->bytes[0];
746 src = (uint8_t*)&in->bytes[0];
749 memcpy( dst, src, nb_bytes );
751 out->used += nb_bytes;
752 in->index = in_index;
754 ASSERT( out->used <= out->size );
759 C_RESULT uvlc_pack_controller( video_controller_t* controller )
761 video_stream_t* stream = &controller->in_stream;
762 uvlc_codec_t* uvlc_codec = (uvlc_codec_t*) controller->video_codec;
763 uvlc_picture_layer_t* picture_layer;
764 uvlc_gob_layer_t* gob;
767 picture_layer = &uvlc_codec->picture_layer;
769 video_stuff8( stream );
771 picture_layer->gobs = (uvlc_gob_layer_t*) controller->gobs;
772 gob = &picture_layer->gobs[controller->blockline];
774 video_write_data( stream, MAKE_START_CODE(controller->blockline), 22 );
776 if( controller->blockline == 0 )
778 picture_layer->quant = gob->quant;
779 uvlc_write_picture_layer( controller, stream );
783 uvlc_write_gob_layer( stream, gob );
789 C_RESULT uvlc_unpack_controller( video_controller_t* controller )
791 uint32_t start_code = 0;
792 video_stream_t* stream = &controller->in_stream;
793 uvlc_codec_t* uvlc_codec = (uvlc_codec_t*) controller->video_codec;
794 uvlc_picture_layer_t* picture_layer;
795 uvlc_gob_layer_t* gob;
798 picture_layer = &uvlc_codec->picture_layer;
800 video_align8( stream );
801 video_read_data( stream, &start_code, 22 );
803 controller->blockline = start_code & 0x1F;
804 start_code &= ~0x1F; // TODO Check if compiler use arm instruction bic
806 ASSERT( controller->blockline == 0x1F ||
807 controller->num_blockline == 0 || // Check if cache is allocated for current picture
808 (controller->num_blockline > 0 && controller->blockline < controller->num_blockline) );
810 if( start_code == PICTURE_START_CODE )
812 if( controller->blockline == 0x1F )
814 controller->picture_complete = TRUE;
818 if( controller->blockline == 0 )
820 uvlc_read_picture_layer( controller, stream );
822 picture_layer->gobs = (uvlc_gob_layer_t*) controller->gobs;
823 gob = &picture_layer->gobs[controller->blockline];
825 gob->quant = picture_layer->quant;
829 picture_layer->gobs = (uvlc_gob_layer_t*) controller->gobs;
830 gob = &picture_layer->gobs[controller->blockline];
832 uvlc_read_gob_layer( stream, gob );
840 C_RESULT uvlc_encode_blockline( video_controller_t* controller, const vp_api_picture_t* blockline, bool_t picture_complete )
842 video_codec_t* video_codec;
843 int16_t *in = NULL, *out = NULL;
844 int32_t num_macro_blocks = 0;
845 video_macroblock_t* macroblock = NULL;
846 video_picture_context_t blockline_ctx;
849 video_stream_t* stream = &controller->in_stream;
851 if( stream->used*2 >= stream->size )
853 stream->bytes = realloc( stream->bytes, stream->size + 2048 ); // Add 2KB to internal stream
854 stream->size += 2048;
857 video_codec = controller->video_codec;
858 controller->picture_complete = picture_complete;
859 controller->blockline = blockline->blockline;
861 uvlc_pack_controller( controller );
863 blockline_ctx.y_src = blockline->y_buf;
864 blockline_ctx.cb_src = blockline->cb_buf;
865 blockline_ctx.cr_src = blockline->cr_buf;
866 blockline_ctx.y_woffset = blockline->y_line_size;
867 blockline_ctx.c_woffset = blockline->cb_line_size;
868 blockline_ctx.y_hoffset = blockline->y_line_size * MCU_HEIGHT;
870 gobs = &controller->gobs[controller->blockline];
871 gobs->quant = controller->quant;
872 macroblock = &gobs->macroblocks[0];
874 in = controller->blockline_cache;
875 out = macroblock->data;
877 num_macro_blocks = controller->mb_blockline;
879 ///> Cache blockline in dct format & perform dct
880 while( num_macro_blocks > MAX_NUM_MACRO_BLOCKS_PER_CALL )
882 RTMON_USTART(VIDEO_VLIB_BLOCKLINE_TO_MB);
883 video_blockline_to_macro_blocks(&blockline_ctx, in, MAX_NUM_MACRO_BLOCKS_PER_CALL);
884 RTMON_USTOP(VIDEO_VLIB_BLOCKLINE_TO_MB);
886 out = video_fdct_compute(in, out, MAX_NUM_MACRO_BLOCKS_PER_CALL);
888 if( in == controller->blockline_cache )
889 in += DCT_BUFFER_SIZE;
891 in -= DCT_BUFFER_SIZE;
893 num_macro_blocks -= MAX_NUM_MACRO_BLOCKS_PER_CALL;
896 RTMON_USTART(VIDEO_VLIB_BLOCKLINE_TO_MB);
897 video_blockline_to_macro_blocks(&blockline_ctx, in, num_macro_blocks);
898 RTMON_USTOP(VIDEO_VLIB_BLOCKLINE_TO_MB);
900 video_fdct_compute(in, out, num_macro_blocks);
903 ///> Do quantification on each macroblock
904 RTMON_USTART(VIDEO_VLIB_QUANTIZE);
905 video_quantize( controller, &controller->gobs[controller->blockline].macroblocks[0], controller->mb_blockline );
906 RTMON_USTOP(VIDEO_VLIB_QUANTIZE);
909 ///> Packetize Data to output buffer
910 RTMON_USTART(VIDEO_VLIB_PACKET);
911 uvlc_write_mb_layer( stream, macroblock, controller->mb_blockline );
912 RTMON_USTOP(VIDEO_VLIB_PACKET);
915 if( controller->picture_complete )
917 video_stuff8( stream );
918 video_write_data( stream, PICTURE_END_CODE, 22 );
921 // Update controller according to user inputs & video statistics
922 video_controller_update( controller, picture_complete );
927 #ifndef HAS_UVLC_DECODE_BLOCKLINE
928 C_RESULT uvlc_decode_blockline( video_controller_t* controller, vp_api_picture_t* picture, bool_t* got_image )
930 video_codec_t* video_codec;
931 vp_api_picture_t blockline = { 0 };
932 int16_t *in = NULL, *out = NULL;
933 int32_t num_macro_blocks = 0;
934 video_macroblock_t* macroblock = NULL;
935 video_picture_context_t blockline_ctx;
938 controller->mode = VIDEO_DECODE;
939 video_codec = controller->video_codec;
941 blockline = *picture;
942 blockline.height = MB_HEIGHT_Y;
943 blockline.complete = 1;
944 blockline.vision_complete = 0;
946 picture->complete = controller->picture_complete;
948 blockline_ctx.y_woffset = blockline.y_line_size;
949 blockline_ctx.c_woffset = blockline.cb_line_size;
950 blockline_ctx.y_hoffset = blockline.y_line_size * MCU_HEIGHT;
952 // At least a complete blockline is found
953 while( !controller->picture_complete && controller->in_stream.index < (controller->in_stream.used >> 2) )
955 uvlc_unpack_controller( controller );
957 if( !controller->picture_complete )
959 blockline.blockline = controller->blockline;
961 blockline_ctx.y_src = picture->y_buf + blockline.blockline * MB_HEIGHT_Y * picture->y_line_size;
962 blockline_ctx.cb_src = picture->cb_buf + blockline.blockline * MB_HEIGHT_C * picture->cb_line_size;
963 blockline_ctx.cr_src = picture->cr_buf + blockline.blockline * MB_HEIGHT_C * picture->cr_line_size;
965 picture->blockline = controller->blockline;
966 num_macro_blocks = controller->mb_blockline;
968 macroblock = &controller->cache_mbs[0];
969 gobs = &controller->gobs[controller->blockline];
970 out = gobs->macroblocks->data;
972 if( gobs->quant != controller->quant )
974 controller->quant = gobs->quant;
975 video_quantizer_update( controller );
978 while( num_macro_blocks > MAX_NUM_MACRO_BLOCKS_PER_CALL )
980 in = ¯oblock->data[0];
982 uvlc_read_mb_layer( &controller->in_stream, macroblock, MAX_NUM_MACRO_BLOCKS_PER_CALL );
984 video_unquantize( controller, macroblock, MAX_NUM_MACRO_BLOCKS_PER_CALL );
986 out = video_idct_compute( in, out, MAX_NUM_MACRO_BLOCKS_PER_CALL );
988 if( macroblock == &controller->cache_mbs[0] )
989 macroblock += MAX_NUM_MACRO_BLOCKS_PER_CALL;
991 macroblock -= MAX_NUM_MACRO_BLOCKS_PER_CALL;
993 num_macro_blocks -= MAX_NUM_MACRO_BLOCKS_PER_CALL;
996 in = macroblock->data;
998 uvlc_read_mb_layer( &controller->in_stream, macroblock, num_macro_blocks );
1000 video_unquantize( controller, macroblock, num_macro_blocks );
1002 video_idct_compute( in, out, num_macro_blocks );
1004 video_blockline_from_macro_blocks(&blockline_ctx, gobs->macroblocks->data, controller->mb_blockline, picture->format);
1006 // Update controller according to video statistics
1007 video_controller_update( controller, controller->picture_complete );
1011 if( controller->picture_complete )
1013 picture->complete = controller->picture_complete;
1014 picture->blockline = 0;
1016 controller->picture_complete = 0;
1017 controller->in_stream.length = 32;
1018 controller->num_frames++;
1024 controller->in_stream.used = 0;
1025 controller->in_stream.index = 0;
1032 C_RESULT uvlc_update( video_controller_t* controller )
1037 C_RESULT uvlc_cache( video_controller_t* controller, video_stream_t* ex_stream)
1041 video_stream_t* in_stream = &controller->in_stream;
1043 switch( controller->mode )
1046 res = uvlc_flush_stream( ex_stream, in_stream );
1050 res = uvlc_load_stream( in_stream, ex_stream );
1061 C_RESULT uvlc_write_gob_layer( video_stream_t* stream, uvlc_gob_layer_t* gob )
1063 video_write_data( stream, gob->quant, 5 );
1068 C_RESULT uvlc_read_gob_layer( video_stream_t* stream, uvlc_gob_layer_t* gob )
1072 video_read_data( stream, &gob->quant, 5 );
1077 #ifndef HAS_UVLC_WRITE_BLOCK
1079 void uvlc_write_block( video_stream_t* const stream, int16_t* data, int32_t num_coeff )
1082 int32_t index, code, run;
1084 zztable = &video_zztable_t81[1];
1088 video_write_data( stream, code, 10 );
1093 while( num_coeff > 0 )
1104 uvlc_encode( stream, code, run, num_coeff );
1110 #endif // HAS_UVLC_WRITE_BLOCK
1112 #ifndef HAS_UVLC_READ_BLOCK
1114 C_RESULT uvlc_read_block( video_stream_t* stream, int16_t* data, int32_t* num_coeff )
1117 int32_t index, code, run, last, nc;
1119 zztable = &video_zztable_t81[0];
1124 code = run = last = 0;
1125 video_read_data( stream, (uint32_t*) &code, 10 );
1133 code = run = last = 0;
1134 uvlc_decode( stream, &run, &code, &last);
1156 C_RESULT uvlc_write_mb_layer( video_stream_t* stream, video_macroblock_t* mb, int32_t num_macro_blocks )
1161 while( num_macro_blocks > 0 )
1163 video_write_data( stream, mb->azq, 1 );
1167 code |= (mb->num_coeff_y0 > 1) << 0;
1168 code |= (mb->num_coeff_y1 > 1) << 1;
1169 code |= (mb->num_coeff_y2 > 1) << 2;
1170 code |= (mb->num_coeff_y3 > 1) << 3;
1171 code |= (mb->num_coeff_cb > 1) << 4;
1172 code |= (mb->num_coeff_cr > 1) << 5;
1173 code |= (mb->dquant != 0) << 6;
1175 video_write_data( stream, code, 8 );
1177 if( mb->dquant != 0 )
1179 code = ( mb->dquant < 0 ) ? ~mb->dquant : mb->dquant;
1180 video_write_data( stream, code, 2 );
1183 /**************** Block Y0 ****************/
1185 uvlc_write_block( stream, data, mb->num_coeff_y0 );
1187 /**************** Block Y1 ****************/
1188 data += MCU_BLOCK_SIZE;
1189 uvlc_write_block( stream, data, mb->num_coeff_y1 );
1191 /**************** Block Y2 ****************/
1192 data += MCU_BLOCK_SIZE;
1193 uvlc_write_block( stream, data, mb->num_coeff_y2 );
1195 /**************** Block Y3 ****************/
1196 data += MCU_BLOCK_SIZE;
1197 uvlc_write_block( stream, data, mb->num_coeff_y3 );
1199 /**************** Block CB ****************/
1200 data += MCU_BLOCK_SIZE;
1201 uvlc_write_block( stream, data, mb->num_coeff_cb );
1203 /**************** Block CR ****************/
1204 data += MCU_BLOCK_SIZE;
1205 uvlc_write_block( stream, data, mb->num_coeff_cr );
1209 num_macro_blocks --;
1215 C_RESULT uvlc_read_mb_layer( video_stream_t* stream, video_macroblock_t* mb, int32_t num_macro_blocks )
1220 memset( mb->data, 0, num_macro_blocks * 6 * MCU_BLOCK_SIZE * sizeof(int16_t) );
1221 while( num_macro_blocks > 0 )
1224 video_read_data( stream, (uint32_t*)&mb->azq, 1 );
1228 video_read_data( stream, &code, 8 );
1230 mb->num_coeff_y0 = (code >> 0) & 1;
1231 mb->num_coeff_y1 = (code >> 1) & 1;
1232 mb->num_coeff_y2 = (code >> 2) & 1;
1233 mb->num_coeff_y3 = (code >> 3) & 1;
1234 mb->num_coeff_cb = (code >> 4) & 1;
1235 mb->num_coeff_cr = (code >> 5) & 1;
1238 if( (code >> 6) & 1 )
1240 video_read_data( stream, &code, 2 );
1242 mb->dquant = (code < 2) ? ~code : code;
1245 /**************** Block Y0 ****************/
1247 uvlc_read_block( stream, data, &mb->num_coeff_y0 );
1249 /**************** Block Y1 ****************/
1250 data += MCU_BLOCK_SIZE;
1251 uvlc_read_block( stream, data, &mb->num_coeff_y1 );
1253 /**************** Block Y2 ****************/
1254 data += MCU_BLOCK_SIZE;
1255 uvlc_read_block( stream, data, &mb->num_coeff_y2 );
1257 /**************** Block Y3 ****************/
1258 data += MCU_BLOCK_SIZE;
1259 uvlc_read_block( stream, data, &mb->num_coeff_y3 );
1261 /**************** Block CB ****************/
1262 data += MCU_BLOCK_SIZE;
1263 uvlc_read_block( stream, data, &mb->num_coeff_cb );
1265 /**************** Block CR ****************/
1266 data += MCU_BLOCK_SIZE;
1267 uvlc_read_block( stream, data, &mb->num_coeff_cr );
1272 num_macro_blocks --;
1278 C_RESULT uvlc_write_picture_layer( video_controller_t* controller, video_stream_t* stream )
1280 uint32_t format = 0, resolution = 0, width, height;
1282 uvlc_codec_t* uvlc_codec = (uvlc_codec_t*) controller->video_codec;
1283 uvlc_picture_layer_t* picture_layer = &uvlc_codec->picture_layer;
1285 width = controller->width;
1286 height = controller->height;
1288 while( format == 0 )
1290 if( width == QQCIF_WIDTH )
1291 format = UVLC_FORMAT_CIF;
1293 if( width == QQVGA_WIDTH )
1294 format = UVLC_FORMAT_VGA;
1302 picture_layer->format = format;
1303 picture_layer->resolution = resolution;
1305 video_write_data( stream, picture_layer->format, 2 );
1306 video_write_data( stream, picture_layer->resolution, 3 );
1307 video_write_data( stream, picture_layer->picture_type, 3 );
1308 video_write_data( stream, picture_layer->quant, 5 );
1309 video_write_data( stream, controller->num_frames, 32 );
1314 C_RESULT uvlc_read_picture_layer( video_controller_t* controller, video_stream_t* stream )
1316 uint32_t width, height;
1318 uvlc_codec_t* uvlc_codec = (uvlc_codec_t*) controller->video_codec;
1319 uvlc_picture_layer_t* picture_layer = &uvlc_codec->picture_layer;
1321 picture_layer->format = 0;
1322 picture_layer->resolution = 0;
1323 picture_layer->picture_type = 0;
1324 picture_layer->quant = 0;
1326 video_read_data( stream, &picture_layer->format, 2 );
1327 video_read_data( stream, &picture_layer->resolution, 3 );
1328 video_read_data( stream, &picture_layer->picture_type, 3 );
1329 video_read_data( stream, &picture_layer->quant, 5 );
1330 video_read_data( stream, &controller->num_frames, 32 );
1332 switch( picture_layer->format )
1334 case UVLC_FORMAT_CIF:
1335 width = QQCIF_WIDTH << (picture_layer->resolution-1);
1336 height = QQCIF_HEIGHT << (picture_layer->resolution-1);
1339 case UVLC_FORMAT_VGA:
1340 width = QQVGA_WIDTH << (picture_layer->resolution-1);
1341 height = QQVGA_HEIGHT << (picture_layer->resolution-1);
1350 video_controller_set_format( controller, width, height );
1355 C_RESULT uvlc_read_block_unquantize( video_controller_t* controller, int16_t* data, int32_t quant, int32_t nc )
1357 video_stream_t* stream = &controller->in_stream;
1359 int32_t index, code, run, last;
1361 zztable = &video_zztable_t81[0];
1362 code = run = last = 0;
1363 if (quant == TABLE_QUANTIZATION)
1365 // table quantization mode
1366 video_read_data( stream, (uint32_t*) &code, 10 );
1367 int16_t* p_iquant_table = (int16_t*)(&iquant_tab[0]);
1368 code *= *p_iquant_table;
1373 uvlc_decode( stream, &run, &code, &last);
1380 code *= p_iquant_table[index];
1383 uvlc_decode( stream, &run, &code, &last);
1389 // const quantization mode
1391 video_read_data( stream, (uint32_t*) &code, 10 );
1393 if( controller->picture_type == VIDEO_PICTURE_INTRA ) // intra
1399 code = quant*( 2*code + 1 );
1407 uvlc_decode( stream, &run, &code, &last);
1416 code = quant*( 2*code + 1 );
1422 uvlc_decode( stream, &run, &code, &last);
1430 uint16_t* uvlc_read_mb_layer_unquantize( video_controller_t* controller, video_macroblock_t* mb, uint16_t* out )
1435 video_zeromem32( (uint32_t*)mb->data, 6 * MCU_BLOCK_SIZE / 2 );
1438 video_read_data( &controller->in_stream, (uint32_t*)&mb->azq, 1 );
1442 video_read_data( &controller->in_stream, &code, 8 );
1444 mb->num_coeff_y0 = (code >> 0) & 1;
1445 mb->num_coeff_y1 = (code >> 1) & 1;
1446 mb->num_coeff_y2 = (code >> 2) & 1;
1447 mb->num_coeff_y3 = (code >> 3) & 1;
1448 mb->num_coeff_cb = (code >> 4) & 1;
1449 mb->num_coeff_cr = (code >> 5) & 1;
1452 if( (code >> 6) & 1 )
1454 video_read_data( &controller->in_stream, &code, 2 );
1456 mb->dquant = (code < 2) ? ~code : code;
1459 controller->quant += mb->dquant;
1461 /**************** Block Y0 ****************/
1463 uvlc_read_block_unquantize( controller, data, controller->quant, mb->num_coeff_y0 );
1465 out += MCU_BLOCK_SIZE;
1467 /**************** Block Y1 ****************/
1468 data += MCU_BLOCK_SIZE;
1469 uvlc_read_block_unquantize( controller, data, controller->quant, mb->num_coeff_y1 );
1471 out += MCU_BLOCK_SIZE;
1473 /**************** Block Y2 ****************/
1474 data += MCU_BLOCK_SIZE;
1475 uvlc_read_block_unquantize( controller, data, controller->quant, mb->num_coeff_y2 );
1477 out += MCU_BLOCK_SIZE;
1479 /**************** Block Y3 ****************/
1480 data += MCU_BLOCK_SIZE;
1481 uvlc_read_block_unquantize( controller, data, controller->quant, mb->num_coeff_y3 );
1483 out += MCU_BLOCK_SIZE;
1485 /**************** Block CB ****************/
1486 data += MCU_BLOCK_SIZE;
1487 uvlc_read_block_unquantize( controller, data, controller->quant, mb->num_coeff_cb );
1489 out += MCU_BLOCK_SIZE;
1491 /**************** Block CR ****************/
1492 data += MCU_BLOCK_SIZE;
1493 uvlc_read_block_unquantize( controller, data, controller->quant, mb->num_coeff_cr );
1495 out += MCU_BLOCK_SIZE;
1503 C_RESULT uvlc_decode_blockline( video_controller_t* controller, vp_api_picture_t* picture, bool_t* got_image )
1505 video_codec_t* video_codec;
1506 vp_api_picture_t blockline = { 0 };
1508 uint16_t *out = NULL;
1509 int32_t num_macro_blocks = 0;
1510 video_macroblock_t* macroblock = NULL;
1511 video_picture_context_t blockline_ctx;
1514 controller->mode = VIDEO_DECODE;
1515 video_codec = controller->video_codec;
1517 blockline = *picture;
1518 blockline.height = MB_HEIGHT_Y;
1519 blockline.complete = 1;
1520 blockline.vision_complete = 0;
1522 picture->complete = controller->picture_complete;
1524 blockline_ctx.y_woffset = blockline.y_line_size;
1525 blockline_ctx.c_woffset = blockline.cb_line_size;
1526 blockline_ctx.y_hoffset = blockline.y_line_size * MCU_HEIGHT;
1528 // At least a complete blockline is found
1529 while( !controller->picture_complete && controller->in_stream.index < (controller->in_stream.used >> 2) )
1531 uvlc_unpack_controller( controller );
1533 if( !controller->picture_complete )
1535 blockline.blockline = controller->blockline;
1537 blockline_ctx.y_src = picture->y_buf + blockline.blockline * MB_HEIGHT_Y * picture->y_line_size;
1538 blockline_ctx.cb_src = picture->cb_buf + blockline.blockline * MB_HEIGHT_C * picture->cb_line_size;
1539 blockline_ctx.cr_src = picture->cr_buf + blockline.blockline * MB_HEIGHT_C * picture->cr_line_size;
1541 picture->blockline = controller->blockline;
1542 num_macro_blocks = controller->mb_blockline;
1544 macroblock = &controller->cache_mbs[0];
1545 gobs = &controller->gobs[controller->blockline];
1546 out = (uint16_t*) gobs->macroblocks->data;
1548 if( gobs->quant != controller->quant )
1550 controller->quant = gobs->quant;
1551 video_quantizer_update( controller );
1554 while( num_macro_blocks > 0 )
1556 in = ¯oblock->data[0];
1558 out = uvlc_read_mb_layer_unquantize( controller, macroblock, out );
1560 num_macro_blocks --;
1563 video_blockline_from_macro_blocks(&blockline_ctx, gobs->macroblocks->data, controller->mb_blockline, picture->format);
1565 // Update controller according to video statistics
1566 video_controller_update( controller, controller->picture_complete );
1570 if( controller->picture_complete )
1572 picture->complete = controller->picture_complete;
1573 picture->blockline = 0;
1575 controller->picture_complete = 0;
1576 controller->in_stream.length = 32;
1577 //controller->num_frames++;
1583 controller->in_stream.used = 0;
1584 controller->in_stream.index = 0;
1590 C_RESULT video_zeromem32( uint32_t* dst, uint32_t length )
1603 C_RESULT video_copy32(uint32_t* dst, uint32_t* src, uint32_t nb)
1607 for( i = 0; i < nb; i++ )
1615 C_RESULT video_copy32_swap(uint32_t* dst, uint32_t* src, uint32_t nb)
1619 for( i = 0; i < nb; i++ )
1621 dst[i] = bswap( src[i] );
1627 #ifndef HAS_VIDEO_BLOCKLINE_TO_MACRO_BLOCKS
1629 // Convert a 8x8 block of 8 bits data to a 8x8 block of 16 bits data
1630 static void copy_block_8_16(int16_t* dst, int32_t dst_offset, uint8_t* src, int32_t src_offset)
1632 uint32_t* src32 = (uint32_t*) src;
1633 uint32_t* dst32 = (uint32_t*) dst;
1635 uint32_t src_offset32 = src_offset >> 2;
1636 uint32_t dst_offset32 = dst_offset >> 1;
1642 for( i = 0; i < MCU_BLOCK_SIZE; i += MCU_WIDTH, src32 += src_offset32, dst32 += dst_offset32 )
1646 *dst32++ = ((temp << 8) & 0x00FF0000) | (temp & 0x000000FF);
1647 *dst32++ = ((temp >> 8) & 0x00FF0000) | ((temp >> 16) & 0x000000FF);
1651 *dst32++ = ((temp << 8) & 0x00FF0000) | (temp & 0x000000FF);
1652 *dst32++ = ((temp >> 8) & 0x00FF0000) | ((temp >> 16) & 0x000000FF);
1659 // Convert a 8x8 block of 16 bits data to a 8x8 block of 8 bits data
1660 static void copy_block_16_8(uint8_t* dst, int32_t dst_offset, int16_t* src, int32_t src_offset)
1665 for( i = 0; i < MCU_BLOCK_SIZE; i += MCU_WIDTH, dst += dst_offset, src += src_offset )
1667 temp = *src++; if( temp > 0xff ) temp = 0xff; if(temp < 0) temp = 0; temp &= 0xff; *dst++ = (uint8_t) temp;
1668 temp = *src++; if( temp > 0xff ) temp = 0xff; if(temp < 0) temp = 0; temp &= 0xff; *dst++ = (uint8_t) temp;
1669 temp = *src++; if( temp > 0xff ) temp = 0xff; if(temp < 0) temp = 0; temp &= 0xff; *dst++ = (uint8_t) temp;
1670 temp = *src++; if( temp > 0xff ) temp = 0xff; if(temp < 0) temp = 0; temp &= 0xff; *dst++ = (uint8_t) temp;
1671 temp = *src++; if( temp > 0xff ) temp = 0xff; if(temp < 0) temp = 0; temp &= 0xff; *dst++ = (uint8_t) temp;
1672 temp = *src++; if( temp > 0xff ) temp = 0xff; if(temp < 0) temp = 0; temp &= 0xff; *dst++ = (uint8_t) temp;
1673 temp = *src++; if( temp > 0xff ) temp = 0xff; if(temp < 0) temp = 0; temp &= 0xff; *dst++ = (uint8_t) temp;
1674 temp = *src++; if( temp > 0xff ) temp = 0xff; if(temp < 0) temp = 0; temp &= 0xff; *dst++ = (uint8_t) temp;
1679 // Transform blockline in macro blocks
1695 // _______________________
1696 // | 1 | 2 | 3 | 4 | 5 | 6 | ...
1697 // |___|___|___|___|___|___|
1700 #ifndef HAS_VIDEO_BLOCKLINE_TO_MACRO_BLOCKS
1702 C_RESULT video_blockline_to_macro_blocks(video_picture_context_t* ctx, int16_t* dst, int32_t num_macro_blocks)
1704 uint8_t* y_src = ctx->y_src;
1705 uint8_t* cb_src = ctx->cb_src;
1706 uint8_t* cr_src = ctx->cr_src;
1708 while( num_macro_blocks > 0 )
1711 copy_block_8_16( dst,
1714 ctx->y_woffset - MCU_WIDTH );
1716 dst += MCU_BLOCK_SIZE; // skip block 1
1718 copy_block_8_16( dst,
1721 ctx->y_woffset - MCU_WIDTH );
1723 dst += MCU_BLOCK_SIZE; // skip block 2
1725 copy_block_8_16( dst,
1727 y_src + ctx->y_hoffset,
1728 ctx->y_woffset - MCU_WIDTH );
1730 dst += MCU_BLOCK_SIZE; // skip block 3
1732 copy_block_8_16( dst,
1734 y_src + ctx->y_hoffset + MCU_WIDTH,
1735 ctx->y_woffset - MCU_WIDTH );
1737 dst += MCU_BLOCK_SIZE; // skip block 4
1739 copy_block_8_16( dst,
1742 ctx->c_woffset - MCU_WIDTH );
1744 dst += MCU_BLOCK_SIZE; // skip blocks 5
1746 copy_block_8_16( dst,
1749 ctx->c_woffset - MCU_WIDTH );
1751 dst += MCU_BLOCK_SIZE; // skip blocks 6
1753 y_src += MCU_WIDTH*2;
1754 cb_src += MCU_WIDTH;
1755 cr_src += MCU_WIDTH;
1757 num_macro_blocks --;
1761 ctx->cb_src = cb_src;
1762 ctx->cr_src = cr_src;
1769 // Transform macro blocks in picture of specified format
1770 static C_RESULT video_blockline_from_macro_blocks_yuv420(video_picture_context_t* ctx, int16_t* src, int32_t num_macro_blocks);
1771 static C_RESULT video_blockline_from_macro_blocks_rgb565(video_picture_context_t* ctx, int16_t* src, int32_t num_macro_blocks);
1773 C_RESULT video_blockline_from_macro_blocks(video_picture_context_t* ctx, int16_t* src, int32_t num_macro_blocks, enum PixelFormat format)
1779 case PIX_FMT_YUV420P:
1780 res = video_blockline_from_macro_blocks_yuv420(ctx, src, num_macro_blocks);
1782 case PIX_FMT_RGB565:
1783 res = video_blockline_from_macro_blocks_rgb565(ctx, src, num_macro_blocks);
1787 //PRINT("In file %s, in function %s, format %d not supported\n", __FILE__, __FUNCTION__, format);
1795 C_RESULT video_blockline_from_macro_blocks_yuv420(video_picture_context_t* ctx, int16_t* src, int32_t num_macro_blocks)
1797 uint8_t *y_dst, *cb_dst, *cr_dst;
1800 cb_dst = ctx->cb_src;
1801 cr_dst = ctx->cr_src;
1803 while( num_macro_blocks > 0 )
1806 copy_block_16_8( y_dst,
1807 ctx->y_woffset - MCU_WIDTH,
1811 src += MCU_BLOCK_SIZE;
1813 copy_block_16_8( y_dst + MCU_WIDTH,
1814 ctx->y_woffset - MCU_WIDTH,
1818 src += MCU_BLOCK_SIZE;
1820 copy_block_16_8( y_dst + ctx->y_hoffset,
1821 ctx->y_woffset - MCU_WIDTH,
1825 src += MCU_BLOCK_SIZE;
1827 copy_block_16_8( y_dst + ctx->y_hoffset + MCU_WIDTH,
1828 ctx->y_woffset - MCU_WIDTH,
1832 src += MCU_BLOCK_SIZE;
1834 copy_block_16_8( cb_dst,
1835 ctx->c_woffset - MCU_WIDTH,
1839 src += MCU_BLOCK_SIZE;
1841 copy_block_16_8( cr_dst,
1842 ctx->c_woffset - MCU_WIDTH,
1846 src += MCU_BLOCK_SIZE;
1848 y_dst += MCU_WIDTH*2;
1849 cb_dst += MCU_WIDTH;
1850 cr_dst += MCU_WIDTH;
1856 ctx->cb_src = cb_dst;
1857 ctx->cr_src = cr_dst;
1862 #define MAKE_RGBA_565(r, g, b) ( ((r) << 11) | ((g) << 5) | (b) )
1864 #if (TARGET_CPU_ARM == 1) && defined(_IPHONE)
1865 static inline int32_t saturate8(int32_t x)
1872 static inline uint32_t saturate5(int32_t x)
1879 static inline uint32_t saturate6(int32_t x)
1888 // To make sure that you are bounding your inputs in the range of 0 & 255
1890 static inline int32_t saturate8(int32_t x)
1899 return x > 0xFF ? 0xFF : x;
1902 static inline uint16_t saturate5(int32_t x)
1911 return x > 0x1F ? 0x1F : x;
1914 static inline uint16_t saturate6(int32_t x)
1923 return x > 0x3F ? 0x3F : x;
1927 static C_RESULT video_blockline_from_macro_blocks_rgb565(video_picture_context_t* ctx, int16_t* src, int32_t num_macro_blocks)
1929 uint32_t y_up_read, y_down_read, cr_current, cb_current;
1930 int32_t u, v, vr, ug, vg, ub, r, g, b;
1931 int16_t *y_buf1, *y_buf2, *cr_buf, *cb_buf;
1932 uint16_t *dst_up, *dst_down;
1934 // Control variables
1935 int32_t line_size, block_size, y_woffset, y_hoffset;
1938 y_buf2 = y_buf1 + MCU_WIDTH;
1940 cb_buf = y_buf1 + MCU_BLOCK_SIZE * 4;
1941 cr_buf = cb_buf + MCU_BLOCK_SIZE;
1943 // Our ptrs are 16 bits
1944 y_woffset = ctx->y_woffset / 2;
1945 y_hoffset = ctx->y_hoffset / 2;
1947 dst_up = (uint16_t*) ctx->y_src;
1948 dst_down = dst_up + y_woffset;
1950 line_size = MCU_WIDTH / 2; // We compute two pixels at a time
1951 block_size = MCU_HEIGHT / 2; // We compute two lines at a time
1953 while( num_macro_blocks > 0 )
1956 cb_current = cb_buf[0];
1957 cr_current = cr_buf[0];
1959 u = cb_current - 128;
1962 v = cr_current - 128;
1966 y_up_read = y_buf1[0] << 8;
1967 y_down_read = y_buf2[0] << 8;
1969 r = saturate5((y_up_read + vr));
1970 g = saturate6((y_up_read - ug - vg));
1971 b = saturate5((y_up_read + ub));
1973 dst_up[0] = MAKE_RGBA_565(r, g, b);
1975 r = saturate5((y_down_read + vr));
1976 g = saturate6((y_down_read - ug - vg));
1977 b = saturate5((y_down_read + ub));
1979 dst_down[0] = MAKE_RGBA_565(r, g, b);
1981 y_up_read = y_buf1[1] << 8;
1982 y_down_read = y_buf2[1] << 8;
1984 r = saturate5((y_up_read + vr));
1985 g = saturate6((y_up_read - ug - vg));
1986 b = saturate5((y_up_read + ub));
1988 dst_up[1] = MAKE_RGBA_565(r, g, b);
1990 r = saturate5((y_down_read + vr));
1991 g = saturate6((y_down_read - ug - vg));
1992 b = saturate5((y_down_read + ub));
1994 dst_down[1] = MAKE_RGBA_565(r, g, b);
1997 cr_current = cr_buf[MCU_WIDTH / 2];
1998 cb_current = cb_buf[MCU_WIDTH / 2];
2000 u = cb_current - 128;
2003 v = cr_current - 128;
2007 y_up_read = y_buf1[MCU_BLOCK_SIZE] << 8;
2008 y_down_read = y_buf2[MCU_BLOCK_SIZE] << 8;
2010 r = saturate5((y_up_read + vr));
2011 g = saturate6((y_up_read - ug - vg));
2012 b = saturate5((y_up_read + ub));
2014 dst_up[MCU_WIDTH] = MAKE_RGBA_565(r, g, b);
2016 r = saturate5((y_down_read + vr));
2017 g = saturate6((y_down_read - ug - vg));
2018 b = saturate5((y_down_read + ub));
2020 dst_down[MCU_WIDTH] = MAKE_RGBA_565(r, g, b);
2022 y_up_read = y_buf1[MCU_BLOCK_SIZE + 1] << 8;
2023 y_down_read = y_buf2[MCU_BLOCK_SIZE + 1] << 8;
2025 r = saturate5((y_up_read + vr));
2026 g = saturate6((y_up_read - ug - vg));
2027 b = saturate5((y_up_read + ub));
2029 dst_up[MCU_WIDTH+1] = MAKE_RGBA_565(r, g, b);
2031 r = saturate5((y_down_read + vr));
2032 g = saturate6((y_down_read - ug - vg));
2033 b = saturate5((y_down_read + ub));
2035 dst_down[MCU_WIDTH+1] = MAKE_RGBA_565(r, g, b);
2038 cr_current = cr_buf[MCU_BLOCK_SIZE/2];
2039 cb_current = cb_buf[MCU_BLOCK_SIZE/2];
2041 u = cb_current - 128;
2044 v = cr_current - 128;
2048 y_up_read = y_buf1[MCU_BLOCK_SIZE*2] << 8;
2049 y_down_read = y_buf2[MCU_BLOCK_SIZE*2] << 8;
2051 r = saturate5((y_up_read + vr));
2052 g = saturate6((y_up_read - ug - vg));
2053 b = saturate5((y_up_read + ub));
2055 dst_up[y_hoffset] = MAKE_RGBA_565(r, g, b);
2057 r = saturate5((y_down_read + vr));
2058 g = saturate6((y_down_read - ug - vg));
2059 b = saturate5((y_down_read + ub));
2061 dst_down[y_hoffset] = MAKE_RGBA_565(r, g, b);
2063 y_up_read = y_buf1[MCU_BLOCK_SIZE*2 + 1] << 8;
2064 y_down_read = y_buf2[MCU_BLOCK_SIZE*2 + 1] << 8;
2066 r = saturate5((y_up_read + vr));
2067 g = saturate6((y_up_read - ug - vg));
2068 b = saturate5((y_up_read + ub));
2070 dst_up[y_hoffset + 1] = MAKE_RGBA_565(r, g, b);
2072 r = saturate5((y_down_read + vr));
2073 g = saturate6((y_down_read - ug - vg));
2074 b = saturate5((y_down_read + ub));
2076 dst_down[y_hoffset + 1] = MAKE_RGBA_565(r, g, b);
2079 cr_current = cr_buf[MCU_BLOCK_SIZE/2 + MCU_WIDTH/2];
2080 cb_current = cb_buf[MCU_BLOCK_SIZE/2 + MCU_WIDTH/2];
2082 u = cb_current - 128;
2085 v = cr_current - 128;
2089 y_up_read = y_buf1[MCU_BLOCK_SIZE*3] << 8;
2090 y_down_read = y_buf2[MCU_BLOCK_SIZE*3] << 8;
2092 r = saturate5((y_up_read + vr));
2093 g = saturate6((y_up_read - ug - vg));
2094 b = saturate5((y_up_read + ub));
2096 dst_up[y_hoffset + MCU_WIDTH] = MAKE_RGBA_565(r, g, b);
2098 r = saturate5((y_down_read + vr));
2099 g = saturate6((y_down_read - ug - vg));
2100 b = saturate5((y_down_read + ub));
2102 dst_down[y_hoffset + MCU_WIDTH] = MAKE_RGBA_565(r, g, b);
2104 y_up_read = y_buf1[MCU_BLOCK_SIZE*3 + 1] << 8;
2105 y_down_read = y_buf2[MCU_BLOCK_SIZE*3 + 1] << 8;
2107 r = saturate5((y_up_read + vr));
2108 g = saturate6((y_up_read - ug - vg));
2109 b = saturate5((y_up_read + ub));
2111 dst_up[y_hoffset + MCU_WIDTH + 1] = MAKE_RGBA_565(r, g, b);
2113 r = saturate5((y_down_read + vr));
2114 g = saturate6((y_down_read - ug - vg));
2115 b = saturate5((y_down_read + ub));
2117 dst_down[y_hoffset + MCU_WIDTH + 1] = MAKE_RGBA_565(r, g, b);
2128 if( line_size == 0 ) // We computed one line of a luma-block
2130 dst_up += y_woffset*2 - MCU_WIDTH;
2131 dst_down += y_woffset*2 - MCU_WIDTH;
2135 if( block_size == 0 )
2137 y_buf1 = cr_buf + MCU_BLOCK_SIZE/2 + MCU_WIDTH/2;
2138 y_buf2 = y_buf1 + MCU_WIDTH;
2140 cb_buf = y_buf1 + MCU_BLOCK_SIZE * 4;
2141 cr_buf = cb_buf + MCU_BLOCK_SIZE;
2143 block_size = MCU_WIDTH / 2; // We compute two lines at a time
2145 dst_up += 2*MCU_WIDTH - y_hoffset;
2146 dst_down = dst_up + y_woffset;
2153 y_buf2 += MCU_WIDTH;
2155 cr_buf += MCU_WIDTH / 2;
2156 cb_buf += MCU_WIDTH / 2;
2159 line_size = MCU_WIDTH / 2; // We compute two pixels at a time
2163 ctx->y_src = (uint8_t*) dst_up;
2168 extern C_RESULT video_utils_set_format( uint32_t width, uint32_t height );
2170 C_RESULT video_controller_update( video_controller_t* controller, bool_t complete )
2172 video_codec_t* video_codec = controller->video_codec;
2174 controller->current_bits += controller->in_stream.index << 5; // Index is an index in a int32_t array (32 bits)
2178 controller->num_frames += 1;
2179 controller->output_bits = controller->current_bits;
2180 controller->current_bits = 0;
2183 video_codec->update( controller );
2188 C_RESULT video_controller_set_mode( video_controller_t* controller, uint32_t mode )
2190 controller->mode = mode;
2195 C_RESULT video_controller_set_bitrate( video_controller_t* controller, uint32_t target )
2197 controller->target_bitrate = target;
2202 static void video_realloc_buffers( video_controller_t* controller, int32_t num_prev_blockline )
2207 video_macroblock_t* mb;
2209 // Realloc global cache (YUV420 format)
2210 controller->cache = (int16_t*) aligned_realloc( controller->cache,
2211 3 * controller->width * controller->height * sizeof(int16_t) / 2,
2213 memset( controller->cache, 0, 3 * controller->width * controller->height * sizeof(int16_t) / 2 );
2216 cache = controller->cache;
2217 i = controller->num_blockline;
2219 controller->gobs = (video_gob_t*) realloc(controller->gobs, i * sizeof(video_gob_t));
2220 memset( controller->gobs, 0, i * sizeof(video_gob_t) );
2222 gob = &controller->gobs[0];
2225 j = controller->mb_blockline;
2227 if( --num_prev_blockline < 0 )
2228 gob->macroblocks = NULL;
2230 gob->macroblocks = (video_macroblock_t*) realloc( gob->macroblocks, j * sizeof(video_macroblock_t));
2231 memset( gob->macroblocks, 0, j * sizeof(video_macroblock_t));
2233 mb = &gob->macroblocks[0];
2237 cache += MCU_BLOCK_SIZE*6;
2245 C_RESULT video_controller_cleanup( video_controller_t* controller )
2250 if( controller->gobs != NULL )
2252 gob = &controller->gobs[0];
2254 for( i = controller->num_blockline; i > 0; i-- )
2256 free( gob->macroblocks );
2261 free( controller->gobs );
2264 if( controller->cache != NULL )
2266 aligned_free( controller->cache );
2272 C_RESULT video_controller_set_format( video_controller_t* controller, int32_t width, int32_t height )
2274 int32_t num_prev_blockline;
2276 ASSERT( (width != 0) && (height != 0) );
2278 if( width != controller->width || controller->height != height )
2280 controller->width = width;
2281 controller->height = height;
2283 num_prev_blockline = controller->num_blockline;
2285 controller->num_blockline = height >> 4;
2286 controller->mb_blockline = width >> 4;
2288 video_realloc_buffers( controller, num_prev_blockline );
2290 video_utils_set_format( width, height );
2296 C_RESULT video_controller_set_picture_type( video_controller_t* controller, uint32_t type )
2298 controller->picture_type = type;
2303 C_RESULT video_controller_set_motion_estimation( video_controller_t* controller, bool_t use_me )
2305 controller->use_me = use_me;
2310 C_RESULT video_quantizer_init( video_controller_t* controller )
2312 // Init quantizer's value
2313 // This value is between 1 and 31
2315 int32_t quant = controller->quant;
2319 else if( quant >= 32 )
2322 if( controller->picture_type == VIDEO_PICTURE_INTRA )
2325 controller->invQp = (1 << 16) / (2*quant);
2327 else // VIDEO_PICTURE_INTER
2329 controller->Qp = quant / 2;
2330 controller->invQp = (1 << 16) / (2*quant);
2333 controller->dquant = 0;
2338 C_RESULT video_quantizer_update( video_controller_t* controller )
2340 // Update quantizer's value
2341 int32_t quant = controller->quant;
2345 else if( quant >= 32 )
2348 if( controller->picture_type == VIDEO_PICTURE_INTRA )
2351 controller->invQp = (1 << 16) / (2*quant);
2353 else // VIDEO_PICTURE_INTER
2355 controller->Qp = quant / 2;
2356 controller->invQp = (1 << 16) / (2*quant);
2359 controller->dquant = 0;
2364 C_RESULT video_quantize( video_controller_t* controller, video_macroblock_t* macroblock, int32_t num_macro_blocks )
2366 int32_t sum_y, sum_c, dc0, dc1, dc2, dc3, dcb, dcr;
2369 y0 = macroblock->data;
2371 while( num_macro_blocks > 0 )
2373 if( controller->do_azq == TRUE )
2375 int16_t *y1, *y2, *y3, *cb, *cr;
2377 y1 = y0 + MCU_BLOCK_SIZE;
2378 y2 = y1 + MCU_BLOCK_SIZE;
2379 y3 = y2 + MCU_BLOCK_SIZE;
2380 cb = y3 + MCU_BLOCK_SIZE;
2381 cr = cb + MCU_BLOCK_SIZE;
2383 // Test for azq (all zero quantized) in luma blocks
2391 sum_y = dc0 + dc1 + dc2 + dc3;
2400 macroblock->azq = (sum_y < controller->aq) & (sum_c < controller->bq);
2401 macroblock->dquant = controller->dquant;
2403 // Perform quantification on coefficients if necessary
2404 if( !macroblock->azq )
2406 RTMON_USTART(VIDEO_VLIB_DOQUANTIZE);
2407 if( controller->picture_type == VIDEO_PICTURE_INTRA ) // intra
2409 y0 = do_quantize_intra_mb(y0, controller->invQp, ¯oblock->num_coeff_y0);
2413 y0 = do_quantize_inter_mb(y0, controller->Qp, controller->invQp, ¯oblock->num_coeff_y0);
2415 RTMON_USTOP(VIDEO_VLIB_DOQUANTIZE);
2421 if( macroblock->azq )
2423 y0 = macroblock->data;
2430 C_RESULT video_unquantize( video_controller_t* controller, video_macroblock_t* macroblock, int32_t num_macro_blocks )
2434 controller->quant += macroblock->dquant;
2436 dst = macroblock->data;
2438 while( num_macro_blocks > 0 )
2440 // TODO Check generated code
2442 if( !macroblock->azq )
2443 do_unquantize(dst, controller->picture_type, controller->quant, macroblock->num_coeff_y0);
2445 dst += MCU_BLOCK_SIZE;
2447 if( !macroblock->azq )
2448 do_unquantize(dst, controller->picture_type, controller->quant, macroblock->num_coeff_y1);
2450 dst += MCU_BLOCK_SIZE;
2452 if( !macroblock->azq )
2453 do_unquantize(dst, controller->picture_type, controller->quant, macroblock->num_coeff_y2);
2455 dst += MCU_BLOCK_SIZE;
2457 if( !macroblock->azq )
2458 do_unquantize(dst, controller->picture_type, controller->quant, macroblock->num_coeff_y3);
2460 dst += MCU_BLOCK_SIZE;
2462 if( !macroblock->azq )
2463 do_unquantize(dst, controller->picture_type, controller->quant, macroblock->num_coeff_cb);
2465 dst += MCU_BLOCK_SIZE;
2467 if( !macroblock->azq )
2468 do_unquantize(dst, controller->picture_type, controller->quant, macroblock->num_coeff_cr);
2470 dst += MCU_BLOCK_SIZE;
2479 #ifndef HAS_DO_QUANTIZE_INTRA_MB
2480 int16_t* do_quantize_intra_mb(int16_t* ptr, int32_t invQuant, int32_t* last_ptr)
2482 int32_t i, num_coeff;
2483 int32_t coeff, sign, last;
2485 for( i = 6; i > 0; i-- )
2487 // LEVEL = (COF + 4)/(2*4) see III.3.2.3
2489 coeff = (*ptr + 4) >> 3;
2496 num_coeff = MCU_BLOCK_SIZE-1;
2498 while( num_coeff > 0 )
2504 // |LEVEL| = |COF| / (2 * QUANT) see III.3.2.2
2534 #endif // HAS_DO_QUANTIZE_INTRA_MB
2536 int16_t* do_quantize_inter_mb(int16_t* ptr, int32_t quant, int32_t invQuant, int32_t* last_ptr)
2538 int32_t i, num_coeff;
2539 int32_t coeff, sign, last;
2541 // LEVEL = ( |COF| - QUANT/2 ) / (2*QUANT) see III.3.2.1
2543 for( i = 6; i > 0; i-- )
2546 num_coeff = MCU_BLOCK_SIZE;
2548 while( num_coeff > 0 )
2583 #ifndef HAS_DO_UNQUANTIZE
2584 C_RESULT do_unquantize(int16_t* ptr, int32_t picture_type, int32_t quant, int32_t num_coeff)
2588 if (quant == TABLE_QUANTIZATION)
2590 // table quantization mode
2591 int16_t* p_iquant_table = (int16_t*)(&iquant_tab[0]);
2597 coeff *= (*p_iquant_table);
2604 } while( num_coeff > 0 );
2608 // constant quantization mode
2609 if( picture_type == VIDEO_PICTURE_INTRA ) // intra
2612 *ptr = (coeff << 3); // see III.3.2
2618 while( num_coeff > 0 )
2624 coeff = quant*( 2*coeff + 1 );
2640 static uint32_t num_references = 0;
2642 C_RESULT video_utils_init( video_controller_t* controller )
2644 if( num_references == 0 )
2653 C_RESULT video_utils_close( video_controller_t* controller )
2655 if( num_references > 0 )
2663 C_RESULT video_utils_set_format( uint32_t width, uint32_t height )
2667 int32_t video_zztable_t81[MCU_BLOCK_SIZE] = {
2668 0, 1, 8, 16, 9, 2, 3, 10,
2669 17, 24, 32, 25, 18, 11, 4, 5,
2670 12, 19, 26, 33, 40, 48, 41, 34,
2671 27, 20, 13, 6, 7, 14, 21, 28,
2672 35, 42, 49, 56, 57, 50, 43, 36,
2673 29, 22, 15, 23, 30, 37, 44, 51,
2674 58, 59, 52, 45, 38, 31, 39, 46,
2675 53, 60, 61, 54, 47, 55, 62, 63,
2678 #define FIX_0_298631336 ((INT32) 2446) /* FIX(0.298631336) */
2679 #define FIX_0_390180644 ((INT32) 3196) /* FIX(0.390180644) */
2680 #define FIX_0_541196100 ((INT32) 4433) /* FIX(0.541196100) */
2681 #define FIX_0_765366865 ((INT32) 6270) /* FIX(0.765366865) */
2682 #define FIX_0_899976223 ((INT32) 7373) /* FIX(0.899976223) */
2683 #define FIX_1_175875602 ((INT32) 9633) /* FIX(1.175875602) */
2684 #define FIX_1_501321110 ((INT32) 12299) /* FIX(1.501321110) */
2685 #define FIX_1_847759065 ((INT32) 15137) /* FIX(1.847759065) */
2686 #define FIX_1_961570560 ((INT32) 16069) /* FIX(1.961570560) */
2687 #define FIX_2_053119869 ((INT32) 16819) /* FIX(2.053119869) */
2688 #define FIX_2_562915447 ((INT32) 20995) /* FIX(2.562915447) */
2689 #define FIX_3_072711026 ((INT32) 25172) /* FIX(3.072711026) */
2695 #define CONST_BITS 13
2696 #define PASS1_BITS 1
2697 #define ONE ((INT32) 1)
2698 #define MULTIPLY(var,const) ((var) * (const))
2699 #define DESCALE(x,n) RIGHT_SHIFT((x) + (ONE << ((n)-1)), n)
2700 #define RIGHT_SHIFT(x,shft) ((x) >> (shft))
2702 #ifndef HAS_FDCT_COMPUTE
2703 void fdct(const unsigned short* in, short* out)
2705 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
2706 INT32 tmp10, tmp11, tmp12, tmp13;
2707 INT32 z1, z2, z3, z4, z5;
2711 int data[DCTSIZE * DCTSIZE];
2713 int* dataptr = data;
2715 for( i = 0; i < DCTSIZE; i++ )
2717 for( j = 0; j < DCTSIZE; j++ )
2721 temp = in[i*DCTSIZE + j];
2722 dataptr[i*DCTSIZE + j] = temp;
2726 /* Pass 1: process rows. */
2727 /* Note results are scaled up by sqrt(8) compared to a true DCT; */
2728 /* furthermore, we scale the results by 2**PASS1_BITS. */
2731 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
2732 tmp0 = dataptr[0] + dataptr[7];
2733 tmp7 = dataptr[0] - dataptr[7];
2734 tmp1 = dataptr[1] + dataptr[6];
2735 tmp6 = dataptr[1] - dataptr[6];
2736 tmp2 = dataptr[2] + dataptr[5];
2737 tmp5 = dataptr[2] - dataptr[5];
2738 tmp3 = dataptr[3] + dataptr[4];
2739 tmp4 = dataptr[3] - dataptr[4];
2741 /* Even part per LL&M figure 1 --- note that published figure is faulty;
2742 * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
2745 tmp10 = tmp0 + tmp3;
2746 tmp13 = tmp0 - tmp3;
2747 tmp11 = tmp1 + tmp2;
2748 tmp12 = tmp1 - tmp2;
2750 dataptr[0] = (DCTELEM) ((tmp10 + tmp11) << PASS1_BITS);
2751 dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
2753 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
2754 dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), CONST_BITS-PASS1_BITS);
2755 dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), CONST_BITS-PASS1_BITS);
2757 /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
2758 * cK represents cos(K*pi/16).
2759 * i0..i3 in the paper are tmp4..tmp7 here.
2766 z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
2768 tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
2769 tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
2770 tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
2771 tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
2772 z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
2773 z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
2774 z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
2775 z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
2780 dataptr[7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS);
2781 dataptr[5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS);
2782 dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS);
2783 dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS);
2785 dataptr += DCTSIZE; /* advance pointer to next row */
2788 /* Pass 2: process columns.
2789 * We remove the PASS1_BITS scaling, but leave the results scaled up
2790 * by an overall factor of 8.
2794 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
2795 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
2796 tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
2797 tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
2798 tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
2799 tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
2800 tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
2801 tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
2802 tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
2804 /* Even part per LL&M figure 1 --- note that published figure is faulty;
2805 * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
2808 tmp10 = tmp0 + tmp3;
2809 tmp13 = tmp0 - tmp3;
2810 tmp11 = tmp1 + tmp2;
2811 tmp12 = tmp1 - tmp2;
2813 dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS);
2814 dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS);
2816 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
2817 dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), CONST_BITS+PASS1_BITS);
2818 dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), CONST_BITS+PASS1_BITS);
2820 /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
2821 * cK represents cos(K*pi/16).
2822 * i0..i3 in the paper are tmp4..tmp7 here.
2829 z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
2831 tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
2832 tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
2833 tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
2834 tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
2835 z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
2836 z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
2837 z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
2838 z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
2843 dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS+PASS1_BITS);
2844 dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS+PASS1_BITS);
2845 dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS+PASS1_BITS);
2846 dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS+PASS1_BITS);
2848 dataptr++; /* advance pointer to next column */
2851 for( i = 0; i < DCTSIZE; i++ )
2852 for( j = 0; j < DCTSIZE; j++ )
2853 out[i*DCTSIZE + j] = data[i*DCTSIZE + j] >> 3;
2855 #endif // HAS_FDCT_COMPUTE
2857 #ifndef HAS_IDCT_COMPUTE
2858 void idct(const short* in, unsigned short* out)
2860 INT32 tmp0, tmp1, tmp2, tmp3;
2861 INT32 tmp10, tmp11, tmp12, tmp13;
2862 INT32 z1, z2, z3, z4, z5;
2867 int workspace[DCTSIZE2]; /* buffers data between passes */
2871 /* Pass 1: process columns from input, store into work array. */
2872 /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
2873 /* furthermore, we scale the results by 2**PASS1_BITS. */
2877 for (ctr = DCTSIZE; ctr > 0; ctr--) {
2878 /* Due to quantization, we will usually find that many of the input
2879 * coefficients are zero, especially the AC terms. We can exploit this
2880 * by short-circuiting the IDCT calculation for any column in which all
2881 * the AC terms are zero. In that case each output is equal to the
2882 * DC coefficient (with scale factor as needed).
2883 * With typical images and quantization tables, half or more of the
2884 * column DCT calculations can be simplified this way.
2887 if( inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
2888 inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
2889 inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
2890 inptr[DCTSIZE*7] == 0 ) {
2891 /* AC terms all zero */
2892 int dcval = inptr[DCTSIZE*0] << PASS1_BITS;
2894 wsptr[DCTSIZE*0] = dcval;
2895 wsptr[DCTSIZE*1] = dcval;
2896 wsptr[DCTSIZE*2] = dcval;
2897 wsptr[DCTSIZE*3] = dcval;
2898 wsptr[DCTSIZE*4] = dcval;
2899 wsptr[DCTSIZE*5] = dcval;
2900 wsptr[DCTSIZE*6] = dcval;
2901 wsptr[DCTSIZE*7] = dcval;
2903 inptr++; /* advance pointers to next column */
2908 /* Even part: reverse the even part of the forward DCT. */
2909 /* The rotator is sqrt(2)*c(-6). */
2911 z2 = inptr[DCTSIZE*2];
2912 z3 = inptr[DCTSIZE*6];
2914 z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
2915 tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
2916 tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
2918 z2 = inptr[DCTSIZE*0];
2919 z3 = inptr[DCTSIZE*4];
2921 tmp0 = (z2 + z3) << CONST_BITS;
2922 tmp1 = (z2 - z3) << CONST_BITS;
2924 tmp10 = tmp0 + tmp3;
2925 tmp13 = tmp0 - tmp3;
2926 tmp11 = tmp1 + tmp2;
2927 tmp12 = tmp1 - tmp2;
2929 /* Odd part per figure 8; the matrix is unitary and hence its
2930 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
2933 tmp0 = inptr[DCTSIZE*7];
2934 tmp1 = inptr[DCTSIZE*5];
2935 tmp2 = inptr[DCTSIZE*3];
2936 tmp3 = inptr[DCTSIZE*1];
2942 z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
2944 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
2945 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
2946 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
2947 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
2948 z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
2949 z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
2950 z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
2951 z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
2961 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
2963 wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
2964 wsptr[DCTSIZE*7] = (int) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
2965 wsptr[DCTSIZE*1] = (int) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
2966 wsptr[DCTSIZE*6] = (int) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
2967 wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
2968 wsptr[DCTSIZE*5] = (int) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
2969 wsptr[DCTSIZE*3] = (int) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
2970 wsptr[DCTSIZE*4] = (int) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
2972 inptr++; /* advance pointers to next column */
2976 /* Pass 2: process rows from work array, store into output array. */
2977 /* Note that we must descale the results by a factor of 8 == 2**3, */
2978 /* and also undo the PASS1_BITS scaling. */
2982 for (ctr = 0; ctr < DCTSIZE; ctr++) {
2983 /* Even part: reverse the even part of the forward DCT. */
2984 /* The rotator is sqrt(2)*c(-6). */
2986 z2 = (INT32) wsptr[2];
2987 z3 = (INT32) wsptr[6];
2989 z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
2990 tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
2991 tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
2993 tmp0 = ((INT32) wsptr[0] + (INT32) wsptr[4]) << CONST_BITS;
2994 tmp1 = ((INT32) wsptr[0] - (INT32) wsptr[4]) << CONST_BITS;
2996 tmp10 = tmp0 + tmp3;
2997 tmp13 = tmp0 - tmp3;
2998 tmp11 = tmp1 + tmp2;
2999 tmp12 = tmp1 - tmp2;
3001 /* Odd part per figure 8; the matrix is unitary and hence its
3002 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
3005 tmp0 = (INT32) wsptr[7];
3006 tmp1 = (INT32) wsptr[5];
3007 tmp2 = (INT32) wsptr[3];
3008 tmp3 = (INT32) wsptr[1];
3014 z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
3016 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
3017 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
3018 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
3019 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
3020 z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
3021 z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
3022 z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
3023 z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
3033 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
3035 outptr[0] = (tmp10 + tmp3) >> ( CONST_BITS+PASS1_BITS+3 );
3036 outptr[7] = (tmp10 - tmp3) >> ( CONST_BITS+PASS1_BITS+3 );
3037 outptr[1] = (tmp11 + tmp2) >> ( CONST_BITS+PASS1_BITS+3 );
3038 outptr[6] = (tmp11 - tmp2) >> ( CONST_BITS+PASS1_BITS+3 );
3039 outptr[2] = (tmp12 + tmp1) >> ( CONST_BITS+PASS1_BITS+3 );
3040 outptr[5] = (tmp12 - tmp1) >> ( CONST_BITS+PASS1_BITS+3 );
3041 outptr[3] = (tmp13 + tmp0) >> ( CONST_BITS+PASS1_BITS+3 );
3042 outptr[4] = (tmp13 - tmp0) >> ( CONST_BITS+PASS1_BITS+3 );
3044 wsptr += DCTSIZE; /* advance pointer to next row */
3048 for(ctr = 0; ctr < DCTSIZE2; ctr++)
3049 out[ctr] = data[ctr];
3051 #endif // HAS_IDCT_COMPUTE
3053 #ifndef HAS_FDCT_COMPUTE
3054 int16_t* video_fdct_compute(int16_t* in, int16_t* out, int32_t num_macro_blocks)
3056 while( num_macro_blocks > 0 )
3058 fdct((uint16_t*)in, out);
3060 in += MCU_BLOCK_SIZE;
3061 out += MCU_BLOCK_SIZE;
3063 fdct((uint16_t*)in, out);
3065 in += MCU_BLOCK_SIZE;
3066 out += MCU_BLOCK_SIZE;
3068 fdct((uint16_t*)in, out);
3070 in += MCU_BLOCK_SIZE;
3071 out += MCU_BLOCK_SIZE;
3073 fdct((uint16_t*)in, out);
3075 in += MCU_BLOCK_SIZE;
3076 out += MCU_BLOCK_SIZE;
3078 fdct((uint16_t*)in, out);
3080 in += MCU_BLOCK_SIZE;
3081 out += MCU_BLOCK_SIZE;
3083 fdct((uint16_t*)in, out);
3085 in += MCU_BLOCK_SIZE;
3086 out += MCU_BLOCK_SIZE;
3093 #endif // HAS_FDCT_COMPUTE
3095 #ifndef HAS_IDCT_COMPUTE
3096 int16_t* video_idct_compute(int16_t* in, int16_t* out, int32_t num_macro_blocks)
3098 while( num_macro_blocks > 0 )
3100 idct(in, (uint16_t*)out);
3102 in += MCU_BLOCK_SIZE;
3103 out += MCU_BLOCK_SIZE;
3105 idct(in, (uint16_t*)out);
3107 in += MCU_BLOCK_SIZE;
3108 out += MCU_BLOCK_SIZE;
3110 idct(in, (uint16_t*)out);
3112 in += MCU_BLOCK_SIZE;
3113 out += MCU_BLOCK_SIZE;
3115 idct(in, (uint16_t*)out);
3117 in += MCU_BLOCK_SIZE;
3118 out += MCU_BLOCK_SIZE;
3120 idct(in, (uint16_t*)out);
3122 in += MCU_BLOCK_SIZE;
3123 out += MCU_BLOCK_SIZE;
3125 idct(in, (uint16_t*)out);
3127 in += MCU_BLOCK_SIZE;
3128 out += MCU_BLOCK_SIZE;
3135 #endif // HAS_IDCT_COMPUTE