4 #include <VP_Api/vp_api.h>
5 #include <VP_Api/vp_api_thread_helper.h>
6 #include <VP_Api/vp_api_error.h>
7 #include <VP_Api/vp_api_picture.h>
8 #include <VP_Stages/vp_stages_configs.h>
9 #include <VP_Stages/vp_stages_io_console.h>
10 #include <VP_Stages/vp_stages_o_sdl.h>
11 #include <VP_Stages/vp_stages_io_file.h>
12 #include <VP_Os/vp_os_print.h>
13 #include <VP_Os/vp_os_malloc.h>
14 #include <VP_Os/vp_os_delay.h>
16 #include <MJPEG/mjpeg.h>
17 #include <MJPEG/dct.h>
19 // #define USE_EULER_ANGLES
21 // #define ACQ_WIDTH (176+0*16)
22 // #define ACQ_HEIGHT (144+0*16)
24 // #define QVGA_WIDTH 352
25 // #define QVGA_HEIGHT 288
27 #define ACQ_WIDTH (352)
28 #define ACQ_HEIGHT (288)
33 PIPELINE_HANDLE pipeline_handle;
36 PROTO_THREAD_ROUTINE(app, nomParams);
37 PROTO_THREAD_ROUTINE(dct, nomParams);
40 THREAD_TABLE_ENTRY(app, 20)
41 THREAD_TABLE_ENTRY(dct, 20)
45 ///*******************************************************************************************************************///
48 typedef struct _buffer_to_picture_config_t
50 vp_api_picture_t* picture;
52 } buffer_to_picture_config_t;
55 buffer_to_picture_open(buffer_to_picture_config_t *cfg)
61 buffer_to_picture_transform(buffer_to_picture_config_t *cfg, vp_api_io_data_t *in, vp_api_io_data_t *out)
63 vp_os_mutex_lock(&out->lock);
66 if(out->status == VP_API_STATUS_INIT)
69 out->size = (ACQ_WIDTH*ACQ_HEIGHT*3)/2;
70 out->buffers = (int8_t **) cfg->picture;
72 out->status = VP_API_STATUS_PROCESSING;
75 if(out->status == VP_API_STATUS_ENDED)
79 if(out->status == VP_API_STATUS_PROCESSING)
81 vp_os_memcpy( cfg->picture->y_buf, in->buffers[0], ACQ_WIDTH*ACQ_HEIGHT );
82 vp_os_memcpy( cfg->picture->cb_buf, in->buffers[0] + ACQ_WIDTH*ACQ_HEIGHT, ACQ_WIDTH*ACQ_HEIGHT/4 );
83 vp_os_memcpy( cfg->picture->cr_buf, in->buffers[0] + ACQ_WIDTH*ACQ_HEIGHT + ACQ_WIDTH*ACQ_HEIGHT/4, ACQ_WIDTH*ACQ_HEIGHT/4 );
86 out->status = in->status;
88 vp_os_mutex_unlock(&out->lock);
94 buffer_to_picture_close(buffer_to_picture_config_t *cfg)
99 const vp_api_stage_funcs_t buffer_to_picture_funcs =
102 (vp_api_stage_open_t)buffer_to_picture_open,
103 (vp_api_stage_transform_t)buffer_to_picture_transform,
104 (vp_api_stage_close_t)buffer_to_picture_close
108 ///*******************************************************************************************************************///
111 typedef struct _picture_to_buffer_config_t
113 vp_api_picture_t* picture;
115 } picture_to_buffer_config_t;
118 picture_to_buffer_open(buffer_to_picture_config_t *cfg)
124 picture_to_buffer_transform(buffer_to_picture_config_t *cfg, vp_api_io_data_t *in, vp_api_io_data_t *out)
126 vp_os_mutex_lock(&out->lock);
128 if(out->status == VP_API_STATUS_INIT)
131 out->size = (ACQ_WIDTH*ACQ_HEIGHT*3)/2;
132 out->buffers = (int8_t **) vp_os_malloc(out->size*sizeof(int8_t) + sizeof(int8_t*));
133 out->indexBuffer = 0;
134 out->status = VP_API_STATUS_PROCESSING;
136 out->buffers[0] = (int8_t *)(out->buffers+1);
139 if(out->status == VP_API_STATUS_PROCESSING)
144 vp_os_memcpy( out->buffers[0], cfg->picture->y_buf, ACQ_WIDTH*ACQ_HEIGHT );
145 vp_os_memcpy( out->buffers[0] + ACQ_WIDTH*ACQ_HEIGHT, cfg->picture->cb_buf, ACQ_WIDTH*ACQ_HEIGHT/4);
146 vp_os_memcpy( out->buffers[0] + ACQ_WIDTH*ACQ_HEIGHT + ACQ_WIDTH*ACQ_HEIGHT/4, cfg->picture->cr_buf, ACQ_WIDTH*ACQ_HEIGHT/4);
150 // out->status = in->status;
152 vp_os_mutex_unlock(&out->lock);
158 picture_to_buffer_close(buffer_to_picture_config_t *cfg)
163 const vp_api_stage_funcs_t picture_to_buffer_funcs =
166 (vp_api_stage_open_t) picture_to_buffer_open,
167 (vp_api_stage_transform_t) picture_to_buffer_transform,
168 (vp_api_stage_close_t) picture_to_buffer_close
172 ///*******************************************************************************************************************///
180 typedef struct _mjpeg_stage_encoding_config_t
184 vp_api_picture_t* picture;
186 uint32_t out_buffer_size;
188 } mjpeg_stage_encoding_config_t;
190 C_RESULT mjpeg_stage_encoding_open(mjpeg_stage_encoding_config_t *cfg)
192 stream_new( &cfg->stream, INPUT_STREAM );
194 return mjpeg_init( &cfg->mjpeg, MJPEG_ENCODE, cfg->picture->width, cfg->picture->height, cfg->picture->format );
197 C_RESULT mjpeg_stage_encoding_transform(mjpeg_stage_encoding_config_t *cfg, vp_api_io_data_t *in, vp_api_io_data_t *out)
205 vp_os_mutex_lock(&out->lock);
207 if( out->status == VP_API_STATUS_INIT )
210 out->buffers = (int8_t**) vp_os_malloc( sizeof(int8_t*) + cfg->out_buffer_size*sizeof(int8_t) );
211 out->buffers[0] = (int8_t*) ( out->buffers + 1 );
212 out->indexBuffer = 0;
214 out->status = VP_API_STATUS_PROCESSING;
217 if( out->status == VP_API_STATUS_PROCESSING )
219 stream_config( &cfg->stream, cfg->out_buffer_size, out->buffers[0] );
221 num_frames = cfg->mjpeg.num_frames;
222 res = mjpeg_encode( &cfg->mjpeg, cfg->picture, &cfg->stream, &got_image );
225 PRINT("Frame complete. Size = %d bytes\n", cfg->stream.index);
227 out->size = cfg->stream.index;
230 if( out->status == VP_API_STATUS_ENDED )
232 PRINT("End of data\n");
235 vp_os_mutex_unlock( &out->lock );
240 C_RESULT mjpeg_stage_encoding_close(mjpeg_stage_encoding_config_t *cfg)
242 return mjpeg_release( &cfg->mjpeg );
246 ///*******************************************************************************************************************///
249 typedef struct _mjpeg_stage_decoding_config_t
253 vp_api_picture_t* picture;
255 uint32_t out_buffer_size;
257 } mjpeg_stage_decoding_config_t;
259 C_RESULT mjpeg_stage_decoding_open(mjpeg_stage_decoding_config_t *cfg)
261 stream_new( &cfg->stream, OUTPUT_STREAM );
263 return mjpeg_init( &cfg->mjpeg, MJPEG_DECODE, cfg->picture->width, cfg->picture->height, cfg->picture->format );
266 C_RESULT mjpeg_stage_decoding_transform(mjpeg_stage_decoding_config_t *cfg, vp_api_io_data_t *in, vp_api_io_data_t *out)
270 vp_os_mutex_lock( &out->lock );
272 if(out->status == VP_API_STATUS_INIT)
275 out->buffers = (int8_t**)&cfg->picture;
276 out->indexBuffer = 0;
279 out->status = VP_API_STATUS_PROCESSING;
282 if( in->status == VP_API_STATUS_ENDED )
283 out->status = in->status;
285 // Several cases must be handled in this stage
286 // 1st: Input buffer is too small to decode a complete picture
287 // 2nd: Input buffer is big enough to decode 1 frame
288 // 3rd: Input buffer is so big we can decode more than 1 frame
290 if( out->status == VP_API_STATUS_PROCESSING )
292 // Reinit stream with new data
293 stream_config( &cfg->stream, in->size, in->buffers[in->indexBuffer] );
296 if(out->status == VP_API_STATUS_PROCESSING || out->status == VP_API_STATUS_STILL_RUNNING)
298 // If out->size == 1 it means picture is ready
300 out->status = VP_API_STATUS_PROCESSING;
302 mjpeg_decode( &cfg->mjpeg, cfg->picture, &cfg->stream, &got_image );
306 // we got one picture (handle case 1)
309 PRINT( "%d picture decoded\n", cfg->mjpeg.num_frames );
312 if( FAILED(stream_is_empty( &cfg->stream )) )
314 // Some data are still in stream
315 // Next time we run this stage we don't want this data to be lost
317 out->status = VP_API_STATUS_STILL_RUNNING;
322 vp_os_mutex_unlock( &out->lock );
327 C_RESULT mjpeg_stage_decoding_close(mjpeg_stage_decoding_config_t *cfg)
329 stream_delete( &cfg->stream );
331 return mjpeg_release( &cfg->mjpeg );
335 ///*******************************************************************************************************************///
338 const vp_api_stage_funcs_t mjpeg_encoding_funcs = {
339 (vp_api_stage_handle_msg_t) NULL,
340 (vp_api_stage_open_t) mjpeg_stage_encoding_open,
341 (vp_api_stage_transform_t) mjpeg_stage_encoding_transform,
342 (vp_api_stage_close_t) mjpeg_stage_encoding_close
346 const vp_api_stage_funcs_t mjpeg_decoding_funcs = {
347 (vp_api_stage_handle_msg_t) NULL,
348 (vp_api_stage_open_t) mjpeg_stage_decoding_open,
349 (vp_api_stage_transform_t) mjpeg_stage_decoding_transform,
350 (vp_api_stage_close_t) mjpeg_stage_decoding_close
354 ///*******************************************************************************************************************///
356 int32_t codec = MJPEG_ENCODER;
359 main(int argc, char **argv)
361 // START_THREAD(escaper, NO_PARAM);
362 START_THREAD(app, 0);
364 // JOIN_THREAD(escaper);
371 PROTO_THREAD_ROUTINE(app, params)
373 uint32_t num_stages = 0;
374 vp_api_picture_t picture;
376 vp_api_io_pipeline_t pipeline;
377 vp_api_io_data_t out;
378 vp_api_io_stage_t stages[NB_STAGES];
380 vp_stages_input_file_config_t ifc;
381 vp_stages_output_file_config_t ofc;
382 // vp_stages_output_sdl_config_t osc;
384 buffer_to_picture_config_t bpc;
385 mjpeg_stage_encoding_config_t mec;
387 picture_to_buffer_config_t pbc;
388 mjpeg_stage_decoding_config_t dec;
390 /// Picture configuration
391 picture.format = PIX_FMT_YUV420P;
393 picture.width = ACQ_WIDTH;
394 picture.height = ACQ_HEIGHT;
395 picture.framerate = 15;
397 picture.y_buf = vp_os_malloc( ACQ_WIDTH*ACQ_HEIGHT );
398 picture.cr_buf = vp_os_malloc( ACQ_WIDTH*ACQ_HEIGHT/4 );
399 picture.cb_buf = vp_os_malloc( ACQ_WIDTH*ACQ_HEIGHT/4 );
401 picture.y_line_size = ACQ_WIDTH;
402 picture.cb_line_size = ACQ_WIDTH / 2;
403 picture.cr_line_size = ACQ_WIDTH / 2;
408 vp_os_memset(&ifc,0,sizeof(vp_stages_input_file_config_t));
410 ifc.name = "../in.yuv";
411 ifc.buffer_size = (ACQ_WIDTH*ACQ_HEIGHT*3)/2;
413 ofc.name = "../temp.mjpg";
415 stages[num_stages].type = VP_API_INPUT_FILE;
416 stages[num_stages].cfg = (void *)&ifc;
417 stages[num_stages].funcs = vp_stages_input_file_funcs;
421 if( codec == MJPEG_ENCODER )
423 bpc.picture = &picture;
425 mec.picture = &picture;
426 mec.out_buffer_size = 4096 * 4;
428 stages[num_stages].type = VP_API_FILTER_DECODER;
429 stages[num_stages].cfg = (void *)&bpc;
430 stages[num_stages].funcs = buffer_to_picture_funcs;
434 stages[num_stages].type = MJPEG_ENCODER;
435 stages[num_stages].cfg = (void*)&mec;
436 stages[num_stages].funcs = mjpeg_encoding_funcs;
438 else if( codec == MJPEG_DECODER )
440 dec.picture = &picture;
441 dec.out_buffer_size = 4096 * 4;
443 pbc.picture = &picture;
445 stages[num_stages].type = MJPEG_DECODER;
446 stages[num_stages].cfg = (void*)&dec;
447 stages[num_stages].funcs = mjpeg_decoding_funcs;
451 stages[num_stages].type = VP_API_FILTER_ENCODER;
452 stages[num_stages].cfg = (void *)&pbc;
453 stages[num_stages].funcs = picture_to_buffer_funcs;
458 stages[num_stages].type = VP_API_OUTPUT_FILE;
459 stages[num_stages].cfg = (void*)&ofc;
460 stages[num_stages].funcs = vp_stages_output_file_funcs;
464 pipeline.nb_stages = num_stages;
465 pipeline.stages = &stages[0];
467 PRINT("Pipeline configured with %d stages\n", num_stages);
469 vp_api_open(&pipeline, &pipeline_handle);
470 out.status = VP_API_STATUS_PROCESSING;
471 while(SUCCEED(vp_api_run(&pipeline, &out)) && (out.status == VP_API_STATUS_PROCESSING || out.status == VP_API_STATUS_STILL_RUNNING));
473 vp_api_close(&pipeline, &pipeline_handle);
479 ///*******************************************************************************************************************///
482 // static THREAD_HANDLE dct_thread_handle;
483 static vp_os_mutex_t dct_start_mutex;
484 static vp_os_cond_t dct_start_cond;
485 static vp_os_mutex_t critical_section;
487 static dct_io_buffer_t* current_io_buffer;
488 static dct_io_buffer_t* result_io_buffer;
490 static void fdct(const unsigned short* in, short* out);
491 static void idct(const short* in, unsigned short* out);
494 //-----------------------------------------------------------------------------
496 //-----------------------------------------------------------------------------
498 PROTO_THREAD_ROUTINE(dct, params)
502 PRINT("DCT thread start\n");
506 if( current_io_buffer == NULL )
508 vp_os_mutex_lock(&dct_start_mutex);
509 vp_os_cond_wait(&dct_start_cond);
510 vp_os_mutex_unlock(&dct_start_mutex);
513 if( current_io_buffer->dct_mode == DCT_MODE_FDCT )
515 for( i = 0; i < current_io_buffer->num_total_blocks; i++ )
517 fdct(current_io_buffer->input[i], current_io_buffer->output[i]);
520 else if( current_io_buffer->dct_mode == DCT_MODE_IDCT )
522 for( i = 0; i < current_io_buffer->num_total_blocks; i++ )
524 idct(current_io_buffer->input[i], current_io_buffer->output[i]);
528 vp_os_mutex_lock(&critical_section);
529 result_io_buffer = current_io_buffer;
530 current_io_buffer = NULL;
531 vp_os_mutex_unlock(&critical_section);
538 //-----------------------------------------------------------------------------
540 //-----------------------------------------------------------------------------
543 bool_t dct_init(void)
545 vp_os_mutex_init(&dct_start_mutex);
546 vp_os_cond_init(&dct_start_cond, &dct_start_mutex);
548 vp_os_mutex_init(&critical_section);
550 current_io_buffer = NULL;
551 result_io_buffer = NULL;
556 bool_t dct_compute( dct_io_buffer_t* io_buffer )
560 assert(io_buffer != NULL);
562 if( current_io_buffer == NULL && result_io_buffer == NULL )
565 current_io_buffer = io_buffer;
574 dct_io_buffer_t* dct_result( void )
577 dct_io_buffer_t* io_buffer;
581 if( current_io_buffer != NULL)
583 if( current_io_buffer->dct_mode == DCT_MODE_FDCT )
585 for( i = 0; i < current_io_buffer->num_total_blocks; i++ )
587 fdct(current_io_buffer->input[i], current_io_buffer->output[i]);
590 else if( current_io_buffer->dct_mode == DCT_MODE_IDCT )
592 for( i = 0; i < current_io_buffer->num_total_blocks; i++ )
594 idct(current_io_buffer->input[i], current_io_buffer->output[i]);
598 io_buffer = current_io_buffer;
599 current_io_buffer = NULL;
605 //-----------------------------------------------------------------------------
607 //-----------------------------------------------------------------------------
610 #define FIX_0_298631336 ((INT32) 2446) /* FIX(0.298631336) */
611 #define FIX_0_390180644 ((INT32) 3196) /* FIX(0.390180644) */
612 #define FIX_0_541196100 ((INT32) 4433) /* FIX(0.541196100) */
613 #define FIX_0_765366865 ((INT32) 6270) /* FIX(0.765366865) */
614 #define FIX_0_899976223 ((INT32) 7373) /* FIX(0.899976223) */
615 #define FIX_1_175875602 ((INT32) 9633) /* FIX(1.175875602) */
616 #define FIX_1_501321110 ((INT32) 12299) /* FIX(1.501321110) */
617 #define FIX_1_847759065 ((INT32) 15137) /* FIX(1.847759065) */
618 #define FIX_1_961570560 ((INT32) 16069) /* FIX(1.961570560) */
619 #define FIX_2_053119869 ((INT32) 16819) /* FIX(2.053119869) */
620 #define FIX_2_562915447 ((INT32) 20995) /* FIX(2.562915447) */
621 #define FIX_3_072711026 ((INT32) 25172) /* FIX(3.072711026) */
627 #define CONST_BITS 13
629 #define ONE ((INT32) 1)
630 #define MULTIPLY(var,const) ((var) * (const))
631 #define DESCALE(x,n) RIGHT_SHIFT((x) + (ONE << ((n)-1)), n)
632 #define RIGHT_SHIFT(x,shft) ((x) >> (shft))
634 static void fdct(const unsigned short* in, short* out)
636 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
637 INT32 tmp10, tmp11, tmp12, tmp13;
638 INT32 z1, z2, z3, z4, z5;
642 int data[DCTSIZE * DCTSIZE];
646 for( i = 0; i < DCTSIZE; i++ )
648 for( j = 0; j < DCTSIZE; j++ )
652 temp = in[i*DCTSIZE + j];
653 dataptr[i*DCTSIZE + j] = temp;
657 /* Pass 1: process rows. */
658 /* Note results are scaled up by sqrt(8) compared to a true DCT; */
659 /* furthermore, we scale the results by 2**PASS1_BITS. */
662 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
663 tmp0 = dataptr[0] + dataptr[7];
664 tmp7 = dataptr[0] - dataptr[7];
665 tmp1 = dataptr[1] + dataptr[6];
666 tmp6 = dataptr[1] - dataptr[6];
667 tmp2 = dataptr[2] + dataptr[5];
668 tmp5 = dataptr[2] - dataptr[5];
669 tmp3 = dataptr[3] + dataptr[4];
670 tmp4 = dataptr[3] - dataptr[4];
672 /* Even part per LL&M figure 1 --- note that published figure is faulty;
673 * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
681 dataptr[0] = (DCTELEM) ((tmp10 + tmp11) << PASS1_BITS);
682 dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
684 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
685 dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), CONST_BITS-PASS1_BITS);
686 dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), CONST_BITS-PASS1_BITS);
688 /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
689 * cK represents cos(K*pi/16).
690 * i0..i3 in the paper are tmp4..tmp7 here.
697 z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
699 tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
700 tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
701 tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
702 tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
703 z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
704 z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
705 z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
706 z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
711 dataptr[7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS);
712 dataptr[5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS);
713 dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS);
714 dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS);
716 dataptr += DCTSIZE; /* advance pointer to next row */
719 /* Pass 2: process columns.
720 * We remove the PASS1_BITS scaling, but leave the results scaled up
721 * by an overall factor of 8.
725 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
726 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
727 tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
728 tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
729 tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
730 tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
731 tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
732 tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
733 tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
735 /* Even part per LL&M figure 1 --- note that published figure is faulty;
736 * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
744 dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS);
745 dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS);
747 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
748 dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), CONST_BITS+PASS1_BITS);
749 dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), CONST_BITS+PASS1_BITS);
751 /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
752 * cK represents cos(K*pi/16).
753 * i0..i3 in the paper are tmp4..tmp7 here.
760 z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
762 tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
763 tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
764 tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
765 tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
766 z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
767 z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
768 z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
769 z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
774 dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS+PASS1_BITS);
775 dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS+PASS1_BITS);
776 dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS+PASS1_BITS);
777 dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS+PASS1_BITS);
779 dataptr++; /* advance pointer to next column */
782 for( i = 0; i < DCTSIZE; i++ )
783 for( j = 0; j < DCTSIZE; j++ )
784 out[i*DCTSIZE + j] = data[i*DCTSIZE + j] >> 3;
787 static void idct(const short* in, unsigned short* out)
789 INT32 tmp0, tmp1, tmp2, tmp3;
790 INT32 tmp10, tmp11, tmp12, tmp13;
791 INT32 z1, z2, z3, z4, z5;
796 int workspace[DCTSIZE2]; /* buffers data between passes */
800 /* Pass 1: process columns from input, store into work array. */
801 /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
802 /* furthermore, we scale the results by 2**PASS1_BITS. */
806 for (ctr = DCTSIZE; ctr > 0; ctr--) {
807 /* Due to quantization, we will usually find that many of the input
808 * coefficients are zero, especially the AC terms. We can exploit this
809 * by short-circuiting the IDCT calculation for any column in which all
810 * the AC terms are zero. In that case each output is equal to the
811 * DC coefficient (with scale factor as needed).
812 * With typical images and quantization tables, half or more of the
813 * column DCT calculations can be simplified this way.
816 if( inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
817 inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
818 inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
819 inptr[DCTSIZE*7] == 0 ) {
820 /* AC terms all zero */
821 int dcval = inptr[DCTSIZE*0] << PASS1_BITS;
823 wsptr[DCTSIZE*0] = dcval;
824 wsptr[DCTSIZE*1] = dcval;
825 wsptr[DCTSIZE*2] = dcval;
826 wsptr[DCTSIZE*3] = dcval;
827 wsptr[DCTSIZE*4] = dcval;
828 wsptr[DCTSIZE*5] = dcval;
829 wsptr[DCTSIZE*6] = dcval;
830 wsptr[DCTSIZE*7] = dcval;
832 inptr++; /* advance pointers to next column */
837 /* Even part: reverse the even part of the forward DCT. */
838 /* The rotator is sqrt(2)*c(-6). */
840 z2 = inptr[DCTSIZE*2];
841 z3 = inptr[DCTSIZE*6];
843 z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
844 tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
845 tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
847 z2 = inptr[DCTSIZE*0];
848 z3 = inptr[DCTSIZE*4];
850 tmp0 = (z2 + z3) << CONST_BITS;
851 tmp1 = (z2 - z3) << CONST_BITS;
858 /* Odd part per figure 8; the matrix is unitary and hence its
859 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
862 tmp0 = inptr[DCTSIZE*7];
863 tmp1 = inptr[DCTSIZE*5];
864 tmp2 = inptr[DCTSIZE*3];
865 tmp3 = inptr[DCTSIZE*1];
871 z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
873 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
874 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
875 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
876 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
877 z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
878 z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
879 z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
880 z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
890 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
892 wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
893 wsptr[DCTSIZE*7] = (int) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
894 wsptr[DCTSIZE*1] = (int) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
895 wsptr[DCTSIZE*6] = (int) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
896 wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
897 wsptr[DCTSIZE*5] = (int) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
898 wsptr[DCTSIZE*3] = (int) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
899 wsptr[DCTSIZE*4] = (int) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
901 inptr++; /* advance pointers to next column */
905 /* Pass 2: process rows from work array, store into output array. */
906 /* Note that we must descale the results by a factor of 8 == 2**3, */
907 /* and also undo the PASS1_BITS scaling. */
911 for (ctr = 0; ctr < DCTSIZE; ctr++) {
912 /* Even part: reverse the even part of the forward DCT. */
913 /* The rotator is sqrt(2)*c(-6). */
915 z2 = (INT32) wsptr[2];
916 z3 = (INT32) wsptr[6];
918 z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
919 tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
920 tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
922 tmp0 = ((INT32) wsptr[0] + (INT32) wsptr[4]) << CONST_BITS;
923 tmp1 = ((INT32) wsptr[0] - (INT32) wsptr[4]) << CONST_BITS;
930 /* Odd part per figure 8; the matrix is unitary and hence its
931 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
934 tmp0 = (INT32) wsptr[7];
935 tmp1 = (INT32) wsptr[5];
936 tmp2 = (INT32) wsptr[3];
937 tmp3 = (INT32) wsptr[1];
943 z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
945 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
946 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
947 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
948 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
949 z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
950 z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
951 z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
952 z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
962 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
964 outptr[0] = (tmp10 + tmp3) >> ( CONST_BITS+PASS1_BITS+3 );
965 outptr[7] = (tmp10 - tmp3) >> ( CONST_BITS+PASS1_BITS+3 );
966 outptr[1] = (tmp11 + tmp2) >> ( CONST_BITS+PASS1_BITS+3 );
967 outptr[6] = (tmp11 - tmp2) >> ( CONST_BITS+PASS1_BITS+3 );
968 outptr[2] = (tmp12 + tmp1) >> ( CONST_BITS+PASS1_BITS+3 );
969 outptr[5] = (tmp12 - tmp1) >> ( CONST_BITS+PASS1_BITS+3 );
970 outptr[3] = (tmp13 + tmp0) >> ( CONST_BITS+PASS1_BITS+3 );
971 outptr[4] = (tmp13 - tmp0) >> ( CONST_BITS+PASS1_BITS+3 );
973 wsptr += DCTSIZE; /* advance pointer to next row */
977 for(ctr = 0; ctr < DCTSIZE2; ctr++)
978 out[ctr] = data[ctr];