git.maemo.org Git - mardrone/blob - mardrone/ARDrone_SDK_Version_1_8_20110726/ARDroneLib/VP_SDK/Examples/win32/mjpeg_encoder.c

   1 #include <stdlib.h>
   2 #include <ctype.h>
   3
   4 #include <VP_Api/vp_api.h>
   5 #include <VP_Api/vp_api_thread_helper.h>
   6 #include <VP_Api/vp_api_error.h>
   7 #include <VP_Api/vp_api_picture.h>
   8 #include <VP_Stages/vp_stages_configs.h>
   9 #include <VP_Stages/vp_stages_io_console.h>
  10 #include <VP_Stages/vp_stages_o_sdl.h>
  11 #include <VP_Stages/vp_stages_io_file.h>
  12 #include <VP_Os/vp_os_print.h>
  13 #include <VP_Os/vp_os_malloc.h>
  14 #include <VP_Os/vp_os_delay.h>
  15
  16 #include <MJPEG/mjpeg.h>
  17 #include <MJPEG/dct.h>
  18
  19 // #define USE_EULER_ANGLES
  20
  21 // #define ACQ_WIDTH  (176+0*16)
  22 // #define ACQ_HEIGHT (144+0*16)
  23
  24 // #define QVGA_WIDTH  352
  25 // #define QVGA_HEIGHT 288
  26
  27 #define ACQ_WIDTH  (352)
  28 #define ACQ_HEIGHT (288)
  29
  30 #define NB_STAGES 4
  31
  32
  33 PIPELINE_HANDLE pipeline_handle;
  34
  35
  36 PROTO_THREAD_ROUTINE(app, nomParams);
  37 PROTO_THREAD_ROUTINE(dct, nomParams);
  38
  39 BEGIN_THREAD_TABLE
  40   THREAD_TABLE_ENTRY(app, 20)
  41   THREAD_TABLE_ENTRY(dct, 20)
  42 END_THREAD_TABLE
  43
  44
  45 ///*******************************************************************************************************************///
  46
  47
  48 typedef struct _buffer_to_picture_config_t
  49 {
  50   vp_api_picture_t* picture;
  51
  52 } buffer_to_picture_config_t;
  53
  54 C_RESULT
  55 buffer_to_picture_open(buffer_to_picture_config_t *cfg)
  56 {
  57   return (SUCCESS);
  58 }
  59
  60 C_RESULT
  61 buffer_to_picture_transform(buffer_to_picture_config_t *cfg, vp_api_io_data_t *in, vp_api_io_data_t *out)
  62 {
  63   vp_os_mutex_lock(&out->lock);
  64
  65
  66   if(out->status == VP_API_STATUS_INIT)
  67   {
  68     out->numBuffers   = 1;
  69     out->size         = (ACQ_WIDTH*ACQ_HEIGHT*3)/2;
  70     out->buffers      = (int8_t **) cfg->picture;
  71     out->indexBuffer  = 0;
  72     out->status       = VP_API_STATUS_PROCESSING;
  73   }
  74
  75   if(out->status == VP_API_STATUS_ENDED)
  76   {
  77   }
  78
  79   if(out->status == VP_API_STATUS_PROCESSING)
  80   {
  81     vp_os_memcpy( cfg->picture->y_buf, in->buffers[0], ACQ_WIDTH*ACQ_HEIGHT );
  82     vp_os_memcpy( cfg->picture->cb_buf, in->buffers[0] + ACQ_WIDTH*ACQ_HEIGHT, ACQ_WIDTH*ACQ_HEIGHT/4 );
  83     vp_os_memcpy( cfg->picture->cr_buf, in->buffers[0] + ACQ_WIDTH*ACQ_HEIGHT + ACQ_WIDTH*ACQ_HEIGHT/4, ACQ_WIDTH*ACQ_HEIGHT/4 );
  84   }
  85
  86   out->status = in->status;
  87
  88   vp_os_mutex_unlock(&out->lock);
  89
  90   return (SUCCESS);
  91 }
  92
  93 C_RESULT
  94 buffer_to_picture_close(buffer_to_picture_config_t *cfg)
  95 {
  96   return (SUCCESS);
  97 }
  98
  99 const vp_api_stage_funcs_t buffer_to_picture_funcs =
 100 {
 101   NULL,
 102   (vp_api_stage_open_t)buffer_to_picture_open,
 103   (vp_api_stage_transform_t)buffer_to_picture_transform,
 104   (vp_api_stage_close_t)buffer_to_picture_close
 105 };
 106
 107
 108 ///*******************************************************************************************************************///
 109
 110
 111 typedef struct _picture_to_buffer_config_t
 112 {
 113   vp_api_picture_t* picture;
 114
 115 } picture_to_buffer_config_t;
 116
 117 C_RESULT
 118 picture_to_buffer_open(buffer_to_picture_config_t *cfg)
 119 {
 120   return C_OK;
 121 }
 122
 123 C_RESULT
 124 picture_to_buffer_transform(buffer_to_picture_config_t *cfg, vp_api_io_data_t *in, vp_api_io_data_t *out)
 125 {
 126   vp_os_mutex_lock(&out->lock);
 127
 128   if(out->status == VP_API_STATUS_INIT)
 129   {
 130     out->numBuffers   = 1;
 131     out->size         = (ACQ_WIDTH*ACQ_HEIGHT*3)/2;
 132     out->buffers      = (int8_t **) vp_os_malloc(out->size*sizeof(int8_t) + sizeof(int8_t*));
 133     out->indexBuffer  = 0;
 134     out->status       = VP_API_STATUS_PROCESSING;
 135
 136     out->buffers[0]   = (int8_t *)(out->buffers+1);
 137   }
 138
 139   if(out->status == VP_API_STATUS_PROCESSING)
 140   {
 141     if( in->size == 1 )
 142     {
 143       // got a picture
 144       vp_os_memcpy( out->buffers[0], cfg->picture->y_buf, ACQ_WIDTH*ACQ_HEIGHT );
 145       vp_os_memcpy( out->buffers[0] + ACQ_WIDTH*ACQ_HEIGHT, cfg->picture->cb_buf, ACQ_WIDTH*ACQ_HEIGHT/4);
 146       vp_os_memcpy( out->buffers[0] + ACQ_WIDTH*ACQ_HEIGHT + ACQ_WIDTH*ACQ_HEIGHT/4, cfg->picture->cr_buf, ACQ_WIDTH*ACQ_HEIGHT/4);
 147     }
 148   }
 149
 150   // out->status = in->status;
 151
 152   vp_os_mutex_unlock(&out->lock);
 153
 154   return (SUCCESS);
 155 }
 156
 157 C_RESULT
 158 picture_to_buffer_close(buffer_to_picture_config_t *cfg)
 159 {
 160   return (SUCCESS);
 161 }
 162
 163 const vp_api_stage_funcs_t picture_to_buffer_funcs =
 164 {
 165   NULL,
 166   (vp_api_stage_open_t) picture_to_buffer_open,
 167   (vp_api_stage_transform_t) picture_to_buffer_transform,
 168   (vp_api_stage_close_t) picture_to_buffer_close
 169 };
 170
 171
 172 ///*******************************************************************************************************************///
 173
 174
 175 enum {
 176   MJPEG_ENCODER,
 177   MJPEG_DECODER
 178 };
 179
 180 typedef struct _mjpeg_stage_encoding_config_t
 181 {
 182   stream_t          stream;
 183   mjpeg_t           mjpeg;
 184   vp_api_picture_t* picture;
 185
 186   uint32_t          out_buffer_size;
 187
 188 } mjpeg_stage_encoding_config_t;
 189
 190 C_RESULT mjpeg_stage_encoding_open(mjpeg_stage_encoding_config_t *cfg)
 191 {
 192   stream_new( &cfg->stream, INPUT_STREAM );
 193
 194   return mjpeg_init( &cfg->mjpeg, MJPEG_ENCODE, cfg->picture->width, cfg->picture->height, cfg->picture->format );
 195 }
 196
 197 C_RESULT mjpeg_stage_encoding_transform(mjpeg_stage_encoding_config_t *cfg, vp_api_io_data_t *in, vp_api_io_data_t *out)
 198 {
 199   C_RESULT res;
 200   uint32_t num_frames;
 201   bool_t got_image;
 202
 203   res = C_OK;
 204
 205   vp_os_mutex_lock(&out->lock);
 206
 207   if( out->status == VP_API_STATUS_INIT )
 208   {
 209     out->numBuffers   = 1;
 210     out->buffers      = (int8_t**) vp_os_malloc( sizeof(int8_t*) + cfg->out_buffer_size*sizeof(int8_t) );
 211     out->buffers[0]   = (int8_t*) ( out->buffers + 1 );
 212     out->indexBuffer  = 0;
 213
 214     out->status = VP_API_STATUS_PROCESSING;
 215   }
 216
 217   if( out->status == VP_API_STATUS_PROCESSING )
 218   {
 219     stream_config( &cfg->stream, cfg->out_buffer_size, out->buffers[0] );
 220
 221     num_frames = cfg->mjpeg.num_frames;
 222     res = mjpeg_encode( &cfg->mjpeg, cfg->picture, &cfg->stream, &got_image );
 223     if( got_image )
 224     {
 225       PRINT("Frame complete. Size = %d bytes\n", cfg->stream.index);
 226     }
 227     out->size = cfg->stream.index;
 228   }
 229
 230   if( out->status == VP_API_STATUS_ENDED )
 231   {
 232     PRINT("End of data\n");
 233   }
 234
 235   vp_os_mutex_unlock( &out->lock );
 236
 237   return C_OK;
 238 }
 239
 240 C_RESULT mjpeg_stage_encoding_close(mjpeg_stage_encoding_config_t *cfg)
 241 {
 242   return mjpeg_release( &cfg->mjpeg );
 243 }
 244
 245
 246 ///*******************************************************************************************************************///
 247
 248
 249 typedef struct _mjpeg_stage_decoding_config_t
 250 {
 251   stream_t          stream;
 252   mjpeg_t           mjpeg;
 253   vp_api_picture_t* picture;
 254
 255   uint32_t          out_buffer_size;
 256
 257 } mjpeg_stage_decoding_config_t;
 258
 259 C_RESULT mjpeg_stage_decoding_open(mjpeg_stage_decoding_config_t *cfg)
 260 {
 261   stream_new( &cfg->stream, OUTPUT_STREAM );
 262
 263   return mjpeg_init( &cfg->mjpeg, MJPEG_DECODE, cfg->picture->width, cfg->picture->height, cfg->picture->format );
 264 }
 265
 266 C_RESULT mjpeg_stage_decoding_transform(mjpeg_stage_decoding_config_t *cfg, vp_api_io_data_t *in, vp_api_io_data_t *out)
 267 {
 268   bool_t got_image;
 269
 270   vp_os_mutex_lock( &out->lock );
 271
 272   if(out->status == VP_API_STATUS_INIT)
 273   {
 274     out->numBuffers   = 1;
 275     out->buffers      = (int8_t**)&cfg->picture;
 276     out->indexBuffer  = 0;
 277     out->lineSize     = 0;
 278
 279     out->status = VP_API_STATUS_PROCESSING;
 280   }
 281
 282   if( in->status == VP_API_STATUS_ENDED )
 283     out->status = in->status;
 284
 285   // Several cases must be handled in this stage
 286   // 1st: Input buffer is too small to decode a complete picture
 287   // 2nd: Input buffer is big enough to decode 1 frame
 288   // 3rd: Input buffer is so big we can decode more than 1 frame
 289
 290   if( out->status == VP_API_STATUS_PROCESSING )
 291   {
 292     // Reinit stream with new data
 293     stream_config( &cfg->stream, in->size, in->buffers[in->indexBuffer] );
 294   }
 295
 296   if(out->status == VP_API_STATUS_PROCESSING || out->status == VP_API_STATUS_STILL_RUNNING)
 297   {
 298     // If out->size == 1 it means picture is ready
 299     out->size = 0;
 300     out->status = VP_API_STATUS_PROCESSING;
 301
 302     mjpeg_decode( &cfg->mjpeg, cfg->picture, &cfg->stream, &got_image );
 303
 304     if( got_image )
 305     {
 306       // we got one picture (handle case 1)
 307       out->size = 1;
 308
 309       PRINT( "%d picture decoded\n", cfg->mjpeg.num_frames );
 310
 311       // handle case 2 & 3
 312       if( FAILED(stream_is_empty( &cfg->stream )) )
 313       {
 314         // Some data are still in stream
 315         // Next time we run this stage we don't want this data to be lost
 316         // So flag it!
 317         out->status = VP_API_STATUS_STILL_RUNNING;
 318       }
 319     }
 320   }
 321
 322   vp_os_mutex_unlock( &out->lock );
 323
 324   return C_OK;
 325 }
 326
 327 C_RESULT mjpeg_stage_decoding_close(mjpeg_stage_decoding_config_t *cfg)
 328 {
 329   stream_delete( &cfg->stream );
 330
 331   return mjpeg_release( &cfg->mjpeg );
 332 }
 333
 334
 335 ///*******************************************************************************************************************///
 336
 337
 338 const vp_api_stage_funcs_t mjpeg_encoding_funcs = {
 339   (vp_api_stage_handle_msg_t) NULL,
 340   (vp_api_stage_open_t) mjpeg_stage_encoding_open,
 341   (vp_api_stage_transform_t) mjpeg_stage_encoding_transform,
 342   (vp_api_stage_close_t) mjpeg_stage_encoding_close
 343 };
 344
 345
 346 const vp_api_stage_funcs_t mjpeg_decoding_funcs = {
 347   (vp_api_stage_handle_msg_t) NULL,
 348   (vp_api_stage_open_t) mjpeg_stage_decoding_open,
 349   (vp_api_stage_transform_t) mjpeg_stage_decoding_transform,
 350   (vp_api_stage_close_t) mjpeg_stage_decoding_close
 351 };
 352
 353
 354 ///*******************************************************************************************************************///
 355
 356 int32_t codec = MJPEG_ENCODER;
 357
 358 int
 359 main(int argc, char **argv)
 360 {
 361   // START_THREAD(escaper, NO_PARAM);
 362   START_THREAD(app, 0);
 363
 364   // JOIN_THREAD(escaper);
 365   JOIN_THREAD(app);
 366
 367   return EXIT_SUCCESS;
 368 }
 369
 370
 371 PROTO_THREAD_ROUTINE(app, params)
 372 {
 373   uint32_t num_stages = 0;
 374   vp_api_picture_t picture;
 375
 376   vp_api_io_pipeline_t    pipeline;
 377   vp_api_io_data_t        out;
 378   vp_api_io_stage_t       stages[NB_STAGES];
 379
 380   vp_stages_input_file_config_t     ifc;
 381   vp_stages_output_file_config_t    ofc;
 382   // vp_stages_output_sdl_config_t     osc;
 383
 384   buffer_to_picture_config_t        bpc;
 385   mjpeg_stage_encoding_config_t     mec;
 386
 387   picture_to_buffer_config_t        pbc;
 388   mjpeg_stage_decoding_config_t     dec;
 389
 390   /// Picture configuration
 391   picture.format              = PIX_FMT_YUV420P;
 392
 393   picture.width               = ACQ_WIDTH;
 394   picture.height              = ACQ_HEIGHT;
 395   picture.framerate           = 15;
 396
 397   picture.y_buf               = vp_os_malloc( ACQ_WIDTH*ACQ_HEIGHT );
 398   picture.cr_buf              = vp_os_malloc( ACQ_WIDTH*ACQ_HEIGHT/4 );
 399   picture.cb_buf              = vp_os_malloc( ACQ_WIDTH*ACQ_HEIGHT/4 );
 400
 401   picture.y_line_size         = ACQ_WIDTH;
 402   picture.cb_line_size        = ACQ_WIDTH / 2;
 403   picture.cr_line_size        = ACQ_WIDTH / 2;
 404
 405   picture.y_pad               = 0;
 406   picture.c_pad               = 0;
 407
 408   vp_os_memset(&ifc,0,sizeof(vp_stages_input_file_config_t));
 409
 410   ifc.name                    = "../in.yuv";
 411   ifc.buffer_size             = (ACQ_WIDTH*ACQ_HEIGHT*3)/2;
 412
 413   ofc.name                    = "../temp.mjpg";
 414
 415   stages[num_stages].type     = VP_API_INPUT_FILE;
 416   stages[num_stages].cfg      = (void *)&ifc;
 417   stages[num_stages].funcs    = vp_stages_input_file_funcs;
 418
 419   num_stages++;
 420
 421   if( codec == MJPEG_ENCODER )
 422   {
 423     bpc.picture         = &picture;
 424
 425     mec.picture         = &picture;
 426     mec.out_buffer_size = 4096 * 4;
 427
 428     stages[num_stages].type      = VP_API_FILTER_DECODER;
 429     stages[num_stages].cfg       = (void *)&bpc;
 430     stages[num_stages].funcs     = buffer_to_picture_funcs;
 431
 432     num_stages++;
 433
 434     stages[num_stages].type      = MJPEG_ENCODER;
 435     stages[num_stages].cfg       = (void*)&mec;
 436     stages[num_stages].funcs     = mjpeg_encoding_funcs;
 437   }
 438   else if( codec == MJPEG_DECODER )
 439   {
 440     dec.picture         = &picture;
 441     dec.out_buffer_size = 4096 * 4;
 442
 443     pbc.picture         = &picture;
 444
 445     stages[num_stages].type      = MJPEG_DECODER;
 446     stages[num_stages].cfg       = (void*)&dec;
 447     stages[num_stages].funcs     = mjpeg_decoding_funcs;
 448
 449     num_stages++;
 450
 451     stages[num_stages].type      = VP_API_FILTER_ENCODER;
 452     stages[num_stages].cfg       = (void *)&pbc;
 453     stages[num_stages].funcs     = picture_to_buffer_funcs;
 454   }
 455
 456   num_stages++;
 457
 458   stages[num_stages].type      = VP_API_OUTPUT_FILE;
 459   stages[num_stages].cfg       = (void*)&ofc;
 460   stages[num_stages].funcs     = vp_stages_output_file_funcs;
 461
 462   num_stages++;
 463
 464   pipeline.nb_stages  = num_stages;
 465   pipeline.stages     = &stages[0];
 466
 467   PRINT("Pipeline configured with %d stages\n", num_stages);
 468
 469   vp_api_open(&pipeline, &pipeline_handle);
 470   out.status = VP_API_STATUS_PROCESSING;
 471   while(SUCCEED(vp_api_run(&pipeline, &out)) && (out.status == VP_API_STATUS_PROCESSING || out.status == VP_API_STATUS_STILL_RUNNING));
 472
 473   vp_api_close(&pipeline, &pipeline_handle);
 474
 475   return EXIT_SUCCESS;
 476 }
 477
 478
 479 ///*******************************************************************************************************************///
 480
 481
 482 // static THREAD_HANDLE dct_thread_handle;
 483 static vp_os_mutex_t dct_start_mutex;
 484 static vp_os_cond_t  dct_start_cond;
 485 static vp_os_mutex_t critical_section;
 486
 487 static dct_io_buffer_t* current_io_buffer;
 488 static dct_io_buffer_t* result_io_buffer;
 489
 490 static void fdct(const unsigned short* in, short* out);
 491 static void idct(const short* in, unsigned short* out);
 492
 493
 494 //-----------------------------------------------------------------------------
 495 // DCT Thread
 496 //-----------------------------------------------------------------------------
 497
 498 PROTO_THREAD_ROUTINE(dct, params)
 499 {
 500   uint32_t i;
 501
 502   PRINT("DCT thread start\n");
 503
 504   while(1)
 505   {
 506     if( current_io_buffer == NULL )
 507     {
 508       vp_os_mutex_lock(&dct_start_mutex);
 509         vp_os_cond_wait(&dct_start_cond);
 510       vp_os_mutex_unlock(&dct_start_mutex);
 511     }
 512
 513     if( current_io_buffer->dct_mode == DCT_MODE_FDCT )
 514     {
 515       for( i = 0; i < current_io_buffer->num_total_blocks; i++ )
 516       {
 517         fdct(current_io_buffer->input[i], current_io_buffer->output[i]);
 518       }
 519     }
 520     else if( current_io_buffer->dct_mode == DCT_MODE_IDCT )
 521     {
 522       for( i = 0; i < current_io_buffer->num_total_blocks; i++ )
 523       {
 524         idct(current_io_buffer->input[i], current_io_buffer->output[i]);
 525       }
 526     }
 527
 528     vp_os_mutex_lock(&critical_section);
 529       result_io_buffer = current_io_buffer;
 530       current_io_buffer = NULL;
 531     vp_os_mutex_unlock(&critical_section);
 532   }
 533
 534   return 0;
 535 }
 536
 537
 538 //-----------------------------------------------------------------------------
 539 // DCT API
 540 //-----------------------------------------------------------------------------
 541
 542
 543 bool_t dct_init(void)
 544 {
 545   vp_os_mutex_init(&dct_start_mutex);
 546   vp_os_cond_init(&dct_start_cond, &dct_start_mutex);
 547
 548   vp_os_mutex_init(&critical_section);
 549
 550   current_io_buffer = NULL;
 551   result_io_buffer  = NULL;
 552
 553   return TRUE;
 554 }
 555
 556 bool_t dct_compute( dct_io_buffer_t* io_buffer )
 557 {
 558   bool_t res = FALSE;
 559
 560   assert(io_buffer != NULL);
 561
 562   if( current_io_buffer == NULL && result_io_buffer == NULL )
 563   {
 564     {
 565       current_io_buffer = io_buffer;
 566
 567       res = TRUE;
 568     }
 569   }
 570
 571   return res;
 572 }
 573
 574 dct_io_buffer_t* dct_result( void )
 575 {
 576   uint32_t i;
 577   dct_io_buffer_t* io_buffer;
 578
 579   io_buffer = NULL;
 580
 581   if( current_io_buffer != NULL)
 582   {
 583     if( current_io_buffer->dct_mode == DCT_MODE_FDCT )
 584     {
 585       for( i = 0; i < current_io_buffer->num_total_blocks; i++ )
 586       {
 587         fdct(current_io_buffer->input[i], current_io_buffer->output[i]);
 588       }
 589     }
 590     else if( current_io_buffer->dct_mode == DCT_MODE_IDCT )
 591     {
 592       for( i = 0; i < current_io_buffer->num_total_blocks; i++ )
 593       {
 594         idct(current_io_buffer->input[i], current_io_buffer->output[i]);
 595       }
 596     }
 597
 598     io_buffer = current_io_buffer;
 599     current_io_buffer = NULL;
 600   }
 601
 602   return io_buffer;
 603 }
 604
 605 //-----------------------------------------------------------------------------
 606 // DCT Computation
 607 //-----------------------------------------------------------------------------
 608
 609
 610 #define FIX_0_298631336  ((INT32)  2446)        /* FIX(0.298631336) */
 611 #define FIX_0_390180644  ((INT32)  3196)        /* FIX(0.390180644) */
 612 #define FIX_0_541196100  ((INT32)  4433)        /* FIX(0.541196100) */
 613 #define FIX_0_765366865  ((INT32)  6270)        /* FIX(0.765366865) */
 614 #define FIX_0_899976223  ((INT32)  7373)        /* FIX(0.899976223) */
 615 #define FIX_1_175875602  ((INT32)  9633)        /* FIX(1.175875602) */
 616 #define FIX_1_501321110  ((INT32)  12299)       /* FIX(1.501321110) */
 617 #define FIX_1_847759065  ((INT32)  15137)       /* FIX(1.847759065) */
 618 #define FIX_1_961570560  ((INT32)  16069)       /* FIX(1.961570560) */
 619 #define FIX_2_053119869  ((INT32)  16819)       /* FIX(2.053119869) */
 620 #define FIX_2_562915447  ((INT32)  20995)       /* FIX(2.562915447) */
 621 #define FIX_3_072711026  ((INT32)  25172)       /* FIX(3.072711026) */
 622
 623 #define INT32       int
 624 #define DCTELEM     int
 625 #define DCTSIZE     8
 626 #define DCTSIZE2    64
 627 #define CONST_BITS  13
 628 #define PASS1_BITS  1
 629 #define ONE     ((INT32) 1)
 630 #define MULTIPLY(var,const)  ((var) * (const))
 631 #define DESCALE(x,n)  RIGHT_SHIFT((x) + (ONE << ((n)-1)), n)
 632 #define RIGHT_SHIFT(x,shft)     ((x) >> (shft))
 633
 634 static void fdct(const unsigned short* in, short* out)
 635 {
 636   INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
 637   INT32 tmp10, tmp11, tmp12, tmp13;
 638   INT32 z1, z2, z3, z4, z5;
 639   int ctr;
 640   // SHIFT_TEMPS
 641
 642   int data[DCTSIZE * DCTSIZE];
 643   int i, j;
 644   int* dataptr = data;
 645
 646   for( i = 0; i < DCTSIZE; i++ )
 647   {
 648     for( j = 0; j < DCTSIZE; j++ )
 649     {
 650       int temp;
 651
 652       temp = in[i*DCTSIZE + j];
 653       dataptr[i*DCTSIZE + j] = temp;
 654     }
 655   }
 656
 657   /* Pass 1: process rows. */
 658   /* Note results are scaled up by sqrt(8) compared to a true DCT; */
 659   /* furthermore, we scale the results by 2**PASS1_BITS. */
 660
 661   dataptr = data;
 662   for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
 663     tmp0 = dataptr[0] + dataptr[7];
 664     tmp7 = dataptr[0] - dataptr[7];
 665     tmp1 = dataptr[1] + dataptr[6];
 666     tmp6 = dataptr[1] - dataptr[6];
 667     tmp2 = dataptr[2] + dataptr[5];
 668     tmp5 = dataptr[2] - dataptr[5];
 669     tmp3 = dataptr[3] + dataptr[4];
 670     tmp4 = dataptr[3] - dataptr[4];
 671
 672     /* Even part per LL&M figure 1 --- note that published figure is faulty;
 673      * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
 674      */
 675
 676     tmp10 = tmp0 + tmp3;
 677     tmp13 = tmp0 - tmp3;
 678     tmp11 = tmp1 + tmp2;
 679     tmp12 = tmp1 - tmp2;
 680
 681     dataptr[0] = (DCTELEM) ((tmp10 + tmp11) << PASS1_BITS);
 682     dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
 683
 684     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
 685     dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), CONST_BITS-PASS1_BITS);
 686     dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), CONST_BITS-PASS1_BITS);
 687
 688     /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
 689      * cK represents cos(K*pi/16).
 690      * i0..i3 in the paper are tmp4..tmp7 here.
 691      */
 692
 693     z1 = tmp4 + tmp7;
 694     z2 = tmp5 + tmp6;
 695     z3 = tmp4 + tmp6;
 696     z4 = tmp5 + tmp7;
 697     z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
 698
 699     tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
 700     tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
 701     tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
 702     tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
 703     z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
 704     z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
 705     z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
 706     z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
 707
 708     z3 += z5;
 709     z4 += z5;
 710
 711     dataptr[7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS);
 712     dataptr[5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS);
 713     dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS);
 714     dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS);
 715
 716     dataptr += DCTSIZE;         /* advance pointer to next row */
 717   }
 718
 719   /* Pass 2: process columns.
 720    * We remove the PASS1_BITS scaling, but leave the results scaled up
 721    * by an overall factor of 8.
 722    */
 723
 724   dataptr = data;
 725   for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
 726     tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
 727     tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
 728     tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
 729     tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
 730     tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
 731     tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
 732     tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
 733     tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
 734
 735     /* Even part per LL&M figure 1 --- note that published figure is faulty;
 736      * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
 737      */
 738
 739     tmp10 = tmp0 + tmp3;
 740     tmp13 = tmp0 - tmp3;
 741     tmp11 = tmp1 + tmp2;
 742     tmp12 = tmp1 - tmp2;
 743
 744     dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS);
 745     dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS);
 746
 747     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
 748     dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), CONST_BITS+PASS1_BITS);
 749     dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), CONST_BITS+PASS1_BITS);
 750
 751     /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
 752      * cK represents cos(K*pi/16).
 753      * i0..i3 in the paper are tmp4..tmp7 here.
 754      */
 755
 756     z1 = tmp4 + tmp7;
 757     z2 = tmp5 + tmp6;
 758     z3 = tmp4 + tmp6;
 759     z4 = tmp5 + tmp7;
 760     z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
 761
 762     tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
 763     tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
 764     tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
 765     tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
 766     z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
 767     z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
 768     z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
 769     z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
 770
 771     z3 += z5;
 772     z4 += z5;
 773
 774     dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS+PASS1_BITS);
 775     dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS+PASS1_BITS);
 776     dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS+PASS1_BITS);
 777     dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS+PASS1_BITS);
 778
 779     dataptr++;  /* advance pointer to next column */
 780   }
 781
 782   for( i = 0; i < DCTSIZE; i++ )
 783     for( j = 0; j < DCTSIZE; j++ )
 784       out[i*DCTSIZE + j] = data[i*DCTSIZE + j] >> 3;
 785 }
 786
 787 static void idct(const short* in, unsigned short* out)
 788 {
 789   INT32 tmp0, tmp1, tmp2, tmp3;
 790   INT32 tmp10, tmp11, tmp12, tmp13;
 791   INT32 z1, z2, z3, z4, z5;
 792   int* wsptr;
 793   int* outptr;
 794   const short* inptr;
 795   int ctr;
 796   int workspace[DCTSIZE2];      /* buffers data between passes */
 797   int data[DCTSIZE2];
 798   // SHIFT_TEMPS
 799
 800   /* Pass 1: process columns from input, store into work array. */
 801   /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
 802   /* furthermore, we scale the results by 2**PASS1_BITS. */
 803
 804   inptr = in;
 805   wsptr = workspace;
 806   for (ctr = DCTSIZE; ctr > 0; ctr--) {
 807     /* Due to quantization, we will usually find that many of the input
 808      * coefficients are zero, especially the AC terms.  We can exploit this
 809      * by short-circuiting the IDCT calculation for any column in which all
 810      * the AC terms are zero.  In that case each output is equal to the
 811      * DC coefficient (with scale factor as needed).
 812      * With typical images and quantization tables, half or more of the
 813      * column DCT calculations can be simplified this way.
 814      */
 815
 816     if( inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
 817         inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
 818         inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
 819         inptr[DCTSIZE*7] == 0 ) {
 820       /* AC terms all zero */
 821       int dcval = inptr[DCTSIZE*0] << PASS1_BITS;
 822
 823       wsptr[DCTSIZE*0] = dcval;
 824       wsptr[DCTSIZE*1] = dcval;
 825       wsptr[DCTSIZE*2] = dcval;
 826       wsptr[DCTSIZE*3] = dcval;
 827       wsptr[DCTSIZE*4] = dcval;
 828       wsptr[DCTSIZE*5] = dcval;
 829       wsptr[DCTSIZE*6] = dcval;
 830       wsptr[DCTSIZE*7] = dcval;
 831
 832       inptr++;  /* advance pointers to next column */
 833       wsptr++;
 834       continue;
 835     }
 836
 837     /* Even part: reverse the even part of the forward DCT. */
 838     /* The rotator is sqrt(2)*c(-6). */
 839
 840     z2 = inptr[DCTSIZE*2];
 841     z3 = inptr[DCTSIZE*6];
 842
 843     z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
 844     tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
 845     tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
 846
 847     z2 = inptr[DCTSIZE*0];
 848     z3 = inptr[DCTSIZE*4];
 849
 850     tmp0 = (z2 + z3) << CONST_BITS;
 851     tmp1 = (z2 - z3) << CONST_BITS;
 852
 853     tmp10 = tmp0 + tmp3;
 854     tmp13 = tmp0 - tmp3;
 855     tmp11 = tmp1 + tmp2;
 856     tmp12 = tmp1 - tmp2;
 857
 858     /* Odd part per figure 8; the matrix is unitary and hence its
 859      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
 860      */
 861
 862     tmp0 = inptr[DCTSIZE*7];
 863     tmp1 = inptr[DCTSIZE*5];
 864     tmp2 = inptr[DCTSIZE*3];
 865     tmp3 = inptr[DCTSIZE*1];
 866
 867     z1 = tmp0 + tmp3;
 868     z2 = tmp1 + tmp2;
 869     z3 = tmp0 + tmp2;
 870     z4 = tmp1 + tmp3;
 871     z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
 872
 873     tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
 874     tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
 875     tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
 876     tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
 877     z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
 878     z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
 879     z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
 880     z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
 881
 882     z3 += z5;
 883     z4 += z5;
 884
 885     tmp0 += z1 + z3;
 886     tmp1 += z2 + z4;
 887     tmp2 += z2 + z3;
 888     tmp3 += z1 + z4;
 889
 890     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
 891
 892     wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
 893     wsptr[DCTSIZE*7] = (int) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
 894     wsptr[DCTSIZE*1] = (int) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
 895     wsptr[DCTSIZE*6] = (int) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
 896     wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
 897     wsptr[DCTSIZE*5] = (int) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
 898     wsptr[DCTSIZE*3] = (int) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
 899     wsptr[DCTSIZE*4] = (int) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
 900
 901     inptr++;  /* advance pointers to next column */
 902     wsptr++;
 903   }
 904
 905   /* Pass 2: process rows from work array, store into output array. */
 906   /* Note that we must descale the results by a factor of 8 == 2**3, */
 907   /* and also undo the PASS1_BITS scaling. */
 908
 909   wsptr = workspace;
 910   outptr = data;
 911   for (ctr = 0; ctr < DCTSIZE; ctr++) {
 912     /* Even part: reverse the even part of the forward DCT. */
 913     /* The rotator is sqrt(2)*c(-6). */
 914
 915     z2 = (INT32) wsptr[2];
 916     z3 = (INT32) wsptr[6];
 917
 918     z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
 919     tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
 920     tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
 921
 922     tmp0 = ((INT32) wsptr[0] + (INT32) wsptr[4]) << CONST_BITS;
 923     tmp1 = ((INT32) wsptr[0] - (INT32) wsptr[4]) << CONST_BITS;
 924
 925     tmp10 = tmp0 + tmp3;
 926     tmp13 = tmp0 - tmp3;
 927     tmp11 = tmp1 + tmp2;
 928     tmp12 = tmp1 - tmp2;
 929
 930     /* Odd part per figure 8; the matrix is unitary and hence its
 931      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
 932      */
 933
 934     tmp0 = (INT32) wsptr[7];
 935     tmp1 = (INT32) wsptr[5];
 936     tmp2 = (INT32) wsptr[3];
 937     tmp3 = (INT32) wsptr[1];
 938
 939     z1 = tmp0 + tmp3;
 940     z2 = tmp1 + tmp2;
 941     z3 = tmp0 + tmp2;
 942     z4 = tmp1 + tmp3;
 943     z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
 944
 945     tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
 946     tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
 947     tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
 948     tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
 949     z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
 950     z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
 951     z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
 952     z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
 953
 954     z3 += z5;
 955     z4 += z5;
 956
 957     tmp0 += z1 + z3;
 958     tmp1 += z2 + z4;
 959     tmp2 += z2 + z3;
 960     tmp3 += z1 + z4;
 961
 962     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
 963
 964     outptr[0] = (tmp10 + tmp3) >> ( CONST_BITS+PASS1_BITS+3 );
 965     outptr[7] = (tmp10 - tmp3) >> ( CONST_BITS+PASS1_BITS+3 );
 966     outptr[1] = (tmp11 + tmp2) >> ( CONST_BITS+PASS1_BITS+3 );
 967     outptr[6] = (tmp11 - tmp2) >> ( CONST_BITS+PASS1_BITS+3 );
 968     outptr[2] = (tmp12 + tmp1) >> ( CONST_BITS+PASS1_BITS+3 );
 969     outptr[5] = (tmp12 - tmp1) >> ( CONST_BITS+PASS1_BITS+3 );
 970     outptr[3] = (tmp13 + tmp0) >> ( CONST_BITS+PASS1_BITS+3 );
 971     outptr[4] = (tmp13 - tmp0) >> ( CONST_BITS+PASS1_BITS+3 );
 972
 973     wsptr += DCTSIZE; /* advance pointer to next row */
 974     outptr += DCTSIZE;
 975   }
 976
 977   for(ctr = 0; ctr < DCTSIZE2; ctr++)
 978     out[ctr] = data[ctr];
 979 }