Update to 2.0.0 tree from current Fremantle build
[opencv] / 3rdparty / libtiff / tif_lzw.c
1 /* $Id: tif_lzw.c,v 1.1 2005-06-17 13:54:52 vp153 Exp $ */
2
3 /*
4  * Copyright (c) 1988-1997 Sam Leffler
5  * Copyright (c) 1991-1997 Silicon Graphics, Inc.
6  *
7  * Permission to use, copy, modify, distribute, and sell this software and 
8  * its documentation for any purpose is hereby granted without fee, provided
9  * that (i) the above copyright notices and this permission notice appear in
10  * all copies of the software and related documentation, and (ii) the names of
11  * Sam Leffler and Silicon Graphics may not be used in any advertising or
12  * publicity relating to the software without the specific, prior written
13  * permission of Sam Leffler and Silicon Graphics.
14  * 
15  * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, 
16  * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY 
17  * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.  
18  * 
19  * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
20  * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
21  * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
22  * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF 
23  * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
24  * OF THIS SOFTWARE.
25  */
26
27 #include "tiffiop.h"
28 #ifdef LZW_SUPPORT
29 /*
30  * TIFF Library.
31  * Rev 5.0 Lempel-Ziv & Welch Compression Support
32  *
33  * This code is derived from the compress program whose code is
34  * derived from software contributed to Berkeley by James A. Woods,
35  * derived from original work by Spencer Thomas and Joseph Orost.
36  *
37  * The original Berkeley copyright notice appears below in its entirety.
38  */
39 #include "tif_predict.h"
40
41 #include <stdio.h>
42
43 /*
44  * NB: The 5.0 spec describes a different algorithm than Aldus
45  *     implements.  Specifically, Aldus does code length transitions
46  *     one code earlier than should be done (for real LZW).
47  *     Earlier versions of this library implemented the correct
48  *     LZW algorithm, but emitted codes in a bit order opposite
49  *     to the TIFF spec.  Thus, to maintain compatibility w/ Aldus
50  *     we interpret MSB-LSB ordered codes to be images written w/
51  *     old versions of this library, but otherwise adhere to the
52  *     Aldus "off by one" algorithm.
53  *
54  * Future revisions to the TIFF spec are expected to "clarify this issue".
55  */
56 #define LZW_COMPAT              /* include backwards compatibility code */
57 /*
58  * Each strip of data is supposed to be terminated by a CODE_EOI.
59  * If the following #define is included, the decoder will also
60  * check for end-of-strip w/o seeing this code.  This makes the
61  * library more robust, but also slower.
62  */
63 #define LZW_CHECKEOS            /* include checks for strips w/o EOI code */
64
65 #define MAXCODE(n)      ((1L<<(n))-1)
66 /*
67  * The TIFF spec specifies that encoded bit
68  * strings range from 9 to 12 bits.
69  */
70 #define BITS_MIN        9               /* start with 9 bits */
71 #define BITS_MAX        12              /* max of 12 bit strings */
72 /* predefined codes */
73 #define CODE_CLEAR      256             /* code to clear string table */
74 #define CODE_EOI        257             /* end-of-information code */
75 #define CODE_FIRST      258             /* first free code entry */
76 #define CODE_MAX        MAXCODE(BITS_MAX)
77 #define HSIZE           9001L           /* 91% occupancy */
78 #define HSHIFT          (13-8)
79 #ifdef LZW_COMPAT
80 /* NB: +1024 is for compatibility with old files */
81 #define CSIZE           (MAXCODE(BITS_MAX)+1024L)
82 #else
83 #define CSIZE           (MAXCODE(BITS_MAX)+1L)
84 #endif
85
86 /*
87  * State block for each open TIFF file using LZW
88  * compression/decompression.  Note that the predictor
89  * state block must be first in this data structure.
90  */
91 typedef struct {
92         TIFFPredictorState predict;     /* predictor super class */
93
94         unsigned short  nbits;          /* # of bits/code */
95         unsigned short  maxcode;        /* maximum code for lzw_nbits */
96         unsigned short  free_ent;       /* next free entry in hash table */
97         long            nextdata;       /* next bits of i/o */
98         long            nextbits;       /* # of valid bits in lzw_nextdata */
99
100         int             rw_mode;        /* preserve rw_mode from init */
101 } LZWBaseState;
102
103 #define lzw_nbits       base.nbits
104 #define lzw_maxcode     base.maxcode
105 #define lzw_free_ent    base.free_ent
106 #define lzw_nextdata    base.nextdata
107 #define lzw_nextbits    base.nextbits
108
109 /*
110  * Encoding-specific state.
111  */
112 typedef uint16 hcode_t;                 /* codes fit in 16 bits */
113 typedef struct {
114         long    hash;
115         hcode_t code;
116 } hash_t;
117
118 /*
119  * Decoding-specific state.
120  */
121 typedef struct code_ent {
122         struct code_ent *next;
123         unsigned short  length;         /* string len, including this token */
124         unsigned char   value;          /* data value */
125         unsigned char   firstchar;      /* first token of string */
126 } code_t;
127
128 typedef int (*decodeFunc)(TIFF*, tidata_t, tsize_t, tsample_t);
129
130 typedef struct {
131         LZWBaseState base;
132
133         /* Decoding specific data */
134         long    dec_nbitsmask;          /* lzw_nbits 1 bits, right adjusted */
135         long    dec_restart;            /* restart count */
136 #ifdef LZW_CHECKEOS
137         long    dec_bitsleft;           /* available bits in raw data */
138 #endif
139         decodeFunc dec_decode;          /* regular or backwards compatible */
140         code_t* dec_codep;              /* current recognized code */
141         code_t* dec_oldcodep;           /* previously recognized code */
142         code_t* dec_free_entp;          /* next free entry */
143         code_t* dec_maxcodep;           /* max available entry */
144         code_t* dec_codetab;            /* kept separate for small machines */
145
146         /* Encoding specific data */
147         int     enc_oldcode;            /* last code encountered */
148         long    enc_checkpoint;         /* point at which to clear table */
149 #define CHECK_GAP       10000           /* enc_ratio check interval */
150         long    enc_ratio;              /* current compression ratio */
151         long    enc_incount;            /* (input) data bytes encoded */
152         long    enc_outcount;           /* encoded (output) bytes */
153         tidata_t enc_rawlimit;          /* bound on tif_rawdata buffer */
154         hash_t* enc_hashtab;            /* kept separate for small machines */
155 } LZWCodecState;
156
157 #define LZWState(tif)           ((LZWBaseState*) (tif)->tif_data)
158 #define DecoderState(tif)       ((LZWCodecState*) LZWState(tif))
159 #define EncoderState(tif)       ((LZWCodecState*) LZWState(tif))
160
161 static  int LZWDecode(TIFF*, tidata_t, tsize_t, tsample_t);
162 #ifdef LZW_COMPAT
163 static  int LZWDecodeCompat(TIFF*, tidata_t, tsize_t, tsample_t);
164 #endif
165 static  void cl_hash(LZWCodecState*);
166
167 /*
168  * LZW Decoder.
169  */
170
171 #ifdef LZW_CHECKEOS
172 /*
173  * This check shouldn't be necessary because each
174  * strip is suppose to be terminated with CODE_EOI.
175  */
176 #define NextCode(_tif, _sp, _bp, _code, _get) {                         \
177         if ((_sp)->dec_bitsleft < nbits) {                              \
178                 TIFFWarning(_tif->tif_name,                             \
179                     "LZWDecode: Strip %d not terminated with EOI code", \
180                     _tif->tif_curstrip);                                \
181                 _code = CODE_EOI;                                       \
182         } else {                                                        \
183                 _get(_sp,_bp,_code);                                    \
184                 (_sp)->dec_bitsleft -= nbits;                           \
185         }                                                               \
186 }
187 #else
188 #define NextCode(tif, sp, bp, code, get) get(sp, bp, code)
189 #endif
190
191 static int
192 LZWSetupDecode(TIFF* tif)
193 {
194         LZWCodecState* sp = DecoderState(tif);
195         static const char module[] = " LZWSetupDecode";
196         int code;
197
198         if( sp == NULL )
199         {
200             /*
201              * Allocate state block so tag methods have storage to record 
202              * values.
203              */
204             tif->tif_data = (tidata_t) _TIFFmalloc(sizeof(LZWCodecState));
205             if (tif->tif_data == NULL)
206             {
207                 TIFFError("LZWPreDecode", "No space for LZW state block");
208                 return (0);
209             }
210
211             DecoderState(tif)->dec_codetab = NULL;
212             DecoderState(tif)->dec_decode = NULL;
213             
214             /*
215              * Setup predictor setup.
216              */
217             (void) TIFFPredictorInit(tif);
218
219             sp = DecoderState(tif);
220         }
221             
222         assert(sp != NULL);
223
224         if (sp->dec_codetab == NULL) {
225                 sp->dec_codetab = (code_t*)_TIFFmalloc(CSIZE*sizeof (code_t));
226                 if (sp->dec_codetab == NULL) {
227                         TIFFError(module, "No space for LZW code table");
228                         return (0);
229                 }
230                 /*
231                  * Pre-load the table.
232                  */
233                 code = 255;
234                 do {
235                     sp->dec_codetab[code].value = code;
236                     sp->dec_codetab[code].firstchar = code;
237                     sp->dec_codetab[code].length = 1;
238                     sp->dec_codetab[code].next = NULL;
239                 } while (code--);
240         }
241         return (1);
242 }
243
244 /*
245  * Setup state for decoding a strip.
246  */
247 static int
248 LZWPreDecode(TIFF* tif, tsample_t s)
249 {
250         LZWCodecState *sp = DecoderState(tif);
251
252         (void) s;
253         assert(sp != NULL);
254         /*
255          * Check for old bit-reversed codes.
256          */
257         if (tif->tif_rawdata[0] == 0 && (tif->tif_rawdata[1] & 0x1)) {
258 #ifdef LZW_COMPAT
259                 if (!sp->dec_decode) {
260                         TIFFWarning(tif->tif_name,
261                             "Old-style LZW codes, convert file");
262                         /*
263                          * Override default decoding methods with
264                          * ones that deal with the old coding.
265                          * Otherwise the predictor versions set
266                          * above will call the compatibility routines
267                          * through the dec_decode method.
268                          */
269                         tif->tif_decoderow = LZWDecodeCompat;
270                         tif->tif_decodestrip = LZWDecodeCompat;
271                         tif->tif_decodetile = LZWDecodeCompat;
272                         /*
273                          * If doing horizontal differencing, must
274                          * re-setup the predictor logic since we
275                          * switched the basic decoder methods...
276                          */
277                         (*tif->tif_setupdecode)(tif);
278                         sp->dec_decode = LZWDecodeCompat;
279                 }
280                 sp->lzw_maxcode = MAXCODE(BITS_MIN);
281 #else /* !LZW_COMPAT */
282                 if (!sp->dec_decode) {
283                         TIFFError(tif->tif_name,
284                             "Old-style LZW codes not supported");
285                         sp->dec_decode = LZWDecode;
286                 }
287                 return (0);
288 #endif/* !LZW_COMPAT */
289         } else {
290                 sp->lzw_maxcode = MAXCODE(BITS_MIN)-1;
291                 sp->dec_decode = LZWDecode;
292         }
293         sp->lzw_nbits = BITS_MIN;
294         sp->lzw_nextbits = 0;
295         sp->lzw_nextdata = 0;
296
297         sp->dec_restart = 0;
298         sp->dec_nbitsmask = MAXCODE(BITS_MIN);
299 #ifdef LZW_CHECKEOS
300         sp->dec_bitsleft = tif->tif_rawcc << 3;
301 #endif
302         sp->dec_free_entp = sp->dec_codetab + CODE_FIRST;
303         /*
304          * Zero entries that are not yet filled in.  We do
305          * this to guard against bogus input data that causes
306          * us to index into undefined entries.  If you can
307          * come up with a way to safely bounds-check input codes
308          * while decoding then you can remove this operation.
309          */
310         _TIFFmemset(sp->dec_free_entp, 0, (CSIZE-CODE_FIRST)*sizeof (code_t));
311         sp->dec_oldcodep = &sp->dec_codetab[-1];
312         sp->dec_maxcodep = &sp->dec_codetab[sp->dec_nbitsmask-1];
313         return (1);
314 }
315
316 /*
317  * Decode a "hunk of data".
318  */
319 #define GetNextCode(sp, bp, code) {                             \
320         nextdata = (nextdata<<8) | *(bp)++;                     \
321         nextbits += 8;                                          \
322         if (nextbits < nbits) {                                 \
323                 nextdata = (nextdata<<8) | *(bp)++;             \
324                 nextbits += 8;                                  \
325         }                                                       \
326         code = (hcode_t)((nextdata >> (nextbits-nbits)) & nbitsmask);   \
327         nextbits -= nbits;                                      \
328 }
329
330 static void
331 codeLoop(TIFF* tif)
332 {
333         TIFFError(tif->tif_name,
334             "LZWDecode: Bogus encoding, loop in the code table; scanline %d",
335             tif->tif_row);
336 }
337
338 static int
339 LZWDecode(TIFF* tif, tidata_t op0, tsize_t occ0, tsample_t s)
340 {
341         LZWCodecState *sp = DecoderState(tif);
342         char *op = (char*) op0;
343         long occ = (long) occ0;
344         char *tp;
345         unsigned char *bp;
346         hcode_t code;
347         int len;
348         long nbits, nextbits, nextdata, nbitsmask;
349         code_t *codep, *free_entp, *maxcodep, *oldcodep;
350
351         (void) s;
352         assert(sp != NULL);
353         /*
354          * Restart interrupted output operation.
355          */
356         if (sp->dec_restart) {
357                 long residue;
358
359                 codep = sp->dec_codep;
360                 residue = codep->length - sp->dec_restart;
361                 if (residue > occ) {
362                         /*
363                          * Residue from previous decode is sufficient
364                          * to satisfy decode request.  Skip to the
365                          * start of the decoded string, place decoded
366                          * values in the output buffer, and return.
367                          */
368                         sp->dec_restart += occ;
369                         do {
370                                 codep = codep->next;
371                         } while (--residue > occ && codep);
372                         if (codep) {
373                                 tp = op + occ;
374                                 do {
375                                         *--tp = codep->value;
376                                         codep = codep->next;
377                                 } while (--occ && codep);
378                         }
379                         return (1);
380                 }
381                 /*
382                  * Residue satisfies only part of the decode request.
383                  */
384                 op += residue, occ -= residue;
385                 tp = op;
386                 do {
387                         int t;
388                         --tp;
389                         t = codep->value;
390                         codep = codep->next;
391                         *tp = t;
392                 } while (--residue && codep);
393                 sp->dec_restart = 0;
394         }
395
396         bp = (unsigned char *)tif->tif_rawcp;
397         nbits = sp->lzw_nbits;
398         nextdata = sp->lzw_nextdata;
399         nextbits = sp->lzw_nextbits;
400         nbitsmask = sp->dec_nbitsmask;
401         oldcodep = sp->dec_oldcodep;
402         free_entp = sp->dec_free_entp;
403         maxcodep = sp->dec_maxcodep;
404
405         while (occ > 0) {
406                 NextCode(tif, sp, bp, code, GetNextCode);
407                 if (code == CODE_EOI)
408                         break;
409                 if (code == CODE_CLEAR) {
410                         free_entp = sp->dec_codetab + CODE_FIRST;
411                         nbits = BITS_MIN;
412                         nbitsmask = MAXCODE(BITS_MIN);
413                         maxcodep = sp->dec_codetab + nbitsmask-1;
414                         NextCode(tif, sp, bp, code, GetNextCode);
415                         if (code == CODE_EOI)
416                                 break;
417                         *op++ = (char)code, occ--;
418                         oldcodep = sp->dec_codetab + code;
419                         continue;
420                 }
421                 codep = sp->dec_codetab + code;
422
423                 /*
424                  * Add the new entry to the code table.
425                  */
426                 if (free_entp < &sp->dec_codetab[0] ||
427                         free_entp >= &sp->dec_codetab[CSIZE]) {
428                         TIFFError(tif->tif_name,
429                         "LZWDecode: Corrupted LZW table at scanline %d",
430                         tif->tif_row);
431                         return (0);
432                 }
433
434                 free_entp->next = oldcodep;
435                 if (free_entp->next < &sp->dec_codetab[0] ||
436                         free_entp->next >= &sp->dec_codetab[CSIZE]) {
437                         TIFFError(tif->tif_name,
438                         "LZWDecode: Corrupted LZW table at scanline %d",
439                         tif->tif_row);
440                         return (0);
441                 }
442                 free_entp->firstchar = free_entp->next->firstchar;
443                 free_entp->length = free_entp->next->length+1;
444                 free_entp->value = (codep < free_entp) ?
445                     codep->firstchar : free_entp->firstchar;
446                 if (++free_entp > maxcodep) {
447                         if (++nbits > BITS_MAX)         /* should not happen */
448                                 nbits = BITS_MAX;
449                         nbitsmask = MAXCODE(nbits);
450                         maxcodep = sp->dec_codetab + nbitsmask-1;
451                 }
452                 oldcodep = codep;
453                 if (code >= 256) {
454                         /*
455                          * Code maps to a string, copy string
456                          * value to output (written in reverse).
457                          */
458                         if(codep->length == 0) {
459                             TIFFError(tif->tif_name,
460                             "LZWDecode: Wrong length of decoded string: "
461                             "data probably corrupted at scanline %d",
462                             tif->tif_row);      
463                             return (0);
464                         }
465                         if (codep->length > occ) {
466                                 /*
467                                  * String is too long for decode buffer,
468                                  * locate portion that will fit, copy to
469                                  * the decode buffer, and setup restart
470                                  * logic for the next decoding call.
471                                  */
472                                 sp->dec_codep = codep;
473                                 do {
474                                         codep = codep->next;
475                                 } while (codep && codep->length > occ);
476                                 if (codep) {
477                                         sp->dec_restart = occ;
478                                         tp = op + occ;
479                                         do  {
480                                                 *--tp = codep->value;
481                                                 codep = codep->next;
482                                         }  while (--occ && codep);
483                                         if (codep)
484                                                 codeLoop(tif);
485                                 }
486                                 break;
487                         }
488                         len = codep->length;
489                         tp = op + len;
490                         do {
491                                 int t;
492                                 --tp;
493                                 t = codep->value;
494                                 codep = codep->next;
495                                 *tp = t;
496                         } while (codep && tp > op);
497                         if (codep) {
498                             codeLoop(tif);
499                             break;
500                         }
501                         op += len, occ -= len;
502                 } else
503                         *op++ = (char)code, occ--;
504         }
505
506         tif->tif_rawcp = (tidata_t) bp;
507         sp->lzw_nbits = (unsigned short) nbits;
508         sp->lzw_nextdata = nextdata;
509         sp->lzw_nextbits = nextbits;
510         sp->dec_nbitsmask = nbitsmask;
511         sp->dec_oldcodep = oldcodep;
512         sp->dec_free_entp = free_entp;
513         sp->dec_maxcodep = maxcodep;
514
515         if (occ > 0) {
516                 TIFFError(tif->tif_name,
517                 "LZWDecode: Not enough data at scanline %d (short %d bytes)",
518                     tif->tif_row, occ);
519                 return (0);
520         }
521         return (1);
522 }
523
524 #ifdef LZW_COMPAT
525 /*
526  * Decode a "hunk of data" for old images.
527  */
528 #define GetNextCodeCompat(sp, bp, code) {                       \
529         nextdata |= (unsigned long) *(bp)++ << nextbits;        \
530         nextbits += 8;                                          \
531         if (nextbits < nbits) {                                 \
532                 nextdata |= (unsigned long) *(bp)++ << nextbits;\
533                 nextbits += 8;                                  \
534         }                                                       \
535         code = (hcode_t)(nextdata & nbitsmask);                 \
536         nextdata >>= nbits;                                     \
537         nextbits -= nbits;                                      \
538 }
539
540 static int
541 LZWDecodeCompat(TIFF* tif, tidata_t op0, tsize_t occ0, tsample_t s)
542 {
543         LZWCodecState *sp = DecoderState(tif);
544         char *op = (char*) op0;
545         long occ = (long) occ0;
546         char *tp;
547         unsigned char *bp;
548         int code, nbits;
549         long nextbits, nextdata, nbitsmask;
550         code_t *codep, *free_entp, *maxcodep, *oldcodep;
551
552         (void) s;
553         assert(sp != NULL);
554         /*
555          * Restart interrupted output operation.
556          */
557         if (sp->dec_restart) {
558                 long residue;
559
560                 codep = sp->dec_codep;
561                 residue = codep->length - sp->dec_restart;
562                 if (residue > occ) {
563                         /*
564                          * Residue from previous decode is sufficient
565                          * to satisfy decode request.  Skip to the
566                          * start of the decoded string, place decoded
567                          * values in the output buffer, and return.
568                          */
569                         sp->dec_restart += occ;
570                         do {
571                                 codep = codep->next;
572                         } while (--residue > occ);
573                         tp = op + occ;
574                         do {
575                                 *--tp = codep->value;
576                                 codep = codep->next;
577                         } while (--occ);
578                         return (1);
579                 }
580                 /*
581                  * Residue satisfies only part of the decode request.
582                  */
583                 op += residue, occ -= residue;
584                 tp = op;
585                 do {
586                         *--tp = codep->value;
587                         codep = codep->next;
588                 } while (--residue);
589                 sp->dec_restart = 0;
590         }
591
592         bp = (unsigned char *)tif->tif_rawcp;
593         nbits = sp->lzw_nbits;
594         nextdata = sp->lzw_nextdata;
595         nextbits = sp->lzw_nextbits;
596         nbitsmask = sp->dec_nbitsmask;
597         oldcodep = sp->dec_oldcodep;
598         free_entp = sp->dec_free_entp;
599         maxcodep = sp->dec_maxcodep;
600
601         while (occ > 0) {
602                 NextCode(tif, sp, bp, code, GetNextCodeCompat);
603                 if (code == CODE_EOI)
604                         break;
605                 if (code == CODE_CLEAR) {
606                         free_entp = sp->dec_codetab + CODE_FIRST;
607                         nbits = BITS_MIN;
608                         nbitsmask = MAXCODE(BITS_MIN);
609                         maxcodep = sp->dec_codetab + nbitsmask;
610                         NextCode(tif, sp, bp, code, GetNextCodeCompat);
611                         if (code == CODE_EOI)
612                                 break;
613                         *op++ = code, occ--;
614                         oldcodep = sp->dec_codetab + code;
615                         continue;
616                 }
617                 codep = sp->dec_codetab + code;
618
619                 /*
620                  * Add the new entry to the code table.
621                  */
622                 if (free_entp < &sp->dec_codetab[0] ||
623                         free_entp >= &sp->dec_codetab[CSIZE]) {
624                         TIFFError(tif->tif_name,
625                         "LZWDecodeCompat: Corrupted LZW table at scanline %d",
626                         tif->tif_row);
627                         return (0);
628                 }
629
630                 free_entp->next = oldcodep;
631                 if (free_entp->next < &sp->dec_codetab[0] ||
632                         free_entp->next >= &sp->dec_codetab[CSIZE]) {
633                         TIFFError(tif->tif_name,
634                         "LZWDecodeCompat: Corrupted LZW table at scanline %d",
635                         tif->tif_row);
636                         return (0);
637                 }
638                 free_entp->firstchar = free_entp->next->firstchar;
639                 free_entp->length = free_entp->next->length+1;
640                 free_entp->value = (codep < free_entp) ?
641                     codep->firstchar : free_entp->firstchar;
642                 if (++free_entp > maxcodep) {
643                         if (++nbits > BITS_MAX)         /* should not happen */
644                                 nbits = BITS_MAX;
645                         nbitsmask = MAXCODE(nbits);
646                         maxcodep = sp->dec_codetab + nbitsmask;
647                 }
648                 oldcodep = codep;
649                 if (code >= 256) {
650                         /*
651                          * Code maps to a string, copy string
652                          * value to output (written in reverse).
653                          */
654                         if(codep->length == 0) {
655                             TIFFError(tif->tif_name,
656                             "LZWDecodeCompat: Wrong length of decoded "
657                             "string: data probably corrupted at scanline %d",
658                             tif->tif_row);      
659                             return (0);
660                         }
661                         if (codep->length > occ) {
662                                 /*
663                                  * String is too long for decode buffer,
664                                  * locate portion that will fit, copy to
665                                  * the decode buffer, and setup restart
666                                  * logic for the next decoding call.
667                                  */
668                                 sp->dec_codep = codep;
669                                 do {
670                                         codep = codep->next;
671                                 } while (codep->length > occ);
672                                 sp->dec_restart = occ;
673                                 tp = op + occ;
674                                 do  {
675                                         *--tp = codep->value;
676                                         codep = codep->next;
677                                 }  while (--occ);
678                                 break;
679                         }
680                         op += codep->length, occ -= codep->length;
681                         tp = op;
682                         do {
683                                 *--tp = codep->value;
684                         } while( (codep = codep->next) != NULL);
685                 } else
686                         *op++ = code, occ--;
687         }
688
689         tif->tif_rawcp = (tidata_t) bp;
690         sp->lzw_nbits = nbits;
691         sp->lzw_nextdata = nextdata;
692         sp->lzw_nextbits = nextbits;
693         sp->dec_nbitsmask = nbitsmask;
694         sp->dec_oldcodep = oldcodep;
695         sp->dec_free_entp = free_entp;
696         sp->dec_maxcodep = maxcodep;
697
698         if (occ > 0) {
699                 TIFFError(tif->tif_name,
700             "LZWDecodeCompat: Not enough data at scanline %d (short %d bytes)",
701                     tif->tif_row, occ);
702                 return (0);
703         }
704         return (1);
705 }
706 #endif /* LZW_COMPAT */
707
708 /*
709  * LZW Encoding.
710  */
711
712 static int
713 LZWSetupEncode(TIFF* tif)
714 {
715         LZWCodecState* sp = EncoderState(tif);
716         static const char module[] = "LZWSetupEncode";
717
718         assert(sp != NULL);
719         sp->enc_hashtab = (hash_t*) _TIFFmalloc(HSIZE*sizeof (hash_t));
720         if (sp->enc_hashtab == NULL) {
721                 TIFFError(module, "No space for LZW hash table");
722                 return (0);
723         }
724         return (1);
725 }
726
727 /*
728  * Reset encoding state at the start of a strip.
729  */
730 static int
731 LZWPreEncode(TIFF* tif, tsample_t s)
732 {
733         LZWCodecState *sp = EncoderState(tif);
734
735         (void) s;
736         assert(sp != NULL);
737         sp->lzw_nbits = BITS_MIN;
738         sp->lzw_maxcode = MAXCODE(BITS_MIN);
739         sp->lzw_free_ent = CODE_FIRST;
740         sp->lzw_nextbits = 0;
741         sp->lzw_nextdata = 0;
742         sp->enc_checkpoint = CHECK_GAP;
743         sp->enc_ratio = 0;
744         sp->enc_incount = 0;
745         sp->enc_outcount = 0;
746         /*
747          * The 4 here insures there is space for 2 max-sized
748          * codes in LZWEncode and LZWPostDecode.
749          */
750         sp->enc_rawlimit = tif->tif_rawdata + tif->tif_rawdatasize-1 - 4;
751         cl_hash(sp);            /* clear hash table */
752         sp->enc_oldcode = (hcode_t) -1; /* generates CODE_CLEAR in LZWEncode */
753         return (1);
754 }
755
756 #define CALCRATIO(sp, rat) {                                    \
757         if (incount > 0x007fffff) { /* NB: shift will overflow */\
758                 rat = outcount >> 8;                            \
759                 rat = (rat == 0 ? 0x7fffffff : incount/rat);    \
760         } else                                                  \
761                 rat = (incount<<8) / outcount;                  \
762 }
763 #define PutNextCode(op, c) {                                    \
764         nextdata = (nextdata << nbits) | c;                     \
765         nextbits += nbits;                                      \
766         *op++ = (unsigned char)(nextdata >> (nextbits-8));              \
767         nextbits -= 8;                                          \
768         if (nextbits >= 8) {                                    \
769                 *op++ = (unsigned char)(nextdata >> (nextbits-8));      \
770                 nextbits -= 8;                                  \
771         }                                                       \
772         outcount += nbits;                                      \
773 }
774
775 /*
776  * Encode a chunk of pixels.
777  *
778  * Uses an open addressing double hashing (no chaining) on the 
779  * prefix code/next character combination.  We do a variant of
780  * Knuth's algorithm D (vol. 3, sec. 6.4) along with G. Knott's
781  * relatively-prime secondary probe.  Here, the modular division
782  * first probe is gives way to a faster exclusive-or manipulation. 
783  * Also do block compression with an adaptive reset, whereby the
784  * code table is cleared when the compression ratio decreases,
785  * but after the table fills.  The variable-length output codes
786  * are re-sized at this point, and a CODE_CLEAR is generated
787  * for the decoder. 
788  */
789 static int
790 LZWEncode(TIFF* tif, tidata_t bp, tsize_t cc, tsample_t s)
791 {
792         register LZWCodecState *sp = EncoderState(tif);
793         register long fcode;
794         register hash_t *hp;
795         register int h, c;
796         hcode_t ent;
797         long disp;
798         long incount, outcount, checkpoint;
799         long nextdata, nextbits;
800         int free_ent, maxcode, nbits;
801         tidata_t op, limit;
802
803         (void) s;
804         if (sp == NULL)
805                 return (0);
806         /*
807          * Load local state.
808          */
809         incount = sp->enc_incount;
810         outcount = sp->enc_outcount;
811         checkpoint = sp->enc_checkpoint;
812         nextdata = sp->lzw_nextdata;
813         nextbits = sp->lzw_nextbits;
814         free_ent = sp->lzw_free_ent;
815         maxcode = sp->lzw_maxcode;
816         nbits = sp->lzw_nbits;
817         op = tif->tif_rawcp;
818         limit = sp->enc_rawlimit;
819         ent = sp->enc_oldcode;
820
821         if (ent == (hcode_t) -1 && cc > 0) {
822                 /*
823                  * NB: This is safe because it can only happen
824                  *     at the start of a strip where we know there
825                  *     is space in the data buffer.
826                  */
827                 PutNextCode(op, CODE_CLEAR);
828                 ent = *bp++; cc--; incount++;
829         }
830         while (cc > 0) {
831                 c = *bp++; cc--; incount++;
832                 fcode = ((long)c << BITS_MAX) + ent;
833                 h = (c << HSHIFT) ^ ent;        /* xor hashing */
834 #ifdef _WINDOWS
835                 /*
836                  * Check hash index for an overflow.
837                  */
838                 if (h >= HSIZE)
839                         h -= HSIZE;
840 #endif
841                 hp = &sp->enc_hashtab[h];
842                 if (hp->hash == fcode) {
843                         ent = hp->code;
844                         continue;
845                 }
846                 if (hp->hash >= 0) {
847                         /*
848                          * Primary hash failed, check secondary hash.
849                          */
850                         disp = HSIZE - h;
851                         if (h == 0)
852                                 disp = 1;
853                         do {
854                                 /*
855                                  * Avoid pointer arithmetic 'cuz of
856                                  * wraparound problems with segments.
857                                  */
858                                 if ((h -= disp) < 0)
859                                         h += HSIZE;
860                                 hp = &sp->enc_hashtab[h];
861                                 if (hp->hash == fcode) {
862                                         ent = hp->code;
863                                         goto hit;
864                                 }
865                         } while (hp->hash >= 0);
866                 }
867                 /*
868                  * New entry, emit code and add to table.
869                  */
870                 /*
871                  * Verify there is space in the buffer for the code
872                  * and any potential Clear code that might be emitted
873                  * below.  The value of limit is setup so that there
874                  * are at least 4 bytes free--room for 2 codes.
875                  */
876                 if (op > limit) {
877                         tif->tif_rawcc = (tsize_t)(op - tif->tif_rawdata);
878                         TIFFFlushData1(tif);
879                         op = tif->tif_rawdata;
880                 }
881                 PutNextCode(op, ent);
882                 ent = c;
883                 hp->code = free_ent++;
884                 hp->hash = fcode;
885                 if (free_ent == CODE_MAX-1) {
886                         /* table is full, emit clear code and reset */
887                         cl_hash(sp);
888                         sp->enc_ratio = 0;
889                         incount = 0;
890                         outcount = 0;
891                         free_ent = CODE_FIRST;
892                         PutNextCode(op, CODE_CLEAR);
893                         nbits = BITS_MIN;
894                         maxcode = MAXCODE(BITS_MIN);
895                 } else {
896                         /*
897                          * If the next entry is going to be too big for
898                          * the code size, then increase it, if possible.
899                          */
900                         if (free_ent > maxcode) {
901                                 nbits++;
902                                 assert(nbits <= BITS_MAX);
903                                 maxcode = (int) MAXCODE(nbits);
904                         } else if (incount >= checkpoint) {
905                                 long rat;
906                                 /*
907                                  * Check compression ratio and, if things seem
908                                  * to be slipping, clear the hash table and
909                                  * reset state.  The compression ratio is a
910                                  * 24+8-bit fractional number.
911                                  */
912                                 checkpoint = incount+CHECK_GAP;
913                                 CALCRATIO(sp, rat);
914                                 if (rat <= sp->enc_ratio) {
915                                         cl_hash(sp);
916                                         sp->enc_ratio = 0;
917                                         incount = 0;
918                                         outcount = 0;
919                                         free_ent = CODE_FIRST;
920                                         PutNextCode(op, CODE_CLEAR);
921                                         nbits = BITS_MIN;
922                                         maxcode = MAXCODE(BITS_MIN);
923                                 } else
924                                         sp->enc_ratio = rat;
925                         }
926                 }
927         hit:
928                 ;
929         }
930
931         /*
932          * Restore global state.
933          */
934         sp->enc_incount = incount;
935         sp->enc_outcount = outcount;
936         sp->enc_checkpoint = checkpoint;
937         sp->enc_oldcode = ent;
938         sp->lzw_nextdata = nextdata;
939         sp->lzw_nextbits = nextbits;
940         sp->lzw_free_ent = free_ent;
941         sp->lzw_maxcode = maxcode;
942         sp->lzw_nbits = nbits;
943         tif->tif_rawcp = op;
944         return (1);
945 }
946
947 /*
948  * Finish off an encoded strip by flushing the last
949  * string and tacking on an End Of Information code.
950  */
951 static int
952 LZWPostEncode(TIFF* tif)
953 {
954         register LZWCodecState *sp = EncoderState(tif);
955         tidata_t op = tif->tif_rawcp;
956         long nextbits = sp->lzw_nextbits;
957         long nextdata = sp->lzw_nextdata;
958         long outcount = sp->enc_outcount;
959         int nbits = sp->lzw_nbits;
960
961         if (op > sp->enc_rawlimit) {
962                 tif->tif_rawcc = (tsize_t)(op - tif->tif_rawdata);
963                 TIFFFlushData1(tif);
964                 op = tif->tif_rawdata;
965         }
966         if (sp->enc_oldcode != (hcode_t) -1) {
967                 PutNextCode(op, sp->enc_oldcode);
968                 sp->enc_oldcode = (hcode_t) -1;
969         }
970         PutNextCode(op, CODE_EOI);
971         if (nextbits > 0) 
972                 *op++ = (unsigned char)(nextdata << (8-nextbits));
973         tif->tif_rawcc = (tsize_t)(op - tif->tif_rawdata);
974         return (1);
975 }
976
977 /*
978  * Reset encoding hash table.
979  */
980 static void
981 cl_hash(LZWCodecState* sp)
982 {
983         register hash_t *hp = &sp->enc_hashtab[HSIZE-1];
984         register long i = HSIZE-8;
985
986         do {
987                 i -= 8;
988                 hp[-7].hash = -1;
989                 hp[-6].hash = -1;
990                 hp[-5].hash = -1;
991                 hp[-4].hash = -1;
992                 hp[-3].hash = -1;
993                 hp[-2].hash = -1;
994                 hp[-1].hash = -1;
995                 hp[ 0].hash = -1;
996                 hp -= 8;
997         } while (i >= 0);
998         for (i += 8; i > 0; i--, hp--)
999                 hp->hash = -1;
1000 }
1001
1002 static void
1003 LZWCleanup(TIFF* tif)
1004 {
1005         if (tif->tif_data) {
1006                 if (DecoderState(tif)->dec_codetab)
1007                         _TIFFfree(DecoderState(tif)->dec_codetab);
1008
1009                 if (EncoderState(tif)->enc_hashtab)
1010                         _TIFFfree(EncoderState(tif)->enc_hashtab);
1011
1012                 _TIFFfree(tif->tif_data);
1013                 tif->tif_data = NULL;
1014         }
1015 }
1016
1017 int
1018 TIFFInitLZW(TIFF* tif, int scheme)
1019 {
1020         assert(scheme == COMPRESSION_LZW);
1021         /*
1022          * Allocate state block so tag methods have storage to record values.
1023          */
1024         tif->tif_data = (tidata_t) _TIFFmalloc(sizeof (LZWCodecState));
1025         if (tif->tif_data == NULL)
1026                 goto bad;
1027         DecoderState(tif)->dec_codetab = NULL;
1028         DecoderState(tif)->dec_decode = NULL;
1029         EncoderState(tif)->enc_hashtab = NULL;
1030         LZWState(tif)->rw_mode = tif->tif_mode;
1031
1032         /*
1033          * Install codec methods.
1034          */
1035         tif->tif_setupdecode = LZWSetupDecode;
1036         tif->tif_predecode = LZWPreDecode;
1037         tif->tif_decoderow = LZWDecode;
1038         tif->tif_decodestrip = LZWDecode;
1039         tif->tif_decodetile = LZWDecode;
1040         tif->tif_setupencode = LZWSetupEncode;
1041         tif->tif_preencode = LZWPreEncode;
1042         tif->tif_postencode = LZWPostEncode;
1043         tif->tif_encoderow = LZWEncode;
1044         tif->tif_encodestrip = LZWEncode;
1045         tif->tif_encodetile = LZWEncode;
1046         tif->tif_cleanup = LZWCleanup;
1047         /*
1048          * Setup predictor setup.
1049          */
1050         (void) TIFFPredictorInit(tif);
1051         return (1);
1052 bad:
1053         TIFFError("TIFFInitLZW", "No space for LZW state block");
1054         return (0);
1055 }
1056
1057 /*
1058  * Copyright (c) 1985, 1986 The Regents of the University of California.
1059  * All rights reserved.
1060  *
1061  * This code is derived from software contributed to Berkeley by
1062  * James A. Woods, derived from original work by Spencer Thomas
1063  * and Joseph Orost.
1064  *
1065  * Redistribution and use in source and binary forms are permitted
1066  * provided that the above copyright notice and this paragraph are
1067  * duplicated in all such forms and that any documentation,
1068  * advertising materials, and other materials related to such
1069  * distribution and use acknowledge that the software was developed
1070  * by the University of California, Berkeley.  The name of the
1071  * University may not be used to endorse or promote products derived
1072  * from this software without specific prior written permission.
1073  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
1074  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
1075  * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
1076  */
1077 #endif /* LZW_SUPPORT */
1078
1079 /* vim: set ts=8 sts=8 sw=8 noet: */