--- /dev/null
+/* dictziplib.c --
+* http://stardict.sourceforge.net
+* Copyright (C) 2003-2003 Hu Zheng <huzheng_001@163.com>
+* This file is a modify version of dictd-1.9.7's data.c
+*
+* data.c --
+* Created: Tue Jul 16 12:45:41 1996 by faith@dict.org
+* Revised: Sat Mar 30 10:46:06 2002 by faith@dict.org
+* Copyright 1996, 1997, 1998, 2000, 2002 Rickard E. Faith (faith@dict.org)
+*
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation; either version 2 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU Library General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software
+* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+*/
+
+//#define HAVE_MMAP //it will defined in config.h. this can be done by configure.in with a AC_FUNC_MMAP.
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <QtGlobal>
+
+#include <cassert>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#ifdef Q_OS_UNIX
+#include <unistd.h>
+#endif
+#ifdef Q_OS_WIN32
+#include <io.h>
+#endif
+#include <limits.h>
+#include <fcntl.h>
+
+#include <sys/stat.h>
+
+
+#include "dictziplib.hpp"
+
+#define USE_CACHE 1
+
+#define BUFFERSIZE 10240
+
+/*
+ * Output buffer must be greater than or
+ * equal to 110% of input buffer size, plus
+ * 12 bytes.
+*/
+#define OUT_BUFFER_SIZE 0xffffL
+
+#define IN_BUFFER_SIZE ((unsigned long)((double)(OUT_BUFFER_SIZE - 12) * 0.89))
+
+/* For gzip-compatible header, as defined in RFC 1952 */
+
+/* Magic for GZIP (rfc1952) */
+#define GZ_MAGIC1 0x1f /* First magic byte */
+#define GZ_MAGIC2 0x8b /* Second magic byte */
+
+/* FLaGs (bitmapped), from rfc1952 */
+#define GZ_FTEXT 0x01 /* Set for ASCII text */
+#define GZ_FHCRC 0x02 /* Header CRC16 */
+#define GZ_FEXTRA 0x04 /* Optional field (random access index) */
+#define GZ_FNAME 0x08 /* Original name */
+#define GZ_COMMENT 0x10 /* Zero-terminated, human-readable comment */
+#define GZ_MAX 2 /* Maximum compression */
+#define GZ_FAST 4 /* Fasted compression */
+
+/* These are from rfc1952 */
+#define GZ_OS_FAT 0 /* FAT filesystem (MS-DOS, OS/2, NT/Win32) */
+#define GZ_OS_AMIGA 1 /* Amiga */
+#define GZ_OS_VMS 2 /* VMS (or OpenVMS) */
+#define GZ_OS_UNIX 3 /* Unix */
+#define GZ_OS_VMCMS 4 /* VM/CMS */
+#define GZ_OS_ATARI 5 /* Atari TOS */
+#define GZ_OS_HPFS 6 /* HPFS filesystem (OS/2, NT) */
+#define GZ_OS_MAC 7 /* Macintosh */
+#define GZ_OS_Z 8 /* Z-System */
+#define GZ_OS_CPM 9 /* CP/M */
+#define GZ_OS_TOPS20 10 /* TOPS-20 */
+#define GZ_OS_NTFS 11 /* NTFS filesystem (NT) */
+#define GZ_OS_QDOS 12 /* QDOS */
+#define GZ_OS_ACORN 13 /* Acorn RISCOS */
+#define GZ_OS_UNKNOWN 255 /* unknown */
+
+#define GZ_RND_S1 'R' /* First magic for random access format */
+#define GZ_RND_S2 'A' /* Second magic for random access format */
+
+#define GZ_ID1 0 /* GZ_MAGIC1 */
+#define GZ_ID2 1 /* GZ_MAGIC2 */
+#define GZ_CM 2 /* Compression Method (Z_DEFALTED) */
+#define GZ_FLG 3 /* FLaGs (see above) */
+#define GZ_MTIME 4 /* Modification TIME */
+#define GZ_XFL 8 /* eXtra FLags (GZ_MAX or GZ_FAST) */
+#define GZ_OS 9 /* Operating System */
+#define GZ_XLEN 10 /* eXtra LENgth (16bit) */
+#define GZ_FEXTRA_START 12 /* Start of extra fields */
+#define GZ_SI1 12 /* Subfield ID1 */
+#define GZ_SI2 13 /* Subfield ID2 */
+#define GZ_SUBLEN 14 /* Subfield length (16bit) */
+#define GZ_VERSION 16 /* Version for subfield format */
+#define GZ_CHUNKLEN 18 /* Chunk length (16bit) */
+#define GZ_CHUNKCNT 20 /* Number of chunks (16bit) */
+#define GZ_RNDDATA 22 /* Random access data (16bit) */
+
+#define DICT_UNKNOWN 0
+#define DICT_TEXT 1
+#define DICT_GZIP 2
+#define DICT_DZIP 3
+
+
+int dictData::read_header(const std::string &fname, int computeCRC)
+{
+ FILE *str;
+ int id1, id2, si1, si2;
+ char buffer[BUFFERSIZE];
+ int extraLength, subLength;
+ int i;
+ char *pt;
+ int c;
+ struct stat sb;
+ unsigned long crc = crc32( 0L, Z_NULL, 0 );
+ int count;
+ unsigned long offset;
+
+ if (!(str = fopen(fname.c_str(), "rb")))
+ {
+ //err_fatal_errno( __FUNCTION__,
+ // "Cannot open data file \"%s\" for read\n", filename );
+ }
+
+ this->headerLength = GZ_XLEN - 1;
+ this->type = DICT_UNKNOWN;
+
+ id1 = getc( str );
+ id2 = getc( str );
+
+ if (id1 != GZ_MAGIC1 || id2 != GZ_MAGIC2)
+ {
+ this->type = DICT_TEXT;
+ fstat( fileno( str ), &sb );
+ this->compressedLength = this->length = sb.st_size;
+ this->origFilename = fname;
+ this->mtime = sb.st_mtime;
+ if (computeCRC)
+ {
+ rewind( str );
+ while (!feof( str ))
+ {
+ if ((count = fread( buffer, 1, BUFFERSIZE, str )))
+ {
+ crc = crc32(crc, (Bytef *)buffer, count);
+ }
+ }
+ }
+ this->crc = crc;
+ fclose( str );
+ return 0;
+ }
+ this->type = DICT_GZIP;
+
+ this->method = getc( str );
+ this->flags = getc( str );
+ this->mtime = getc( str ) << 0;
+ this->mtime |= getc( str ) << 8;
+ this->mtime |= getc( str ) << 16;
+ this->mtime |= getc( str ) << 24;
+ this->extraFlags = getc( str );
+ this->os = getc( str );
+
+ if (this->flags & GZ_FEXTRA)
+ {
+ extraLength = getc( str ) << 0;
+ extraLength |= getc( str ) << 8;
+ this->headerLength += extraLength + 2;
+ si1 = getc( str );
+ si2 = getc( str );
+
+ if (si1 == GZ_RND_S1 || si2 == GZ_RND_S2)
+ {
+ subLength = getc( str ) << 0;
+ subLength |= getc( str ) << 8;
+ this->version = getc( str ) << 0;
+ this->version |= getc( str ) << 8;
+
+ if (this->version != 1)
+ {
+ //err_internal( __FUNCTION__,
+ // "dzip header version %d not supported\n",
+ // this->version );
+ }
+
+ this->chunkLength = getc( str ) << 0;
+ this->chunkLength |= getc( str ) << 8;
+ this->chunkCount = getc( str ) << 0;
+ this->chunkCount |= getc( str ) << 8;
+
+ if (this->chunkCount <= 0)
+ {
+ fclose( str );
+ return 5;
+ }
+ this->chunks = (int *)malloc(sizeof( this->chunks[0] )
+ * this->chunkCount );
+ for (i = 0; i < this->chunkCount; i++)
+ {
+ this->chunks[i] = getc( str ) << 0;
+ this->chunks[i] |= getc( str ) << 8;
+ }
+ this->type = DICT_DZIP;
+ }
+ else
+ {
+ fseek( str, this->headerLength, SEEK_SET );
+ }
+ }
+
+ if (this->flags & GZ_FNAME)
+ { /* FIXME! Add checking against header len */
+ pt = buffer;
+ while ((c = getc( str )) && c != EOF)
+ * pt++ = c;
+ *pt = '\0';
+
+ this->origFilename = buffer;
+ this->headerLength += this->origFilename.length() + 1;
+ }
+ else
+ {
+ this->origFilename = "";
+ }
+
+ if (this->flags & GZ_COMMENT)
+ { /* FIXME! Add checking for header len */
+ pt = buffer;
+ while ((c = getc( str )) && c != EOF)
+ * pt++ = c;
+ *pt = '\0';
+ comment = buffer;
+ headerLength += comment.length() + 1;
+ }
+ else
+ {
+ comment = "";
+ }
+
+ if (this->flags & GZ_FHCRC)
+ {
+ getc( str );
+ getc( str );
+ this->headerLength += 2;
+ }
+
+ if (ftell( str ) != this->headerLength + 1)
+ {
+ //err_internal( __FUNCTION__,
+ // "File position (%lu) != header length + 1 (%d)\n",
+ // ftell( str ), this->headerLength + 1 );
+ }
+
+ fseek( str, -8, SEEK_END );
+ this->crc = getc( str ) << 0;
+ this->crc |= getc( str ) << 8;
+ this->crc |= getc( str ) << 16;
+ this->crc |= getc( str ) << 24;
+ this->length = getc( str ) << 0;
+ this->length |= getc( str ) << 8;
+ this->length |= getc( str ) << 16;
+ this->length |= getc( str ) << 24;
+ this->compressedLength = ftell( str );
+
+ /* Compute offsets */
+ this->offsets = (unsigned long *)malloc( sizeof( this->offsets[0] )
+ * this->chunkCount );
+ for (offset = this->headerLength + 1, i = 0;
+ i < this->chunkCount;
+ i++)
+ {
+ this->offsets[i] = offset;
+ offset += this->chunks[i];
+ }
+
+ fclose( str );
+ return 0;
+}
+
+bool dictData::open(const std::string& fname, int computeCRC)
+{
+ struct stat sb;
+ int j;
+ int fd;
+
+ this->initialized = 0;
+
+#ifdef Q_OS_UNIX
+ if (stat(fname.c_str(), &sb) || !S_ISREG(sb.st_mode))
+#elif defined(Q_OS_WIN32)
+ if (_stat(fname.c_str(), &sb) || !(sb.stMode & _S_IFREG))
+#endif
+ {
+ //err_warning( __FUNCTION__,
+ // "%s is not a regular file -- ignoring\n", fname );
+ return false;
+ }
+
+ if (read_header(fname, computeCRC))
+ {
+ //err_fatal( __FUNCTION__,
+ // "\"%s\" not in text or dzip format\n", fname );
+ return false;
+ }
+
+ if ((fd = ::open(fname.c_str(), O_RDONLY )) < 0)
+ {
+ //err_fatal_errno( __FUNCTION__,
+ // "Cannot open data file \"%s\"\n", fname );
+ return false;
+ }
+ if (fstat(fd, &sb))
+ {
+ //err_fatal_errno( __FUNCTION__,
+ // "Cannot stat data file \"%s\"\n", fname );
+ return false;
+ }
+
+ this->size = sb.st_size;
+ ::close(fd);
+ if (!mapfile.open(fname.c_str(), size))
+ return false;
+
+ this->start = mapfile.begin();
+ this->end = this->start + this->size;
+
+ for (j = 0; j < DICT_CACHE_SIZE; j++)
+ {
+ cache[j].chunk = -1;
+ cache[j].stamp = -1;
+ cache[j].inBuffer = NULL;
+ cache[j].count = 0;
+ }
+
+ return true;
+}
+
+void dictData::close()
+{
+ int i;
+
+ if (this->chunks)
+ free(this->chunks);
+ if (this->offsets)
+ free(this->offsets);
+
+ if (this->initialized)
+ {
+ if (inflateEnd( &this->zStream ))
+ {
+ //err_internal( __FUNCTION__,
+ // "Cannot shut down inflation engine: %s\n",
+ // this->zStream.msg );
+ }
+ }
+
+ for (i = 0; i < DICT_CACHE_SIZE; ++i)
+ {
+ if (this -> cache [i].inBuffer)
+ free (this -> cache [i].inBuffer);
+ }
+}
+
+void dictData::read(char *buffer, unsigned long start, unsigned long size)
+{
+ char *pt;
+ unsigned long end;
+ int count;
+ char *inBuffer;
+ char outBuffer[OUT_BUFFER_SIZE];
+ int firstChunk, lastChunk;
+ int firstOffset, lastOffset;
+ int i, j;
+ int found, target, lastStamp;
+ static int stamp = 0;
+
+ end = start + size;
+
+ //buffer = malloc( size + 1 );
+
+ //PRINTF(DBG_UNZIP,
+ // ("dict_data_read( %p, %lu, %lu )\n",
+ //h, start, size ));
+
+
+ switch (this->type)
+ {
+ case DICT_GZIP:
+ //err_fatal( __FUNCTION__,
+ // "Cannot seek on pure gzip format files.\n"
+ // "Use plain text (for performance)"
+ // " or dzip format (for space savings).\n" );
+ break;
+ case DICT_TEXT:
+ memcpy( buffer, this->start + start, size );
+ //buffer[size] = '\0';
+ break;
+ case DICT_DZIP:
+ if (!this->initialized)
+ {
+ ++this->initialized;
+ this->zStream.zalloc = NULL;
+ this->zStream.zfree = NULL;
+ this->zStream.opaque = NULL;
+ this->zStream.next_in = 0;
+ this->zStream.avail_in = 0;
+ this->zStream.next_out = NULL;
+ this->zStream.avail_out = 0;
+ if (inflateInit2( &this->zStream, -15 ) != Z_OK)
+ {
+ //err_internal( __FUNCTION__,
+ // "Cannot initialize inflation engine: %s\n",
+ //this->zStream.msg );
+ }
+ }
+ firstChunk = start / this->chunkLength;
+ firstOffset = start - firstChunk * this->chunkLength;
+ lastChunk = end / this->chunkLength;
+ lastOffset = end - lastChunk * this->chunkLength;
+ //PRINTF(DBG_UNZIP,
+ // (" start = %lu, end = %lu\n"
+ //"firstChunk = %d, firstOffset = %d,"
+ //" lastChunk = %d, lastOffset = %d\n",
+ //start, end, firstChunk, firstOffset, lastChunk, lastOffset ));
+ for (pt = buffer, i = firstChunk; i <= lastChunk; i++)
+ {
+
+ /* Access cache */
+ found = 0;
+ target = 0;
+ lastStamp = INT_MAX;
+ for (j = 0; j < DICT_CACHE_SIZE; j++)
+ {
+#if USE_CACHE
+ if (this->cache[j].chunk == i)
+ {
+ found = 1;
+ target = j;
+ break;
+ }
+#endif
+ if (this->cache[j].stamp < lastStamp)
+ {
+ lastStamp = this->cache[j].stamp;
+ target = j;
+ }
+ }
+
+ this->cache[target].stamp = ++stamp;
+ if (found)
+ {
+ count = this->cache[target].count;
+ inBuffer = this->cache[target].inBuffer;
+ }
+ else
+ {
+ this->cache[target].chunk = i;
+ if (!this->cache[target].inBuffer)
+ this->cache[target].inBuffer = (char *)malloc( IN_BUFFER_SIZE );
+ inBuffer = this->cache[target].inBuffer;
+
+ if (this->chunks[i] >= OUT_BUFFER_SIZE )
+ {
+ //err_internal( __FUNCTION__,
+ // "this->chunks[%d] = %d >= %ld (OUT_BUFFER_SIZE)\n",
+ // i, this->chunks[i], OUT_BUFFER_SIZE );
+ }
+ memcpy( outBuffer, this->start + this->offsets[i], this->chunks[i] );
+
+ this->zStream.next_in = (Bytef *)outBuffer;
+ this->zStream.avail_in = this->chunks[i];
+ this->zStream.next_out = (Bytef *)inBuffer;
+ this->zStream.avail_out = IN_BUFFER_SIZE;
+ if (inflate( &this->zStream, Z_PARTIAL_FLUSH ) != Z_OK)
+ {
+ //err_fatal( __FUNCTION__, "inflate: %s\n", this->zStream.msg );
+ }
+ if (this->zStream.avail_in)
+ {
+ //err_internal( __FUNCTION__,
+ // "inflate did not flush (%d pending, %d avail)\n",
+ // this->zStream.avail_in, this->zStream.avail_out );
+ }
+
+ count = IN_BUFFER_SIZE - this->zStream.avail_out;
+
+ this->cache[target].count = count;
+ }
+
+ if (i == firstChunk)
+ {
+ if (i == lastChunk)
+ {
+ memcpy( pt, inBuffer + firstOffset, lastOffset - firstOffset);
+ pt += lastOffset - firstOffset;
+ }
+ else
+ {
+ if (count != this->chunkLength )
+ {
+ //err_internal( __FUNCTION__,
+ // "Length = %d instead of %d\n",
+ //count, this->chunkLength );
+ }
+ memcpy( pt, inBuffer + firstOffset,
+ this->chunkLength - firstOffset );
+ pt += this->chunkLength - firstOffset;
+ }
+ }
+ else if (i == lastChunk)
+ {
+ memcpy( pt, inBuffer, lastOffset );
+ pt += lastOffset;
+ }
+ else
+ {
+ assert( count == this->chunkLength );
+ memcpy( pt, inBuffer, this->chunkLength );
+ pt += this->chunkLength;
+ }
+ }
+ //*pt = '\0';
+ break;
+ case DICT_UNKNOWN:
+ //err_fatal( __FUNCTION__, "Cannot read unknown file type\n" );
+ break;
+ }
+}