Upload 2.0.2
[physicsfs] / lzma / CPP / 7zip / Compress / LZMA_Alone / LzmaBench.cpp
diff --git a/lzma/CPP/7zip/Compress/LZMA_Alone/LzmaBench.cpp b/lzma/CPP/7zip/Compress/LZMA_Alone/LzmaBench.cpp
new file mode 100644 (file)
index 0000000..4cd2d63
--- /dev/null
@@ -0,0 +1,1024 @@
+// LzmaBench.cpp
+
+#include "StdAfx.h"
+
+#include "LzmaBench.h"
+
+#ifndef _WIN32
+#define USE_POSIX_TIME
+#define USE_POSIX_TIME2
+#endif
+
+#ifdef USE_POSIX_TIME
+#include <time.h>
+#ifdef USE_POSIX_TIME2
+#include <sys/time.h>
+#endif
+#endif
+
+#ifdef _WIN32
+#define USE_ALLOCA
+#endif
+
+#ifdef USE_ALLOCA
+#ifdef _WIN32
+#include <malloc.h>
+#else
+#include <stdlib.h>
+#endif
+#endif
+
+extern "C" 
+{ 
+#include "../../../../C/Alloc.h"
+#include "../../../../C/7zCrc.h"
+}
+#include "../../../Common/MyCom.h"
+#include "../../ICoder.h"
+
+#ifdef BENCH_MT
+#include "../../../Windows/Thread.h"
+#include "../../../Windows/Synchronization.h"
+#endif
+
+#ifdef EXTERNAL_LZMA
+#include "../../../Windows/PropVariant.h"
+#else
+#include "../LZMA/LZMADecoder.h"
+#include "../LZMA/LZMAEncoder.h"
+#endif
+
+static const UInt32 kUncompressMinBlockSize = 1 << 26;
+static const UInt32 kAdditionalSize = (1 << 16);
+static const UInt32 kCompressedAdditionalSize = (1 << 10);
+static const UInt32 kMaxLzmaPropSize = 5;
+
+class CBaseRandomGenerator
+{
+  UInt32 A1;
+  UInt32 A2;
+public:
+  CBaseRandomGenerator() { Init(); }
+  void Init() { A1 = 362436069; A2 = 521288629;}
+  UInt32 GetRnd() 
+  {
+    return 
+      ((A1 = 36969 * (A1 & 0xffff) + (A1 >> 16)) << 16) +
+      ((A2 = 18000 * (A2 & 0xffff) + (A2 >> 16)) );
+  }
+};
+
+class CBenchBuffer
+{
+public:
+  size_t BufferSize;
+  Byte *Buffer;
+  CBenchBuffer(): Buffer(0) {} 
+  virtual ~CBenchBuffer() { Free(); }
+  void Free() 
+  { 
+    ::MidFree(Buffer);
+    Buffer = 0;
+  }
+  bool Alloc(size_t bufferSize) 
+  {
+    if (Buffer != 0 && BufferSize == bufferSize)
+      return true;
+    Free();
+    Buffer = (Byte *)::MidAlloc(bufferSize);
+    BufferSize = bufferSize;
+    return (Buffer != 0);
+  }
+};
+
+class CBenchRandomGenerator: public CBenchBuffer
+{
+  CBaseRandomGenerator *RG;
+public:
+  void Set(CBaseRandomGenerator *rg) { RG = rg; }
+  UInt32 GetVal(UInt32 &res, int numBits) 
+  {
+    UInt32 val = res & (((UInt32)1 << numBits) - 1);
+    res >>= numBits;
+    return val;
+  }
+  UInt32 GetLen(UInt32 &res) 
+  { 
+    UInt32 len = GetVal(res, 2);
+    return GetVal(res, 1 + len);
+  }
+  void Generate()
+  {
+    UInt32 pos = 0;
+    UInt32 rep0 = 1;
+    while (pos < BufferSize)
+    {
+      UInt32 res = RG->GetRnd();
+      res >>= 1;
+      if (GetVal(res, 1) == 0 || pos < 1024)
+        Buffer[pos++] = (Byte)(res & 0xFF);
+      else
+      {
+        UInt32 len;
+        len = 1 + GetLen(res);
+        if (GetVal(res, 3) != 0)
+        {
+          len += GetLen(res);
+          do
+          {
+            UInt32 ppp = GetVal(res, 5) + 6;
+            res = RG->GetRnd();
+            if (ppp > 30)
+              continue;
+            rep0 = /* (1 << ppp) +*/  GetVal(res, ppp);
+            res = RG->GetRnd();
+          }
+          while (rep0 >= pos);
+          rep0++;
+        }
+
+        for (UInt32 i = 0; i < len && pos < BufferSize; i++, pos++)
+          Buffer[pos] = Buffer[pos - rep0];
+      }
+    }
+  }
+};
+
+
+class CBenchmarkInStream: 
+  public ISequentialInStream,
+  public CMyUnknownImp
+{
+  const Byte *Data;
+  size_t Pos;
+  size_t Size;
+public:
+  MY_UNKNOWN_IMP
+  void Init(const Byte *data, size_t size)
+  {
+    Data = data;
+    Size = size;
+    Pos = 0;
+  }
+  STDMETHOD(Read)(void *data, UInt32 size, UInt32 *processedSize);
+};
+
+STDMETHODIMP CBenchmarkInStream::Read(void *data, UInt32 size, UInt32 *processedSize)
+{
+  size_t remain = Size - Pos;
+  UInt32 kMaxBlockSize = (1 << 20);
+  if (size > kMaxBlockSize)
+    size = kMaxBlockSize;
+  if (size > remain)
+    size = (UInt32)remain;
+  for (UInt32 i = 0; i < size; i++)
+    ((Byte *)data)[i] = Data[Pos + i];
+  Pos += size;
+  if(processedSize != NULL)
+    *processedSize = size;
+  return S_OK;
+}
+  
+class CBenchmarkOutStream: 
+  public ISequentialOutStream,
+  public CBenchBuffer,
+  public CMyUnknownImp
+{
+  // bool _overflow;
+public:
+  UInt32 Pos;
+  // CBenchmarkOutStream(): _overflow(false) {} 
+  void Init() 
+  {
+    // _overflow = false;
+    Pos = 0;
+  }
+  MY_UNKNOWN_IMP
+  STDMETHOD(Write)(const void *data, UInt32 size, UInt32 *processedSize);
+};
+
+STDMETHODIMP CBenchmarkOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize)
+{
+  size_t curSize = BufferSize - Pos;
+  if (curSize > size)
+    curSize = size;
+  memcpy(Buffer + Pos, data, curSize);
+  Pos += (UInt32)curSize;
+  if(processedSize != NULL)
+    *processedSize = (UInt32)curSize;
+  if (curSize != size)
+  {
+    // _overflow = true;
+    return E_FAIL;
+  }
+  return S_OK;
+}
+  
+class CCrcOutStream: 
+  public ISequentialOutStream,
+  public CMyUnknownImp
+{
+public:
+  UInt32 Crc;
+  MY_UNKNOWN_IMP
+  void Init() { Crc = CRC_INIT_VAL; }
+  STDMETHOD(Write)(const void *data, UInt32 size, UInt32 *processedSize);
+};
+
+STDMETHODIMP CCrcOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize)
+{
+  Crc = CrcUpdate(Crc, data, size);
+  if (processedSize != NULL)
+    *processedSize = size;
+  return S_OK;
+}
+  
+static UInt64 GetTimeCount()
+{
+  #ifdef USE_POSIX_TIME
+  #ifdef USE_POSIX_TIME2
+  timeval v;
+  if (gettimeofday(&v, 0) == 0)
+    return (UInt64)(v.tv_sec) * 1000000 + v.tv_usec;
+  return (UInt64)time(NULL) * 1000000;
+  #else
+  return time(NULL);
+  #endif
+  #else
+  /*
+  LARGE_INTEGER value;
+  if (::QueryPerformanceCounter(&value))
+    return value.QuadPart;
+  */
+  return GetTickCount();
+  #endif 
+}
+
+static UInt64 GetFreq()
+{
+  #ifdef USE_POSIX_TIME
+  #ifdef USE_POSIX_TIME2
+  return 1000000;
+  #else
+  return 1;
+  #endif 
+  #else
+  /*
+  LARGE_INTEGER value;
+  if (::QueryPerformanceFrequency(&value))
+    return value.QuadPart;
+  */
+  return 1000;
+  #endif 
+}
+
+#ifndef USE_POSIX_TIME
+static inline UInt64 GetTime64(const FILETIME &t) { return ((UInt64)t.dwHighDateTime << 32) | t.dwLowDateTime; }
+#endif
+static UInt64 GetUserTime()
+{
+  #ifdef USE_POSIX_TIME
+  return clock();
+  #else
+  FILETIME creationTime, exitTime, kernelTime, userTime;
+  if (::GetProcessTimes(::GetCurrentProcess(), &creationTime, &exitTime, &kernelTime, &userTime) != 0)
+    return GetTime64(userTime) + GetTime64(kernelTime);
+  return (UInt64)GetTickCount() * 10000;
+  #endif 
+}
+
+static UInt64 GetUserFreq()
+{
+  #ifdef USE_POSIX_TIME
+  return CLOCKS_PER_SEC;
+  #else
+  return 10000000;
+  #endif 
+}
+
+class CBenchProgressStatus
+{
+  #ifdef BENCH_MT
+  NWindows::NSynchronization::CCriticalSection CS;  
+  #endif
+public:
+  HRESULT Res;
+  bool EncodeMode;
+  void SetResult(HRESULT res) 
+  {
+    #ifdef BENCH_MT
+    NWindows::NSynchronization::CCriticalSectionLock lock(CS);
+    #endif
+    Res = res;
+  }
+  HRESULT GetResult()
+  {
+    #ifdef BENCH_MT
+    NWindows::NSynchronization::CCriticalSectionLock lock(CS);
+    #endif
+    return Res;
+  }
+};
+
+class CBenchProgressInfo:
+  public ICompressProgressInfo,
+  public CMyUnknownImp
+{
+public:
+  CBenchProgressStatus *Status;
+  CBenchInfo BenchInfo;
+  HRESULT Res;
+  IBenchCallback *callback;
+  CBenchProgressInfo(): callback(0) {}
+  MY_UNKNOWN_IMP
+  STDMETHOD(SetRatioInfo)(const UInt64 *inSize, const UInt64 *outSize);
+};
+
+void SetStartTime(CBenchInfo &bi)
+{
+  bi.GlobalFreq = GetFreq();
+  bi.UserFreq = GetUserFreq();
+  bi.GlobalTime = ::GetTimeCount();
+  bi.UserTime = ::GetUserTime();
+}
+
+void SetFinishTime(const CBenchInfo &biStart, CBenchInfo &dest)
+{
+  dest.GlobalFreq = GetFreq();
+  dest.UserFreq = GetUserFreq();
+  dest.GlobalTime = ::GetTimeCount() - biStart.GlobalTime;
+  dest.UserTime = ::GetUserTime() - biStart.UserTime;
+}
+
+STDMETHODIMP CBenchProgressInfo::SetRatioInfo(const UInt64 *inSize, const UInt64 *outSize)
+{
+  HRESULT res = Status->GetResult();
+  if (res != S_OK)
+    return res;
+  if (!callback)
+    return res;
+  CBenchInfo info = BenchInfo;
+  SetFinishTime(BenchInfo, info);
+  if (Status->EncodeMode)
+  {
+    info.UnpackSize = *inSize;
+    info.PackSize = *outSize;
+    res = callback->SetEncodeResult(info, false);
+  }
+  else
+  {
+    info.PackSize = BenchInfo.PackSize + *inSize;
+    info.UnpackSize = BenchInfo.UnpackSize + *outSize;
+    res = callback->SetDecodeResult(info, false);
+  }
+  if (res != S_OK)
+    Status->SetResult(res);
+  return res;
+}
+
+static const int kSubBits = 8;
+
+static UInt32 GetLogSize(UInt32 size)
+{
+  for (int i = kSubBits; i < 32; i++)
+    for (UInt32 j = 0; j < (1 << kSubBits); j++)
+      if (size <= (((UInt32)1) << i) + (j << (i - kSubBits)))
+        return (i << kSubBits) + j;
+  return (32 << kSubBits);
+}
+
+static void NormalizeVals(UInt64 &v1, UInt64 &v2)
+{
+  while (v1 > 1000000)
+  {
+    v1 >>= 1;
+    v2 >>= 1;
+  }
+}
+
+UInt64 GetUsage(const CBenchInfo &info)
+{
+  UInt64 userTime = info.UserTime;
+  UInt64 userFreq = info.UserFreq;
+  UInt64 globalTime = info.GlobalTime;
+  UInt64 globalFreq = info.GlobalFreq;
+  NormalizeVals(userTime, userFreq);
+  NormalizeVals(globalFreq, globalTime);
+  if (userFreq == 0)
+    userFreq = 1;
+  if (globalTime == 0)
+    globalTime = 1;
+  return userTime * globalFreq * 1000000 / userFreq / globalTime;
+}
+
+UInt64 GetRatingPerUsage(const CBenchInfo &info, UInt64 rating)
+{
+  UInt64 userTime = info.UserTime;
+  UInt64 userFreq = info.UserFreq;
+  UInt64 globalTime = info.GlobalTime;
+  UInt64 globalFreq = info.GlobalFreq;
+  NormalizeVals(userFreq, userTime);
+  NormalizeVals(globalTime, globalFreq);
+  if (globalFreq == 0)
+    globalFreq = 1;
+  if (userTime == 0)
+    userTime = 1;
+  return userFreq * globalTime / globalFreq *  rating / userTime;
+}
+
+static UInt64 MyMultDiv64(UInt64 value, UInt64 elapsedTime, UInt64 freq)
+{
+  UInt64 elTime = elapsedTime;
+  NormalizeVals(freq, elTime);
+  if (elTime == 0)
+    elTime = 1;
+  return value * freq / elTime;
+}
+
+UInt64 GetCompressRating(UInt32 dictionarySize, UInt64 elapsedTime, UInt64 freq, UInt64 size)
+{
+  UInt64 t = GetLogSize(dictionarySize) - (kBenchMinDicLogSize << kSubBits);
+  // UInt64 numCommandsForOne = 1000 + ((t * t * 7) >> (2 * kSubBits)); // AMD K8
+  UInt64 numCommandsForOne = 870 + ((t * t * 5) >> (2 * kSubBits)); // Intel Core2
+
+  UInt64 numCommands = (UInt64)(size) * numCommandsForOne;
+  return MyMultDiv64(numCommands, elapsedTime, freq);
+}
+
+UInt64 GetDecompressRating(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt32 numIterations)
+{
+  // UInt64 numCommands = (inSize * 216 + outSize * 14) * numIterations; // AMD K8
+  UInt64 numCommands = (inSize * 220 + outSize * 8) * numIterations; // Intel Core2
+  return MyMultDiv64(numCommands, elapsedTime, freq);
+}
+
+#ifdef EXTERNAL_LZMA
+typedef UInt32 (WINAPI * CreateObjectPointer)(const GUID *clsID, 
+    const GUID *interfaceID, void **outObject);
+#endif
+
+struct CEncoderInfo;
+
+struct CEncoderInfo
+{
+  #ifdef BENCH_MT
+  NWindows::CThread thread[2];
+  #endif
+  CMyComPtr<ICompressCoder> encoder;
+  CBenchProgressInfo *progressInfoSpec[2];
+  CMyComPtr<ICompressProgressInfo> progressInfo[2];
+  UInt32 NumIterations;
+  #ifdef USE_ALLOCA
+  size_t AllocaSize;
+  #endif
+
+  struct CDecoderInfo
+  {
+    CEncoderInfo *Encoder;
+    UInt32 DecoderIndex;
+    #ifdef USE_ALLOCA
+    size_t AllocaSize;
+    #endif
+    bool CallbackMode;
+  };
+  CDecoderInfo decodersInfo[2];
+
+  CMyComPtr<ICompressCoder> decoders[2];
+  HRESULT Results[2];
+  CBenchmarkOutStream *outStreamSpec;
+  CMyComPtr<ISequentialOutStream> outStream;
+  IBenchCallback *callback;
+  UInt32 crc;
+  UInt32 kBufferSize;
+  UInt32 compressedSize;
+  CBenchRandomGenerator rg;
+  CBenchmarkOutStream *propStreamSpec;
+  CMyComPtr<ISequentialOutStream> propStream;
+  HRESULT Init(UInt32 dictionarySize, UInt32 numThreads, CBaseRandomGenerator *rg);
+  HRESULT Encode();
+  HRESULT Decode(UInt32 decoderIndex);
+
+  CEncoderInfo(): outStreamSpec(0), callback(0), propStreamSpec(0) {}
+
+  #ifdef BENCH_MT
+  static THREAD_FUNC_DECL EncodeThreadFunction(void *param)
+  {
+    CEncoderInfo *encoder = (CEncoderInfo *)param;
+    #ifdef USE_ALLOCA
+    alloca(encoder->AllocaSize);
+    #endif
+    HRESULT res = encoder->Encode();
+    encoder->Results[0] = res;
+    if (res != S_OK)
+      encoder->progressInfoSpec[0]->Status->SetResult(res);
+
+    return 0;
+  }
+  static THREAD_FUNC_DECL DecodeThreadFunction(void *param)
+  {
+    CDecoderInfo *decoder = (CDecoderInfo *)param;
+    #ifdef USE_ALLOCA
+    alloca(decoder->AllocaSize);
+    #endif
+    CEncoderInfo *encoder = decoder->Encoder;
+    encoder->Results[decoder->DecoderIndex] = encoder->Decode(decoder->DecoderIndex);
+    return 0;
+  }
+
+  HRESULT CreateEncoderThread()
+  {
+    return thread[0].Create(EncodeThreadFunction, this);
+  }
+
+  HRESULT CreateDecoderThread(int index, bool callbackMode
+      #ifdef USE_ALLOCA
+      , size_t allocaSize
+      #endif
+      )
+  {
+    CDecoderInfo &decoder = decodersInfo[index];
+    decoder.DecoderIndex = index;
+    decoder.Encoder = this;
+    #ifdef USE_ALLOCA
+    decoder.AllocaSize = allocaSize;
+    #endif
+    decoder.CallbackMode = callbackMode;
+    return thread[index].Create(DecodeThreadFunction, &decoder);
+  }
+  #endif
+};
+
+HRESULT CEncoderInfo::Init(UInt32 dictionarySize, UInt32 numThreads, CBaseRandomGenerator *rgLoc)
+{
+  rg.Set(rgLoc);
+  kBufferSize = dictionarySize + kAdditionalSize;
+  UInt32 kCompressedBufferSize = (kBufferSize / 2) + kCompressedAdditionalSize;
+  if (!rg.Alloc(kBufferSize))
+    return E_OUTOFMEMORY;
+  rg.Generate();
+  crc = CrcCalc(rg.Buffer, rg.BufferSize);
+
+  outStreamSpec = new CBenchmarkOutStream;
+  if (!outStreamSpec->Alloc(kCompressedBufferSize))
+    return E_OUTOFMEMORY;
+
+  outStream = outStreamSpec;
+
+  propStreamSpec = 0;
+  if (!propStream)
+  {
+    propStreamSpec = new CBenchmarkOutStream;
+    propStream = propStreamSpec;
+  }
+  if (!propStreamSpec->Alloc(kMaxLzmaPropSize))
+    return E_OUTOFMEMORY;
+  propStreamSpec->Init();
+  
+  PROPID propIDs[] = 
+  { 
+    NCoderPropID::kDictionarySize, 
+    NCoderPropID::kMultiThread
+  };
+  const int kNumProps = sizeof(propIDs) / sizeof(propIDs[0]);
+  PROPVARIANT properties[kNumProps];
+  properties[0].vt = VT_UI4;
+  properties[0].ulVal = (UInt32)dictionarySize;
+
+  properties[1].vt = VT_BOOL;
+  properties[1].boolVal = (numThreads > 1) ? VARIANT_TRUE : VARIANT_FALSE;
+
+  {
+    CMyComPtr<ICompressSetCoderProperties> setCoderProperties;
+    RINOK(encoder.QueryInterface(IID_ICompressSetCoderProperties, &setCoderProperties));
+    if (!setCoderProperties)
+      return E_FAIL;
+    RINOK(setCoderProperties->SetCoderProperties(propIDs, properties, kNumProps));
+
+    CMyComPtr<ICompressWriteCoderProperties> writeCoderProperties;
+    encoder.QueryInterface(IID_ICompressWriteCoderProperties, &writeCoderProperties);
+    if (writeCoderProperties)
+    {
+      RINOK(writeCoderProperties->WriteCoderProperties(propStream));
+    }
+  }
+  return S_OK;
+}
+
+HRESULT CEncoderInfo::Encode()
+{
+  CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
+  CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
+  inStreamSpec->Init(rg.Buffer, rg.BufferSize);
+  outStreamSpec->Init();
+
+  RINOK(encoder->Code(inStream, outStream, 0, 0, progressInfo[0]));
+  compressedSize = outStreamSpec->Pos;
+  encoder.Release();
+  return S_OK;
+}
+
+HRESULT CEncoderInfo::Decode(UInt32 decoderIndex)
+{
+  CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
+  CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
+  CMyComPtr<ICompressCoder> &decoder = decoders[decoderIndex];
+
+  CMyComPtr<ICompressSetDecoderProperties2> compressSetDecoderProperties;
+  decoder.QueryInterface(IID_ICompressSetDecoderProperties2, &compressSetDecoderProperties);
+  if (!compressSetDecoderProperties)
+    return E_FAIL;
+
+  CCrcOutStream *crcOutStreamSpec = new CCrcOutStream;
+  CMyComPtr<ISequentialOutStream> crcOutStream = crcOutStreamSpec;
+    
+  CBenchProgressInfo *pi = progressInfoSpec[decoderIndex];
+  pi->BenchInfo.UnpackSize = 0;
+  pi->BenchInfo.PackSize = 0;
+
+  for (UInt32 j = 0; j < NumIterations; j++)
+  {
+    inStreamSpec->Init(outStreamSpec->Buffer, compressedSize);
+    crcOutStreamSpec->Init();
+    
+    RINOK(compressSetDecoderProperties->SetDecoderProperties2(propStreamSpec->Buffer, propStreamSpec->Pos));
+    UInt64 outSize = kBufferSize;
+    RINOK(decoder->Code(inStream, crcOutStream, 0, &outSize, progressInfo[decoderIndex]));
+    if (CRC_GET_DIGEST(crcOutStreamSpec->Crc) != crc)
+      return S_FALSE;
+    pi->BenchInfo.UnpackSize += kBufferSize;
+    pi->BenchInfo.PackSize += compressedSize;
+  }
+  decoder.Release();
+  return S_OK;
+}
+
+static const UInt32 kNumThreadsMax = (1 << 16);
+
+struct CBenchEncoders
+{
+  CEncoderInfo *encoders;
+  CBenchEncoders(UInt32 num): encoders(0) { encoders = new CEncoderInfo[num]; }
+  ~CBenchEncoders() { delete []encoders; }
+};
+
+HRESULT LzmaBench(
+  #ifdef EXTERNAL_LZMA
+  CCodecs *codecs,
+  #endif
+  UInt32 numThreads, UInt32 dictionarySize, IBenchCallback *callback)
+{
+  UInt32 numEncoderThreads = 
+    #ifdef BENCH_MT
+    (numThreads > 1 ? numThreads / 2 : 1);
+    #else
+    1;
+    #endif
+  UInt32 numSubDecoderThreads = 
+    #ifdef BENCH_MT
+    (numThreads > 1 ? 2 : 1);
+    #else
+    1;
+    #endif
+  if (dictionarySize < (1 << kBenchMinDicLogSize) || numThreads < 1 || numEncoderThreads > kNumThreadsMax)
+  {
+    return E_INVALIDARG;
+  }
+
+  CBenchEncoders encodersSpec(numEncoderThreads);
+  CEncoderInfo *encoders = encodersSpec.encoders;
+
+  #ifdef EXTERNAL_LZMA
+  UString name = L"LZMA";
+  #endif
+
+  UInt32 i;
+  for (i = 0; i < numEncoderThreads; i++)
+  {
+    CEncoderInfo &encoder = encoders[i];
+    encoder.callback = (i == 0) ? callback : 0;
+
+    #ifdef EXTERNAL_LZMA
+    RINOK(codecs->CreateCoder(name, true, encoder.encoder));
+    #else
+    encoder.encoder = new NCompress::NLZMA::CEncoder;
+    #endif
+    for (UInt32 j = 0; j < numSubDecoderThreads; j++)
+    {
+      #ifdef EXTERNAL_LZMA
+      RINOK(codecs->CreateCoder(name, false, encoder.decoders[j]));
+      #else
+      encoder.decoders[j] = new NCompress::NLZMA::CDecoder;
+      #endif
+    }
+  }
+
+  CBaseRandomGenerator rg;
+  rg.Init();
+  for (i = 0; i < numEncoderThreads; i++)
+  {
+    RINOK(encoders[i].Init(dictionarySize, numThreads, &rg));
+  }
+
+  CBenchProgressStatus status;
+  status.Res = S_OK;
+  status.EncodeMode = true;
+
+  for (i = 0; i < numEncoderThreads; i++)
+  {
+    CEncoderInfo &encoder = encoders[i];
+    for (int j = 0; j < 2; j++)
+    {
+      encoder.progressInfo[j] = encoder.progressInfoSpec[j] = new CBenchProgressInfo;
+      encoder.progressInfoSpec[j]->Status = &status;
+    }
+    if (i == 0)
+    {
+      encoder.progressInfoSpec[0]->callback = callback;
+      encoder.progressInfoSpec[0]->BenchInfo.NumIterations = numEncoderThreads;
+      SetStartTime(encoder.progressInfoSpec[0]->BenchInfo);
+    }
+
+    #ifdef BENCH_MT
+    if (numEncoderThreads > 1)
+    {
+      #ifdef USE_ALLOCA
+      encoder.AllocaSize = (i * 16 * 21) & 0x7FF;
+      #endif
+      RINOK(encoder.CreateEncoderThread())
+    }
+    else
+    #endif
+    {
+      RINOK(encoder.Encode());
+    }
+  }
+  #ifdef BENCH_MT
+  if (numEncoderThreads > 1)
+    for (i = 0; i < numEncoderThreads; i++)
+      encoders[i].thread[0].Wait();
+  #endif
+
+  RINOK(status.Res);
+
+  CBenchInfo info;
+
+  SetFinishTime(encoders[0].progressInfoSpec[0]->BenchInfo, info);
+  info.UnpackSize = 0;
+  info.PackSize = 0;
+  info.NumIterations = 1; // progressInfoSpec->NumIterations;
+  for (i = 0; i < numEncoderThreads; i++)
+  {
+    CEncoderInfo &encoder = encoders[i];
+    info.UnpackSize += encoder.kBufferSize;
+    info.PackSize += encoder.compressedSize;
+  }
+  RINOK(callback->SetEncodeResult(info, true));
+
+
+  status.Res = S_OK;
+  status.EncodeMode = false;
+
+  UInt32 numDecoderThreads = numEncoderThreads * numSubDecoderThreads;
+  for (i = 0; i < numEncoderThreads; i++)
+  {
+    CEncoderInfo &encoder = encoders[i];
+    encoder.NumIterations = 2 + kUncompressMinBlockSize / encoder.kBufferSize;
+
+    if (i == 0)
+    {
+      encoder.progressInfoSpec[0]->callback = callback;
+      encoder.progressInfoSpec[0]->BenchInfo.NumIterations = numDecoderThreads;
+      SetStartTime(encoder.progressInfoSpec[0]->BenchInfo);
+    }
+
+    #ifdef BENCH_MT
+    if (numDecoderThreads > 1)
+    {
+      for (UInt32 j = 0; j < numSubDecoderThreads; j++)
+      {
+        size_t allocaSize = ((i * numSubDecoderThreads + j) * 16 * 21) & 0x7FF;
+        HRESULT res = encoder.CreateDecoderThread(j, (i == 0 && j == 0)
+            #ifdef USE_ALLOCA
+            , allocaSize
+            #endif
+            );
+        RINOK(res);
+      }
+    }
+    else
+    #endif
+    {
+      RINOK(encoder.Decode(0));
+    }
+  }
+  #ifdef BENCH_MT
+  HRESULT res = S_OK;
+  if (numDecoderThreads > 1)
+    for (i = 0; i < numEncoderThreads; i++)
+      for (UInt32 j = 0; j < numSubDecoderThreads; j++)
+      {
+        CEncoderInfo &encoder = encoders[i];
+        encoder.thread[j].Wait();
+        if (encoder.Results[j] != S_OK)
+          res = encoder.Results[j];
+      }
+  RINOK(res);
+  #endif
+  RINOK(status.Res);
+  SetFinishTime(encoders[0].progressInfoSpec[0]->BenchInfo, info);
+  info.UnpackSize = 0;
+  info.PackSize = 0;
+  info.NumIterations = numSubDecoderThreads * encoders[0].NumIterations;
+  for (i = 0; i < numEncoderThreads; i++)
+  {
+    CEncoderInfo &encoder = encoders[i];
+    info.UnpackSize += encoder.kBufferSize;
+    info.PackSize += encoder.compressedSize;
+  }
+  RINOK(callback->SetDecodeResult(info, false));
+  RINOK(callback->SetDecodeResult(info, true));
+  return S_OK;
+}
+
+
+inline UInt64 GetLZMAUsage(bool multiThread, UInt32 dictionary)
+{ 
+  UInt32 hs = dictionary - 1;
+  hs |= (hs >> 1);
+  hs |= (hs >> 2);
+  hs |= (hs >> 4);
+  hs |= (hs >> 8);
+  hs >>= 1;
+  hs |= 0xFFFF;
+  if (hs > (1 << 24))
+    hs >>= 1;
+  hs++;
+  return ((hs + (1 << 16)) + (UInt64)dictionary * 2) * 4 + (UInt64)dictionary * 3 / 2 + 
+      (1 << 20) + (multiThread ? (6 << 20) : 0);
+}
+
+UInt64 GetBenchMemoryUsage(UInt32 numThreads, UInt32 dictionary)
+{
+  const UInt32 kBufferSize = dictionary;
+  const UInt32 kCompressedBufferSize = (kBufferSize / 2);
+  UInt32 numSubThreads = (numThreads > 1) ? 2 : 1;
+  UInt32 numBigThreads = numThreads / numSubThreads;
+  return (kBufferSize + kCompressedBufferSize +
+    GetLZMAUsage((numThreads > 1), dictionary) + (2 << 20)) * numBigThreads;
+}
+
+static bool CrcBig(const void *data, UInt32 size, UInt32 numCycles, UInt32 crcBase)
+{
+  for (UInt32 i = 0; i < numCycles; i++)
+    if (CrcCalc(data, size) != crcBase)
+      return false;
+  return true;
+}
+
+#ifdef BENCH_MT
+struct CCrcInfo
+{
+  NWindows::CThread Thread;
+  const Byte *Data;
+  UInt32 Size;
+  UInt32 NumCycles;
+  UInt32 Crc;
+  bool Res;
+  void Wait()
+  {
+    Thread.Wait();
+    Thread.Close();
+  }
+};
+
+static THREAD_FUNC_DECL CrcThreadFunction(void *param)
+{
+  CCrcInfo *p = (CCrcInfo *)param;
+  p->Res = CrcBig(p->Data, p->Size, p->NumCycles, p->Crc);
+  return 0;
+}
+
+struct CCrcThreads
+{
+  UInt32 NumThreads;
+  CCrcInfo *Items;
+  CCrcThreads(): Items(0), NumThreads(0) {}
+  void WaitAll()
+  {
+    for (UInt32 i = 0; i < NumThreads; i++)
+      Items[i].Wait();
+    NumThreads = 0;
+  }
+  ~CCrcThreads() 
+  { 
+    WaitAll();
+    delete []Items; 
+  }
+};
+#endif
+
+static UInt32 CrcCalc1(const Byte *buf, UInt32 size)
+{
+  UInt32 crc = CRC_INIT_VAL;;
+  for (UInt32 i = 0; i < size; i++)
+    crc = CRC_UPDATE_BYTE(crc, buf[i]);
+  return CRC_GET_DIGEST(crc);
+}
+
+static void RandGen(Byte *buf, UInt32 size, CBaseRandomGenerator &RG)
+{
+  for (UInt32 i = 0; i < size; i++)
+    buf[i] = (Byte)RG.GetRnd();
+}
+
+static UInt32 RandGenCrc(Byte *buf, UInt32 size, CBaseRandomGenerator &RG)
+{
+  RandGen(buf, size, RG);
+  return CrcCalc1(buf, size);
+}
+
+bool CrcInternalTest()
+{
+  CBenchBuffer buffer;
+  const UInt32 kBufferSize0 = (1 << 8);
+  const UInt32 kBufferSize1 = (1 << 10);
+  const UInt32 kCheckSize = (1 << 5);
+  if (!buffer.Alloc(kBufferSize0 + kBufferSize1))
+    return false;
+  Byte *buf = buffer.Buffer;
+  UInt32 i;
+  for (i = 0; i < kBufferSize0; i++)
+    buf[i] = (Byte)i;
+  UInt32 crc1 = CrcCalc1(buf, kBufferSize0);
+  if (crc1 != 0x29058C73)
+    return false;
+  CBaseRandomGenerator RG;
+  RandGen(buf + kBufferSize0, kBufferSize1, RG);
+  for (i = 0; i < kBufferSize0 + kBufferSize1 - kCheckSize; i++)
+    for (UInt32 j = 0; j < kCheckSize; j++)
+      if (CrcCalc1(buf + i, j) != CrcCalc(buf + i, j))
+        return false;
+  return true;
+}
+
+HRESULT CrcBench(UInt32 numThreads, UInt32 bufferSize, UInt64 &speed)
+{
+  if (numThreads == 0)
+    numThreads = 1;
+
+  CBenchBuffer buffer;
+  size_t totalSize = (size_t)bufferSize * numThreads;
+  if (totalSize / numThreads != bufferSize)
+    return E_OUTOFMEMORY;
+  if (!buffer.Alloc(totalSize))
+    return E_OUTOFMEMORY;
+
+  Byte *buf = buffer.Buffer;
+  CBaseRandomGenerator RG;
+  UInt32 numCycles = ((UInt32)1 << 30) / ((bufferSize >> 2) + 1) + 1;
+
+  UInt64 timeVal;
+  #ifdef BENCH_MT
+  CCrcThreads threads;
+  if (numThreads > 1)
+  {
+    threads.Items = new CCrcInfo[numThreads];
+    UInt32 i;
+    for (i = 0; i < numThreads; i++)
+    {
+      CCrcInfo &info = threads.Items[i];
+      Byte *data = buf + (size_t)bufferSize * i;
+      info.Data = data;
+      info.NumCycles = numCycles;
+      info.Size = bufferSize;
+      info.Crc = RandGenCrc(data, bufferSize, RG);
+    }
+    timeVal = GetTimeCount();
+    for (i = 0; i < numThreads; i++)
+    {
+      CCrcInfo &info = threads.Items[i];
+      RINOK(info.Thread.Create(CrcThreadFunction, &info));
+      threads.NumThreads++;
+    }
+    threads.WaitAll();
+    for (i = 0; i < numThreads; i++)
+      if (!threads.Items[i].Res)
+        return S_FALSE;
+  }
+  else
+  #endif
+  {
+    UInt32 crc = RandGenCrc(buf, bufferSize, RG);
+    timeVal = GetTimeCount();
+    if (!CrcBig(buf, bufferSize, numCycles, crc))
+      return S_FALSE;
+  }
+  timeVal = GetTimeCount() - timeVal;
+  if (timeVal == 0)
+    timeVal = 1;
+
+  UInt64 size = (UInt64)numCycles * totalSize;
+  speed = MyMultDiv64(size, timeVal, GetFreq());
+  return S_OK;
+}
+