Update to 2.0.0 tree from current Fremantle build
[opencv] / src / cxcore / cxalloc.cpp
diff --git a/src/cxcore/cxalloc.cpp b/src/cxcore/cxalloc.cpp
new file mode 100644 (file)
index 0000000..1d73f94
--- /dev/null
@@ -0,0 +1,697 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "_cxcore.h"
+
+#define CV_USE_SYSTEM_MALLOC 1
+
+namespace cv
+{
+
+static void* OutOfMemoryError(size_t size)
+{
+    CV_Error_(CV_StsNoMem, ("Failed to allocate %lu bytes", (unsigned long)size));
+    return 0;
+}
+
+#if CV_USE_SYSTEM_MALLOC
+
+void deleteThreadAllocData() {}
+
+void* fastMalloc( size_t size )
+{
+    uchar* udata = (uchar*)malloc(size + sizeof(void*) + CV_MALLOC_ALIGN);
+    if(!udata)
+        return OutOfMemoryError(size);
+    uchar** adata = alignPtr((uchar**)udata + 1, CV_MALLOC_ALIGN);
+    adata[-1] = udata;
+    return adata;
+}
+    
+void fastFree(void* ptr)
+{
+    if(ptr)
+    {
+        uchar* udata = ((uchar**)ptr)[-1];
+        CV_DbgAssert(udata < (uchar*)ptr &&
+               ((uchar*)ptr - udata) <= (ptrdiff_t)(sizeof(void*)+CV_MALLOC_ALIGN)); 
+        free(udata);
+    }
+}
+
+#else
+
+#if 0
+#define SANITY_CHECK(block) \
+    CV_Assert(((size_t)(block) & (MEM_BLOCK_SIZE-1)) == 0 && \
+        (unsigned)(block)->binIdx <= (unsigned)MAX_BIN && \
+        (block)->signature == MEM_BLOCK_SIGNATURE)
+#else
+#define SANITY_CHECK(block)
+#endif
+
+#define STAT(stmt)
+
+#ifdef WIN32
+struct CriticalSection
+{
+    CriticalSection() { InitializeCriticalSection(&cs); }
+    ~CriticalSection() { DeleteCriticalSection(&cs); }
+    void lock() { EnterCriticalSection(&cs); }
+    void unlock() { LeaveCriticalSection(&cs); }
+    bool trylock() { return TryEnterCriticalSection(&cs) != 0; }
+
+    CRITICAL_SECTION cs;
+};
+
+void* SystemAlloc(size_t size)
+{
+    void* ptr = malloc(size);
+    return ptr ? ptr : OutOfMemoryError(size);
+}
+
+void SystemFree(void* ptr, size_t)
+{
+    free(ptr);
+}
+#else
+struct CriticalSection
+{
+    CriticalSection() { pthread_mutex_init(&mutex, 0); }
+    ~CriticalSection() { pthread_mutex_destroy(&mutex); }
+    void lock() { pthread_mutex_lock(&mutex); }
+    void unlock() { pthread_mutex_unlock(&mutex); }
+    bool trylock() { return pthread_mutex_trylock(&mutex) == 0; }
+
+    pthread_mutex_t mutex;
+};
+
+void* SystemAlloc(size_t size)
+{
+    #ifndef MAP_ANONYMOUS
+    #define MAP_ANONYMOUS MAP_ANON
+    #endif
+    void* ptr = 0;
+    ptr = mmap(ptr, size, (PROT_READ | PROT_WRITE), MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+    return ptr != MAP_FAILED ? ptr : OutOfMemoryError(size);
+}
+
+void SystemFree(void* ptr, size_t size)
+{
+    munmap(ptr, size);
+}
+#endif
+
+struct AutoLock
+{
+    AutoLock(CriticalSection& _cs) : cs(&_cs) { cs->lock(); }
+    ~AutoLock() { cs->unlock(); }
+    CriticalSection* cs;
+};
+
+const size_t MEM_BLOCK_SIGNATURE = 0x01234567;
+const int MEM_BLOCK_SHIFT = 14;
+const size_t MEM_BLOCK_SIZE = 1 << MEM_BLOCK_SHIFT;
+const size_t HDR_SIZE = 128;
+const size_t MAX_BLOCK_SIZE = MEM_BLOCK_SIZE - HDR_SIZE;
+const int MAX_BIN = 28;
+
+static const int binSizeTab[MAX_BIN+1] =
+{ 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 128, 160, 192, 256, 320, 384, 480, 544, 672, 768,
+896, 1056, 1328, 1600, 2688, 4048, 5408, 8128, 16256 };
+
+struct MallocTables
+{
+    void initBinTab()
+    {
+        int i, j = 0, n;
+        for( i = 0; i <= MAX_BIN; i++ )
+        {
+            n = binSizeTab[i]>>3;
+            for( ; j <= n; j++ )
+                binIdx[j] = (uchar)i;
+        }
+    }
+    int bin(size_t size)
+    {
+        assert( size <= MAX_BLOCK_SIZE );
+        return binIdx[(size + 7)>>3];
+    }
+
+    MallocTables()
+    {
+        initBinTab();
+    }
+
+    uchar binIdx[MAX_BLOCK_SIZE/8+1];
+};
+
+MallocTables mallocTables;
+
+struct Node
+{
+    Node* next;
+};
+
+struct ThreadData;
+
+struct Block
+{
+    Block(Block* _next)
+    {
+        signature = MEM_BLOCK_SIGNATURE;
+        prev = 0;
+        next = _next;
+        privateFreeList = publicFreeList = 0;
+        bumpPtr = endPtr = 0;
+        objSize = 0;
+        threadData = 0;
+        data = (uchar*)this + HDR_SIZE;
+    }
+
+    ~Block() {}
+
+    void init(Block* _prev, Block* _next, int _objSize, ThreadData* _threadData)
+    {
+        prev = _prev;
+        if(prev)
+            prev->next = this;
+        next = _next;
+        if(next)
+            next->prev = this;
+        objSize = _objSize;
+        binIdx = mallocTables.bin(objSize);
+        threadData = _threadData;
+        privateFreeList = publicFreeList = 0;
+        bumpPtr = data;
+        int nobjects = MAX_BLOCK_SIZE/objSize;
+        endPtr = bumpPtr + nobjects*objSize;
+        almostEmptyThreshold = (nobjects + 1)/2;
+        allocated = 0;
+    }
+
+    bool isFilled() const { return allocated > almostEmptyThreshold; }
+
+    size_t signature;
+    Block* prev;
+    Block* next;
+    Node* privateFreeList;
+    Node* publicFreeList;
+    uchar* bumpPtr;
+    uchar* endPtr;
+    uchar* data;
+    ThreadData* threadData;
+    int objSize;
+    int binIdx;
+    int allocated;
+    int almostEmptyThreshold;
+    CriticalSection cs;
+};
+
+struct BigBlock
+{
+    BigBlock(int bigBlockSize, BigBlock* _next)
+    {
+        first = alignPtr((Block*)(this+1), MEM_BLOCK_SIZE);
+        next = _next;
+        nblocks = (int)(((char*)this + bigBlockSize - (char*)first)/MEM_BLOCK_SIZE);
+        Block* p = 0;
+        for( int i = nblocks-1; i >= 0; i-- )
+            p = ::new((uchar*)first + i*MEM_BLOCK_SIZE) Block(p);
+    }
+
+    ~BigBlock()
+    {
+        for( int i = nblocks-1; i >= 0; i-- )
+            ((Block*)((uchar*)first+i*MEM_BLOCK_SIZE))->~Block();
+    }
+
+    BigBlock* next;
+    Block* first;
+    int nblocks;
+};
+
+struct BlockPool
+{
+    BlockPool(int _bigBlockSize=1<<20) : pool(0), bigBlockSize(_bigBlockSize)
+    {
+    }
+
+    ~BlockPool()
+    {
+        AutoLock lock(cs);
+        while( pool )
+        {
+            BigBlock* nextBlock = pool->next;
+            pool->~BigBlock();
+            SystemFree(pool, bigBlockSize);
+            pool = nextBlock;
+        }
+    }
+
+    Block* alloc()
+    {
+        AutoLock lock(cs);
+        Block* block;
+        if( !freeBlocks )
+        {
+            BigBlock* bblock = ::new(SystemAlloc(bigBlockSize)) BigBlock(bigBlockSize, pool);
+            assert( bblock != 0 );
+            freeBlocks = bblock->first;
+            pool = bblock;
+        }
+        block = freeBlocks;
+        freeBlocks = freeBlocks->next;
+        if( freeBlocks )
+            freeBlocks->prev = 0;
+        STAT(stat.bruttoBytes += MEM_BLOCK_SIZE);
+        return block;
+    }
+
+    void free(Block* block)
+    {
+        AutoLock lock(cs);
+        block->prev = 0;
+        block->next = freeBlocks;
+        freeBlocks = block;
+        STAT(stat.bruttoBytes -= MEM_BLOCK_SIZE);
+    }
+
+    CriticalSection cs;
+    Block* freeBlocks;
+    BigBlock* pool;
+    int bigBlockSize;
+    int blocksPerBigBlock;
+};
+
+BlockPool mallocPool;
+
+enum { START=0, FREE=1, GC=2 };
+
+struct ThreadData
+{
+    ThreadData() { for(int i = 0; i <= MAX_BIN; i++) bins[i][START] = bins[i][FREE] = bins[i][GC] = 0; }
+    ~ThreadData()
+    {
+        // mark all the thread blocks as abandoned or even release them
+        for( int i = 0; i <= MAX_BIN; i++ )
+        {
+            Block *bin = bins[i][START], *block = bin;
+            bins[i][START] = bins[i][FREE] = bins[i][GC] = 0;
+            if( block )
+            {
+                do
+                {
+                    Block* next = block->next;
+                    int allocated = block->allocated;
+                    {
+                    AutoLock lock(block->cs);
+                    block->next = block->prev = 0;
+                    block->threadData = 0;
+                    Node *node = block->publicFreeList;
+                    for( ; node != 0; node = node->next )
+                        allocated--;
+                    }
+                    if( allocated == 0 )
+                        mallocPool.free(block);
+                    block = next;
+                }
+                while( block != bin );
+            }
+        }
+    }
+
+    void moveBlockToFreeList( Block* block )
+    {
+        int i = block->binIdx;
+        Block*& freePtr = bins[i][FREE];
+        CV_DbgAssert( block->next->prev == block && block->prev->next == block );
+        if( block != freePtr )
+        {
+            Block*& gcPtr = bins[i][GC];
+            if( gcPtr == block )
+                gcPtr = block->next;
+            if( block->next != block )
+            {
+                block->prev->next = block->next;
+                block->next->prev = block->prev;
+            }
+            block->next = freePtr->next;
+            block->prev = freePtr;
+            freePtr = block->next->prev = block->prev->next = block;
+        }
+    }
+
+    Block* bins[MAX_BIN+1][3];
+
+#ifdef WIN32
+#ifdef WINCE
+#      define TLS_OUT_OF_INDEXES ((DWORD)0xFFFFFFFF)
+#endif
+
+    static DWORD tlsKey;
+    static ThreadData* get()
+    {
+        ThreadData* data;
+        if( tlsKey == TLS_OUT_OF_INDEXES )
+            tlsKey = TlsAlloc();
+        data = (ThreadData*)TlsGetValue(tlsKey);
+        if( !data )
+        {
+            data = new ThreadData;
+            TlsSetValue(tlsKey, data);
+        }
+        return data;
+    }
+#else
+    static void deleteData(void* data)
+    {
+        delete (ThreadData*)data;
+    }
+
+    static pthread_key_t tlsKey;
+    static ThreadData* get()
+    {
+        ThreadData* data;
+        if( !tlsKey )
+            pthread_key_create(&tlsKey, deleteData);
+        data = (ThreadData*)pthread_getspecific(tlsKey);
+        if( !data )
+        {
+            data = new ThreadData;
+            pthread_setspecific(tlsKey, data);
+        }
+        return data;
+    }
+#endif
+};
+
+#ifdef WIN32
+DWORD ThreadData::tlsKey = TLS_OUT_OF_INDEXES;
+
+void deleteThreadAllocData()
+{
+    if( ThreadData::tlsKey != TLS_OUT_OF_INDEXES )
+        delete (ThreadData*)TlsGetValue( ThreadData::tlsKey );
+}
+
+#else
+pthread_key_t ThreadData::tlsKey = 0;
+#endif
+
+#if 0
+static void checkList(ThreadData* tls, int idx)
+{
+    Block* block = tls->bins[idx][START];
+    if( !block )
+    {
+        CV_DbgAssert( tls->bins[idx][FREE] == 0 && tls->bins[idx][GC] == 0 );
+    }
+    else
+    {
+        bool gcInside = false;
+        bool freeInside = false;
+        do
+        {
+            if( tls->bins[idx][FREE] == block )
+                freeInside = true;
+            if( tls->bins[idx][GC] == block )
+                gcInside = true;
+            block = block->next;
+        }
+        while( block != tls->bins[idx][START] );
+        CV_DbgAssert( gcInside && freeInside );
+    }
+}
+#else
+#define checkList(tls, idx)
+#endif
+
+void* fastMalloc( size_t size )
+{
+    if( size > MAX_BLOCK_SIZE )
+    {
+        size_t size1 = size + sizeof(uchar*)*2 + MEM_BLOCK_SIZE;
+        uchar* udata = (uchar*)SystemAlloc(size1);
+        uchar** adata = alignPtr((uchar**)udata + 2, MEM_BLOCK_SIZE);
+        adata[-1] = udata;
+        adata[-2] = (uchar*)size1;
+        return adata;
+    }
+
+    {
+    ThreadData* tls = ThreadData::get();
+    int idx = mallocTables.bin(size);
+    Block*& startPtr = tls->bins[idx][START];
+    Block*& gcPtr = tls->bins[idx][GC];
+    Block*& freePtr = tls->bins[idx][FREE], *block = freePtr;
+    checkList(tls, idx);
+    size = binSizeTab[idx];
+    STAT(
+        stat.nettoBytes += size;
+        stat.mallocCalls++;
+        );
+    uchar* data = 0;
+
+    for(;;)
+    {
+        if( block )
+        {
+            // try to find non-full block
+            for(;;)
+            {
+                CV_DbgAssert( block->next->prev == block && block->prev->next == block );
+                if( block->bumpPtr )
+                {
+                    data = block->bumpPtr;
+                    if( (block->bumpPtr += size) >= block->endPtr )
+                        block->bumpPtr = 0;
+                    break;
+                }
+
+                if( block->privateFreeList )
+                {
+                    data = (uchar*)block->privateFreeList;
+                    block->privateFreeList = block->privateFreeList->next;
+                    break;
+                }
+
+                if( block == startPtr )
+                    break;
+                block = block->next;
+            }
+#if 0
+            avg_k += _k;
+            avg_nk++;
+            if( avg_nk == 1000 )
+            {
+                printf("avg search iters per 1e3 allocs = %g\n", (double)avg_k/avg_nk );
+                avg_k = avg_nk = 0;
+            }
+#endif
+
+            freePtr = block;
+            if( !data )
+            {
+                block = gcPtr; 
+                for( int k = 0; k < 2; k++ )
+                {
+                    SANITY_CHECK(block);
+                    CV_DbgAssert( block->next->prev == block && block->prev->next == block );
+                    if( block->publicFreeList )
+                    {
+                        {
+                        AutoLock lock(block->cs);
+                        block->privateFreeList = block->publicFreeList;
+                        block->publicFreeList = 0;
+                        }
+                        Node* node = block->privateFreeList;
+                        for(;node != 0; node = node->next)
+                            --block->allocated;
+                        data = (uchar*)block->privateFreeList;
+                        block->privateFreeList = block->privateFreeList->next;
+                        gcPtr = block->next;
+                        if( block->allocated+1 <= block->almostEmptyThreshold )
+                            tls->moveBlockToFreeList(block);
+                        break;
+                    }
+                    block = block->next;
+                }
+                if( !data )
+                    gcPtr = block;
+            }
+        }
+
+        if( data )
+            break;
+        block = mallocPool.alloc();
+        block->init(startPtr ? startPtr->prev : block, startPtr ? startPtr : block, (int)size, tls);
+        if( !startPtr )
+            startPtr = gcPtr = freePtr = block;
+        checkList(tls, block->binIdx);
+        SANITY_CHECK(block);
+    }
+
+    ++block->allocated;
+    return data;
+    }
+}
+
+void fastFree( void* ptr )
+{
+    if( ((size_t)ptr & (MEM_BLOCK_SIZE-1)) == 0 )
+    {
+        if( ptr != 0 )
+        {
+            void* origPtr = ((void**)ptr)[-1];
+            size_t sz = (size_t)((void**)ptr)[-2];
+            SystemFree( origPtr, sz );
+        }
+        return;
+    }
+
+    {
+    ThreadData* tls = ThreadData::get();
+    Node* node = (Node*)ptr;
+    Block* block = (Block*)((size_t)ptr & -(int)MEM_BLOCK_SIZE);
+    assert( block->signature == MEM_BLOCK_SIGNATURE );
+
+    if( block->threadData == tls )
+    {
+        STAT(
+        stat.nettoBytes -= block->objSize;
+        stat.freeCalls++;
+        float ratio = (float)stat.nettoBytes/stat.bruttoBytes;
+        if( stat.minUsageRatio > ratio )
+            stat.minUsageRatio = ratio;
+        );
+
+        SANITY_CHECK(block);
+
+        bool prevFilled = block->isFilled();
+        --block->allocated;
+        if( !block->isFilled() && (block->allocated == 0 || prevFilled) )
+        {
+            if( block->allocated == 0 )
+            {
+                int idx = block->binIdx;
+                Block*& startPtr = tls->bins[idx][START];
+                Block*& freePtr = tls->bins[idx][FREE];
+                Block*& gcPtr = tls->bins[idx][GC];
+                
+                if( block == block->next )
+                {
+                    CV_DbgAssert( startPtr == block && freePtr == block && gcPtr == block );
+                    startPtr = freePtr = gcPtr = 0;
+                }
+                else
+                {
+                    if( freePtr == block )
+                        freePtr = block->next;
+                    if( gcPtr == block )
+                        gcPtr = block->next;
+                    if( startPtr == block )
+                        startPtr = block->next;
+                    block->prev->next = block->next;
+                    block->next->prev = block->prev;
+                }
+                mallocPool.free(block);
+                checkList(tls, idx);
+                return;
+            }
+
+            tls->moveBlockToFreeList(block);
+        }
+        node->next = block->privateFreeList;
+        block->privateFreeList = node;
+    }
+    else
+    {
+        AutoLock lock(block->cs);
+        SANITY_CHECK(block);
+
+        node->next = block->publicFreeList;
+        block->publicFreeList = node;
+        if( block->threadData == 0 )
+        {
+            // take ownership of the abandoned block.
+            // note that it can happen at the same time as
+            // ThreadData::deleteData() marks the blocks as abandoned,
+            // so this part of the algorithm needs to be checked for data races
+            int idx = block->binIdx;
+            block->threadData = tls;
+            Block*& startPtr = tls->bins[idx][START];
+
+            if( startPtr )
+            {
+                block->next = startPtr;
+                block->prev = startPtr->prev;
+                block->next->prev = block->prev->next = block;
+            }
+            else
+                startPtr = tls->bins[idx][FREE] = tls->bins[idx][GC] = block;
+        }
+    }
+    }
+}
+
+#endif
+
+}
+
+CV_IMPL void cvSetMemoryManager( CvAllocFunc, CvFreeFunc, void * )
+{
+    CV_Error( -1, "Custom memory allocator is not supported" );
+}
+
+CV_IMPL void* cvAlloc( size_t size )
+{
+    return cv::fastMalloc( size );
+}
+
+CV_IMPL void cvFree_( void* ptr )
+{
+    cv::fastFree( ptr );
+}
+
+
+/* End of file. */