Update to 2.0.0 tree from current Fremantle build
[opencv] / src / cxcore / cxalloc.cpp
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                           License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 //   * Redistribution's of source code must retain the above copyright notice,
21 //     this list of conditions and the following disclaimer.
22 //
23 //   * Redistribution's in binary form must reproduce the above copyright notice,
24 //     this list of conditions and the following disclaimer in the documentation
25 //     and/or other materials provided with the distribution.
26 //
27 //   * The name of the copyright holders may not be used to endorse or promote products
28 //     derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42
43 #include "_cxcore.h"
44
45 #define CV_USE_SYSTEM_MALLOC 1
46
47 namespace cv
48 {
49
50 static void* OutOfMemoryError(size_t size)
51 {
52     CV_Error_(CV_StsNoMem, ("Failed to allocate %lu bytes", (unsigned long)size));
53     return 0;
54 }
55
56 #if CV_USE_SYSTEM_MALLOC
57
58 void deleteThreadAllocData() {}
59
60 void* fastMalloc( size_t size )
61 {
62     uchar* udata = (uchar*)malloc(size + sizeof(void*) + CV_MALLOC_ALIGN);
63     if(!udata)
64         return OutOfMemoryError(size);
65     uchar** adata = alignPtr((uchar**)udata + 1, CV_MALLOC_ALIGN);
66     adata[-1] = udata;
67     return adata;
68 }
69     
70 void fastFree(void* ptr)
71 {
72     if(ptr)
73     {
74         uchar* udata = ((uchar**)ptr)[-1];
75         CV_DbgAssert(udata < (uchar*)ptr &&
76                ((uchar*)ptr - udata) <= (ptrdiff_t)(sizeof(void*)+CV_MALLOC_ALIGN)); 
77         free(udata);
78     }
79 }
80
81 #else
82
83 #if 0
84 #define SANITY_CHECK(block) \
85     CV_Assert(((size_t)(block) & (MEM_BLOCK_SIZE-1)) == 0 && \
86         (unsigned)(block)->binIdx <= (unsigned)MAX_BIN && \
87         (block)->signature == MEM_BLOCK_SIGNATURE)
88 #else
89 #define SANITY_CHECK(block)
90 #endif
91
92 #define STAT(stmt)
93
94 #ifdef WIN32
95 struct CriticalSection
96 {
97     CriticalSection() { InitializeCriticalSection(&cs); }
98     ~CriticalSection() { DeleteCriticalSection(&cs); }
99     void lock() { EnterCriticalSection(&cs); }
100     void unlock() { LeaveCriticalSection(&cs); }
101     bool trylock() { return TryEnterCriticalSection(&cs) != 0; }
102
103     CRITICAL_SECTION cs;
104 };
105
106 void* SystemAlloc(size_t size)
107 {
108     void* ptr = malloc(size);
109     return ptr ? ptr : OutOfMemoryError(size);
110 }
111
112 void SystemFree(void* ptr, size_t)
113 {
114     free(ptr);
115 }
116 #else
117 struct CriticalSection
118 {
119     CriticalSection() { pthread_mutex_init(&mutex, 0); }
120     ~CriticalSection() { pthread_mutex_destroy(&mutex); }
121     void lock() { pthread_mutex_lock(&mutex); }
122     void unlock() { pthread_mutex_unlock(&mutex); }
123     bool trylock() { return pthread_mutex_trylock(&mutex) == 0; }
124
125     pthread_mutex_t mutex;
126 };
127
128 void* SystemAlloc(size_t size)
129 {
130     #ifndef MAP_ANONYMOUS
131     #define MAP_ANONYMOUS MAP_ANON
132     #endif
133     void* ptr = 0;
134     ptr = mmap(ptr, size, (PROT_READ | PROT_WRITE), MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
135     return ptr != MAP_FAILED ? ptr : OutOfMemoryError(size);
136 }
137
138 void SystemFree(void* ptr, size_t size)
139 {
140     munmap(ptr, size);
141 }
142 #endif
143
144 struct AutoLock
145 {
146     AutoLock(CriticalSection& _cs) : cs(&_cs) { cs->lock(); }
147     ~AutoLock() { cs->unlock(); }
148     CriticalSection* cs;
149 };
150
151 const size_t MEM_BLOCK_SIGNATURE = 0x01234567;
152 const int MEM_BLOCK_SHIFT = 14;
153 const size_t MEM_BLOCK_SIZE = 1 << MEM_BLOCK_SHIFT;
154 const size_t HDR_SIZE = 128;
155 const size_t MAX_BLOCK_SIZE = MEM_BLOCK_SIZE - HDR_SIZE;
156 const int MAX_BIN = 28;
157
158 static const int binSizeTab[MAX_BIN+1] =
159 { 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 128, 160, 192, 256, 320, 384, 480, 544, 672, 768,
160 896, 1056, 1328, 1600, 2688, 4048, 5408, 8128, 16256 };
161
162 struct MallocTables
163 {
164     void initBinTab()
165     {
166         int i, j = 0, n;
167         for( i = 0; i <= MAX_BIN; i++ )
168         {
169             n = binSizeTab[i]>>3;
170             for( ; j <= n; j++ )
171                 binIdx[j] = (uchar)i;
172         }
173     }
174     int bin(size_t size)
175     {
176         assert( size <= MAX_BLOCK_SIZE );
177         return binIdx[(size + 7)>>3];
178     }
179
180     MallocTables()
181     {
182         initBinTab();
183     }
184
185     uchar binIdx[MAX_BLOCK_SIZE/8+1];
186 };
187
188 MallocTables mallocTables;
189
190 struct Node
191 {
192     Node* next;
193 };
194
195 struct ThreadData;
196
197 struct Block
198 {
199     Block(Block* _next)
200     {
201         signature = MEM_BLOCK_SIGNATURE;
202         prev = 0;
203         next = _next;
204         privateFreeList = publicFreeList = 0;
205         bumpPtr = endPtr = 0;
206         objSize = 0;
207         threadData = 0;
208         data = (uchar*)this + HDR_SIZE;
209     }
210
211     ~Block() {}
212
213     void init(Block* _prev, Block* _next, int _objSize, ThreadData* _threadData)
214     {
215         prev = _prev;
216         if(prev)
217             prev->next = this;
218         next = _next;
219         if(next)
220             next->prev = this;
221         objSize = _objSize;
222         binIdx = mallocTables.bin(objSize);
223         threadData = _threadData;
224         privateFreeList = publicFreeList = 0;
225         bumpPtr = data;
226         int nobjects = MAX_BLOCK_SIZE/objSize;
227         endPtr = bumpPtr + nobjects*objSize;
228         almostEmptyThreshold = (nobjects + 1)/2;
229         allocated = 0;
230     }
231
232     bool isFilled() const { return allocated > almostEmptyThreshold; }
233
234     size_t signature;
235     Block* prev;
236     Block* next;
237     Node* privateFreeList;
238     Node* publicFreeList;
239     uchar* bumpPtr;
240     uchar* endPtr;
241     uchar* data;
242     ThreadData* threadData;
243     int objSize;
244     int binIdx;
245     int allocated;
246     int almostEmptyThreshold;
247     CriticalSection cs;
248 };
249
250 struct BigBlock
251 {
252     BigBlock(int bigBlockSize, BigBlock* _next)
253     {
254         first = alignPtr((Block*)(this+1), MEM_BLOCK_SIZE);
255         next = _next;
256         nblocks = (int)(((char*)this + bigBlockSize - (char*)first)/MEM_BLOCK_SIZE);
257         Block* p = 0;
258         for( int i = nblocks-1; i >= 0; i-- )
259             p = ::new((uchar*)first + i*MEM_BLOCK_SIZE) Block(p);
260     }
261
262     ~BigBlock()
263     {
264         for( int i = nblocks-1; i >= 0; i-- )
265             ((Block*)((uchar*)first+i*MEM_BLOCK_SIZE))->~Block();
266     }
267
268     BigBlock* next;
269     Block* first;
270     int nblocks;
271 };
272
273 struct BlockPool
274 {
275     BlockPool(int _bigBlockSize=1<<20) : pool(0), bigBlockSize(_bigBlockSize)
276     {
277     }
278
279     ~BlockPool()
280     {
281         AutoLock lock(cs);
282         while( pool )
283         {
284             BigBlock* nextBlock = pool->next;
285             pool->~BigBlock();
286             SystemFree(pool, bigBlockSize);
287             pool = nextBlock;
288         }
289     }
290
291     Block* alloc()
292     {
293         AutoLock lock(cs);
294         Block* block;
295         if( !freeBlocks )
296         {
297             BigBlock* bblock = ::new(SystemAlloc(bigBlockSize)) BigBlock(bigBlockSize, pool);
298             assert( bblock != 0 );
299             freeBlocks = bblock->first;
300             pool = bblock;
301         }
302         block = freeBlocks;
303         freeBlocks = freeBlocks->next;
304         if( freeBlocks )
305             freeBlocks->prev = 0;
306         STAT(stat.bruttoBytes += MEM_BLOCK_SIZE);
307         return block;
308     }
309
310     void free(Block* block)
311     {
312         AutoLock lock(cs);
313         block->prev = 0;
314         block->next = freeBlocks;
315         freeBlocks = block;
316         STAT(stat.bruttoBytes -= MEM_BLOCK_SIZE);
317     }
318
319     CriticalSection cs;
320     Block* freeBlocks;
321     BigBlock* pool;
322     int bigBlockSize;
323     int blocksPerBigBlock;
324 };
325
326 BlockPool mallocPool;
327
328 enum { START=0, FREE=1, GC=2 };
329
330 struct ThreadData
331 {
332     ThreadData() { for(int i = 0; i <= MAX_BIN; i++) bins[i][START] = bins[i][FREE] = bins[i][GC] = 0; }
333     ~ThreadData()
334     {
335         // mark all the thread blocks as abandoned or even release them
336         for( int i = 0; i <= MAX_BIN; i++ )
337         {
338             Block *bin = bins[i][START], *block = bin;
339             bins[i][START] = bins[i][FREE] = bins[i][GC] = 0;
340             if( block )
341             {
342                 do
343                 {
344                     Block* next = block->next;
345                     int allocated = block->allocated;
346                     {
347                     AutoLock lock(block->cs);
348                     block->next = block->prev = 0;
349                     block->threadData = 0;
350                     Node *node = block->publicFreeList;
351                     for( ; node != 0; node = node->next )
352                         allocated--;
353                     }
354                     if( allocated == 0 )
355                         mallocPool.free(block);
356                     block = next;
357                 }
358                 while( block != bin );
359             }
360         }
361     }
362
363     void moveBlockToFreeList( Block* block )
364     {
365         int i = block->binIdx;
366         Block*& freePtr = bins[i][FREE];
367         CV_DbgAssert( block->next->prev == block && block->prev->next == block );
368         if( block != freePtr )
369         {
370             Block*& gcPtr = bins[i][GC];
371             if( gcPtr == block )
372                 gcPtr = block->next;
373             if( block->next != block )
374             {
375                 block->prev->next = block->next;
376                 block->next->prev = block->prev;
377             }
378             block->next = freePtr->next;
379             block->prev = freePtr;
380             freePtr = block->next->prev = block->prev->next = block;
381         }
382     }
383
384     Block* bins[MAX_BIN+1][3];
385
386 #ifdef WIN32
387 #ifdef WINCE
388 #       define TLS_OUT_OF_INDEXES ((DWORD)0xFFFFFFFF)
389 #endif
390
391     static DWORD tlsKey;
392     static ThreadData* get()
393     {
394         ThreadData* data;
395         if( tlsKey == TLS_OUT_OF_INDEXES )
396             tlsKey = TlsAlloc();
397         data = (ThreadData*)TlsGetValue(tlsKey);
398         if( !data )
399         {
400             data = new ThreadData;
401             TlsSetValue(tlsKey, data);
402         }
403         return data;
404     }
405 #else
406     static void deleteData(void* data)
407     {
408         delete (ThreadData*)data;
409     }
410
411     static pthread_key_t tlsKey;
412     static ThreadData* get()
413     {
414         ThreadData* data;
415         if( !tlsKey )
416             pthread_key_create(&tlsKey, deleteData);
417         data = (ThreadData*)pthread_getspecific(tlsKey);
418         if( !data )
419         {
420             data = new ThreadData;
421             pthread_setspecific(tlsKey, data);
422         }
423         return data;
424     }
425 #endif
426 };
427
428 #ifdef WIN32
429 DWORD ThreadData::tlsKey = TLS_OUT_OF_INDEXES;
430
431 void deleteThreadAllocData()
432 {
433     if( ThreadData::tlsKey != TLS_OUT_OF_INDEXES )
434         delete (ThreadData*)TlsGetValue( ThreadData::tlsKey );
435 }
436
437 #else
438 pthread_key_t ThreadData::tlsKey = 0;
439 #endif
440
441 #if 0
442 static void checkList(ThreadData* tls, int idx)
443 {
444     Block* block = tls->bins[idx][START];
445     if( !block )
446     {
447         CV_DbgAssert( tls->bins[idx][FREE] == 0 && tls->bins[idx][GC] == 0 );
448     }
449     else
450     {
451         bool gcInside = false;
452         bool freeInside = false;
453         do
454         {
455             if( tls->bins[idx][FREE] == block )
456                 freeInside = true;
457             if( tls->bins[idx][GC] == block )
458                 gcInside = true;
459             block = block->next;
460         }
461         while( block != tls->bins[idx][START] );
462         CV_DbgAssert( gcInside && freeInside );
463     }
464 }
465 #else
466 #define checkList(tls, idx)
467 #endif
468
469 void* fastMalloc( size_t size )
470 {
471     if( size > MAX_BLOCK_SIZE )
472     {
473         size_t size1 = size + sizeof(uchar*)*2 + MEM_BLOCK_SIZE;
474         uchar* udata = (uchar*)SystemAlloc(size1);
475         uchar** adata = alignPtr((uchar**)udata + 2, MEM_BLOCK_SIZE);
476         adata[-1] = udata;
477         adata[-2] = (uchar*)size1;
478         return adata;
479     }
480
481     {
482     ThreadData* tls = ThreadData::get();
483     int idx = mallocTables.bin(size);
484     Block*& startPtr = tls->bins[idx][START];
485     Block*& gcPtr = tls->bins[idx][GC];
486     Block*& freePtr = tls->bins[idx][FREE], *block = freePtr;
487     checkList(tls, idx);
488     size = binSizeTab[idx];
489     STAT(
490         stat.nettoBytes += size;
491         stat.mallocCalls++;
492         );
493     uchar* data = 0;
494
495     for(;;)
496     {
497         if( block )
498         {
499             // try to find non-full block
500             for(;;)
501             {
502                 CV_DbgAssert( block->next->prev == block && block->prev->next == block );
503                 if( block->bumpPtr )
504                 {
505                     data = block->bumpPtr;
506                     if( (block->bumpPtr += size) >= block->endPtr )
507                         block->bumpPtr = 0;
508                     break;
509                 }
510
511                 if( block->privateFreeList )
512                 {
513                     data = (uchar*)block->privateFreeList;
514                     block->privateFreeList = block->privateFreeList->next;
515                     break;
516                 }
517
518                 if( block == startPtr )
519                     break;
520                 block = block->next;
521             }
522 #if 0
523             avg_k += _k;
524             avg_nk++;
525             if( avg_nk == 1000 )
526             {
527                 printf("avg search iters per 1e3 allocs = %g\n", (double)avg_k/avg_nk );
528                 avg_k = avg_nk = 0;
529             }
530 #endif
531
532             freePtr = block;
533             if( !data )
534             {
535                 block = gcPtr; 
536                 for( int k = 0; k < 2; k++ )
537                 {
538                     SANITY_CHECK(block);
539                     CV_DbgAssert( block->next->prev == block && block->prev->next == block );
540                     if( block->publicFreeList )
541                     {
542                         {
543                         AutoLock lock(block->cs);
544                         block->privateFreeList = block->publicFreeList;
545                         block->publicFreeList = 0;
546                         }
547                         Node* node = block->privateFreeList;
548                         for(;node != 0; node = node->next)
549                             --block->allocated;
550                         data = (uchar*)block->privateFreeList;
551                         block->privateFreeList = block->privateFreeList->next;
552                         gcPtr = block->next;
553                         if( block->allocated+1 <= block->almostEmptyThreshold )
554                             tls->moveBlockToFreeList(block);
555                         break;
556                     }
557                     block = block->next;
558                 }
559                 if( !data )
560                     gcPtr = block;
561             }
562         }
563
564         if( data )
565             break;
566         block = mallocPool.alloc();
567         block->init(startPtr ? startPtr->prev : block, startPtr ? startPtr : block, (int)size, tls);
568         if( !startPtr )
569             startPtr = gcPtr = freePtr = block;
570         checkList(tls, block->binIdx);
571         SANITY_CHECK(block);
572     }
573
574     ++block->allocated;
575     return data;
576     }
577 }
578
579 void fastFree( void* ptr )
580 {
581     if( ((size_t)ptr & (MEM_BLOCK_SIZE-1)) == 0 )
582     {
583         if( ptr != 0 )
584         {
585             void* origPtr = ((void**)ptr)[-1];
586             size_t sz = (size_t)((void**)ptr)[-2];
587             SystemFree( origPtr, sz );
588         }
589         return;
590     }
591
592     {
593     ThreadData* tls = ThreadData::get();
594     Node* node = (Node*)ptr;
595     Block* block = (Block*)((size_t)ptr & -(int)MEM_BLOCK_SIZE);
596     assert( block->signature == MEM_BLOCK_SIGNATURE );
597
598     if( block->threadData == tls )
599     {
600         STAT(
601         stat.nettoBytes -= block->objSize;
602         stat.freeCalls++;
603         float ratio = (float)stat.nettoBytes/stat.bruttoBytes;
604         if( stat.minUsageRatio > ratio )
605             stat.minUsageRatio = ratio;
606         );
607
608         SANITY_CHECK(block);
609
610         bool prevFilled = block->isFilled();
611         --block->allocated;
612         if( !block->isFilled() && (block->allocated == 0 || prevFilled) )
613         {
614             if( block->allocated == 0 )
615             {
616                 int idx = block->binIdx;
617                 Block*& startPtr = tls->bins[idx][START];
618                 Block*& freePtr = tls->bins[idx][FREE];
619                 Block*& gcPtr = tls->bins[idx][GC];
620                 
621                 if( block == block->next )
622                 {
623                     CV_DbgAssert( startPtr == block && freePtr == block && gcPtr == block );
624                     startPtr = freePtr = gcPtr = 0;
625                 }
626                 else
627                 {
628                     if( freePtr == block )
629                         freePtr = block->next;
630                     if( gcPtr == block )
631                         gcPtr = block->next;
632                     if( startPtr == block )
633                         startPtr = block->next;
634                     block->prev->next = block->next;
635                     block->next->prev = block->prev;
636                 }
637                 mallocPool.free(block);
638                 checkList(tls, idx);
639                 return;
640             }
641
642             tls->moveBlockToFreeList(block);
643         }
644         node->next = block->privateFreeList;
645         block->privateFreeList = node;
646     }
647     else
648     {
649         AutoLock lock(block->cs);
650         SANITY_CHECK(block);
651
652         node->next = block->publicFreeList;
653         block->publicFreeList = node;
654         if( block->threadData == 0 )
655         {
656             // take ownership of the abandoned block.
657             // note that it can happen at the same time as
658             // ThreadData::deleteData() marks the blocks as abandoned,
659             // so this part of the algorithm needs to be checked for data races
660             int idx = block->binIdx;
661             block->threadData = tls;
662             Block*& startPtr = tls->bins[idx][START];
663
664             if( startPtr )
665             {
666                 block->next = startPtr;
667                 block->prev = startPtr->prev;
668                 block->next->prev = block->prev->next = block;
669             }
670             else
671                 startPtr = tls->bins[idx][FREE] = tls->bins[idx][GC] = block;
672         }
673     }
674     }
675 }
676
677 #endif
678
679 }
680
681 CV_IMPL void cvSetMemoryManager( CvAllocFunc, CvFreeFunc, void * )
682 {
683     CV_Error( -1, "Custom memory allocator is not supported" );
684 }
685
686 CV_IMPL void* cvAlloc( size_t size )
687 {
688     return cv::fastMalloc( size );
689 }
690
691 CV_IMPL void cvFree_( void* ptr )
692 {
693     cv::fastFree( ptr );
694 }
695
696
697 /* End of file. */