git.maemo.org Git - opencv/blob - apps/Hawk/lexer.cpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                        Intel License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2000, Intel Corporation, all rights reserved.
  14 // Third party copyrights are property of their respective owners.
  15 //
  16 // Redistribution and use in source and binary forms, with or without modification,
  17 // are permitted provided that the following conditions are met:
  18 //
  19 //   * Redistribution's of source code must retain the above copyright notice,
  20 //     this list of conditions and the following disclaimer.
  21 //
  22 //   * Redistribution's in binary form must reproduce the above copyright notice,
  23 //     this list of conditions and the following disclaimer in the documentation
  24 //     and/or other materials provided with the distribution.
  25 //
  26 //   * The name of Intel Corporation may not be used to endorse or promote products
  27 //     derived from this software without specific prior written permission.
  28 //
  29 // This software is provided by the copyright holders and contributors "as is" and
  30 // any express or implied warranties, including, but not limited to, the implied
  31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  32 // In no event shall the Intel Corporation or contributors be liable for any direct,
  33 // indirect, incidental, special, exemplary, or consequential damages
  34 // (including, but not limited to, procurement of substitute goods or services;
  35 // loss of use, data, or profits; or business interruption) however caused
  36 // and on any theory of liability, whether in contract, strict liability,
  37 // or tort (including negligence or otherwise) arising in any way out of
  38 // the use of this software, even if advised of the possibility of such damage.
  39 //
  40 //M*/
  41
  42 #include  "stdafx.h"
  43 #include  <stdio.h>
  44 #include  <stdlib.h>
  45 #include  <ctype.h>
  46 #include  <string.h>
  47
  48 #include  "lexer.h"
  49
  50 typedef struct _HashEntry
  51 {
  52     unsigned  hash;
  53     int       len;
  54     struct _HashEntry* next;
  55     const char* str;
  56 }
  57 HashEntry;
  58
  59 HashEntry   keyword_storage[100];
  60
  61 #define  HASHTABLE_SIZE   17
  62 HashEntry*  keyword_table[HASHTABLE_SIZE];
  63 static int  hash_init = 0;
  64
  65 const char* keywords[] =
  66 {
  67     "break",    "case",     "char",     "const",
  68     "continue", "default",  "do",       "double",
  69     "else",     "enum",     "extern",   "float",
  70     "for",      "goto",     "if",       "int",
  71     "long",     "register", "return",   "short",
  72     "signed",   "sizeof",   "static",   "struct",
  73     "switch",   "typedef",  "union",    "unsigned",
  74     "void",     "volatile", "while",    0
  75 };
  76
  77
  78 inline unsigned  calc_hash( const char* text, int len )
  79 {
  80     int j, shift = 0;
  81     unsigned hash = len;
  82     for( j = 0; j < len; j++ )
  83     {
  84         shift += 11;
  85         if( shift >= 32 ) shift -= 32;
  86         hash ^= ((unsigned char*)text)[j] << shift;
  87     }
  88     return hash;
  89 }
  90
  91
  92 void InitLexer( Lexer* lexer, const char* text )
  93 {
  94     lexer->text = text;
  95     lexer->pos = 0;
  96
  97     if( !hash_init )
  98     {
  99         int  i;
 100         int  count[HASHTABLE_SIZE];
 101         memset( count, 0, sizeof(count));
 102         memset( keyword_table, 0, sizeof( keyword_table));
 103
 104         for( i = 0; keywords[i] != 0; i++ )
 105         {
 106             int idx, len;
 107             keyword_storage[i].len = len = strlen( keywords[i] );
 108             keyword_storage[i].hash = calc_hash( keywords[i], len );
 109             keyword_storage[i].str = keywords[i];
 110             idx = keyword_storage[i].hash % HASHTABLE_SIZE;
 111             keyword_storage[i].next = keyword_table[idx];
 112             keyword_table[idx] = keyword_storage + i;
 113             count[idx]++;
 114         }
 115         hash_init = 1;
 116     }
 117 }
 118
 119
 120 HashEntry* find_text( const char* str, int len )
 121 {
 122     unsigned hash = calc_hash( str, len );
 123     int idx = hash % HASHTABLE_SIZE;
 124     HashEntry* entry = keyword_table[idx];
 125
 126     while( entry )
 127     {
 128         if( entry->hash == hash &&
 129             entry->len == len &&
 130             !strncmp( entry->str, str, len )) break;
 131         entry = entry->next;
 132     }
 133     return entry;
 134 }
 135
 136
 137 void  GetToken( Lexer* lexer, Token* token )
 138 {
 139     const char* text = lexer->text;
 140     int pos = lexer->pos;
 141     token->type = TOKEN_NORMAL;
 142
 143     while( isspace(text[pos])) pos++;
 144     token->start = pos;
 145
 146     switch( text[pos] )
 147     {
 148     case '/': pos++;
 149               switch( text[pos] )
 150               {
 151               case '/': /* end-line comment */
 152                   token->type = TOKEN_COMMENT;
 153                   ++pos; while( text[pos] != '\n' && text[pos] != '\0' ) pos++;
 154                   break;
 155               case '*':
 156                   token->type = TOKEN_COMMENT;
 157                   ++pos;
 158                   while( text[pos] != '\0' )
 159                   {
 160                       if( text[pos] == '*' && text[pos+1] == '/')
 161                       {
 162                           pos += 2;
 163                           break;
 164                       }
 165                       pos++;
 166                   }
 167                   break;
 168               }
 169               break;
 170     case '\0':  token->type = TOKEN_END;
 171                 return;
 172
 173     case '\"':  token->type = TOKEN_STRING;
 174                 pos++;
 175                 for(;;)
 176                 {
 177                     if( text[pos] == '\0' || text[pos] == '\"' || text[pos] == '\n' )
 178                         break;
 179                     if( text[pos] == '\\' )
 180                     {
 181                         pos += 2;
 182                         if( text[pos] == '\n' ) pos++;
 183                     }
 184                     else
 185                     {
 186                         pos++;
 187                     }
 188                 }
 189                 if( text[pos] == '\"') pos++;
 190                 break;
 191
 192     case '\'':  token->type = TOKEN_STRING;
 193                 pos++;
 194                 for(;;)
 195                 {
 196                     if( text[pos] == '\0' || text[pos] == '\'' || text[pos] == '\n' )
 197                         break;
 198                     pos += text[pos] == '\\' ? 2 : 1;
 199                 }
 200                 if( text[pos] == '\'') pos++;
 201                 break;
 202     default:
 203         if( isalpha( text[pos] ) || text[pos] == '_' )
 204         {
 205             pos++;
 206             while( isalnum( text[pos] ) || text[pos] == '_' ) pos++;
 207
 208             if( find_text( text + token->start, pos - token->start ))
 209             {
 210                 token->type = TOKEN_KEYWORD;
 211             }
 212         }
 213         else if( isdigit(text[pos]) || (text[pos] == '.' && isdigit(text[pos+1])))
 214         {
 215             int pos1 = pos;
 216             token->type = TOKEN_NUMBER;
 217             pos++; while( isalnum( text[pos])) pos++;
 218             if( (text[pos] == '+' || text[pos] == '-') && text[pos-1] == 'e')
 219             {
 220                 while( isdigit(text[pos1]) || text[pos1] == '.') pos1++;
 221                 if( pos1 == pos - 1 )
 222                 {
 223                     pos++;
 224                     while( isdigit(text[pos])) pos++;
 225                 }
 226             }
 227         }
 228         else
 229         {
 230             pos++;
 231         }
 232     }
 233
 234     lexer->pos = pos;
 235 }