Added CONFIG_CLEAR and CONFIG_RESET to config.maemo
[busybox4maemo] / editors / awk.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * awk implementation for busybox
4  *
5  * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6  *
7  * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
8  */
9
10 #include "libbb.h"
11 #include "xregex.h"
12 #include <math.h>
13
14 /* This is a NOEXEC applet. Be very careful! */
15
16
17 #define MAXVARFMT       240
18 #define MINNVBLOCK      64
19
20 /* variable flags */
21 #define VF_NUMBER       0x0001  /* 1 = primary type is number */
22 #define VF_ARRAY        0x0002  /* 1 = it's an array */
23
24 #define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
25 #define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
26 #define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
27 #define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
28 #define VF_FSTR         0x1000  /* 1 = var::string points to fstring buffer */
29 #define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
30 #define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
31
32 /* these flags are static, don't change them when value is changed */
33 #define VF_DONTTOUCH    (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
34
35 /* Variable */
36 typedef struct var_s {
37         unsigned type;            /* flags */
38         double number;
39         char *string;
40         union {
41                 int aidx;               /* func arg idx (for compilation stage) */
42                 struct xhash_s *array;  /* array ptr */
43                 struct var_s *parent;   /* for func args, ptr to actual parameter */
44                 char **walker;          /* list of array elements (for..in) */
45         } x;
46 } var;
47
48 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
49 typedef struct chain_s {
50         struct node_s *first;
51         struct node_s *last;
52         const char *programname;
53 } chain;
54
55 /* Function */
56 typedef struct func_s {
57         unsigned nargs;
58         struct chain_s body;
59 } func;
60
61 /* I/O stream */
62 typedef struct rstream_s {
63         FILE *F;
64         char *buffer;
65         int adv;
66         int size;
67         int pos;
68         smallint is_pipe;
69 } rstream;
70
71 typedef struct hash_item_s {
72         union {
73                 struct var_s v;         /* variable/array hash */
74                 struct rstream_s rs;    /* redirect streams hash */
75                 struct func_s f;        /* functions hash */
76         } data;
77         struct hash_item_s *next;       /* next in chain */
78         char name[1];                   /* really it's longer */
79 } hash_item;
80
81 typedef struct xhash_s {
82         unsigned nel;           /* num of elements */
83         unsigned csize;         /* current hash size */
84         unsigned nprime;        /* next hash size in PRIMES[] */
85         unsigned glen;          /* summary length of item names */
86         struct hash_item_s **items;
87 } xhash;
88
89 /* Tree node */
90 typedef struct node_s {
91         uint32_t info;
92         unsigned lineno;
93         union {
94                 struct node_s *n;
95                 var *v;
96                 int i;
97                 char *s;
98                 regex_t *re;
99         } l;
100         union {
101                 struct node_s *n;
102                 regex_t *ire;
103                 func *f;
104                 int argno;
105         } r;
106         union {
107                 struct node_s *n;
108         } a;
109 } node;
110
111 /* Block of temporary variables */
112 typedef struct nvblock_s {
113         int size;
114         var *pos;
115         struct nvblock_s *prev;
116         struct nvblock_s *next;
117         var nv[0];
118 } nvblock;
119
120 typedef struct tsplitter_s {
121         node n;
122         regex_t re[2];
123 } tsplitter;
124
125 /* simple token classes */
126 /* Order and hex values are very important!!!  See next_token() */
127 #define TC_SEQSTART      1                              /* ( */
128 #define TC_SEQTERM      (1 << 1)                /* ) */
129 #define TC_REGEXP       (1 << 2)                /* /.../ */
130 #define TC_OUTRDR       (1 << 3)                /* | > >> */
131 #define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
132 #define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
133 #define TC_BINOPX       (1 << 6)                /* two-opnd operator */
134 #define TC_IN           (1 << 7)
135 #define TC_COMMA        (1 << 8)
136 #define TC_PIPE         (1 << 9)                /* input redirection pipe */
137 #define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
138 #define TC_ARRTERM      (1 << 11)               /* ] */
139 #define TC_GRPSTART     (1 << 12)               /* { */
140 #define TC_GRPTERM      (1 << 13)               /* } */
141 #define TC_SEMICOL      (1 << 14)
142 #define TC_NEWLINE      (1 << 15)
143 #define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
144 #define TC_WHILE        (1 << 17)
145 #define TC_ELSE         (1 << 18)
146 #define TC_BUILTIN      (1 << 19)
147 #define TC_GETLINE      (1 << 20)
148 #define TC_FUNCDECL     (1 << 21)               /* `function' `func' */
149 #define TC_BEGIN        (1 << 22)
150 #define TC_END          (1 << 23)
151 #define TC_EOF          (1 << 24)
152 #define TC_VARIABLE     (1 << 25)
153 #define TC_ARRAY        (1 << 26)
154 #define TC_FUNCTION     (1 << 27)
155 #define TC_STRING       (1 << 28)
156 #define TC_NUMBER       (1 << 29)
157
158 #define TC_UOPPRE  (TC_UOPPRE1 | TC_UOPPRE2)
159
160 /* combined token classes */
161 #define TC_BINOP   (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
162 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
163 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
164                    | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
165
166 #define TC_STATEMNT (TC_STATX | TC_WHILE)
167 #define TC_OPTERM  (TC_SEMICOL | TC_NEWLINE)
168
169 /* word tokens, cannot mean something else if not expected */
170 #define TC_WORD    (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
171                    | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
172
173 /* discard newlines after these */
174 #define TC_NOTERM  (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
175                    | TC_BINOP | TC_OPTERM)
176
177 /* what can expression begin with */
178 #define TC_OPSEQ   (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
179 /* what can group begin with */
180 #define TC_GRPSEQ  (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
181
182 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
183 /* operator is inserted between them */
184 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
185                    | TC_STRING | TC_NUMBER | TC_UOPPOST)
186 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
187
188 #define OF_RES1    0x010000
189 #define OF_RES2    0x020000
190 #define OF_STR1    0x040000
191 #define OF_STR2    0x080000
192 #define OF_NUM1    0x100000
193 #define OF_CHECKED 0x200000
194
195 /* combined operator flags */
196 #define xx      0
197 #define xV      OF_RES2
198 #define xS      (OF_RES2 | OF_STR2)
199 #define Vx      OF_RES1
200 #define VV      (OF_RES1 | OF_RES2)
201 #define Nx      (OF_RES1 | OF_NUM1)
202 #define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
203 #define Sx      (OF_RES1 | OF_STR1)
204 #define SV      (OF_RES1 | OF_STR1 | OF_RES2)
205 #define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
206
207 #define OPCLSMASK 0xFF00
208 #define OPNMASK   0x007F
209
210 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
211  * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
212  * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
213  */
214 #define P(x)      (x << 24)
215 #define PRIMASK   0x7F000000
216 #define PRIMASK2  0x7E000000
217
218 /* Operation classes */
219
220 #define SHIFT_TIL_THIS  0x0600
221 #define RECUR_FROM_THIS 0x1000
222
223 enum {
224         OC_DELETE = 0x0100,     OC_EXEC = 0x0200,       OC_NEWSOURCE = 0x0300,
225         OC_PRINT = 0x0400,      OC_PRINTF = 0x0500,     OC_WALKINIT = 0x0600,
226
227         OC_BR = 0x0700,         OC_BREAK = 0x0800,      OC_CONTINUE = 0x0900,
228         OC_EXIT = 0x0a00,       OC_NEXT = 0x0b00,       OC_NEXTFILE = 0x0c00,
229         OC_TEST = 0x0d00,       OC_WALKNEXT = 0x0e00,
230
231         OC_BINARY = 0x1000,     OC_BUILTIN = 0x1100,    OC_COLON = 0x1200,
232         OC_COMMA = 0x1300,      OC_COMPARE = 0x1400,    OC_CONCAT = 0x1500,
233         OC_FBLTIN = 0x1600,     OC_FIELD = 0x1700,      OC_FNARG = 0x1800,
234         OC_FUNC = 0x1900,       OC_GETLINE = 0x1a00,    OC_IN = 0x1b00,
235         OC_LAND = 0x1c00,       OC_LOR = 0x1d00,        OC_MATCH = 0x1e00,
236         OC_MOVE = 0x1f00,       OC_PGETLINE = 0x2000,   OC_REGEXP = 0x2100,
237         OC_REPLACE = 0x2200,    OC_RETURN = 0x2300,     OC_SPRINTF = 0x2400,
238         OC_TERNARY = 0x2500,    OC_UNARY = 0x2600,      OC_VAR = 0x2700,
239         OC_DONE = 0x2800,
240
241         ST_IF = 0x3000,         ST_DO = 0x3100,         ST_FOR = 0x3200,
242         ST_WHILE = 0x3300
243 };
244
245 /* simple builtins */
246 enum {
247         F_in,   F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
248         F_ti,   F_le,   F_sy,   F_ff,   F_cl
249 };
250
251 /* builtins */
252 enum {
253         B_a2,   B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_lo,   B_up,
254         B_ge,   B_gs,   B_su,
255         B_an,   B_co,   B_ls,   B_or,   B_rs,   B_xo,
256 };
257
258 /* tokens and their corresponding info values */
259
260 #define NTC     "\377"  /* switch to next token class (tc<<1) */
261 #define NTCC    '\377'
262
263 #define OC_B    OC_BUILTIN
264
265 static const char tokenlist[] ALIGN1 =
266         "\1("       NTC
267         "\1)"       NTC
268         "\1/"       NTC                                 /* REGEXP */
269         "\2>>"      "\1>"       "\1|"       NTC         /* OUTRDR */
270         "\2++"      "\2--"      NTC                     /* UOPPOST */
271         "\2++"      "\2--"      "\1$"       NTC         /* UOPPRE1 */
272         "\2=="      "\1="       "\2+="      "\2-="      /* BINOPX */
273         "\2*="      "\2/="      "\2%="      "\2^="
274         "\1+"       "\1-"       "\3**="     "\2**"
275         "\1/"       "\1%"       "\1^"       "\1*"
276         "\2!="      "\2>="      "\2<="      "\1>"
277         "\1<"       "\2!~"      "\1~"       "\2&&"
278         "\2||"      "\1?"       "\1:"       NTC
279         "\2in"      NTC
280         "\1,"       NTC
281         "\1|"       NTC
282         "\1+"       "\1-"       "\1!"       NTC         /* UOPPRE2 */
283         "\1]"       NTC
284         "\1{"       NTC
285         "\1}"       NTC
286         "\1;"       NTC
287         "\1\n"      NTC
288         "\2if"      "\2do"      "\3for"     "\5break"   /* STATX */
289         "\10continue"           "\6delete"  "\5print"
290         "\6printf"  "\4next"    "\10nextfile"
291         "\6return"  "\4exit"    NTC
292         "\5while"   NTC
293         "\4else"    NTC
294
295         "\3and"     "\5compl"   "\6lshift"  "\2or"
296         "\6rshift"  "\3xor"
297         "\5close"   "\6system"  "\6fflush"  "\5atan2"   /* BUILTIN */
298         "\3cos"     "\3exp"     "\3int"     "\3log"
299         "\4rand"    "\3sin"     "\4sqrt"    "\5srand"
300         "\6gensub"  "\4gsub"    "\5index"   "\6length"
301         "\5match"   "\5split"   "\7sprintf" "\3sub"
302         "\6substr"  "\7systime" "\10strftime"
303         "\7tolower" "\7toupper" NTC
304         "\7getline" NTC
305         "\4func"    "\10function"   NTC
306         "\5BEGIN"   NTC
307         "\3END"     "\0"
308         ;
309
310 static const uint32_t tokeninfo[] = {
311         0,
312         0,
313         OC_REGEXP,
314         xS|'a',     xS|'w',     xS|'|',
315         OC_UNARY|xV|P(9)|'p',       OC_UNARY|xV|P(9)|'m',
316         OC_UNARY|xV|P(9)|'P',       OC_UNARY|xV|P(9)|'M',
317             OC_FIELD|xV|P(5),
318         OC_COMPARE|VV|P(39)|5,      OC_MOVE|VV|P(74),
319             OC_REPLACE|NV|P(74)|'+',    OC_REPLACE|NV|P(74)|'-',
320         OC_REPLACE|NV|P(74)|'*',    OC_REPLACE|NV|P(74)|'/',
321             OC_REPLACE|NV|P(74)|'%',    OC_REPLACE|NV|P(74)|'&',
322         OC_BINARY|NV|P(29)|'+',     OC_BINARY|NV|P(29)|'-',
323             OC_REPLACE|NV|P(74)|'&',    OC_BINARY|NV|P(15)|'&',
324         OC_BINARY|NV|P(25)|'/',     OC_BINARY|NV|P(25)|'%',
325             OC_BINARY|NV|P(15)|'&',     OC_BINARY|NV|P(25)|'*',
326         OC_COMPARE|VV|P(39)|4,      OC_COMPARE|VV|P(39)|3,
327             OC_COMPARE|VV|P(39)|0,      OC_COMPARE|VV|P(39)|1,
328         OC_COMPARE|VV|P(39)|2,      OC_MATCH|Sx|P(45)|'!',
329             OC_MATCH|Sx|P(45)|'~',      OC_LAND|Vx|P(55),
330         OC_LOR|Vx|P(59),            OC_TERNARY|Vx|P(64)|'?',
331             OC_COLON|xx|P(67)|':',
332         OC_IN|SV|P(49),
333         OC_COMMA|SS|P(80),
334         OC_PGETLINE|SV|P(37),
335         OC_UNARY|xV|P(19)|'+',      OC_UNARY|xV|P(19)|'-',
336             OC_UNARY|xV|P(19)|'!',
337         0,
338         0,
339         0,
340         0,
341         0,
342         ST_IF,          ST_DO,          ST_FOR,         OC_BREAK,
343         OC_CONTINUE,                    OC_DELETE|Vx,   OC_PRINT,
344         OC_PRINTF,      OC_NEXT,        OC_NEXTFILE,
345         OC_RETURN|Vx,   OC_EXIT|Nx,
346         ST_WHILE,
347         0,
348
349         OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
350         OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
351         OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
352         OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
353         OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
354         OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
355         OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
356         OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b),
357         OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
358         OC_GETLINE|SV|P(0),
359         0,      0,
360         0,
361         0
362 };
363
364 /* internal variable names and their initial values       */
365 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
366 enum {
367         CONVFMT,    OFMT,       FS,         OFS,
368         ORS,        RS,         RT,         FILENAME,
369         SUBSEP,     ARGIND,     ARGC,       ARGV,
370         ERRNO,      FNR,
371         NR,         NF,         IGNORECASE,
372         ENVIRON,    F0,         NUM_INTERNAL_VARS
373 };
374
375 static const char vNames[] ALIGN1 =
376         "CONVFMT\0" "OFMT\0"    "FS\0*"     "OFS\0"
377         "ORS\0"     "RS\0*"     "RT\0"      "FILENAME\0"
378         "SUBSEP\0"  "ARGIND\0"  "ARGC\0"    "ARGV\0"
379         "ERRNO\0"   "FNR\0"
380         "NR\0"      "NF\0*"     "IGNORECASE\0*"
381         "ENVIRON\0" "$\0*"      "\0";
382
383 static const char vValues[] ALIGN1 =
384         "%.6g\0"    "%.6g\0"    " \0"       " \0"
385         "\n\0"      "\n\0"      "\0"        "\0"
386         "\034\0"
387         "\377";
388
389 /* hash size may grow to these values */
390 #define FIRST_PRIME 61
391 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
392
393
394 /* Globals. Split in two parts so that first one is addressed
395  * with (mostly short) negative offsets */
396 struct globals {
397         chain beginseq, mainseq, endseq;
398         chain *seq;
399         node *break_ptr, *continue_ptr;
400         rstream *iF;
401         xhash *vhash, *ahash, *fdhash, *fnhash;
402         const char *g_progname;
403         int g_lineno;
404         int nfields;
405         int maxfields; /* used in fsrealloc() only */
406         var *Fields;
407         nvblock *g_cb;
408         char *g_pos;
409         char *g_buf;
410         smallint icase;
411         smallint exiting;
412         smallint nextrec;
413         smallint nextfile;
414         smallint is_f0_split;
415 };
416 struct globals2 {
417         uint32_t t_info; /* often used */
418         uint32_t t_tclass;
419         char *t_string;
420         int t_lineno;
421         int t_rollback;
422
423         var *intvar[NUM_INTERNAL_VARS]; /* often used */
424
425         /* former statics from various functions */
426         char *split_f0__fstrings;
427
428         uint32_t next_token__save_tclass;
429         uint32_t next_token__save_info;
430         uint32_t next_token__ltclass;
431         smallint next_token__concat_inserted;
432
433         smallint next_input_file__files_happen;
434         rstream next_input_file__rsm;
435
436         var *evaluate__fnargs;
437         unsigned evaluate__seed;
438         regex_t evaluate__sreg;
439
440         var ptest__v;
441
442         tsplitter exec_builtin__tspl;
443
444         /* biggest and least used members go last */
445         double t_double;
446         tsplitter fsplitter, rsplitter;
447 };
448 #define G1 (ptr_to_globals[-1])
449 #define G (*(struct globals2 *)ptr_to_globals)
450 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
451 /* char G1size[sizeof(G1)]; - 0x6c */
452 /* char Gsize[sizeof(G)]; - 0x1cc */
453 /* Trying to keep most of members accessible with short offsets: */
454 /* char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
455 #define beginseq     (G1.beginseq    )
456 #define mainseq      (G1.mainseq     )
457 #define endseq       (G1.endseq      )
458 #define seq          (G1.seq         )
459 #define break_ptr    (G1.break_ptr   )
460 #define continue_ptr (G1.continue_ptr)
461 #define iF           (G1.iF          )
462 #define vhash        (G1.vhash       )
463 #define ahash        (G1.ahash       )
464 #define fdhash       (G1.fdhash      )
465 #define fnhash       (G1.fnhash      )
466 #define g_progname   (G1.g_progname  )
467 #define g_lineno     (G1.g_lineno    )
468 #define nfields      (G1.nfields     )
469 #define maxfields    (G1.maxfields   )
470 #define Fields       (G1.Fields      )
471 #define g_cb         (G1.g_cb        )
472 #define g_pos        (G1.g_pos       )
473 #define g_buf        (G1.g_buf       )
474 #define icase        (G1.icase       )
475 #define exiting      (G1.exiting     )
476 #define nextrec      (G1.nextrec     )
477 #define nextfile     (G1.nextfile    )
478 #define is_f0_split  (G1.is_f0_split )
479 #define t_info       (G.t_info      )
480 #define t_tclass     (G.t_tclass    )
481 #define t_string     (G.t_string    )
482 #define t_double     (G.t_double    )
483 #define t_lineno     (G.t_lineno    )
484 #define t_rollback   (G.t_rollback  )
485 #define intvar       (G.intvar      )
486 #define fsplitter    (G.fsplitter   )
487 #define rsplitter    (G.rsplitter   )
488 #define INIT_G() do { \
489         SET_PTR_TO_GLOBALS(xzalloc(sizeof(G1) + sizeof(G)) + sizeof(G1)); \
490         G.next_token__ltclass = TC_OPTERM; \
491         G.evaluate__seed = 1; \
492 } while (0)
493
494
495 /* function prototypes */
496 static void handle_special(var *);
497 static node *parse_expr(uint32_t);
498 static void chain_group(void);
499 static var *evaluate(node *, var *);
500 static rstream *next_input_file(void);
501 static int fmt_num(char *, int, const char *, double, int);
502 static int awk_exit(int) ATTRIBUTE_NORETURN;
503
504 /* ---- error handling ---- */
505
506 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
507 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
508 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
509 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
510 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
511 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
512 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
513 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
514 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
515 #if !ENABLE_FEATURE_AWK_MATH
516 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
517 #endif
518
519 static void zero_out_var(var * vp)
520 {
521         memset(vp, 0, sizeof(*vp));
522 }
523
524 static void syntax_error(const char *const message) ATTRIBUTE_NORETURN;
525 static void syntax_error(const char *const message)
526 {
527         bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
528 }
529
530 /* ---- hash stuff ---- */
531
532 static unsigned hashidx(const char *name)
533 {
534         unsigned idx = 0;
535
536         while (*name) idx = *name++ + (idx << 6) - idx;
537         return idx;
538 }
539
540 /* create new hash */
541 static xhash *hash_init(void)
542 {
543         xhash *newhash;
544
545         newhash = xzalloc(sizeof(xhash));
546         newhash->csize = FIRST_PRIME;
547         newhash->items = xzalloc(newhash->csize * sizeof(hash_item *));
548
549         return newhash;
550 }
551
552 /* find item in hash, return ptr to data, NULL if not found */
553 static void *hash_search(xhash *hash, const char *name)
554 {
555         hash_item *hi;
556
557         hi = hash->items [ hashidx(name) % hash->csize ];
558         while (hi) {
559                 if (strcmp(hi->name, name) == 0)
560                         return &(hi->data);
561                 hi = hi->next;
562         }
563         return NULL;
564 }
565
566 /* grow hash if it becomes too big */
567 static void hash_rebuild(xhash *hash)
568 {
569         unsigned newsize, i, idx;
570         hash_item **newitems, *hi, *thi;
571
572         if (hash->nprime == ARRAY_SIZE(PRIMES))
573                 return;
574
575         newsize = PRIMES[hash->nprime++];
576         newitems = xzalloc(newsize * sizeof(hash_item *));
577
578         for (i = 0; i < hash->csize; i++) {
579                 hi = hash->items[i];
580                 while (hi) {
581                         thi = hi;
582                         hi = thi->next;
583                         idx = hashidx(thi->name) % newsize;
584                         thi->next = newitems[idx];
585                         newitems[idx] = thi;
586                 }
587         }
588
589         free(hash->items);
590         hash->csize = newsize;
591         hash->items = newitems;
592 }
593
594 /* find item in hash, add it if necessary. Return ptr to data */
595 static void *hash_find(xhash *hash, const char *name)
596 {
597         hash_item *hi;
598         unsigned idx;
599         int l;
600
601         hi = hash_search(hash, name);
602         if (!hi) {
603                 if (++hash->nel / hash->csize > 10)
604                         hash_rebuild(hash);
605
606                 l = strlen(name) + 1;
607                 hi = xzalloc(sizeof(hash_item) + l);
608                 memcpy(hi->name, name, l);
609
610                 idx = hashidx(name) % hash->csize;
611                 hi->next = hash->items[idx];
612                 hash->items[idx] = hi;
613                 hash->glen += l;
614         }
615         return &(hi->data);
616 }
617
618 #define findvar(hash, name) ((var*)    hash_find((hash), (name)))
619 #define newvar(name)        ((var*)    hash_find(vhash, (name)))
620 #define newfile(name)       ((rstream*)hash_find(fdhash, (name)))
621 #define newfunc(name)       ((func*)   hash_find(fnhash, (name)))
622
623 static void hash_remove(xhash *hash, const char *name)
624 {
625         hash_item *hi, **phi;
626
627         phi = &(hash->items[hashidx(name) % hash->csize]);
628         while (*phi) {
629                 hi = *phi;
630                 if (strcmp(hi->name, name) == 0) {
631                         hash->glen -= (strlen(name) + 1);
632                         hash->nel--;
633                         *phi = hi->next;
634                         free(hi);
635                         break;
636                 }
637                 phi = &(hi->next);
638         }
639 }
640
641 /* ------ some useful functions ------ */
642
643 static void skip_spaces(char **s)
644 {
645         char *p = *s;
646
647         while (1) {
648                 if (*p == '\\' && p[1] == '\n') {
649                         p++;
650                         t_lineno++;
651                 } else if (*p != ' ' && *p != '\t') {
652                         break;
653                 }
654                 p++;
655         }
656         *s = p;
657 }
658
659 static char *nextword(char **s)
660 {
661         char *p = *s;
662
663         while (*(*s)++) /* */;
664
665         return p;
666 }
667
668 static char nextchar(char **s)
669 {
670         char c, *pps;
671
672         c = *((*s)++);
673         pps = *s;
674         if (c == '\\') c = bb_process_escape_sequence((const char**)s);
675         if (c == '\\' && *s == pps) c = *((*s)++);
676         return c;
677 }
678
679 static int ALWAYS_INLINE isalnum_(int c)
680 {
681         return (isalnum(c) || c == '_');
682 }
683
684 static FILE *afopen(const char *path, const char *mode)
685 {
686         return (*path == '-' && *(path+1) == '\0') ? stdin : xfopen(path, mode);
687 }
688
689 /* -------- working with variables (set/get/copy/etc) -------- */
690
691 static xhash *iamarray(var *v)
692 {
693         var *a = v;
694
695         while (a->type & VF_CHILD)
696                 a = a->x.parent;
697
698         if (!(a->type & VF_ARRAY)) {
699                 a->type |= VF_ARRAY;
700                 a->x.array = hash_init();
701         }
702         return a->x.array;
703 }
704
705 static void clear_array(xhash *array)
706 {
707         unsigned i;
708         hash_item *hi, *thi;
709
710         for (i = 0; i < array->csize; i++) {
711                 hi = array->items[i];
712                 while (hi) {
713                         thi = hi;
714                         hi = hi->next;
715                         free(thi->data.v.string);
716                         free(thi);
717                 }
718                 array->items[i] = NULL;
719         }
720         array->glen = array->nel = 0;
721 }
722
723 /* clear a variable */
724 static var *clrvar(var *v)
725 {
726         if (!(v->type & VF_FSTR))
727                 free(v->string);
728
729         v->type &= VF_DONTTOUCH;
730         v->type |= VF_DIRTY;
731         v->string = NULL;
732         return v;
733 }
734
735 /* assign string value to variable */
736 static var *setvar_p(var *v, char *value)
737 {
738         clrvar(v);
739         v->string = value;
740         handle_special(v);
741         return v;
742 }
743
744 /* same as setvar_p but make a copy of string */
745 static var *setvar_s(var *v, const char *value)
746 {
747         return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
748 }
749
750 /* same as setvar_s but set USER flag */
751 static var *setvar_u(var *v, const char *value)
752 {
753         setvar_s(v, value);
754         v->type |= VF_USER;
755         return v;
756 }
757
758 /* set array element to user string */
759 static void setari_u(var *a, int idx, const char *s)
760 {
761         char sidx[sizeof(int)*3 + 1];
762         var *v;
763
764         sprintf(sidx, "%d", idx);
765         v = findvar(iamarray(a), sidx);
766         setvar_u(v, s);
767 }
768
769 /* assign numeric value to variable */
770 static var *setvar_i(var *v, double value)
771 {
772         clrvar(v);
773         v->type |= VF_NUMBER;
774         v->number = value;
775         handle_special(v);
776         return v;
777 }
778
779 static const char *getvar_s(var *v)
780 {
781         /* if v is numeric and has no cached string, convert it to string */
782         if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
783                 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
784                 v->string = xstrdup(g_buf);
785                 v->type |= VF_CACHED;
786         }
787         return (v->string == NULL) ? "" : v->string;
788 }
789
790 static double getvar_i(var *v)
791 {
792         char *s;
793
794         if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
795                 v->number = 0;
796                 s = v->string;
797                 if (s && *s) {
798                         v->number = strtod(s, &s);
799                         if (v->type & VF_USER) {
800                                 skip_spaces(&s);
801                                 if (*s != '\0')
802                                         v->type &= ~VF_USER;
803                         }
804                 } else {
805                         v->type &= ~VF_USER;
806                 }
807                 v->type |= VF_CACHED;
808         }
809         return v->number;
810 }
811
812 static var *copyvar(var *dest, const var *src)
813 {
814         if (dest != src) {
815                 clrvar(dest);
816                 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
817                 dest->number = src->number;
818                 if (src->string)
819                         dest->string = xstrdup(src->string);
820         }
821         handle_special(dest);
822         return dest;
823 }
824
825 static var *incvar(var *v)
826 {
827         return setvar_i(v, getvar_i(v) + 1.);
828 }
829
830 /* return true if v is number or numeric string */
831 static int is_numeric(var *v)
832 {
833         getvar_i(v);
834         return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
835 }
836
837 /* return 1 when value of v corresponds to true, 0 otherwise */
838 static int istrue(var *v)
839 {
840         if (is_numeric(v))
841                 return (v->number == 0) ? 0 : 1;
842         return (v->string && *(v->string)) ? 1 : 0;
843 }
844
845 /* temporary variables allocator. Last allocated should be first freed */
846 static var *nvalloc(int n)
847 {
848         nvblock *pb = NULL;
849         var *v, *r;
850         int size;
851
852         while (g_cb) {
853                 pb = g_cb;
854                 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size) break;
855                 g_cb = g_cb->next;
856         }
857
858         if (!g_cb) {
859                 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
860                 g_cb = xmalloc(sizeof(nvblock) + size * sizeof(var));
861                 g_cb->size = size;
862                 g_cb->pos = g_cb->nv;
863                 g_cb->prev = pb;
864                 g_cb->next = NULL;
865                 if (pb) pb->next = g_cb;
866         }
867
868         v = r = g_cb->pos;
869         g_cb->pos += n;
870
871         while (v < g_cb->pos) {
872                 v->type = 0;
873                 v->string = NULL;
874                 v++;
875         }
876
877         return r;
878 }
879
880 static void nvfree(var *v)
881 {
882         var *p;
883
884         if (v < g_cb->nv || v >= g_cb->pos)
885                 syntax_error(EMSG_INTERNAL_ERROR);
886
887         for (p = v; p < g_cb->pos; p++) {
888                 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
889                         clear_array(iamarray(p));
890                         free(p->x.array->items);
891                         free(p->x.array);
892                 }
893                 if (p->type & VF_WALK)
894                         free(p->x.walker);
895
896                 clrvar(p);
897         }
898
899         g_cb->pos = v;
900         while (g_cb->prev && g_cb->pos == g_cb->nv) {
901                 g_cb = g_cb->prev;
902         }
903 }
904
905 /* ------- awk program text parsing ------- */
906
907 /* Parse next token pointed by global pos, place results into global ttt.
908  * If token isn't expected, give away. Return token class
909  */
910 static uint32_t next_token(uint32_t expected)
911 {
912 #define concat_inserted (G.next_token__concat_inserted)
913 #define save_tclass     (G.next_token__save_tclass)
914 #define save_info       (G.next_token__save_info)
915 /* Initialized to TC_OPTERM: */
916 #define ltclass         (G.next_token__ltclass)
917
918         char *p, *pp, *s;
919         const char *tl;
920         uint32_t tc;
921         const uint32_t *ti;
922         int l;
923
924         if (t_rollback) {
925                 t_rollback = FALSE;
926
927         } else if (concat_inserted) {
928                 concat_inserted = FALSE;
929                 t_tclass = save_tclass;
930                 t_info = save_info;
931
932         } else {
933                 p = g_pos;
934  readnext:
935                 skip_spaces(&p);
936                 g_lineno = t_lineno;
937                 if (*p == '#')
938                         while (*p != '\n' && *p != '\0')
939                                 p++;
940
941                 if (*p == '\n')
942                         t_lineno++;
943
944                 if (*p == '\0') {
945                         tc = TC_EOF;
946
947                 } else if (*p == '\"') {
948                         /* it's a string */
949                         t_string = s = ++p;
950                         while (*p != '\"') {
951                                 if (*p == '\0' || *p == '\n')
952                                         syntax_error(EMSG_UNEXP_EOS);
953                                 *(s++) = nextchar(&p);
954                         }
955                         p++;
956                         *s = '\0';
957                         tc = TC_STRING;
958
959                 } else if ((expected & TC_REGEXP) && *p == '/') {
960                         /* it's regexp */
961                         t_string = s = ++p;
962                         while (*p != '/') {
963                                 if (*p == '\0' || *p == '\n')
964                                         syntax_error(EMSG_UNEXP_EOS);
965                                 *s = *p++;
966                                 if (*s++ == '\\') {
967                                         pp = p;
968                                         *(s-1) = bb_process_escape_sequence((const char **)&p);
969                                         if (*pp == '\\')
970                                                 *s++ = '\\';
971                                         if (p == pp)
972                                                 *s++ = *p++;
973                                 }
974                         }
975                         p++;
976                         *s = '\0';
977                         tc = TC_REGEXP;
978
979                 } else if (*p == '.' || isdigit(*p)) {
980                         /* it's a number */
981                         t_double = strtod(p, &p);
982                         if (*p == '.')
983                                 syntax_error(EMSG_UNEXP_TOKEN);
984                         tc = TC_NUMBER;
985
986                 } else {
987                         /* search for something known */
988                         tl = tokenlist;
989                         tc = 0x00000001;
990                         ti = tokeninfo;
991                         while (*tl) {
992                                 l = *(tl++);
993                                 if (l == NTCC) {
994                                         tc <<= 1;
995                                         continue;
996                                 }
997                                 /* if token class is expected, token
998                                  * matches and it's not a longer word,
999                                  * then this is what we are looking for
1000                                  */
1001                                 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1002                                  && *tl == *p && strncmp(p, tl, l) == 0
1003                                  && !((tc & TC_WORD) && isalnum_(p[l]))
1004                                 ) {
1005                                         t_info = *ti;
1006                                         p += l;
1007                                         break;
1008                                 }
1009                                 ti++;
1010                                 tl += l;
1011                         }
1012
1013                         if (!*tl) {
1014                                 /* it's a name (var/array/function),
1015                                  * otherwise it's something wrong
1016                                  */
1017                                 if (!isalnum_(*p))
1018                                         syntax_error(EMSG_UNEXP_TOKEN);
1019
1020                                 t_string = --p;
1021                                 while (isalnum_(*(++p))) {
1022                                         *(p-1) = *p;
1023                                 }
1024                                 *(p-1) = '\0';
1025                                 tc = TC_VARIABLE;
1026                                 /* also consume whitespace between functionname and bracket */
1027                                 if (!(expected & TC_VARIABLE))
1028                                         skip_spaces(&p);
1029                                 if (*p == '(') {
1030                                         tc = TC_FUNCTION;
1031                                 } else {
1032                                         if (*p == '[') {
1033                                                 p++;
1034                                                 tc = TC_ARRAY;
1035                                         }
1036                                 }
1037                         }
1038                 }
1039                 g_pos = p;
1040
1041                 /* skipping newlines in some cases */
1042                 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1043                         goto readnext;
1044
1045                 /* insert concatenation operator when needed */
1046                 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1047                         concat_inserted = TRUE;
1048                         save_tclass = tc;
1049                         save_info = t_info;
1050                         tc = TC_BINOP;
1051                         t_info = OC_CONCAT | SS | P(35);
1052                 }
1053
1054                 t_tclass = tc;
1055         }
1056         ltclass = t_tclass;
1057
1058         /* Are we ready for this? */
1059         if (!(ltclass & expected))
1060                 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1061                                 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1062
1063         return ltclass;
1064 #undef concat_inserted
1065 #undef save_tclass
1066 #undef save_info
1067 #undef ltclass
1068 }
1069
1070 static void rollback_token(void)
1071 {
1072         t_rollback = TRUE;
1073 }
1074
1075 static node *new_node(uint32_t info)
1076 {
1077         node *n;
1078
1079         n = xzalloc(sizeof(node));
1080         n->info = info;
1081         n->lineno = g_lineno;
1082         return n;
1083 }
1084
1085 static node *mk_re_node(const char *s, node *n, regex_t *re)
1086 {
1087         n->info = OC_REGEXP;
1088         n->l.re = re;
1089         n->r.ire = re + 1;
1090         xregcomp(re, s, REG_EXTENDED);
1091         xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1092
1093         return n;
1094 }
1095
1096 static node *condition(void)
1097 {
1098         next_token(TC_SEQSTART);
1099         return parse_expr(TC_SEQTERM);
1100 }
1101
1102 /* parse expression terminated by given argument, return ptr
1103  * to built subtree. Terminator is eaten by parse_expr */
1104 static node *parse_expr(uint32_t iexp)
1105 {
1106         node sn;
1107         node *cn = &sn;
1108         node *vn, *glptr;
1109         uint32_t tc, xtc;
1110         var *v;
1111
1112         sn.info = PRIMASK;
1113         sn.r.n = glptr = NULL;
1114         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1115
1116         while (!((tc = next_token(xtc)) & iexp)) {
1117                 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1118                         /* input redirection (<) attached to glptr node */
1119                         cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1120                         cn->a.n = glptr;
1121                         xtc = TC_OPERAND | TC_UOPPRE;
1122                         glptr = NULL;
1123
1124                 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1125                         /* for binary and postfix-unary operators, jump back over
1126                          * previous operators with higher priority */
1127                         vn = cn;
1128                         while ( ((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1129                          || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON)) )
1130                                 vn = vn->a.n;
1131                         if ((t_info & OPCLSMASK) == OC_TERNARY)
1132                                 t_info += P(6);
1133                         cn = vn->a.n->r.n = new_node(t_info);
1134                         cn->a.n = vn->a.n;
1135                         if (tc & TC_BINOP) {
1136                                 cn->l.n = vn;
1137                                 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1138                                 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1139                                         /* it's a pipe */
1140                                         next_token(TC_GETLINE);
1141                                         /* give maximum priority to this pipe */
1142                                         cn->info &= ~PRIMASK;
1143                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1144                                 }
1145                         } else {
1146                                 cn->r.n = vn;
1147                                 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1148                         }
1149                         vn->a.n = cn;
1150
1151                 } else {
1152                         /* for operands and prefix-unary operators, attach them
1153                          * to last node */
1154                         vn = cn;
1155                         cn = vn->r.n = new_node(t_info);
1156                         cn->a.n = vn;
1157                         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1158                         if (tc & (TC_OPERAND | TC_REGEXP)) {
1159                                 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1160                                 /* one should be very careful with switch on tclass -
1161                                  * only simple tclasses should be used! */
1162                                 switch (tc) {
1163                                 case TC_VARIABLE:
1164                                 case TC_ARRAY:
1165                                         cn->info = OC_VAR;
1166                                         v = hash_search(ahash, t_string);
1167                                         if (v != NULL) {
1168                                                 cn->info = OC_FNARG;
1169                                                 cn->l.i = v->x.aidx;
1170                                         } else {
1171                                                 cn->l.v = newvar(t_string);
1172                                         }
1173                                         if (tc & TC_ARRAY) {
1174                                                 cn->info |= xS;
1175                                                 cn->r.n = parse_expr(TC_ARRTERM);
1176                                         }
1177                                         break;
1178
1179                                 case TC_NUMBER:
1180                                 case TC_STRING:
1181                                         cn->info = OC_VAR;
1182                                         v = cn->l.v = xzalloc(sizeof(var));
1183                                         if (tc & TC_NUMBER)
1184                                                 setvar_i(v, t_double);
1185                                         else
1186                                                 setvar_s(v, t_string);
1187                                         break;
1188
1189                                 case TC_REGEXP:
1190                                         mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1191                                         break;
1192
1193                                 case TC_FUNCTION:
1194                                         cn->info = OC_FUNC;
1195                                         cn->r.f = newfunc(t_string);
1196                                         cn->l.n = condition();
1197                                         break;
1198
1199                                 case TC_SEQSTART:
1200                                         cn = vn->r.n = parse_expr(TC_SEQTERM);
1201                                         cn->a.n = vn;
1202                                         break;
1203
1204                                 case TC_GETLINE:
1205                                         glptr = cn;
1206                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1207                                         break;
1208
1209                                 case TC_BUILTIN:
1210                                         cn->l.n = condition();
1211                                         break;
1212                                 }
1213                         }
1214                 }
1215         }
1216         return sn.r.n;
1217 }
1218
1219 /* add node to chain. Return ptr to alloc'd node */
1220 static node *chain_node(uint32_t info)
1221 {
1222         node *n;
1223
1224         if (!seq->first)
1225                 seq->first = seq->last = new_node(0);
1226
1227         if (seq->programname != g_progname) {
1228                 seq->programname = g_progname;
1229                 n = chain_node(OC_NEWSOURCE);
1230                 n->l.s = xstrdup(g_progname);
1231         }
1232
1233         n = seq->last;
1234         n->info = info;
1235         seq->last = n->a.n = new_node(OC_DONE);
1236
1237         return n;
1238 }
1239
1240 static void chain_expr(uint32_t info)
1241 {
1242         node *n;
1243
1244         n = chain_node(info);
1245         n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1246         if (t_tclass & TC_GRPTERM)
1247                 rollback_token();
1248 }
1249
1250 static node *chain_loop(node *nn)
1251 {
1252         node *n, *n2, *save_brk, *save_cont;
1253
1254         save_brk = break_ptr;
1255         save_cont = continue_ptr;
1256
1257         n = chain_node(OC_BR | Vx);
1258         continue_ptr = new_node(OC_EXEC);
1259         break_ptr = new_node(OC_EXEC);
1260         chain_group();
1261         n2 = chain_node(OC_EXEC | Vx);
1262         n2->l.n = nn;
1263         n2->a.n = n;
1264         continue_ptr->a.n = n2;
1265         break_ptr->a.n = n->r.n = seq->last;
1266
1267         continue_ptr = save_cont;
1268         break_ptr = save_brk;
1269
1270         return n;
1271 }
1272
1273 /* parse group and attach it to chain */
1274 static void chain_group(void)
1275 {
1276         uint32_t c;
1277         node *n, *n2, *n3;
1278
1279         do {
1280                 c = next_token(TC_GRPSEQ);
1281         } while (c & TC_NEWLINE);
1282
1283         if (c & TC_GRPSTART) {
1284                 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1285                         if (t_tclass & TC_NEWLINE) continue;
1286                         rollback_token();
1287                         chain_group();
1288                 }
1289         } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1290                 rollback_token();
1291                 chain_expr(OC_EXEC | Vx);
1292         } else {                                                /* TC_STATEMNT */
1293                 switch (t_info & OPCLSMASK) {
1294                 case ST_IF:
1295                         n = chain_node(OC_BR | Vx);
1296                         n->l.n = condition();
1297                         chain_group();
1298                         n2 = chain_node(OC_EXEC);
1299                         n->r.n = seq->last;
1300                         if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1301                                 chain_group();
1302                                 n2->a.n = seq->last;
1303                         } else {
1304                                 rollback_token();
1305                         }
1306                         break;
1307
1308                 case ST_WHILE:
1309                         n2 = condition();
1310                         n = chain_loop(NULL);
1311                         n->l.n = n2;
1312                         break;
1313
1314                 case ST_DO:
1315                         n2 = chain_node(OC_EXEC);
1316                         n = chain_loop(NULL);
1317                         n2->a.n = n->a.n;
1318                         next_token(TC_WHILE);
1319                         n->l.n = condition();
1320                         break;
1321
1322                 case ST_FOR:
1323                         next_token(TC_SEQSTART);
1324                         n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1325                         if (t_tclass & TC_SEQTERM) {    /* for-in */
1326                                 if ((n2->info & OPCLSMASK) != OC_IN)
1327                                         syntax_error(EMSG_UNEXP_TOKEN);
1328                                 n = chain_node(OC_WALKINIT | VV);
1329                                 n->l.n = n2->l.n;
1330                                 n->r.n = n2->r.n;
1331                                 n = chain_loop(NULL);
1332                                 n->info = OC_WALKNEXT | Vx;
1333                                 n->l.n = n2->l.n;
1334                         } else {                        /* for (;;) */
1335                                 n = chain_node(OC_EXEC | Vx);
1336                                 n->l.n = n2;
1337                                 n2 = parse_expr(TC_SEMICOL);
1338                                 n3 = parse_expr(TC_SEQTERM);
1339                                 n = chain_loop(n3);
1340                                 n->l.n = n2;
1341                                 if (!n2)
1342                                         n->info = OC_EXEC;
1343                         }
1344                         break;
1345
1346                 case OC_PRINT:
1347                 case OC_PRINTF:
1348                         n = chain_node(t_info);
1349                         n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1350                         if (t_tclass & TC_OUTRDR) {
1351                                 n->info |= t_info;
1352                                 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1353                         }
1354                         if (t_tclass & TC_GRPTERM)
1355                                 rollback_token();
1356                         break;
1357
1358                 case OC_BREAK:
1359                         n = chain_node(OC_EXEC);
1360                         n->a.n = break_ptr;
1361                         break;
1362
1363                 case OC_CONTINUE:
1364                         n = chain_node(OC_EXEC);
1365                         n->a.n = continue_ptr;
1366                         break;
1367
1368                 /* delete, next, nextfile, return, exit */
1369                 default:
1370                         chain_expr(t_info);
1371                 }
1372         }
1373 }
1374
1375 static void parse_program(char *p)
1376 {
1377         uint32_t tclass;
1378         node *cn;
1379         func *f;
1380         var *v;
1381
1382         g_pos = p;
1383         t_lineno = 1;
1384         while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1385                         TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1386
1387                 if (tclass & TC_OPTERM)
1388                         continue;
1389
1390                 seq = &mainseq;
1391                 if (tclass & TC_BEGIN) {
1392                         seq = &beginseq;
1393                         chain_group();
1394
1395                 } else if (tclass & TC_END) {
1396                         seq = &endseq;
1397                         chain_group();
1398
1399                 } else if (tclass & TC_FUNCDECL) {
1400                         next_token(TC_FUNCTION);
1401                         g_pos++;
1402                         f = newfunc(t_string);
1403                         f->body.first = NULL;
1404                         f->nargs = 0;
1405                         while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1406                                 v = findvar(ahash, t_string);
1407                                 v->x.aidx = (f->nargs)++;
1408
1409                                 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1410                                         break;
1411                         }
1412                         seq = &(f->body);
1413                         chain_group();
1414                         clear_array(ahash);
1415
1416                 } else if (tclass & TC_OPSEQ) {
1417                         rollback_token();
1418                         cn = chain_node(OC_TEST);
1419                         cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1420                         if (t_tclass & TC_GRPSTART) {
1421                                 rollback_token();
1422                                 chain_group();
1423                         } else {
1424                                 chain_node(OC_PRINT);
1425                         }
1426                         cn->r.n = mainseq.last;
1427
1428                 } else /* if (tclass & TC_GRPSTART) */ {
1429                         rollback_token();
1430                         chain_group();
1431                 }
1432         }
1433 }
1434
1435
1436 /* -------- program execution part -------- */
1437
1438 static node *mk_splitter(const char *s, tsplitter *spl)
1439 {
1440         regex_t *re, *ire;
1441         node *n;
1442
1443         re = &spl->re[0];
1444         ire = &spl->re[1];
1445         n = &spl->n;
1446         if ((n->info & OPCLSMASK) == OC_REGEXP) {
1447                 regfree(re);
1448                 regfree(ire); // TODO: nuke ire, use re+1?
1449         }
1450         if (strlen(s) > 1) {
1451                 mk_re_node(s, n, re);
1452         } else {
1453                 n->info = (uint32_t) *s;
1454         }
1455
1456         return n;
1457 }
1458
1459 /* use node as a regular expression. Supplied with node ptr and regex_t
1460  * storage space. Return ptr to regex (if result points to preg, it should
1461  * be later regfree'd manually
1462  */
1463 static regex_t *as_regex(node *op, regex_t *preg)
1464 {
1465         var *v;
1466         const char *s;
1467
1468         if ((op->info & OPCLSMASK) == OC_REGEXP) {
1469                 return icase ? op->r.ire : op->l.re;
1470         }
1471         v = nvalloc(1);
1472         s = getvar_s(evaluate(op, v));
1473         xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1474         nvfree(v);
1475         return preg;
1476 }
1477
1478 /* gradually increasing buffer */
1479 static void qrealloc(char **b, int n, int *size)
1480 {
1481         if (!*b || n >= *size)
1482                 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1483 }
1484
1485 /* resize field storage space */
1486 static void fsrealloc(int size)
1487 {
1488         int i;
1489
1490         if (size >= maxfields) {
1491                 i = maxfields;
1492                 maxfields = size + 16;
1493                 Fields = xrealloc(Fields, maxfields * sizeof(var));
1494                 for (; i < maxfields; i++) {
1495                         Fields[i].type = VF_SPECIAL;
1496                         Fields[i].string = NULL;
1497                 }
1498         }
1499
1500         if (size < nfields) {
1501                 for (i = size; i < nfields; i++) {
1502                         clrvar(Fields + i);
1503                 }
1504         }
1505         nfields = size;
1506 }
1507
1508 static int awk_split(const char *s, node *spl, char **slist)
1509 {
1510         int l, n = 0;
1511         char c[4];
1512         char *s1;
1513         regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1514
1515         /* in worst case, each char would be a separate field */
1516         *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1517         strcpy(s1, s);
1518
1519         c[0] = c[1] = (char)spl->info;
1520         c[2] = c[3] = '\0';
1521         if (*getvar_s(intvar[RS]) == '\0')
1522                 c[2] = '\n';
1523
1524         if ((spl->info & OPCLSMASK) == OC_REGEXP) {  /* regex split */
1525                 if (!*s)
1526                         return n; /* "": zero fields */
1527                 n++; /* at least one field will be there */
1528                 do {
1529                         l = strcspn(s, c+2); /* len till next NUL or \n */
1530                         if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1531                          && pmatch[0].rm_so <= l
1532                         ) {
1533                                 l = pmatch[0].rm_so;
1534                                 if (pmatch[0].rm_eo == 0) {
1535                                         l++;
1536                                         pmatch[0].rm_eo++;
1537                                 }
1538                                 n++; /* we saw yet another delimiter */
1539                         } else {
1540                                 pmatch[0].rm_eo = l;
1541                                 if (s[l]) pmatch[0].rm_eo++;
1542                         }
1543                         memcpy(s1, s, l);
1544                         s1[l] = '\0';
1545                         nextword(&s1);
1546                         s += pmatch[0].rm_eo;
1547                 } while (*s);
1548                 return n;
1549         }
1550         if (c[0] == '\0') {  /* null split */
1551                 while (*s) {
1552                         *s1++ = *s++;
1553                         *s1++ = '\0';
1554                         n++;
1555                 }
1556                 return n;
1557         }
1558         if (c[0] != ' ') {  /* single-character split */
1559                 if (icase) {
1560                         c[0] = toupper(c[0]);
1561                         c[1] = tolower(c[1]);
1562                 }
1563                 if (*s1) n++;
1564                 while ((s1 = strpbrk(s1, c))) {
1565                         *s1++ = '\0';
1566                         n++;
1567                 }
1568                 return n;
1569         }
1570         /* space split */
1571         while (*s) {
1572                 s = skip_whitespace(s);
1573                 if (!*s) break;
1574                 n++;
1575                 while (*s && !isspace(*s))
1576                         *s1++ = *s++;
1577                 *s1++ = '\0';
1578         }
1579         return n;
1580 }
1581
1582 static void split_f0(void)
1583 {
1584 /* static char *fstrings; */
1585 #define fstrings (G.split_f0__fstrings)
1586
1587         int i, n;
1588         char *s;
1589
1590         if (is_f0_split)
1591                 return;
1592
1593         is_f0_split = TRUE;
1594         free(fstrings);
1595         fsrealloc(0);
1596         n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1597         fsrealloc(n);
1598         s = fstrings;
1599         for (i = 0; i < n; i++) {
1600                 Fields[i].string = nextword(&s);
1601                 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1602         }
1603
1604         /* set NF manually to avoid side effects */
1605         clrvar(intvar[NF]);
1606         intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1607         intvar[NF]->number = nfields;
1608 #undef fstrings
1609 }
1610
1611 /* perform additional actions when some internal variables changed */
1612 static void handle_special(var *v)
1613 {
1614         int n;
1615         char *b;
1616         const char *sep, *s;
1617         int sl, l, len, i, bsize;
1618
1619         if (!(v->type & VF_SPECIAL))
1620                 return;
1621
1622         if (v == intvar[NF]) {
1623                 n = (int)getvar_i(v);
1624                 fsrealloc(n);
1625
1626                 /* recalculate $0 */
1627                 sep = getvar_s(intvar[OFS]);
1628                 sl = strlen(sep);
1629                 b = NULL;
1630                 len = 0;
1631                 for (i = 0; i < n; i++) {
1632                         s = getvar_s(&Fields[i]);
1633                         l = strlen(s);
1634                         if (b) {
1635                                 memcpy(b+len, sep, sl);
1636                                 len += sl;
1637                         }
1638                         qrealloc(&b, len+l+sl, &bsize);
1639                         memcpy(b+len, s, l);
1640                         len += l;
1641                 }
1642                 if (b)
1643                         b[len] = '\0';
1644                 setvar_p(intvar[F0], b);
1645                 is_f0_split = TRUE;
1646
1647         } else if (v == intvar[F0]) {
1648                 is_f0_split = FALSE;
1649
1650         } else if (v == intvar[FS]) {
1651                 mk_splitter(getvar_s(v), &fsplitter);
1652
1653         } else if (v == intvar[RS]) {
1654                 mk_splitter(getvar_s(v), &rsplitter);
1655
1656         } else if (v == intvar[IGNORECASE]) {
1657                 icase = istrue(v);
1658
1659         } else {                                /* $n */
1660                 n = getvar_i(intvar[NF]);
1661                 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1662                 /* right here v is invalid. Just to note... */
1663         }
1664 }
1665
1666 /* step through func/builtin/etc arguments */
1667 static node *nextarg(node **pn)
1668 {
1669         node *n;
1670
1671         n = *pn;
1672         if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1673                 *pn = n->r.n;
1674                 n = n->l.n;
1675         } else {
1676                 *pn = NULL;
1677         }
1678         return n;
1679 }
1680
1681 static void hashwalk_init(var *v, xhash *array)
1682 {
1683         char **w;
1684         hash_item *hi;
1685         int i;
1686
1687         if (v->type & VF_WALK)
1688                 free(v->x.walker);
1689
1690         v->type |= VF_WALK;
1691         w = v->x.walker = xzalloc(2 + 2*sizeof(char *) + array->glen);
1692         w[0] = w[1] = (char *)(w + 2);
1693         for (i = 0; i < array->csize; i++) {
1694                 hi = array->items[i];
1695                 while (hi) {
1696                         strcpy(*w, hi->name);
1697                         nextword(w);
1698                         hi = hi->next;
1699                 }
1700         }
1701 }
1702
1703 static int hashwalk_next(var *v)
1704 {
1705         char **w;
1706
1707         w = v->x.walker;
1708         if (w[1] == w[0])
1709                 return FALSE;
1710
1711         setvar_s(v, nextword(w+1));
1712         return TRUE;
1713 }
1714
1715 /* evaluate node, return 1 when result is true, 0 otherwise */
1716 static int ptest(node *pattern)
1717 {
1718         /* ptest__v is "static": to save stack space? */
1719         return istrue(evaluate(pattern, &G.ptest__v));
1720 }
1721
1722 /* read next record from stream rsm into a variable v */
1723 static int awk_getline(rstream *rsm, var *v)
1724 {
1725         char *b;
1726         regmatch_t pmatch[2];
1727         int a, p, pp=0, size;
1728         int fd, so, eo, r, rp;
1729         char c, *m, *s;
1730
1731         /* we're using our own buffer since we need access to accumulating
1732          * characters
1733          */
1734         fd = fileno(rsm->F);
1735         m = rsm->buffer;
1736         a = rsm->adv;
1737         p = rsm->pos;
1738         size = rsm->size;
1739         c = (char) rsplitter.n.info;
1740         rp = 0;
1741
1742         if (!m) qrealloc(&m, 256, &size);
1743         do {
1744                 b = m + a;
1745                 so = eo = p;
1746                 r = 1;
1747                 if (p > 0) {
1748                         if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1749                                 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1750                                                         b, 1, pmatch, 0) == 0) {
1751                                         so = pmatch[0].rm_so;
1752                                         eo = pmatch[0].rm_eo;
1753                                         if (b[eo] != '\0')
1754                                                 break;
1755                                 }
1756                         } else if (c != '\0') {
1757                                 s = strchr(b+pp, c);
1758                                 if (!s) s = memchr(b+pp, '\0', p - pp);
1759                                 if (s) {
1760                                         so = eo = s-b;
1761                                         eo++;
1762                                         break;
1763                                 }
1764                         } else {
1765                                 while (b[rp] == '\n')
1766                                         rp++;
1767                                 s = strstr(b+rp, "\n\n");
1768                                 if (s) {
1769                                         so = eo = s-b;
1770                                         while (b[eo] == '\n') eo++;
1771                                         if (b[eo] != '\0')
1772                                                 break;
1773                                 }
1774                         }
1775                 }
1776
1777                 if (a > 0) {
1778                         memmove(m, (const void *)(m+a), p+1);
1779                         b = m;
1780                         a = 0;
1781                 }
1782
1783                 qrealloc(&m, a+p+128, &size);
1784                 b = m + a;
1785                 pp = p;
1786                 p += safe_read(fd, b+p, size-p-1);
1787                 if (p < pp) {
1788                         p = 0;
1789                         r = 0;
1790                         setvar_i(intvar[ERRNO], errno);
1791                 }
1792                 b[p] = '\0';
1793
1794         } while (p > pp);
1795
1796         if (p == 0) {
1797                 r--;
1798         } else {
1799                 c = b[so]; b[so] = '\0';
1800                 setvar_s(v, b+rp);
1801                 v->type |= VF_USER;
1802                 b[so] = c;
1803                 c = b[eo]; b[eo] = '\0';
1804                 setvar_s(intvar[RT], b+so);
1805                 b[eo] = c;
1806         }
1807
1808         rsm->buffer = m;
1809         rsm->adv = a + eo;
1810         rsm->pos = p - eo;
1811         rsm->size = size;
1812
1813         return r;
1814 }
1815
1816 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1817 {
1818         int r = 0;
1819         char c;
1820         const char *s = format;
1821
1822         if (int_as_int && n == (int)n) {
1823                 r = snprintf(b, size, "%d", (int)n);
1824         } else {
1825                 do { c = *s; } while (c && *++s);
1826                 if (strchr("diouxX", c)) {
1827                         r = snprintf(b, size, format, (int)n);
1828                 } else if (strchr("eEfgG", c)) {
1829                         r = snprintf(b, size, format, n);
1830                 } else {
1831                         syntax_error(EMSG_INV_FMT);
1832                 }
1833         }
1834         return r;
1835 }
1836
1837
1838 /* formatted output into an allocated buffer, return ptr to buffer */
1839 static char *awk_printf(node *n)
1840 {
1841         char *b = NULL;
1842         char *fmt, *s, *f;
1843         const char *s1;
1844         int i, j, incr, bsize;
1845         char c, c1;
1846         var *v, *arg;
1847
1848         v = nvalloc(1);
1849         fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1850
1851         i = 0;
1852         while (*f) {
1853                 s = f;
1854                 while (*f && (*f != '%' || *(++f) == '%'))
1855                         f++;
1856                 while (*f && !isalpha(*f)) {
1857                         if (*f == '*')
1858                                 syntax_error("%*x formats are not supported");
1859                         f++;
1860                 }
1861
1862                 incr = (f - s) + MAXVARFMT;
1863                 qrealloc(&b, incr + i, &bsize);
1864                 c = *f;
1865                 if (c != '\0') f++;
1866                 c1 = *f;
1867                 *f = '\0';
1868                 arg = evaluate(nextarg(&n), v);
1869
1870                 j = i;
1871                 if (c == 'c' || !c) {
1872                         i += sprintf(b+i, s, is_numeric(arg) ?
1873                                         (char)getvar_i(arg) : *getvar_s(arg));
1874                 } else if (c == 's') {
1875                         s1 = getvar_s(arg);
1876                         qrealloc(&b, incr+i+strlen(s1), &bsize);
1877                         i += sprintf(b+i, s, s1);
1878                 } else {
1879                         i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1880                 }
1881                 *f = c1;
1882
1883                 /* if there was an error while sprintf, return value is negative */
1884                 if (i < j) i = j;
1885         }
1886
1887         b = xrealloc(b, i + 1);
1888         free(fmt);
1889         nvfree(v);
1890         b[i] = '\0';
1891         return b;
1892 }
1893
1894 /* common substitution routine
1895  * replace (nm) substring of (src) that match (n) with (repl), store
1896  * result into (dest), return number of substitutions. If nm=0, replace
1897  * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1898  * subexpression matching (\1-\9)
1899  */
1900 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex)
1901 {
1902         char *ds = NULL;
1903         const char *s;
1904         const char *sp;
1905         int c, i, j, di, rl, so, eo, nbs, n, dssize;
1906         regmatch_t pmatch[10];
1907         regex_t sreg, *re;
1908
1909         re = as_regex(rn, &sreg);
1910         if (!src) src = intvar[F0];
1911         if (!dest) dest = intvar[F0];
1912
1913         i = di = 0;
1914         sp = getvar_s(src);
1915         rl = strlen(repl);
1916         while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0 : REG_NOTBOL) == 0) {
1917                 so = pmatch[0].rm_so;
1918                 eo = pmatch[0].rm_eo;
1919
1920                 qrealloc(&ds, di + eo + rl, &dssize);
1921                 memcpy(ds + di, sp, eo);
1922                 di += eo;
1923                 if (++i >= nm) {
1924                         /* replace */
1925                         di -= (eo - so);
1926                         nbs = 0;
1927                         for (s = repl; *s; s++) {
1928                                 ds[di++] = c = *s;
1929                                 if (c == '\\') {
1930                                         nbs++;
1931                                         continue;
1932                                 }
1933                                 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1934                                         di -= ((nbs + 3) >> 1);
1935                                         j = 0;
1936                                         if (c != '&') {
1937                                                 j = c - '0';
1938                                                 nbs++;
1939                                         }
1940                                         if (nbs % 2) {
1941                                                 ds[di++] = c;
1942                                         } else {
1943                                                 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1944                                                 qrealloc(&ds, di + rl + n, &dssize);
1945                                                 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1946                                                 di += n;
1947                                         }
1948                                 }
1949                                 nbs = 0;
1950                         }
1951                 }
1952
1953                 sp += eo;
1954                 if (i == nm) break;
1955                 if (eo == so) {
1956                         ds[di] = *sp++;
1957                         if (!ds[di++]) break;
1958                 }
1959         }
1960
1961         qrealloc(&ds, di + strlen(sp), &dssize);
1962         strcpy(ds + di, sp);
1963         setvar_p(dest, ds);
1964         if (re == &sreg) regfree(re);
1965         return i;
1966 }
1967
1968 static var *exec_builtin(node *op, var *res)
1969 {
1970 #define tspl (G.exec_builtin__tspl)
1971
1972         int (*to_xxx)(int);
1973         var *tv;
1974         node *an[4];
1975         var *av[4];
1976         const char *as[4];
1977         regmatch_t pmatch[2];
1978         regex_t sreg, *re;
1979         node *spl;
1980         uint32_t isr, info;
1981         int nargs;
1982         time_t tt;
1983         char *s, *s1;
1984         int i, l, ll, n;
1985
1986         tv = nvalloc(4);
1987         isr = info = op->info;
1988         op = op->l.n;
1989
1990         av[2] = av[3] = NULL;
1991         for (i = 0; i < 4 && op; i++) {
1992                 an[i] = nextarg(&op);
1993                 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1994                 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1995                 isr >>= 1;
1996         }
1997
1998         nargs = i;
1999         if (nargs < (info >> 30))
2000                 syntax_error(EMSG_TOO_FEW_ARGS);
2001
2002         switch (info & OPNMASK) {
2003
2004         case B_a2:
2005 #if ENABLE_FEATURE_AWK_MATH
2006                 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
2007 #else
2008                 syntax_error(EMSG_NO_MATH);
2009 #endif
2010                 break;
2011
2012         case B_sp:
2013                 if (nargs > 2) {
2014                         spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2015                                 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2016                 } else {
2017                         spl = &fsplitter.n;
2018                 }
2019
2020                 n = awk_split(as[0], spl, &s);
2021                 s1 = s;
2022                 clear_array(iamarray(av[1]));
2023                 for (i=1; i<=n; i++)
2024                         setari_u(av[1], i, nextword(&s1));
2025                 free(s);
2026                 setvar_i(res, n);
2027                 break;
2028
2029         case B_ss:
2030                 l = strlen(as[0]);
2031                 i = getvar_i(av[1]) - 1;
2032                 if (i > l) i = l;
2033                 if (i < 0) i = 0;
2034                 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2035                 if (n < 0) n = 0;
2036                 s = xmalloc(n+1);
2037                 strncpy(s, as[0]+i, n);
2038                 s[n] = '\0';
2039                 setvar_p(res, s);
2040                 break;
2041
2042         case B_an:
2043                 setvar_i(res, (long)getvar_i(av[0]) & (long)getvar_i(av[1]));
2044                 break;
2045
2046         case B_co:
2047                 setvar_i(res, ~(long)getvar_i(av[0]));
2048                 break;
2049
2050         case B_ls:
2051                 setvar_i(res, (long)getvar_i(av[0]) << (long)getvar_i(av[1]));
2052                 break;
2053
2054         case B_or:
2055                 setvar_i(res, (long)getvar_i(av[0]) | (long)getvar_i(av[1]));
2056                 break;
2057
2058         case B_rs:
2059                 setvar_i(res, (long)((unsigned long)getvar_i(av[0]) >> (unsigned long)getvar_i(av[1])));
2060                 break;
2061
2062         case B_xo:
2063                 setvar_i(res, (long)getvar_i(av[0]) ^ (long)getvar_i(av[1]));
2064                 break;
2065
2066         case B_lo:
2067                 to_xxx = tolower;
2068                 goto lo_cont;
2069
2070         case B_up:
2071                 to_xxx = toupper;
2072  lo_cont:
2073                 s1 = s = xstrdup(as[0]);
2074                 while (*s1) {
2075                         *s1 = (*to_xxx)(*s1);
2076                         s1++;
2077                 }
2078                 setvar_p(res, s);
2079                 break;
2080
2081         case B_ix:
2082                 n = 0;
2083                 ll = strlen(as[1]);
2084                 l = strlen(as[0]) - ll;
2085                 if (ll > 0 && l >= 0) {
2086                         if (!icase) {
2087                                 s = strstr(as[0], as[1]);
2088                                 if (s) n = (s - as[0]) + 1;
2089                         } else {
2090                                 /* this piece of code is terribly slow and
2091                                  * really should be rewritten
2092                                  */
2093                                 for (i=0; i<=l; i++) {
2094                                         if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2095                                                 n = i+1;
2096                                                 break;
2097                                         }
2098                                 }
2099                         }
2100                 }
2101                 setvar_i(res, n);
2102                 break;
2103
2104         case B_ti:
2105                 if (nargs > 1)
2106                         tt = getvar_i(av[1]);
2107                 else
2108                         time(&tt);
2109                 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2110                 i = strftime(g_buf, MAXVARFMT,
2111                         ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2112                         localtime(&tt));
2113                 g_buf[i] = '\0';
2114                 setvar_s(res, g_buf);
2115                 break;
2116
2117         case B_ma:
2118                 re = as_regex(an[1], &sreg);
2119                 n = regexec(re, as[0], 1, pmatch, 0);
2120                 if (n == 0) {
2121                         pmatch[0].rm_so++;
2122                         pmatch[0].rm_eo++;
2123                 } else {
2124                         pmatch[0].rm_so = 0;
2125                         pmatch[0].rm_eo = -1;
2126                 }
2127                 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2128                 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2129                 setvar_i(res, pmatch[0].rm_so);
2130                 if (re == &sreg) regfree(re);
2131                 break;
2132
2133         case B_ge:
2134                 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2135                 break;
2136
2137         case B_gs:
2138                 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2139                 break;
2140
2141         case B_su:
2142                 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2143                 break;
2144         }
2145
2146         nvfree(tv);
2147         return res;
2148 #undef tspl
2149 }
2150
2151 /*
2152  * Evaluate node - the heart of the program. Supplied with subtree
2153  * and place where to store result. returns ptr to result.
2154  */
2155 #define XC(n) ((n) >> 8)
2156
2157 static var *evaluate(node *op, var *res)
2158 {
2159 /* This procedure is recursive so we should count every byte */
2160 #define fnargs (G.evaluate__fnargs)
2161 /* seed is initialized to 1 */
2162 #define seed   (G.evaluate__seed)
2163 #define sreg   (G.evaluate__sreg)
2164
2165         node *op1;
2166         var *v1;
2167         union {
2168                 var *v;
2169                 const char *s;
2170                 double d;
2171                 int i;
2172         } L, R;
2173         uint32_t opinfo;
2174         int opn;
2175         union {
2176                 char *s;
2177                 rstream *rsm;
2178                 FILE *F;
2179                 var *v;
2180                 regex_t *re;
2181                 uint32_t info;
2182         } X;
2183
2184         if (!op)
2185                 return setvar_s(res, NULL);
2186
2187         v1 = nvalloc(2);
2188
2189         while (op) {
2190                 opinfo = op->info;
2191                 opn = (opinfo & OPNMASK);
2192                 g_lineno = op->lineno;
2193
2194                 /* execute inevitable things */
2195                 op1 = op->l.n;
2196                 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2197                 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2198                 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2199                 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2200                 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2201
2202                 switch (XC(opinfo & OPCLSMASK)) {
2203
2204                 /* -- iterative node type -- */
2205
2206                 /* test pattern */
2207                 case XC( OC_TEST ):
2208                         if ((op1->info & OPCLSMASK) == OC_COMMA) {
2209                                 /* it's range pattern */
2210                                 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2211                                         op->info |= OF_CHECKED;
2212                                         if (ptest(op1->r.n))
2213                                                 op->info &= ~OF_CHECKED;
2214
2215                                         op = op->a.n;
2216                                 } else {
2217                                         op = op->r.n;
2218                                 }
2219                         } else {
2220                                 op = (ptest(op1)) ? op->a.n : op->r.n;
2221                         }
2222                         break;
2223
2224                 /* just evaluate an expression, also used as unconditional jump */
2225                 case XC( OC_EXEC ):
2226                         break;
2227
2228                 /* branch, used in if-else and various loops */
2229                 case XC( OC_BR ):
2230                         op = istrue(L.v) ? op->a.n : op->r.n;
2231                         break;
2232
2233                 /* initialize for-in loop */
2234                 case XC( OC_WALKINIT ):
2235                         hashwalk_init(L.v, iamarray(R.v));
2236                         break;
2237
2238                 /* get next array item */
2239                 case XC( OC_WALKNEXT ):
2240                         op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2241                         break;
2242
2243                 case XC( OC_PRINT ):
2244                 case XC( OC_PRINTF ):
2245                         X.F = stdout;
2246                         if (op->r.n) {
2247                                 X.rsm = newfile(R.s);
2248                                 if (!X.rsm->F) {
2249                                         if (opn == '|') {
2250                                                 X.rsm->F = popen(R.s, "w");
2251                                                 if (X.rsm->F == NULL)
2252                                                         bb_perror_msg_and_die("popen");
2253                                                 X.rsm->is_pipe = 1;
2254                                         } else {
2255                                                 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2256                                         }
2257                                 }
2258                                 X.F = X.rsm->F;
2259                         }
2260
2261                         if ((opinfo & OPCLSMASK) == OC_PRINT) {
2262                                 if (!op1) {
2263                                         fputs(getvar_s(intvar[F0]), X.F);
2264                                 } else {
2265                                         while (op1) {
2266                                                 L.v = evaluate(nextarg(&op1), v1);
2267                                                 if (L.v->type & VF_NUMBER) {
2268                                                         fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2269                                                                         getvar_i(L.v), TRUE);
2270                                                         fputs(g_buf, X.F);
2271                                                 } else {
2272                                                         fputs(getvar_s(L.v), X.F);
2273                                                 }
2274
2275                                                 if (op1) fputs(getvar_s(intvar[OFS]), X.F);
2276                                         }
2277                                 }
2278                                 fputs(getvar_s(intvar[ORS]), X.F);
2279
2280                         } else {        /* OC_PRINTF */
2281                                 L.s = awk_printf(op1);
2282                                 fputs(L.s, X.F);
2283                                 free((char*)L.s);
2284                         }
2285                         fflush(X.F);
2286                         break;
2287
2288                 case XC( OC_DELETE ):
2289                         X.info = op1->info & OPCLSMASK;
2290                         if (X.info == OC_VAR) {
2291                                 R.v = op1->l.v;
2292                         } else if (X.info == OC_FNARG) {
2293                                 R.v = &fnargs[op1->l.i];
2294                         } else {
2295                                 syntax_error(EMSG_NOT_ARRAY);
2296                         }
2297
2298                         if (op1->r.n) {
2299                                 clrvar(L.v);
2300                                 L.s = getvar_s(evaluate(op1->r.n, v1));
2301                                 hash_remove(iamarray(R.v), L.s);
2302                         } else {
2303                                 clear_array(iamarray(R.v));
2304                         }
2305                         break;
2306
2307                 case XC( OC_NEWSOURCE ):
2308                         g_progname = op->l.s;
2309                         break;
2310
2311                 case XC( OC_RETURN ):
2312                         copyvar(res, L.v);
2313                         break;
2314
2315                 case XC( OC_NEXTFILE ):
2316                         nextfile = TRUE;
2317                 case XC( OC_NEXT ):
2318                         nextrec = TRUE;
2319                 case XC( OC_DONE ):
2320                         clrvar(res);
2321                         break;
2322
2323                 case XC( OC_EXIT ):
2324                         awk_exit(L.d);
2325
2326                 /* -- recursive node type -- */
2327
2328                 case XC( OC_VAR ):
2329                         L.v = op->l.v;
2330                         if (L.v == intvar[NF])
2331                                 split_f0();
2332                         goto v_cont;
2333
2334                 case XC( OC_FNARG ):
2335                         L.v = &fnargs[op->l.i];
2336  v_cont:
2337                         res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2338                         break;
2339
2340                 case XC( OC_IN ):
2341                         setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2342                         break;
2343
2344                 case XC( OC_REGEXP ):
2345                         op1 = op;
2346                         L.s = getvar_s(intvar[F0]);
2347                         goto re_cont;
2348
2349                 case XC( OC_MATCH ):
2350                         op1 = op->r.n;
2351  re_cont:
2352                         X.re = as_regex(op1, &sreg);
2353                         R.i = regexec(X.re, L.s, 0, NULL, 0);
2354                         if (X.re == &sreg) regfree(X.re);
2355                         setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2356                         break;
2357
2358                 case XC( OC_MOVE ):
2359                         /* if source is a temporary string, jusk relink it to dest */
2360                         if (R.v == v1+1 && R.v->string) {
2361                                 res = setvar_p(L.v, R.v->string);
2362                                 R.v->string = NULL;
2363                         } else {
2364                                 res = copyvar(L.v, R.v);
2365                         }
2366                         break;
2367
2368                 case XC( OC_TERNARY ):
2369                         if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2370                                 syntax_error(EMSG_POSSIBLE_ERROR);
2371                         res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2372                         break;
2373
2374                 case XC( OC_FUNC ):
2375                         if (!op->r.f->body.first)
2376                                 syntax_error(EMSG_UNDEF_FUNC);
2377
2378                         X.v = R.v = nvalloc(op->r.f->nargs+1);
2379                         while (op1) {
2380                                 L.v = evaluate(nextarg(&op1), v1);
2381                                 copyvar(R.v, L.v);
2382                                 R.v->type |= VF_CHILD;
2383                                 R.v->x.parent = L.v;
2384                                 if (++R.v - X.v >= op->r.f->nargs)
2385                                         break;
2386                         }
2387
2388                         R.v = fnargs;
2389                         fnargs = X.v;
2390
2391                         L.s = g_progname;
2392                         res = evaluate(op->r.f->body.first, res);
2393                         g_progname = L.s;
2394
2395                         nvfree(fnargs);
2396                         fnargs = R.v;
2397                         break;
2398
2399                 case XC( OC_GETLINE ):
2400                 case XC( OC_PGETLINE ):
2401                         if (op1) {
2402                                 X.rsm = newfile(L.s);
2403                                 if (!X.rsm->F) {
2404                                         if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2405                                                 X.rsm->F = popen(L.s, "r");
2406                                                 X.rsm->is_pipe = TRUE;
2407                                         } else {
2408                                                 X.rsm->F = fopen(L.s, "r");             /* not xfopen! */
2409                                         }
2410                                 }
2411                         } else {
2412                                 if (!iF) iF = next_input_file();
2413                                 X.rsm = iF;
2414                         }
2415
2416                         if (!X.rsm->F) {
2417                                 setvar_i(intvar[ERRNO], errno);
2418                                 setvar_i(res, -1);
2419                                 break;
2420                         }
2421
2422                         if (!op->r.n)
2423                                 R.v = intvar[F0];
2424
2425                         L.i = awk_getline(X.rsm, R.v);
2426                         if (L.i > 0) {
2427                                 if (!op1) {
2428                                         incvar(intvar[FNR]);
2429                                         incvar(intvar[NR]);
2430                                 }
2431                         }
2432                         setvar_i(res, L.i);
2433                         break;
2434
2435                 /* simple builtins */
2436                 case XC( OC_FBLTIN ):
2437                         switch (opn) {
2438
2439                         case F_in:
2440                                 R.d = (int)L.d;
2441                                 break;
2442
2443                         case F_rn:
2444                                 R.d = (double)rand() / (double)RAND_MAX;
2445                                 break;
2446 #if ENABLE_FEATURE_AWK_MATH
2447                         case F_co:
2448                                 R.d = cos(L.d);
2449                                 break;
2450
2451                         case F_ex:
2452                                 R.d = exp(L.d);
2453                                 break;
2454
2455                         case F_lg:
2456                                 R.d = log(L.d);
2457                                 break;
2458
2459                         case F_si:
2460                                 R.d = sin(L.d);
2461                                 break;
2462
2463                         case F_sq:
2464                                 R.d = sqrt(L.d);
2465                                 break;
2466 #else
2467                         case F_co:
2468                         case F_ex:
2469                         case F_lg:
2470                         case F_si:
2471                         case F_sq:
2472                                 syntax_error(EMSG_NO_MATH);
2473                                 break;
2474 #endif
2475                         case F_sr:
2476                                 R.d = (double)seed;
2477                                 seed = op1 ? (unsigned)L.d : (unsigned)time(NULL);
2478                                 srand(seed);
2479                                 break;
2480
2481                         case F_ti:
2482                                 R.d = time(NULL);
2483                                 break;
2484
2485                         case F_le:
2486                                 if (!op1)
2487                                         L.s = getvar_s(intvar[F0]);
2488                                 R.d = strlen(L.s);
2489                                 break;
2490
2491                         case F_sy:
2492                                 fflush(NULL);
2493                                 R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2494                                                 ? (system(L.s) >> 8) : 0;
2495                                 break;
2496
2497                         case F_ff:
2498                                 if (!op1)
2499                                         fflush(stdout);
2500                                 else {
2501                                         if (L.s && *L.s) {
2502                                                 X.rsm = newfile(L.s);
2503                                                 fflush(X.rsm->F);
2504                                         } else {
2505                                                 fflush(NULL);
2506                                         }
2507                                 }
2508                                 break;
2509
2510                         case F_cl:
2511                                 X.rsm = (rstream *)hash_search(fdhash, L.s);
2512                                 if (X.rsm) {
2513                                         R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2514                                         free(X.rsm->buffer);
2515                                         hash_remove(fdhash, L.s);
2516                                 }
2517                                 if (R.i != 0)
2518                                         setvar_i(intvar[ERRNO], errno);
2519                                 R.d = (double)R.i;
2520                                 break;
2521                         }
2522                         setvar_i(res, R.d);
2523                         break;
2524
2525                 case XC( OC_BUILTIN ):
2526                         res = exec_builtin(op, res);
2527                         break;
2528
2529                 case XC( OC_SPRINTF ):
2530                         setvar_p(res, awk_printf(op1));
2531                         break;
2532
2533                 case XC( OC_UNARY ):
2534                         X.v = R.v;
2535                         L.d = R.d = getvar_i(R.v);
2536                         switch (opn) {
2537                         case 'P':
2538                                 L.d = ++R.d;
2539                                 goto r_op_change;
2540                         case 'p':
2541                                 R.d++;
2542                                 goto r_op_change;
2543                         case 'M':
2544                                 L.d = --R.d;
2545                                 goto r_op_change;
2546                         case 'm':
2547                                 R.d--;
2548                                 goto r_op_change;
2549                         case '!':
2550                                 L.d = istrue(X.v) ? 0 : 1;
2551                                 break;
2552                         case '-':
2553                                 L.d = -R.d;
2554                                 break;
2555  r_op_change:
2556                                 setvar_i(X.v, R.d);
2557                         }
2558                         setvar_i(res, L.d);
2559                         break;
2560
2561                 case XC( OC_FIELD ):
2562                         R.i = (int)getvar_i(R.v);
2563                         if (R.i == 0) {
2564                                 res = intvar[F0];
2565                         } else {
2566                                 split_f0();
2567                                 if (R.i > nfields)
2568                                         fsrealloc(R.i);
2569                                 res = &Fields[R.i - 1];
2570                         }
2571                         break;
2572
2573                 /* concatenation (" ") and index joining (",") */
2574                 case XC( OC_CONCAT ):
2575                 case XC( OC_COMMA ):
2576                         opn = strlen(L.s) + strlen(R.s) + 2;
2577                         X.s = xmalloc(opn);
2578                         strcpy(X.s, L.s);
2579                         if ((opinfo & OPCLSMASK) == OC_COMMA) {
2580                                 L.s = getvar_s(intvar[SUBSEP]);
2581                                 X.s = xrealloc(X.s, opn + strlen(L.s));
2582                                 strcat(X.s, L.s);
2583                         }
2584                         strcat(X.s, R.s);
2585                         setvar_p(res, X.s);
2586                         break;
2587
2588                 case XC( OC_LAND ):
2589                         setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2590                         break;
2591
2592                 case XC( OC_LOR ):
2593                         setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2594                         break;
2595
2596                 case XC( OC_BINARY ):
2597                 case XC( OC_REPLACE ):
2598                         R.d = getvar_i(R.v);
2599                         switch (opn) {
2600                         case '+':
2601                                 L.d += R.d;
2602                                 break;
2603                         case '-':
2604                                 L.d -= R.d;
2605                                 break;
2606                         case '*':
2607                                 L.d *= R.d;
2608                                 break;
2609                         case '/':
2610                                 if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO);
2611                                 L.d /= R.d;
2612                                 break;
2613                         case '&':
2614 #if ENABLE_FEATURE_AWK_MATH
2615                                 L.d = pow(L.d, R.d);
2616 #else
2617                                 syntax_error(EMSG_NO_MATH);
2618 #endif
2619                                 break;
2620                         case '%':
2621                                 if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO);
2622                                 L.d -= (int)(L.d / R.d) * R.d;
2623                                 break;
2624                         }
2625                         res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2626                         break;
2627
2628                 case XC( OC_COMPARE ):
2629                         if (is_numeric(L.v) && is_numeric(R.v)) {
2630                                 L.d = getvar_i(L.v) - getvar_i(R.v);
2631                         } else {
2632                                 L.s = getvar_s(L.v);
2633                                 R.s = getvar_s(R.v);
2634                                 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2635                         }
2636                         switch (opn & 0xfe) {
2637                         case 0:
2638                                 R.i = (L.d > 0);
2639                                 break;
2640                         case 2:
2641                                 R.i = (L.d >= 0);
2642                                 break;
2643                         case 4:
2644                                 R.i = (L.d == 0);
2645                                 break;
2646                         }
2647                         setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2648                         break;
2649
2650                 default:
2651                         syntax_error(EMSG_POSSIBLE_ERROR);
2652                 }
2653                 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2654                         op = op->a.n;
2655                 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2656                         break;
2657                 if (nextrec)
2658                         break;
2659         }
2660         nvfree(v1);
2661         return res;
2662 #undef fnargs
2663 #undef seed
2664 #undef sreg
2665 }
2666
2667
2668 /* -------- main & co. -------- */
2669
2670 static int awk_exit(int r)
2671 {
2672         var tv;
2673         unsigned i;
2674         hash_item *hi;
2675
2676         zero_out_var(&tv);
2677
2678         if (!exiting) {
2679                 exiting = TRUE;
2680                 nextrec = FALSE;
2681                 evaluate(endseq.first, &tv);
2682         }
2683
2684         /* waiting for children */
2685         for (i = 0; i < fdhash->csize; i++) {
2686                 hi = fdhash->items[i];
2687                 while (hi) {
2688                         if (hi->data.rs.F && hi->data.rs.is_pipe)
2689                                 pclose(hi->data.rs.F);
2690                         hi = hi->next;
2691                 }
2692         }
2693
2694         exit(r);
2695 }
2696
2697 /* if expr looks like "var=value", perform assignment and return 1,
2698  * otherwise return 0 */
2699 static int is_assignment(const char *expr)
2700 {
2701         char *exprc, *s, *s0, *s1;
2702
2703         exprc = xstrdup(expr);
2704         if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2705                 free(exprc);
2706                 return FALSE;
2707         }
2708
2709         *(s++) = '\0';
2710         s0 = s1 = s;
2711         while (*s)
2712                 *(s1++) = nextchar(&s);
2713
2714         *s1 = '\0';
2715         setvar_u(newvar(exprc), s0);
2716         free(exprc);
2717         return TRUE;
2718 }
2719
2720 /* switch to next input file */
2721 static rstream *next_input_file(void)
2722 {
2723 #define rsm          (G.next_input_file__rsm)
2724 #define files_happen (G.next_input_file__files_happen)
2725
2726         FILE *F = NULL;
2727         const char *fname, *ind;
2728
2729         if (rsm.F) fclose(rsm.F);
2730         rsm.F = NULL;
2731         rsm.pos = rsm.adv = 0;
2732
2733         do {
2734                 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2735                         if (files_happen)
2736                                 return NULL;
2737                         fname = "-";
2738                         F = stdin;
2739                 } else {
2740                         ind = getvar_s(incvar(intvar[ARGIND]));
2741                         fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2742                         if (fname && *fname && !is_assignment(fname))
2743                                 F = afopen(fname, "r");
2744                 }
2745         } while (!F);
2746
2747         files_happen = TRUE;
2748         setvar_s(intvar[FILENAME], fname);
2749         rsm.F = F;
2750         return &rsm;
2751 #undef rsm
2752 #undef files_happen
2753 }
2754
2755 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
2756 int awk_main(int argc, char **argv)
2757 {
2758         unsigned opt;
2759         char *opt_F, *opt_W;
2760         llist_t *opt_v = NULL;
2761         int i, j, flen;
2762         var *v;
2763         var tv;
2764         char **envp;
2765         char *vnames = (char *)vNames; /* cheat */
2766         char *vvalues = (char *)vValues;
2767
2768         INIT_G();
2769
2770         /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
2771          * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
2772         if (ENABLE_LOCALE_SUPPORT)
2773                 setlocale(LC_NUMERIC, "C");
2774
2775         zero_out_var(&tv);
2776
2777         /* allocate global buffer */
2778         g_buf = xmalloc(MAXVARFMT + 1);
2779
2780         vhash = hash_init();
2781         ahash = hash_init();
2782         fdhash = hash_init();
2783         fnhash = hash_init();
2784
2785         /* initialize variables */
2786         for (i = 0; *vnames; i++) {
2787                 intvar[i] = v = newvar(nextword(&vnames));
2788                 if (*vvalues != '\377')
2789                         setvar_s(v, nextword(&vvalues));
2790                 else
2791                         setvar_i(v, 0);
2792
2793                 if (*vnames == '*') {
2794                         v->type |= VF_SPECIAL;
2795                         vnames++;
2796                 }
2797         }
2798
2799         handle_special(intvar[FS]);
2800         handle_special(intvar[RS]);
2801
2802         newfile("/dev/stdin")->F = stdin;
2803         newfile("/dev/stdout")->F = stdout;
2804         newfile("/dev/stderr")->F = stderr;
2805
2806         /* Huh, people report that sometimes environ is NULL. Oh well. */
2807         if (environ) for (envp = environ; *envp; envp++) {
2808                 /* environ is writable, thus we don't strdup it needlessly */
2809                 char *s = *envp;
2810                 char *s1 = strchr(s, '=');
2811                 if (s1) {
2812                         *s1 = '\0';
2813                         /* Both findvar and setvar_u take const char*
2814                          * as 2nd arg -> environment is not trashed */
2815                         setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
2816                         *s1 = '=';
2817                 }
2818         }
2819         opt_complementary = "v::";
2820         opt = getopt32(argv, "F:v:f:W:", &opt_F, &opt_v, &g_progname, &opt_W);
2821         argv += optind;
2822         argc -= optind;
2823         if (opt & 0x1)
2824                 setvar_s(intvar[FS], opt_F); // -F
2825         while (opt_v) { /* -v */
2826                 if (!is_assignment(llist_pop(&opt_v)))
2827                         bb_show_usage();
2828         }
2829         if (opt & 0x4) { // -f
2830                 char *s = s; /* die, gcc, die */
2831                 FILE *from_file = afopen(g_progname, "r");
2832                 /* one byte is reserved for some trick in next_token */
2833                 if (fseek(from_file, 0, SEEK_END) == 0) {
2834                         flen = ftell(from_file);
2835                         s = xmalloc(flen + 4);
2836                         fseek(from_file, 0, SEEK_SET);
2837                         i = 1 + fread(s + 1, 1, flen, from_file);
2838                 } else {
2839                         for (i = j = 1; j > 0; i += j) {
2840                                 s = xrealloc(s, i + 4096);
2841                                 j = fread(s + i, 1, 4094, from_file);
2842                         }
2843                 }
2844                 s[i] = '\0';
2845                 fclose(from_file);
2846                 parse_program(s + 1);
2847                 free(s);
2848         } else { // no -f: take program from 1st parameter
2849                 if (!argc)
2850                         bb_show_usage();
2851                 g_progname = "cmd. line";
2852                 parse_program(*argv++);
2853                 argc--;
2854         }
2855         if (opt & 0x8) // -W
2856                 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
2857
2858         /* fill in ARGV array */
2859         setvar_i(intvar[ARGC], argc + 1);
2860         setari_u(intvar[ARGV], 0, "awk");
2861         i = 0;
2862         while (*argv)
2863                 setari_u(intvar[ARGV], ++i, *argv++);
2864
2865         evaluate(beginseq.first, &tv);
2866         if (!mainseq.first && !endseq.first)
2867                 awk_exit(EXIT_SUCCESS);
2868
2869         /* input file could already be opened in BEGIN block */
2870         if (!iF) iF = next_input_file();
2871
2872         /* passing through input files */
2873         while (iF) {
2874                 nextfile = FALSE;
2875                 setvar_i(intvar[FNR], 0);
2876
2877                 while ((i = awk_getline(iF, intvar[F0])) > 0) {
2878                         nextrec = FALSE;
2879                         incvar(intvar[NR]);
2880                         incvar(intvar[FNR]);
2881                         evaluate(mainseq.first, &tv);
2882
2883                         if (nextfile)
2884                                 break;
2885                 }
2886
2887                 if (i < 0)
2888                         syntax_error(strerror(errno));
2889
2890                 iF = next_input_file();
2891         }
2892
2893         awk_exit(EXIT_SUCCESS);
2894         /*return 0;*/
2895 }