blob: 0f8e606f37850e35fb9efbc00040ccdde81aaf5e [file] [log] [blame]
Jack Palevichae54f1f2009-05-08 14:54:15 -07001/*
Jack Palevich21a15a22009-05-11 14:49:29 -07002 Obfuscated Tiny C Compiler
Jack Palevich88311482009-05-08 13:57:37 -07003
Jack Palevich21a15a22009-05-11 14:49:29 -07004 Copyright (C) 2001-2003 Fabrice Bellard
Jack Palevichae54f1f2009-05-08 14:54:15 -07005
Jack Palevich21a15a22009-05-11 14:49:29 -07006 This software is provided 'as-is', without any express or implied
7 warranty. In no event will the authors be held liable for any damages
8 arising from the use of this software.
Jack Paleviche27bf3e2009-05-10 14:09:03 -07009
Jack Palevich21a15a22009-05-11 14:49:29 -070010 Permission is granted to anyone to use this software for any purpose,
11 including commercial applications, and to alter it and redistribute it
12 freely, subject to the following restrictions:
Jack Paleviche27bf3e2009-05-10 14:09:03 -070013
Jack Palevich21a15a22009-05-11 14:49:29 -070014 1. The origin of this software must not be misrepresented; you must not
15 claim that you wrote the original software. If you use this software
16 in a product, an acknowledgment in the product and its documentation
17 *is* required.
18 2. Altered source versions must be plainly marked as such, and must not be
19 misrepresented as being the original software.
20 3. This notice may not be removed or altered from any source distribution.
21 */
Jack Paleviche27bf3e2009-05-10 14:09:03 -070022
Jack Palevich77ae76e2009-05-10 19:59:24 -070023#include <ctype.h>
24#include <dlfcn.h>
Jack Paleviche27bf3e2009-05-10 14:09:03 -070025#include <stdarg.h>
Jack Palevichae54f1f2009-05-08 14:54:15 -070026#include <stdio.h>
Jack Palevichf6b5a532009-05-10 19:16:42 -070027#include <stdlib.h>
28#include <string.h>
Jack Palevichae54f1f2009-05-08 14:54:15 -070029
Jack Palevichbbf8ab52009-05-11 11:54:30 -070030namespace acc {
31
Jack Palevich77ae76e2009-05-10 19:59:24 -070032class compiler {
Jack Palevich7448a2e2009-05-08 18:33:45 -070033
Jack Palevich21a15a22009-05-11 14:49:29 -070034 class CodeBuf {
35 char* ind;
36 char* pProgramBase;
Jack Palevichf0cbc922009-05-08 16:35:13 -070037
Jack Palevich21a15a22009-05-11 14:49:29 -070038 void release() {
39 if (pProgramBase != 0) {
40 free(pProgramBase);
41 pProgramBase = 0;
Jack Palevichae54f1f2009-05-08 14:54:15 -070042 }
Jack Palevich21a15a22009-05-11 14:49:29 -070043 }
44
45 public:
46 CodeBuf() {
47 pProgramBase = 0;
48 ind = 0;
49 }
50
51 ~CodeBuf() {
52 release();
53 }
54
55 void init(int size) {
56 release();
57 pProgramBase = (char*) calloc(1, size);
58 ind = pProgramBase;
59 }
60
61 void o(int n) {
62 /* cannot use unsigned, so we must do a hack */
63 while (n && n != -1) {
64 *ind++ = n;
65 n = n >> 8;
66 }
67 }
68
69 /*
70 * Output a byte. Handles all values, 0..ff.
71 */
72 void ob(int n) {
73 *ind++ = n;
74 }
75
76 /* output a symbol and patch all calls to it */
77 void gsym(int t) {
78 int n;
79 while (t) {
80 n = *(int *) t; /* next value */
81 *(int *) t = ((int) ind) - t - 4;
82 t = n;
83 }
84 }
85
86 /* psym is used to put an instruction with a data field which is a
87 reference to a symbol. It is in fact the same as oad ! */
88 int psym(int n, int t) {
89 return oad(n, t);
90 }
91
92 /* instruction + address */
93 int oad(int n, int t) {
94 o(n);
95 *(int *) ind = t;
96 t = (int) ind;
97 ind = ind + 4;
98 return t;
99 }
100
101 inline void* getBase() {
102 return (void*) pProgramBase;
103 }
104
105 int getSize() {
106 return ind - pProgramBase;
107 }
108
109 int getPC() {
110 return (int) ind;
111 }
112 };
113
114 class CodeGenerator {
115 public:
116 CodeGenerator() {}
117 virtual ~CodeGenerator() {}
118
119 void init(CodeBuf* pCodeBuf) {
120 this->pCodeBuf = pCodeBuf;
121 }
122
123 /* output a symbol and patch all calls to it */
124 void gsym(int t) {
125 pCodeBuf->gsym(t);
126 }
127
128 protected:
129 void o(int n) {
130 pCodeBuf->o(n);
131 }
132
133 /*
134 * Output a byte. Handles all values, 0..ff.
135 */
136 void ob(int n) {
137 pCodeBuf->ob(n);
138 }
139
140 /* psym is used to put an instruction with a data field which is a
141 reference to a symbol. It is in fact the same as oad ! */
142 int psym(int n, int t) {
143 return oad(n, t);
144 }
145
146 /* instruction + address */
147 int oad(int n, int t) {
148 return pCodeBuf->oad(n,t);
149 }
150
151 int getPC() {
152 return pCodeBuf->getPC();
153 }
154
155 private:
156 CodeBuf* pCodeBuf;
157 };
158
159 class X86CodeGenerator : public CodeGenerator {
160 public:
161 X86CodeGenerator() {}
162 virtual ~X86CodeGenerator() {}
163
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700164 /* returns address to patch with local variable size
165 */
166 int functionEntry() {
167 o(0xe58955); /* push %ebp, mov %esp, %ebp */
168 return oad(0xec81, 0); /* sub $xxx, %esp */
169 }
170
171 void functionExit() {
172 o(0xc3c9); /* leave, ret */
173 }
174
Jack Palevich21a15a22009-05-11 14:49:29 -0700175 /* load immediate value */
176 int li(int t) {
177 oad(0xb8, t); /* mov $xx, %eax */
178 }
179
180 int gjmp(int t) {
181 return psym(0xe9, t);
182 }
183
184 /* l = 0: je, l == 1: jne */
185 int gtst(int l, int t) {
186 o(0x0fc085); /* test %eax, %eax, je/jne xxx */
187 return psym(0x84 + l, t);
188 }
189
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700190 int gcmp(int op) {
191 int t = decodeOp(op);
Jack Palevich21a15a22009-05-11 14:49:29 -0700192 o(0xc139); /* cmp %eax,%ecx */
193 li(0);
194 o(0x0f); /* setxx %al */
195 o(t + 0x90);
196 o(0xc0);
197 }
198
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700199 int genOp(int op) {
200 o(decodeOp(op));
201 if (op == OP_MOD)
202 o(0x92); /* xchg %edx, %eax */
203 }
204
Jack Palevich21a15a22009-05-11 14:49:29 -0700205 void clearECX() {
206 oad(0xb9, 0); /* movl $0, %ecx */
207 }
208
209 void pushEAX() {
210 o(0x50); /* push %eax */
211 }
212
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700213 void popECX() {
Jack Palevich21a15a22009-05-11 14:49:29 -0700214 o(0x59); /* pop %ecx */
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700215 }
216
217 void storeEAXToAddressECX(bool isInt) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700218 o(0x0188 + isInt); /* movl %eax/%al, (%ecx) */
219 }
220
221 void loadEAXIndirect(bool isInt) {
222 if (isInt)
223 o(0x8b); /* mov (%eax), %eax */
224 else
225 o(0xbe0f); /* movsbl (%eax), %eax */
226 ob(0); /* add zero in code */
227 }
228
229 void leaEAX(int ea) {
230 gmov(10, ea); /* leal EA, %eax */
231 }
232
233 void storeEAX(int ea) {
234 gmov(6, ea); /* mov %eax, EA */
235 }
236
237 void loadEAX(int ea) {
238 gmov(8, ea); /* mov EA, %eax */
239 }
240
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700241 void postIncrementOrDecrement(int n, int op) {
242 /* Implement post-increment or post decrement.
Jack Palevich21a15a22009-05-11 14:49:29 -0700243 */
244 gmov(0, n); /* 83 ADD */
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700245 o(decodeOp(op));
Jack Palevich21a15a22009-05-11 14:49:29 -0700246 }
247
248 int allocStackSpaceForArgs() {
249 return oad(0xec81, 0); /* sub $xxx, %esp */
250 }
251
252 void storeEAToArg(int l) {
253 oad(0x248489, l); /* movl %eax, xxx(%esp) */
254 }
255
256 int callForward(int symbol) {
257 return psym(0xe8, symbol); /* call xxx */
258 }
259
260 void callRelative(int t) {
261 psym(0xe8, t); /* call xxx */
262 }
263
264 void callIndirect(int l) {
265 oad(0x2494ff, l); /* call *xxx(%esp) */
266 }
267
268 void adjustStackAfterCall(int l) {
269 oad(0xc481, l); /* add $xxx, %esp */
270 }
271
Jack Palevich21a15a22009-05-11 14:49:29 -0700272 private:
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700273 static const int operatorHelper[];
274
275 int decodeOp(int op) {
276 if (op < 0 || op > OP_COUNT) {
277 fprintf(stderr, "Out-of-range operator: %d\n", op);
278 exit(1);
279 }
280 return operatorHelper[op];
281 }
Jack Palevich21a15a22009-05-11 14:49:29 -0700282
283 int gmov(int l, int t) {
284 o(l + 0x83);
285 oad((t < LOCAL) << 7 | 5, t);
286 }
287 };
288
289 /* vars: value of variables
290 loc : local variable index
291 glo : global variable index
292 ind : output code ptr
293 rsym: return symbol
294 prog: output code
295 dstk: define stack
296 dptr, dch: macro state
297 */
298 int tok, tokc, tokl, ch, vars, rsym, loc, glo, sym_stk, dstk,
299 dptr, dch, last_id;
300 void* pSymbolBase;
301 void* pGlobalBase;
302 void* pVarsBase;
303 FILE* file;
304
305 CodeBuf codeBuf;
306 X86CodeGenerator* pGen;
307
308 static const int ALLOC_SIZE = 99999;
309
310 /* depends on the init string */
311 static const int TOK_STR_SIZE = 48;
312 static const int TOK_IDENT = 0x100;
313 static const int TOK_INT = 0x100;
314 static const int TOK_IF = 0x120;
315 static const int TOK_ELSE = 0x138;
316 static const int TOK_WHILE = 0x160;
317 static const int TOK_BREAK = 0x190;
318 static const int TOK_RETURN = 0x1c0;
319 static const int TOK_FOR = 0x1f8;
320 static const int TOK_DEFINE = 0x218;
321 static const int TOK_MAIN = 0x250;
322
323 static const int TOK_DUMMY = 1;
324 static const int TOK_NUM = 2;
325
326 static const int LOCAL = 0x200;
327
328 static const int SYM_FORWARD = 0;
329 static const int SYM_DEFINE = 1;
330
331 /* tokens in string heap */
332 static const int TAG_TOK = ' ';
333 static const int TAG_MACRO = 2;
334
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700335 static const int OP_INCREMENT = 0;
336 static const int OP_DECREMENT = 1;
337 static const int OP_MUL = 2;
338 static const int OP_DIV = 3;
339 static const int OP_MOD = 4;
340 static const int OP_PLUS = 5;
341 static const int OP_MINUS = 6;
342 static const int OP_SHIFT_LEFT = 7;
343 static const int OP_SHIFT_RIGHT = 8;
344 static const int OP_LESS_EQUAL = 9;
345 static const int OP_GREATER_EQUAL = 10;
346 static const int OP_LESS = 11;
347 static const int OP_GREATER = 12;
348 static const int OP_EQUALS = 13;
349 static const int OP_NOT_EQUALS = 14;
350 static const int OP_LOGICAL_AND = 15;
351 static const int OP_LOGICAL_OR = 16;
352 static const int OP_BIT_AND = 17;
353 static const int OP_BIT_XOR = 18;
354 static const int OP_BIT_OR = 19;
355 static const int OP_BIT_NOT = 20;
356 static const int OP_LOGICAL_NOT = 21;
357 static const int OP_COUNT = 22;
358
359 /* Operators are searched from front, the two-character operators appear
360 * before the single-character operators with the same first character.
361 * @ is used to pad out single-character operators.
362 */
363 static const char* operatorChars;
364 static const char operatorLevel[];
365
Jack Palevich21a15a22009-05-11 14:49:29 -0700366 void pdef(int t) {
367 *(char *) dstk++ = t;
368 }
369
370 void inp() {
371 if (dptr) {
372 ch = *(char *) dptr++;
373 if (ch == TAG_MACRO) {
374 dptr = 0;
375 ch = dch;
376 }
377 } else
378 ch = fgetc(file);
379 /* printf("ch=%c 0x%x\n", ch, ch); */
380 }
381
382 int isid() {
383 return isalnum(ch) | ch == '_';
384 }
385
386 /* read a character constant */
387 void getq() {
388 if (ch == '\\') {
389 inp();
390 if (ch == 'n')
391 ch = '\n';
392 }
393 }
394
395 void next() {
396 int l, a;
397
398 while (isspace(ch) | ch == '#') {
399 if (ch == '#') {
400 inp();
401 next();
402 if (tok == TOK_DEFINE) {
403 next();
404 pdef(TAG_TOK); /* fill last ident tag */
405 *(int *) tok = SYM_DEFINE;
406 *(int *) (tok + 4) = dstk; /* define stack */
407 }
408 /* well we always save the values ! */
409 while (ch != '\n') {
410 pdef(ch);
411 inp();
412 }
413 pdef(ch);
414 pdef(TAG_MACRO);
415 }
416 inp();
417 }
418 tokl = 0;
419 tok = ch;
420 /* encode identifiers & numbers */
421 if (isid()) {
422 pdef(TAG_TOK);
423 last_id = dstk;
424 while (isid()) {
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700425 pdef(ch);
426 inp();
Jack Palevichae54f1f2009-05-08 14:54:15 -0700427 }
Jack Palevich21a15a22009-05-11 14:49:29 -0700428 if (isdigit(tok)) {
429 tokc = strtol((char*) last_id, 0, 0);
430 tok = TOK_NUM;
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700431 } else {
Jack Palevich21a15a22009-05-11 14:49:29 -0700432 *(char *) dstk = TAG_TOK; /* no need to mark end of string (we
433 suppose data is initialized to zero by calloc) */
434 tok = (int) (strstr((char*) sym_stk, (char*) (last_id - 1))
435 - sym_stk);
436 *(char *) dstk = 0; /* mark real end of ident for dlsym() */
437 tok = tok * 8 + TOK_IDENT;
438 if (tok > TOK_DEFINE) {
439 tok = vars + tok;
440 /* printf("tok=%s %x\n", last_id, tok); */
441 /* define handling */
442 if (*(int *) tok == SYM_DEFINE) {
443 dptr = *(int *) (tok + 4);
444 dch = ch;
445 inp();
446 next();
447 }
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700448 }
449 }
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700450 } else {
Jack Palevich21a15a22009-05-11 14:49:29 -0700451 inp();
452 if (tok == '\'') {
453 tok = TOK_NUM;
454 getq();
455 tokc = ch;
456 inp();
457 inp();
458 } else if (tok == '/' & ch == '*') {
459 inp();
460 while (ch) {
461 while (ch != '*')
462 inp();
463 inp();
464 if (ch == '/')
465 ch = 0;
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700466 }
Jack Palevich21a15a22009-05-11 14:49:29 -0700467 inp();
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700468 next();
Jack Palevich21a15a22009-05-11 14:49:29 -0700469 } else {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700470 const char* t = operatorChars;
471 int opIndex = 0;
Jack Palevich21a15a22009-05-11 14:49:29 -0700472 while (l = *t++) {
473 a = *t++;
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700474 tokl = operatorLevel[opIndex];
475 tokc = opIndex;
Jack Palevich21a15a22009-05-11 14:49:29 -0700476 if (l == tok & (a == ch | a == '@')) {
477#if 0
478 printf("%c%c -> tokl=%d tokc=0x%x\n",
479 l, a, tokl, tokc);
480#endif
481 if (a == ch) {
482 inp();
483 tok = TOK_DUMMY; /* dummy token for double tokens */
484 }
485 break;
486 }
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700487 opIndex++;
488 }
489 if (l == 0) {
490 tokl = 0;
491 tokc = 0;
Jack Palevich21a15a22009-05-11 14:49:29 -0700492 }
493 }
494 }
495#if 0
496 {
497 int p;
498
499 printf("tok=0x%x ", tok);
500 if (tok >= TOK_IDENT) {
501 printf("'");
502 if (tok> TOK_DEFINE)
503 p = sym_stk + 1 + (tok - vars - TOK_IDENT) / 8;
504 else
505 p = sym_stk + 1 + (tok - TOK_IDENT) / 8;
506 while (*(char *)p != TAG_TOK && *(char *)p)
507 printf("%c", *(char *)p++);
508 printf("'\n");
509 } else if (tok == TOK_NUM) {
510 printf("%d\n", tokc);
511 } else {
512 printf("'%c'\n", tok);
513 }
514 }
515#endif
516 }
517
518 void error(const char *fmt, ...) {
519 va_list ap;
520
521 va_start(ap, fmt);
522 fprintf(stderr, "%ld: ", ftell((FILE *) file));
523 vfprintf(stderr, fmt, ap);
524 fprintf(stderr, "\n");
525 va_end(ap);
526 exit(1);
527 }
528
529 void skip(int c) {
530 if (tok != c) {
531 error("'%c' expected", c);
532 }
533 next();
534 }
535
Jack Palevich21a15a22009-05-11 14:49:29 -0700536 /* l is one if '=' parsing wanted (quick hack) */
537 void unary(int l) {
538 int n, t, a, c;
539
540 n = 1; /* type of expression 0 = forward, 1 = value, other =
541 lvalue */
542 if (tok == '\"') {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700543 pGen->li(glo);
Jack Palevich21a15a22009-05-11 14:49:29 -0700544 while (ch != '\"') {
545 getq();
546 *(char *) glo++ = ch;
547 inp();
548 }
549 *(char *) glo = 0;
550 glo = glo + 4 & -4; /* align heap */
551 inp();
552 next();
553 } else {
554 c = tokl;
555 a = tokc;
556 t = tok;
557 next();
558 if (t == TOK_NUM) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700559 pGen->li(a);
Jack Palevich21a15a22009-05-11 14:49:29 -0700560 } else if (c == 2) {
561 /* -, +, !, ~ */
562 unary(0);
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700563 pGen->clearECX();
Jack Palevich21a15a22009-05-11 14:49:29 -0700564 if (t == '!')
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700565 pGen->gcmp(a);
Jack Palevich21a15a22009-05-11 14:49:29 -0700566 else
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700567 pGen->genOp(a);
Jack Palevich21a15a22009-05-11 14:49:29 -0700568 } else if (t == '(') {
569 expr();
570 skip(')');
571 } else if (t == '*') {
572 /* parse cast */
573 skip('(');
574 t = tok; /* get type */
575 next(); /* skip int/char/void */
576 next(); /* skip '*' or '(' */
577 if (tok == '*') {
578 /* function type */
579 skip('*');
580 skip(')');
581 skip('(');
582 skip(')');
583 t = 0;
584 }
585 skip(')');
586 unary(0);
587 if (tok == '=') {
588 next();
589 pGen->pushEAX();
590 expr();
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700591 pGen->popECX();
592 pGen->storeEAXToAddressECX(t == TOK_INT);
Jack Palevich21a15a22009-05-11 14:49:29 -0700593 } else if (t) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700594 pGen->loadEAXIndirect(t == TOK_INT);
Jack Palevich21a15a22009-05-11 14:49:29 -0700595 }
596 } else if (t == '&') {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700597 pGen->leaEAX(*(int *) tok);
Jack Palevich21a15a22009-05-11 14:49:29 -0700598 next();
599 } else {
600 n = *(int *) t;
601 /* forward reference: try dlsym */
602 if (!n)
603 n = (int) dlsym(0, (char*) last_id);
604 if (tok == '=' & l) {
605 /* assignment */
606 next();
607 expr();
608 pGen->storeEAX(n);
609 } else if (tok != '(') {
610 /* variable */
611 pGen->loadEAX(n);
612 if (tokl == 11) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700613 pGen->postIncrementOrDecrement(n, tokc);
Jack Palevich21a15a22009-05-11 14:49:29 -0700614 next();
615 }
616 }
617 }
618 }
619
620 /* function call */
621 if (tok == '(') {
622 if (n == 1)
623 pGen->pushEAX();
624
625 /* push args and invert order */
626 a = pGen->allocStackSpaceForArgs();
627 next();
628 l = 0;
629 while (tok != ')') {
630 expr();
631 pGen->storeEAToArg(l);
Jack Palevichbbf8ab52009-05-11 11:54:30 -0700632 if (tok == ',')
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700633 next();
Jack Palevich21a15a22009-05-11 14:49:29 -0700634 l = l + 4;
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700635 }
Jack Palevich21a15a22009-05-11 14:49:29 -0700636 *(int *) a = l;
637 next();
638 if (!n) {
639 /* forward reference */
640 t = t + 4;
641 *(int *) t = pGen->callForward(*(int *) t);
642 } else if (n == 1) {
643 pGen->callIndirect(l);
644 l = l + 4;
645 } else {
646 pGen->callRelative(n - codeBuf.getPC() - 5); /* call xxx */
647 }
648 if (l)
649 pGen->adjustStackAfterCall(l);
650 }
651 }
652
653 void sum(int l) {
654 int t, n, a;
655
656 if (l-- == 1)
657 unary(1);
658 else {
659 sum(l);
660 a = 0;
661 while (l == tokl) {
662 n = tok;
663 t = tokc;
664 next();
665
666 if (l > 8) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700667 a = pGen->gtst(t == OP_LOGICAL_OR, a); /* && and || output code generation */
Jack Palevich21a15a22009-05-11 14:49:29 -0700668 sum(l);
669 } else {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700670 pGen->pushEAX();
Jack Palevich21a15a22009-05-11 14:49:29 -0700671 sum(l);
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700672 pGen->popECX();
Jack Palevich21a15a22009-05-11 14:49:29 -0700673
674 if (l == 4 | l == 5) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700675 pGen->gcmp(t);
Jack Palevich21a15a22009-05-11 14:49:29 -0700676 } else {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700677 pGen->genOp(t);
Jack Palevich21a15a22009-05-11 14:49:29 -0700678 }
679 }
680 }
681 /* && and || output code generation */
682 if (a && l > 8) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700683 a = pGen->gtst(t == OP_LOGICAL_OR, a);
684 pGen->li(t != OP_LOGICAL_OR);
685 pGen->gjmp(5); /* jmp $ + 5 */
686 pGen->gsym(a);
687 pGen->li(t == OP_LOGICAL_OR);
Jack Palevich21a15a22009-05-11 14:49:29 -0700688 }
689 }
690 }
691
692 void expr() {
693 sum(11);
694 }
695
696 int test_expr() {
697 expr();
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700698 return pGen->gtst(0, 0);
Jack Palevich21a15a22009-05-11 14:49:29 -0700699 }
700
701 void block(int l) {
702 int a, n, t;
703
704 if (tok == TOK_IF) {
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700705 next();
706 skip('(');
Jack Palevich21a15a22009-05-11 14:49:29 -0700707 a = test_expr();
708 skip(')');
709 block(l);
710 if (tok == TOK_ELSE) {
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700711 next();
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700712 n = pGen->gjmp(0); /* jmp */
713 pGen->gsym(a);
Jack Palevich21a15a22009-05-11 14:49:29 -0700714 block(l);
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700715 pGen->gsym(n); /* patch else jmp */
Jack Palevich21a15a22009-05-11 14:49:29 -0700716 } else {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700717 pGen->gsym(a); /* patch if test */
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700718 }
Jack Palevich21a15a22009-05-11 14:49:29 -0700719 } else if (tok == TOK_WHILE | tok == TOK_FOR) {
720 t = tok;
721 next();
722 skip('(');
723 if (t == TOK_WHILE) {
724 n = codeBuf.getPC();
725 a = test_expr();
726 } else {
727 if (tok != ';')
728 expr();
729 skip(';');
730 n = codeBuf.getPC();
731 a = 0;
732 if (tok != ';')
733 a = test_expr();
734 skip(';');
735 if (tok != ')') {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700736 t = pGen->gjmp(0);
Jack Palevich21a15a22009-05-11 14:49:29 -0700737 expr();
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700738 pGen->gjmp(n - codeBuf.getPC() - 5);
739 pGen->gsym(t);
Jack Palevich21a15a22009-05-11 14:49:29 -0700740 n = t + 4;
741 }
742 }
743 skip(')');
744 block((int) &a);
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700745 pGen->gjmp(n - codeBuf.getPC() - 5); /* jmp */
746 pGen->gsym(a);
Jack Palevich21a15a22009-05-11 14:49:29 -0700747 } else if (tok == '{') {
748 next();
749 /* declarations */
750 decl(1);
751 while (tok != '}')
752 block(l);
753 next();
754 } else {
755 if (tok == TOK_RETURN) {
756 next();
757 if (tok != ';')
758 expr();
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700759 rsym = pGen->gjmp(rsym); /* jmp */
Jack Palevich21a15a22009-05-11 14:49:29 -0700760 } else if (tok == TOK_BREAK) {
761 next();
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700762 *(int *) l = pGen->gjmp(*(int *) l);
Jack Palevich21a15a22009-05-11 14:49:29 -0700763 } else if (tok != ';')
764 expr();
765 skip(';');
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700766 }
767 }
Jack Palevich21a15a22009-05-11 14:49:29 -0700768
769 /* 'l' is true if local declarations */
770 void decl(int l) {
771 int a;
772
773 while (tok == TOK_INT | tok != -1 & !l) {
774 if (tok == TOK_INT) {
775 next();
776 while (tok != ';') {
777 if (l) {
778 loc = loc + 4;
779 *(int *) tok = -loc;
780 } else {
781 *(int *) tok = glo;
782 glo = glo + 4;
783 }
784 next();
785 if (tok == ',')
786 next();
787 }
788 skip(';');
789 } else {
790 /* patch forward references (XXX: do not work for function
791 pointers) */
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700792 pGen->gsym(*(int *) (tok + 4));
Jack Palevich21a15a22009-05-11 14:49:29 -0700793 /* put function address */
794 *(int *) tok = codeBuf.getPC();
795 next();
796 skip('(');
797 a = 8;
798 while (tok != ')') {
799 /* read param name and compute offset */
800 *(int *) tok = a;
801 a = a + 4;
802 next();
803 if (tok == ',')
804 next();
805 }
806 next(); /* skip ')' */
807 rsym = loc = 0;
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700808 a = pGen->functionEntry();
Jack Palevich21a15a22009-05-11 14:49:29 -0700809 block(0);
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700810 pGen->gsym(rsym);
811 pGen->functionExit();
Jack Palevich21a15a22009-05-11 14:49:29 -0700812 *(int *) a = loc; /* save local variables */
813 }
814 }
815 }
816
817 void cleanup() {
818 if (sym_stk != 0) {
819 free((void*) sym_stk);
820 sym_stk = 0;
821 }
822 if (pGlobalBase != 0) {
823 free((void*) pGlobalBase);
824 pGlobalBase = 0;
825 }
826 if (pVarsBase != 0) {
827 free(pVarsBase);
828 pVarsBase = 0;
829 }
830 if (pGen) {
831 delete pGen;
832 pGen = 0;
833 }
834 }
835
836 void clear() {
837 tok = 0;
838 tokc = 0;
839 tokl = 0;
840 ch = 0;
841 vars = 0;
842 rsym = 0;
843 loc = 0;
844 glo = 0;
845 sym_stk = 0;
846 dstk = 0;
847 dptr = 0;
848 dch = 0;
849 last_id = 0;
850 file = 0;
851 pGlobalBase = 0;
852 pVarsBase = 0;
853 pGen = 0;
854 }
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700855
Jack Palevich77ae76e2009-05-10 19:59:24 -0700856public:
Jack Palevich21a15a22009-05-11 14:49:29 -0700857 compiler() {
858 clear();
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700859 }
Jack Palevichbbf8ab52009-05-11 11:54:30 -0700860
Jack Palevich21a15a22009-05-11 14:49:29 -0700861 ~compiler() {
862 cleanup();
863 }
864
865 int compile(FILE* in) {
866 cleanup();
867 clear();
868 codeBuf.init(ALLOC_SIZE);
869 pGen = new X86CodeGenerator();
870 pGen->init(&codeBuf);
871 file = in;
872 sym_stk = (int) calloc(1, ALLOC_SIZE);
873 dstk = (int) strcpy((char*) sym_stk,
874 " int if else while break return for define main ")
875 + TOK_STR_SIZE;
876 pGlobalBase = calloc(1, ALLOC_SIZE);
877 glo = (int) pGlobalBase;
878 pVarsBase = calloc(1, ALLOC_SIZE);
879 vars = (int) pVarsBase;
880 inp();
881 next();
882 decl(0);
883 return 0;
884 }
885
886 int run(int argc, char** argv) {
887 typedef int (*mainPtr)(int argc, char** argv);
888 mainPtr aMain = (mainPtr) *(int*) (vars + TOK_MAIN);
889 if (!aMain) {
890 fprintf(stderr, "Could not find function \"main\".\n");
891 return -1;
892 }
893 return aMain(argc, argv);
894 }
895
896 int dump(FILE* out) {
897 fwrite(codeBuf.getBase(), 1, codeBuf.getSize(), out);
898 return 0;
899 }
Jack Palevich77ae76e2009-05-10 19:59:24 -0700900
901};
902
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700903const char* compiler::operatorChars =
904 "++--*@/@%@+@-@<<>><=>=<@>@==!=&&||&@^@|@~@!@";
905
906const char compiler::operatorLevel[] =
907 {11, 11, 1, 1, 1, 2, 2, 3, 3, 4, 4, 4, 4,
908 5, 5, /* ==, != */
909 9, 10, /* &&, || */
910 6, 7, 8, /* & ^ | */
911 2, 2 /* ~ ! */
912 };
913
914const int compiler::X86CodeGenerator::operatorHelper[] = {
915 0x1, // ++
916 0xff, // --
917 0xc1af0f, // *
918 0xf9f79991, // /
919 0xf9f79991, // % (With manual assist to swap results)
920 0xc801, // +
921 0xd8f7c829, // -
922 0xe0d391, // <<
923 0xf8d391, // >>
924 0xe, // <=
925 0xd, // >=
926 0xc, // <
927 0xf, // >
928 0x4, // ==
929 0x5, // !=
930 0x0, // &&
931 0x1, // ||
932 0xc821, // &
933 0xc831, // ^
934 0xc809, // |
935 0xd0f7, // ~
936 0x4 // !
937};
938
Jack Palevichbbf8ab52009-05-11 11:54:30 -0700939} // namespace acc
940
Jack Palevich77ae76e2009-05-10 19:59:24 -0700941int main(int argc, char** argv) {
Jack Palevichbbf8ab52009-05-11 11:54:30 -0700942 bool doTest = false;
943 const char* inFile = NULL;
944 const char* outFile = NULL;
945 int i;
Jack Palevich21a15a22009-05-11 14:49:29 -0700946 for (i = 1; i < argc; i++) {
Jack Palevichbbf8ab52009-05-11 11:54:30 -0700947 char* arg = argv[i];
948 if (arg[0] == '-') {
949 switch (arg[1]) {
950 case 'T':
951 if (i + 1 >= argc) {
952 fprintf(stderr, "Expected filename after -T\n");
953 return 2;
954 }
955 doTest = true;
956 outFile = argv[i + 1];
957 i += 1;
958 break;
959 default:
960 fprintf(stderr, "Unrecognized flag %s\n", arg);
961 return 3;
962 }
963 } else if (inFile == NULL) {
964 inFile = arg;
965 } else {
966 break;
967 }
968 }
969
970 FILE* in = stdin;
971 if (inFile) {
972 in = fopen(inFile, "r");
Jack Palevich21a15a22009-05-11 14:49:29 -0700973 if (!in) {
Jack Palevichbbf8ab52009-05-11 11:54:30 -0700974 fprintf(stderr, "Could not open input file %s\n", inFile);
975 return 1;
976 }
977 }
978 acc::compiler compiler;
979 int compileResult = compiler.compile(in);
980 if (in != stdin) {
981 fclose(in);
982 }
983 if (compileResult) {
984 fprintf(stderr, "Compile failed: %d\n", compileResult);
985 return 6;
986 }
987 if (doTest) {
988 FILE* save = fopen(outFile, "w");
Jack Palevich21a15a22009-05-11 14:49:29 -0700989 if (!save) {
Jack Palevichbbf8ab52009-05-11 11:54:30 -0700990 fprintf(stderr, "Could not open output file %s\n", outFile);
991 return 5;
992 }
993 compiler.dump(save);
994 fclose(save);
995 } else {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700996 fprintf(stderr, "Executing compiled code:\n");
Jack Palevich21a15a22009-05-11 14:49:29 -0700997 int codeArgc = argc - i + 1;
998 char** codeArgv = argv + i - 1;
Jack Palevichbbf8ab52009-05-11 11:54:30 -0700999 codeArgv[0] = (char*) (inFile ? inFile : "stdin");
1000 return compiler.run(codeArgc, codeArgv);
1001 }
1002
1003 return 0;
1004}