blob: 13a863f53cc7227edc321c55f6ce2fe0d5f23429 [file] [log] [blame]
Jack Palevichae54f1f2009-05-08 14:54:15 -07001/*
Jack Palevich21a15a22009-05-11 14:49:29 -07002 Obfuscated Tiny C Compiler
Jack Palevich88311482009-05-08 13:57:37 -07003
Jack Palevich21a15a22009-05-11 14:49:29 -07004 Copyright (C) 2001-2003 Fabrice Bellard
Jack Palevichae54f1f2009-05-08 14:54:15 -07005
Jack Palevich21a15a22009-05-11 14:49:29 -07006 This software is provided 'as-is', without any express or implied
7 warranty. In no event will the authors be held liable for any damages
8 arising from the use of this software.
Jack Paleviche27bf3e2009-05-10 14:09:03 -07009
Jack Palevich21a15a22009-05-11 14:49:29 -070010 Permission is granted to anyone to use this software for any purpose,
11 including commercial applications, and to alter it and redistribute it
12 freely, subject to the following restrictions:
Jack Paleviche27bf3e2009-05-10 14:09:03 -070013
Jack Palevich21a15a22009-05-11 14:49:29 -070014 1. The origin of this software must not be misrepresented; you must not
15 claim that you wrote the original software. If you use this software
16 in a product, an acknowledgment in the product and its documentation
17 *is* required.
18 2. Altered source versions must be plainly marked as such, and must not be
19 misrepresented as being the original software.
20 3. This notice may not be removed or altered from any source distribution.
21 */
Jack Paleviche27bf3e2009-05-10 14:09:03 -070022
Jack Palevich77ae76e2009-05-10 19:59:24 -070023#include <ctype.h>
24#include <dlfcn.h>
Jack Paleviche27bf3e2009-05-10 14:09:03 -070025#include <stdarg.h>
Jack Palevichae54f1f2009-05-08 14:54:15 -070026#include <stdio.h>
Jack Palevichf6b5a532009-05-10 19:16:42 -070027#include <stdlib.h>
28#include <string.h>
Jack Palevichae54f1f2009-05-08 14:54:15 -070029
Jack Palevichbbf8ab52009-05-11 11:54:30 -070030namespace acc {
31
Jack Palevich77ae76e2009-05-10 19:59:24 -070032class compiler {
Jack Palevich21a15a22009-05-11 14:49:29 -070033 class CodeBuf {
34 char* ind;
35 char* pProgramBase;
Jack Palevichf0cbc922009-05-08 16:35:13 -070036
Jack Palevich21a15a22009-05-11 14:49:29 -070037 void release() {
38 if (pProgramBase != 0) {
39 free(pProgramBase);
40 pProgramBase = 0;
Jack Palevichae54f1f2009-05-08 14:54:15 -070041 }
Jack Palevich21a15a22009-05-11 14:49:29 -070042 }
43
44 public:
45 CodeBuf() {
46 pProgramBase = 0;
47 ind = 0;
48 }
49
50 ~CodeBuf() {
51 release();
52 }
53
54 void init(int size) {
55 release();
56 pProgramBase = (char*) calloc(1, size);
57 ind = pProgramBase;
58 }
59
60 void o(int n) {
61 /* cannot use unsigned, so we must do a hack */
62 while (n && n != -1) {
63 *ind++ = n;
64 n = n >> 8;
65 }
66 }
67
68 /*
69 * Output a byte. Handles all values, 0..ff.
70 */
71 void ob(int n) {
72 *ind++ = n;
73 }
74
75 /* output a symbol and patch all calls to it */
76 void gsym(int t) {
77 int n;
78 while (t) {
79 n = *(int *) t; /* next value */
80 *(int *) t = ((int) ind) - t - 4;
81 t = n;
82 }
83 }
84
85 /* psym is used to put an instruction with a data field which is a
86 reference to a symbol. It is in fact the same as oad ! */
87 int psym(int n, int t) {
88 return oad(n, t);
89 }
90
91 /* instruction + address */
92 int oad(int n, int t) {
93 o(n);
94 *(int *) ind = t;
95 t = (int) ind;
96 ind = ind + 4;
97 return t;
98 }
99
100 inline void* getBase() {
101 return (void*) pProgramBase;
102 }
103
104 int getSize() {
105 return ind - pProgramBase;
106 }
107
108 int getPC() {
109 return (int) ind;
110 }
111 };
112
113 class CodeGenerator {
114 public:
115 CodeGenerator() {}
116 virtual ~CodeGenerator() {}
117
Jack Palevich22305132009-05-13 10:58:45 -0700118 virtual void init(CodeBuf* pCodeBuf) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700119 this->pCodeBuf = pCodeBuf;
120 }
121
Jack Palevich22305132009-05-13 10:58:45 -0700122 /* returns address to patch with local variable size
123 */
124 virtual int functionEntry() = 0;
125
126 virtual void functionExit() = 0;
127
128 /* load immediate value */
129 virtual int li(int t) = 0;
130
131 virtual int gjmp(int t) = 0;
132
133 /* l = 0: je, l == 1: jne */
134 virtual int gtst(bool l, int t) = 0;
135
136 virtual void gcmp(int op) = 0;
137
138 virtual int genOp(int op) = 0;
139
140 virtual void clearECX() = 0;
141
142 virtual void pushEAX() = 0;
143
144 virtual void popECX() = 0;
145
146 virtual void storeEAXToAddressECX(bool isInt) = 0;
147
148 virtual void loadEAXIndirect(bool isInt) = 0;
149
150 virtual void leaEAX(int ea) = 0;
151
152 virtual void storeEAX(int ea) = 0;
153
154 virtual void loadEAX(int ea) = 0;
155
156 virtual void postIncrementOrDecrement(int n, int op) = 0;
157
158 virtual int allocStackSpaceForArgs() = 0;
159
160 virtual void storeEAToArg(int l) = 0;
161
162 virtual int callForward(int symbol) = 0;
163
164 virtual void callRelative(int t) = 0;
165
166 virtual void callIndirect(int l) = 0;
167
168 virtual void adjustStackAfterCall(int l) = 0;
169
Jack Palevich21a15a22009-05-11 14:49:29 -0700170 /* output a symbol and patch all calls to it */
Jack Palevich22305132009-05-13 10:58:45 -0700171 virtual void gsym(int t) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700172 pCodeBuf->gsym(t);
173 }
174
175 protected:
176 void o(int n) {
177 pCodeBuf->o(n);
178 }
179
180 /*
181 * Output a byte. Handles all values, 0..ff.
182 */
183 void ob(int n) {
184 pCodeBuf->ob(n);
185 }
186
187 /* psym is used to put an instruction with a data field which is a
188 reference to a symbol. It is in fact the same as oad ! */
189 int psym(int n, int t) {
190 return oad(n, t);
191 }
192
193 /* instruction + address */
194 int oad(int n, int t) {
195 return pCodeBuf->oad(n,t);
196 }
197
198 int getPC() {
199 return pCodeBuf->getPC();
200 }
201
202 private:
203 CodeBuf* pCodeBuf;
204 };
205
Jack Palevich22305132009-05-13 10:58:45 -0700206 class ARMCodeGenerator : public CodeGenerator {
207 public:
208 ARMCodeGenerator() {}
209 virtual ~ARMCodeGenerator() {}
210
211 /* returns address to patch with local variable size
212 */
213 virtual int functionEntry() {
214 fprintf(stderr, "functionEntry();\n");
215 o(0xe58955); /* push %ebp, mov %esp, %ebp */
216 return oad(0xec81, 0); /* sub $xxx, %esp */
217 }
218
219 virtual void functionExit() {
220 fprintf(stderr, "functionExit();\n");
221 o(0xc3c9); /* leave, ret */
222 }
223
224 /* load immediate value */
225 virtual int li(int t) {
226 fprintf(stderr, "li(%d);\n", t);
227 oad(0xb8, t); /* mov $xx, %eax */
228 }
229
230 virtual int gjmp(int t) {
231 fprintf(stderr, "gjmp(%d);\n", t);
232 return psym(0xe9, t);
233 }
234
235 /* l = 0: je, l == 1: jne */
236 virtual int gtst(bool l, int t) {
237 fprintf(stderr, "gtst(%d, %d);\n", l, t);
238 o(0x0fc085); /* test %eax, %eax, je/jne xxx */
239 return psym(0x84 + l, t);
240 }
241
242 virtual void gcmp(int op) {
243 fprintf(stderr, "gcmp(%d);\n", op);
244#if 0
245 int t = decodeOp(op);
246 o(0xc139); /* cmp %eax,%ecx */
247 li(0);
248 o(0x0f); /* setxx %al */
249 o(t + 0x90);
250 o(0xc0);
251#endif
252 }
253
254 virtual int genOp(int op) {
255 fprintf(stderr, "genOp(%d);\n", op);
256#if 0
257 o(decodeOp(op));
258 if (op == OP_MOD)
259 o(0x92); /* xchg %edx, %eax */
260#endif
261 }
262
263 virtual void clearECX() {
264 fprintf(stderr, "clearECX();\n");
265 oad(0xb9, 0); /* movl $0, %ecx */
266 }
267
268 virtual void pushEAX() {
269 fprintf(stderr, "pushEAX();\n");
270 o(0x50); /* push %eax */
271 }
272
273 virtual void popECX() {
274 fprintf(stderr, "popECX();\n");
275 o(0x59); /* pop %ecx */
276 }
277
278 virtual void storeEAXToAddressECX(bool isInt) {
279 fprintf(stderr, "storeEAXToAddressECX(%d);\n", isInt);
280 o(0x0188 + isInt); /* movl %eax/%al, (%ecx) */
281 }
282
283 virtual void loadEAXIndirect(bool isInt) {
284 fprintf(stderr, "loadEAXIndirect(%d);\n", isInt);
285 if (isInt)
286 o(0x8b); /* mov (%eax), %eax */
287 else
288 o(0xbe0f); /* movsbl (%eax), %eax */
289 ob(0); /* add zero in code */
290 }
291
292 virtual void leaEAX(int ea) {
293 fprintf(stderr, "leaEAX(%d);\n", ea);
294#if 0
295 gmov(10, ea); /* leal EA, %eax */
296#endif
297 }
298
299 virtual void storeEAX(int ea) {
300 fprintf(stderr, "storeEAX(%d);\n", ea);
301#if 0
302 gmov(6, ea); /* mov %eax, EA */
303#endif
304 }
305
306 virtual void loadEAX(int ea) {
307 fprintf(stderr, "loadEAX(%d);\n", ea);
308#if 0
309 gmov(8, ea); /* mov EA, %eax */
310#endif
311 }
312
313 virtual void postIncrementOrDecrement(int n, int op) {
314 fprintf(stderr, "postIncrementOrDecrement(%d, %d);\n", n, op);
315 /* Implement post-increment or post decrement.
316 */
317#if 0
318 gmov(0, n); /* 83 ADD */
319 o(decodeOp(op));
320#endif
321 }
322
323 virtual int allocStackSpaceForArgs() {
324 fprintf(stderr, "allocStackSpaceForArgs();\n");
325 return oad(0xec81, 0); /* sub $xxx, %esp */
326 }
327
328 virtual void storeEAToArg(int l) {
329 fprintf(stderr, "storeEAToArg(%d);\n", l);
330 oad(0x248489, l); /* movl %eax, xxx(%esp) */
331 }
332
333 virtual int callForward(int symbol) {
334 fprintf(stderr, "callForward(%d);\n", symbol);
335 return psym(0xe8, symbol); /* call xxx */
336 }
337
338 virtual void callRelative(int t) {
339 fprintf(stderr, "callRelative(%d);\n", t);
340 psym(0xe8, t); /* call xxx */
341 }
342
343 virtual void callIndirect(int l) {
344 fprintf(stderr, "callIndirect(%d);\n", l);
345 oad(0x2494ff, l); /* call *xxx(%esp) */
346 }
347
348 virtual void adjustStackAfterCall(int l) {
349 fprintf(stderr, "adjustStackAfterCall(%d);\n", l);
350 oad(0xc481, l); /* add $xxx, %esp */
351 }
352
353 private:
354
355 };
356
Jack Palevich21a15a22009-05-11 14:49:29 -0700357 class X86CodeGenerator : public CodeGenerator {
358 public:
359 X86CodeGenerator() {}
360 virtual ~X86CodeGenerator() {}
361
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700362 /* returns address to patch with local variable size
363 */
Jack Palevich22305132009-05-13 10:58:45 -0700364 virtual int functionEntry() {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700365 o(0xe58955); /* push %ebp, mov %esp, %ebp */
366 return oad(0xec81, 0); /* sub $xxx, %esp */
367 }
368
Jack Palevich22305132009-05-13 10:58:45 -0700369 virtual void functionExit() {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700370 o(0xc3c9); /* leave, ret */
371 }
372
Jack Palevich21a15a22009-05-11 14:49:29 -0700373 /* load immediate value */
Jack Palevich22305132009-05-13 10:58:45 -0700374 virtual int li(int t) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700375 oad(0xb8, t); /* mov $xx, %eax */
376 }
377
Jack Palevich22305132009-05-13 10:58:45 -0700378 virtual int gjmp(int t) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700379 return psym(0xe9, t);
380 }
381
382 /* l = 0: je, l == 1: jne */
Jack Palevich22305132009-05-13 10:58:45 -0700383 virtual int gtst(bool l, int t) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700384 o(0x0fc085); /* test %eax, %eax, je/jne xxx */
385 return psym(0x84 + l, t);
386 }
387
Jack Palevich22305132009-05-13 10:58:45 -0700388 virtual void gcmp(int op) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700389 int t = decodeOp(op);
Jack Palevich21a15a22009-05-11 14:49:29 -0700390 o(0xc139); /* cmp %eax,%ecx */
391 li(0);
392 o(0x0f); /* setxx %al */
393 o(t + 0x90);
394 o(0xc0);
395 }
396
Jack Palevich22305132009-05-13 10:58:45 -0700397 virtual int genOp(int op) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700398 o(decodeOp(op));
399 if (op == OP_MOD)
400 o(0x92); /* xchg %edx, %eax */
401 }
402
Jack Palevich22305132009-05-13 10:58:45 -0700403 virtual void clearECX() {
Jack Palevich21a15a22009-05-11 14:49:29 -0700404 oad(0xb9, 0); /* movl $0, %ecx */
405 }
406
Jack Palevich22305132009-05-13 10:58:45 -0700407 virtual void pushEAX() {
Jack Palevich21a15a22009-05-11 14:49:29 -0700408 o(0x50); /* push %eax */
409 }
410
Jack Palevich22305132009-05-13 10:58:45 -0700411 virtual void popECX() {
Jack Palevich21a15a22009-05-11 14:49:29 -0700412 o(0x59); /* pop %ecx */
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700413 }
414
Jack Palevich22305132009-05-13 10:58:45 -0700415 virtual void storeEAXToAddressECX(bool isInt) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700416 o(0x0188 + isInt); /* movl %eax/%al, (%ecx) */
417 }
418
Jack Palevich22305132009-05-13 10:58:45 -0700419 virtual void loadEAXIndirect(bool isInt) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700420 if (isInt)
421 o(0x8b); /* mov (%eax), %eax */
422 else
423 o(0xbe0f); /* movsbl (%eax), %eax */
424 ob(0); /* add zero in code */
425 }
426
Jack Palevich22305132009-05-13 10:58:45 -0700427 virtual void leaEAX(int ea) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700428 gmov(10, ea); /* leal EA, %eax */
429 }
430
Jack Palevich22305132009-05-13 10:58:45 -0700431 virtual void storeEAX(int ea) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700432 gmov(6, ea); /* mov %eax, EA */
433 }
434
Jack Palevich22305132009-05-13 10:58:45 -0700435 virtual void loadEAX(int ea) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700436 gmov(8, ea); /* mov EA, %eax */
437 }
438
Jack Palevich22305132009-05-13 10:58:45 -0700439 virtual void postIncrementOrDecrement(int n, int op) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700440 /* Implement post-increment or post decrement.
Jack Palevich21a15a22009-05-11 14:49:29 -0700441 */
442 gmov(0, n); /* 83 ADD */
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700443 o(decodeOp(op));
Jack Palevich21a15a22009-05-11 14:49:29 -0700444 }
445
Jack Palevich22305132009-05-13 10:58:45 -0700446 virtual int allocStackSpaceForArgs() {
Jack Palevich21a15a22009-05-11 14:49:29 -0700447 return oad(0xec81, 0); /* sub $xxx, %esp */
448 }
449
Jack Palevich22305132009-05-13 10:58:45 -0700450 virtual void storeEAToArg(int l) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700451 oad(0x248489, l); /* movl %eax, xxx(%esp) */
452 }
453
Jack Palevich22305132009-05-13 10:58:45 -0700454 virtual int callForward(int symbol) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700455 return psym(0xe8, symbol); /* call xxx */
456 }
457
Jack Palevich22305132009-05-13 10:58:45 -0700458 virtual void callRelative(int t) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700459 psym(0xe8, t); /* call xxx */
460 }
461
Jack Palevich22305132009-05-13 10:58:45 -0700462 virtual void callIndirect(int l) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700463 oad(0x2494ff, l); /* call *xxx(%esp) */
464 }
465
Jack Palevich22305132009-05-13 10:58:45 -0700466 virtual void adjustStackAfterCall(int l) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700467 oad(0xc481, l); /* add $xxx, %esp */
468 }
469
Jack Palevich21a15a22009-05-11 14:49:29 -0700470 private:
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700471 static const int operatorHelper[];
472
473 int decodeOp(int op) {
474 if (op < 0 || op > OP_COUNT) {
475 fprintf(stderr, "Out-of-range operator: %d\n", op);
476 exit(1);
477 }
478 return operatorHelper[op];
479 }
Jack Palevich21a15a22009-05-11 14:49:29 -0700480
481 int gmov(int l, int t) {
482 o(l + 0x83);
483 oad((t < LOCAL) << 7 | 5, t);
484 }
485 };
486
487 /* vars: value of variables
488 loc : local variable index
489 glo : global variable index
490 ind : output code ptr
491 rsym: return symbol
492 prog: output code
493 dstk: define stack
494 dptr, dch: macro state
495 */
496 int tok, tokc, tokl, ch, vars, rsym, loc, glo, sym_stk, dstk,
497 dptr, dch, last_id;
498 void* pSymbolBase;
499 void* pGlobalBase;
500 void* pVarsBase;
501 FILE* file;
502
503 CodeBuf codeBuf;
Jack Palevich22305132009-05-13 10:58:45 -0700504 CodeGenerator* pGen;
Jack Palevich21a15a22009-05-11 14:49:29 -0700505
506 static const int ALLOC_SIZE = 99999;
507
508 /* depends on the init string */
509 static const int TOK_STR_SIZE = 48;
510 static const int TOK_IDENT = 0x100;
511 static const int TOK_INT = 0x100;
512 static const int TOK_IF = 0x120;
513 static const int TOK_ELSE = 0x138;
514 static const int TOK_WHILE = 0x160;
515 static const int TOK_BREAK = 0x190;
516 static const int TOK_RETURN = 0x1c0;
517 static const int TOK_FOR = 0x1f8;
518 static const int TOK_DEFINE = 0x218;
519 static const int TOK_MAIN = 0x250;
520
521 static const int TOK_DUMMY = 1;
522 static const int TOK_NUM = 2;
523
524 static const int LOCAL = 0x200;
525
526 static const int SYM_FORWARD = 0;
527 static const int SYM_DEFINE = 1;
528
529 /* tokens in string heap */
530 static const int TAG_TOK = ' ';
531 static const int TAG_MACRO = 2;
532
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700533 static const int OP_INCREMENT = 0;
534 static const int OP_DECREMENT = 1;
535 static const int OP_MUL = 2;
536 static const int OP_DIV = 3;
537 static const int OP_MOD = 4;
538 static const int OP_PLUS = 5;
539 static const int OP_MINUS = 6;
540 static const int OP_SHIFT_LEFT = 7;
541 static const int OP_SHIFT_RIGHT = 8;
542 static const int OP_LESS_EQUAL = 9;
543 static const int OP_GREATER_EQUAL = 10;
544 static const int OP_LESS = 11;
545 static const int OP_GREATER = 12;
546 static const int OP_EQUALS = 13;
547 static const int OP_NOT_EQUALS = 14;
548 static const int OP_LOGICAL_AND = 15;
549 static const int OP_LOGICAL_OR = 16;
550 static const int OP_BIT_AND = 17;
551 static const int OP_BIT_XOR = 18;
552 static const int OP_BIT_OR = 19;
553 static const int OP_BIT_NOT = 20;
554 static const int OP_LOGICAL_NOT = 21;
555 static const int OP_COUNT = 22;
556
557 /* Operators are searched from front, the two-character operators appear
558 * before the single-character operators with the same first character.
559 * @ is used to pad out single-character operators.
560 */
561 static const char* operatorChars;
562 static const char operatorLevel[];
563
Jack Palevich21a15a22009-05-11 14:49:29 -0700564 void pdef(int t) {
565 *(char *) dstk++ = t;
566 }
567
568 void inp() {
569 if (dptr) {
570 ch = *(char *) dptr++;
571 if (ch == TAG_MACRO) {
572 dptr = 0;
573 ch = dch;
574 }
575 } else
576 ch = fgetc(file);
577 /* printf("ch=%c 0x%x\n", ch, ch); */
578 }
579
580 int isid() {
581 return isalnum(ch) | ch == '_';
582 }
583
584 /* read a character constant */
585 void getq() {
586 if (ch == '\\') {
587 inp();
588 if (ch == 'n')
589 ch = '\n';
590 }
591 }
592
593 void next() {
594 int l, a;
595
596 while (isspace(ch) | ch == '#') {
597 if (ch == '#') {
598 inp();
599 next();
600 if (tok == TOK_DEFINE) {
601 next();
602 pdef(TAG_TOK); /* fill last ident tag */
603 *(int *) tok = SYM_DEFINE;
604 *(int *) (tok + 4) = dstk; /* define stack */
605 }
606 /* well we always save the values ! */
607 while (ch != '\n') {
608 pdef(ch);
609 inp();
610 }
611 pdef(ch);
612 pdef(TAG_MACRO);
613 }
614 inp();
615 }
616 tokl = 0;
617 tok = ch;
618 /* encode identifiers & numbers */
619 if (isid()) {
620 pdef(TAG_TOK);
621 last_id = dstk;
622 while (isid()) {
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700623 pdef(ch);
624 inp();
Jack Palevichae54f1f2009-05-08 14:54:15 -0700625 }
Jack Palevich21a15a22009-05-11 14:49:29 -0700626 if (isdigit(tok)) {
627 tokc = strtol((char*) last_id, 0, 0);
628 tok = TOK_NUM;
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700629 } else {
Jack Palevich21a15a22009-05-11 14:49:29 -0700630 *(char *) dstk = TAG_TOK; /* no need to mark end of string (we
631 suppose data is initialized to zero by calloc) */
632 tok = (int) (strstr((char*) sym_stk, (char*) (last_id - 1))
633 - sym_stk);
634 *(char *) dstk = 0; /* mark real end of ident for dlsym() */
635 tok = tok * 8 + TOK_IDENT;
636 if (tok > TOK_DEFINE) {
637 tok = vars + tok;
638 /* printf("tok=%s %x\n", last_id, tok); */
639 /* define handling */
640 if (*(int *) tok == SYM_DEFINE) {
641 dptr = *(int *) (tok + 4);
642 dch = ch;
643 inp();
644 next();
645 }
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700646 }
647 }
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700648 } else {
Jack Palevich21a15a22009-05-11 14:49:29 -0700649 inp();
650 if (tok == '\'') {
651 tok = TOK_NUM;
652 getq();
653 tokc = ch;
654 inp();
655 inp();
656 } else if (tok == '/' & ch == '*') {
657 inp();
658 while (ch) {
659 while (ch != '*')
660 inp();
661 inp();
662 if (ch == '/')
663 ch = 0;
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700664 }
Jack Palevich21a15a22009-05-11 14:49:29 -0700665 inp();
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700666 next();
Jack Palevich21a15a22009-05-11 14:49:29 -0700667 } else {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700668 const char* t = operatorChars;
669 int opIndex = 0;
Jack Palevich21a15a22009-05-11 14:49:29 -0700670 while (l = *t++) {
671 a = *t++;
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700672 tokl = operatorLevel[opIndex];
673 tokc = opIndex;
Jack Palevich21a15a22009-05-11 14:49:29 -0700674 if (l == tok & (a == ch | a == '@')) {
675#if 0
676 printf("%c%c -> tokl=%d tokc=0x%x\n",
677 l, a, tokl, tokc);
678#endif
679 if (a == ch) {
680 inp();
681 tok = TOK_DUMMY; /* dummy token for double tokens */
682 }
683 break;
684 }
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700685 opIndex++;
686 }
687 if (l == 0) {
688 tokl = 0;
689 tokc = 0;
Jack Palevich21a15a22009-05-11 14:49:29 -0700690 }
691 }
692 }
693#if 0
694 {
695 int p;
696
697 printf("tok=0x%x ", tok);
698 if (tok >= TOK_IDENT) {
699 printf("'");
700 if (tok> TOK_DEFINE)
701 p = sym_stk + 1 + (tok - vars - TOK_IDENT) / 8;
702 else
703 p = sym_stk + 1 + (tok - TOK_IDENT) / 8;
704 while (*(char *)p != TAG_TOK && *(char *)p)
705 printf("%c", *(char *)p++);
706 printf("'\n");
707 } else if (tok == TOK_NUM) {
708 printf("%d\n", tokc);
709 } else {
710 printf("'%c'\n", tok);
711 }
712 }
713#endif
714 }
715
716 void error(const char *fmt, ...) {
717 va_list ap;
718
719 va_start(ap, fmt);
720 fprintf(stderr, "%ld: ", ftell((FILE *) file));
721 vfprintf(stderr, fmt, ap);
722 fprintf(stderr, "\n");
723 va_end(ap);
724 exit(1);
725 }
726
727 void skip(int c) {
728 if (tok != c) {
729 error("'%c' expected", c);
730 }
731 next();
732 }
733
Jack Palevich21a15a22009-05-11 14:49:29 -0700734 /* l is one if '=' parsing wanted (quick hack) */
735 void unary(int l) {
736 int n, t, a, c;
737
738 n = 1; /* type of expression 0 = forward, 1 = value, other =
739 lvalue */
740 if (tok == '\"') {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700741 pGen->li(glo);
Jack Palevich21a15a22009-05-11 14:49:29 -0700742 while (ch != '\"') {
743 getq();
744 *(char *) glo++ = ch;
745 inp();
746 }
747 *(char *) glo = 0;
748 glo = glo + 4 & -4; /* align heap */
749 inp();
750 next();
751 } else {
752 c = tokl;
753 a = tokc;
754 t = tok;
755 next();
756 if (t == TOK_NUM) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700757 pGen->li(a);
Jack Palevich21a15a22009-05-11 14:49:29 -0700758 } else if (c == 2) {
759 /* -, +, !, ~ */
760 unary(0);
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700761 pGen->clearECX();
Jack Palevich21a15a22009-05-11 14:49:29 -0700762 if (t == '!')
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700763 pGen->gcmp(a);
Jack Palevich21a15a22009-05-11 14:49:29 -0700764 else
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700765 pGen->genOp(a);
Jack Palevich21a15a22009-05-11 14:49:29 -0700766 } else if (t == '(') {
767 expr();
768 skip(')');
769 } else if (t == '*') {
770 /* parse cast */
771 skip('(');
772 t = tok; /* get type */
773 next(); /* skip int/char/void */
774 next(); /* skip '*' or '(' */
775 if (tok == '*') {
776 /* function type */
777 skip('*');
778 skip(')');
779 skip('(');
780 skip(')');
781 t = 0;
782 }
783 skip(')');
784 unary(0);
785 if (tok == '=') {
786 next();
787 pGen->pushEAX();
788 expr();
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700789 pGen->popECX();
790 pGen->storeEAXToAddressECX(t == TOK_INT);
Jack Palevich21a15a22009-05-11 14:49:29 -0700791 } else if (t) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700792 pGen->loadEAXIndirect(t == TOK_INT);
Jack Palevich21a15a22009-05-11 14:49:29 -0700793 }
794 } else if (t == '&') {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700795 pGen->leaEAX(*(int *) tok);
Jack Palevich21a15a22009-05-11 14:49:29 -0700796 next();
797 } else {
798 n = *(int *) t;
799 /* forward reference: try dlsym */
800 if (!n)
801 n = (int) dlsym(0, (char*) last_id);
802 if (tok == '=' & l) {
803 /* assignment */
804 next();
805 expr();
806 pGen->storeEAX(n);
807 } else if (tok != '(') {
808 /* variable */
809 pGen->loadEAX(n);
810 if (tokl == 11) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700811 pGen->postIncrementOrDecrement(n, tokc);
Jack Palevich21a15a22009-05-11 14:49:29 -0700812 next();
813 }
814 }
815 }
816 }
817
818 /* function call */
819 if (tok == '(') {
820 if (n == 1)
821 pGen->pushEAX();
822
823 /* push args and invert order */
824 a = pGen->allocStackSpaceForArgs();
825 next();
826 l = 0;
827 while (tok != ')') {
828 expr();
829 pGen->storeEAToArg(l);
Jack Palevichbbf8ab52009-05-11 11:54:30 -0700830 if (tok == ',')
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700831 next();
Jack Palevich21a15a22009-05-11 14:49:29 -0700832 l = l + 4;
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700833 }
Jack Palevich21a15a22009-05-11 14:49:29 -0700834 *(int *) a = l;
835 next();
836 if (!n) {
837 /* forward reference */
838 t = t + 4;
839 *(int *) t = pGen->callForward(*(int *) t);
840 } else if (n == 1) {
841 pGen->callIndirect(l);
842 l = l + 4;
843 } else {
844 pGen->callRelative(n - codeBuf.getPC() - 5); /* call xxx */
845 }
846 if (l)
847 pGen->adjustStackAfterCall(l);
848 }
849 }
850
851 void sum(int l) {
852 int t, n, a;
853
854 if (l-- == 1)
855 unary(1);
856 else {
857 sum(l);
858 a = 0;
859 while (l == tokl) {
860 n = tok;
861 t = tokc;
862 next();
863
864 if (l > 8) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700865 a = pGen->gtst(t == OP_LOGICAL_OR, a); /* && and || output code generation */
Jack Palevich21a15a22009-05-11 14:49:29 -0700866 sum(l);
867 } else {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700868 pGen->pushEAX();
Jack Palevich21a15a22009-05-11 14:49:29 -0700869 sum(l);
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700870 pGen->popECX();
Jack Palevich21a15a22009-05-11 14:49:29 -0700871
872 if (l == 4 | l == 5) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700873 pGen->gcmp(t);
Jack Palevich21a15a22009-05-11 14:49:29 -0700874 } else {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700875 pGen->genOp(t);
Jack Palevich21a15a22009-05-11 14:49:29 -0700876 }
877 }
878 }
879 /* && and || output code generation */
880 if (a && l > 8) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700881 a = pGen->gtst(t == OP_LOGICAL_OR, a);
882 pGen->li(t != OP_LOGICAL_OR);
883 pGen->gjmp(5); /* jmp $ + 5 */
884 pGen->gsym(a);
885 pGen->li(t == OP_LOGICAL_OR);
Jack Palevich21a15a22009-05-11 14:49:29 -0700886 }
887 }
888 }
889
890 void expr() {
891 sum(11);
892 }
893
894 int test_expr() {
895 expr();
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700896 return pGen->gtst(0, 0);
Jack Palevich21a15a22009-05-11 14:49:29 -0700897 }
898
899 void block(int l) {
900 int a, n, t;
901
902 if (tok == TOK_IF) {
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700903 next();
904 skip('(');
Jack Palevich21a15a22009-05-11 14:49:29 -0700905 a = test_expr();
906 skip(')');
907 block(l);
908 if (tok == TOK_ELSE) {
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700909 next();
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700910 n = pGen->gjmp(0); /* jmp */
911 pGen->gsym(a);
Jack Palevich21a15a22009-05-11 14:49:29 -0700912 block(l);
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700913 pGen->gsym(n); /* patch else jmp */
Jack Palevich21a15a22009-05-11 14:49:29 -0700914 } else {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700915 pGen->gsym(a); /* patch if test */
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700916 }
Jack Palevich21a15a22009-05-11 14:49:29 -0700917 } else if (tok == TOK_WHILE | tok == TOK_FOR) {
918 t = tok;
919 next();
920 skip('(');
921 if (t == TOK_WHILE) {
922 n = codeBuf.getPC();
923 a = test_expr();
924 } else {
925 if (tok != ';')
926 expr();
927 skip(';');
928 n = codeBuf.getPC();
929 a = 0;
930 if (tok != ';')
931 a = test_expr();
932 skip(';');
933 if (tok != ')') {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700934 t = pGen->gjmp(0);
Jack Palevich21a15a22009-05-11 14:49:29 -0700935 expr();
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700936 pGen->gjmp(n - codeBuf.getPC() - 5);
937 pGen->gsym(t);
Jack Palevich21a15a22009-05-11 14:49:29 -0700938 n = t + 4;
939 }
940 }
941 skip(')');
942 block((int) &a);
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700943 pGen->gjmp(n - codeBuf.getPC() - 5); /* jmp */
944 pGen->gsym(a);
Jack Palevich21a15a22009-05-11 14:49:29 -0700945 } else if (tok == '{') {
946 next();
947 /* declarations */
948 decl(1);
949 while (tok != '}')
950 block(l);
951 next();
952 } else {
953 if (tok == TOK_RETURN) {
954 next();
955 if (tok != ';')
956 expr();
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700957 rsym = pGen->gjmp(rsym); /* jmp */
Jack Palevich21a15a22009-05-11 14:49:29 -0700958 } else if (tok == TOK_BREAK) {
959 next();
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700960 *(int *) l = pGen->gjmp(*(int *) l);
Jack Palevich21a15a22009-05-11 14:49:29 -0700961 } else if (tok != ';')
962 expr();
963 skip(';');
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700964 }
965 }
Jack Palevich21a15a22009-05-11 14:49:29 -0700966
967 /* 'l' is true if local declarations */
968 void decl(int l) {
969 int a;
970
971 while (tok == TOK_INT | tok != -1 & !l) {
972 if (tok == TOK_INT) {
973 next();
974 while (tok != ';') {
975 if (l) {
976 loc = loc + 4;
977 *(int *) tok = -loc;
978 } else {
979 *(int *) tok = glo;
980 glo = glo + 4;
981 }
982 next();
983 if (tok == ',')
984 next();
985 }
986 skip(';');
987 } else {
988 /* patch forward references (XXX: do not work for function
989 pointers) */
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700990 pGen->gsym(*(int *) (tok + 4));
Jack Palevich21a15a22009-05-11 14:49:29 -0700991 /* put function address */
992 *(int *) tok = codeBuf.getPC();
993 next();
994 skip('(');
995 a = 8;
996 while (tok != ')') {
997 /* read param name and compute offset */
998 *(int *) tok = a;
999 a = a + 4;
1000 next();
1001 if (tok == ',')
1002 next();
1003 }
1004 next(); /* skip ')' */
1005 rsym = loc = 0;
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001006 a = pGen->functionEntry();
Jack Palevich21a15a22009-05-11 14:49:29 -07001007 block(0);
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001008 pGen->gsym(rsym);
1009 pGen->functionExit();
Jack Palevich21a15a22009-05-11 14:49:29 -07001010 *(int *) a = loc; /* save local variables */
1011 }
1012 }
1013 }
1014
1015 void cleanup() {
1016 if (sym_stk != 0) {
1017 free((void*) sym_stk);
1018 sym_stk = 0;
1019 }
1020 if (pGlobalBase != 0) {
1021 free((void*) pGlobalBase);
1022 pGlobalBase = 0;
1023 }
1024 if (pVarsBase != 0) {
1025 free(pVarsBase);
1026 pVarsBase = 0;
1027 }
1028 if (pGen) {
1029 delete pGen;
1030 pGen = 0;
1031 }
1032 }
1033
1034 void clear() {
1035 tok = 0;
1036 tokc = 0;
1037 tokl = 0;
1038 ch = 0;
1039 vars = 0;
1040 rsym = 0;
1041 loc = 0;
1042 glo = 0;
1043 sym_stk = 0;
1044 dstk = 0;
1045 dptr = 0;
1046 dch = 0;
1047 last_id = 0;
1048 file = 0;
1049 pGlobalBase = 0;
1050 pVarsBase = 0;
1051 pGen = 0;
1052 }
Jack Paleviche27bf3e2009-05-10 14:09:03 -07001053
Jack Palevich22305132009-05-13 10:58:45 -07001054 void setArchitecture(const char* architecture) {
1055 delete pGen;
1056 pGen = 0;
1057
1058 if (architecture != NULL) {
1059 if (strcmp(architecture, "arm") == 0) {
1060 pGen = new ARMCodeGenerator();
1061 } else if (strcmp(architecture, "x86") == 0) {
1062 pGen = new X86CodeGenerator();
1063 } else {
1064 fprintf(stderr, "Unknown architecture %s", architecture);
1065 }
1066 }
1067
1068 if (pGen == NULL) {
1069 pGen = new ARMCodeGenerator();
1070 }
1071 }
1072
Jack Palevich77ae76e2009-05-10 19:59:24 -07001073public:
Jack Palevich22305132009-05-13 10:58:45 -07001074 struct args {
1075 args() {
1076 architecture = 0;
1077 }
1078 const char* architecture;
1079 };
1080
Jack Palevich21a15a22009-05-11 14:49:29 -07001081 compiler() {
1082 clear();
Jack Paleviche27bf3e2009-05-10 14:09:03 -07001083 }
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001084
Jack Palevich21a15a22009-05-11 14:49:29 -07001085 ~compiler() {
1086 cleanup();
1087 }
1088
Jack Palevich22305132009-05-13 10:58:45 -07001089 int compile(FILE* in, args& args) {
Jack Palevich21a15a22009-05-11 14:49:29 -07001090 cleanup();
1091 clear();
1092 codeBuf.init(ALLOC_SIZE);
Jack Palevich22305132009-05-13 10:58:45 -07001093 setArchitecture(args.architecture);
Jack Palevich21a15a22009-05-11 14:49:29 -07001094 pGen->init(&codeBuf);
1095 file = in;
1096 sym_stk = (int) calloc(1, ALLOC_SIZE);
1097 dstk = (int) strcpy((char*) sym_stk,
1098 " int if else while break return for define main ")
1099 + TOK_STR_SIZE;
1100 pGlobalBase = calloc(1, ALLOC_SIZE);
1101 glo = (int) pGlobalBase;
1102 pVarsBase = calloc(1, ALLOC_SIZE);
1103 vars = (int) pVarsBase;
1104 inp();
1105 next();
1106 decl(0);
1107 return 0;
1108 }
1109
1110 int run(int argc, char** argv) {
1111 typedef int (*mainPtr)(int argc, char** argv);
1112 mainPtr aMain = (mainPtr) *(int*) (vars + TOK_MAIN);
1113 if (!aMain) {
1114 fprintf(stderr, "Could not find function \"main\".\n");
1115 return -1;
1116 }
1117 return aMain(argc, argv);
1118 }
1119
1120 int dump(FILE* out) {
1121 fwrite(codeBuf.getBase(), 1, codeBuf.getSize(), out);
1122 return 0;
1123 }
Jack Palevich77ae76e2009-05-10 19:59:24 -07001124
1125};
1126
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001127const char* compiler::operatorChars =
1128 "++--*@/@%@+@-@<<>><=>=<@>@==!=&&||&@^@|@~@!@";
1129
1130const char compiler::operatorLevel[] =
1131 {11, 11, 1, 1, 1, 2, 2, 3, 3, 4, 4, 4, 4,
1132 5, 5, /* ==, != */
1133 9, 10, /* &&, || */
1134 6, 7, 8, /* & ^ | */
1135 2, 2 /* ~ ! */
1136 };
1137
1138const int compiler::X86CodeGenerator::operatorHelper[] = {
1139 0x1, // ++
1140 0xff, // --
1141 0xc1af0f, // *
1142 0xf9f79991, // /
1143 0xf9f79991, // % (With manual assist to swap results)
1144 0xc801, // +
1145 0xd8f7c829, // -
1146 0xe0d391, // <<
1147 0xf8d391, // >>
1148 0xe, // <=
1149 0xd, // >=
1150 0xc, // <
1151 0xf, // >
1152 0x4, // ==
1153 0x5, // !=
1154 0x0, // &&
1155 0x1, // ||
1156 0xc821, // &
1157 0xc831, // ^
1158 0xc809, // |
1159 0xd0f7, // ~
1160 0x4 // !
1161};
1162
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001163} // namespace acc
1164
Jack Palevich77ae76e2009-05-10 19:59:24 -07001165int main(int argc, char** argv) {
Jack Palevich22305132009-05-13 10:58:45 -07001166 bool doDump = false;
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001167 const char* inFile = NULL;
1168 const char* outFile = NULL;
Jack Palevich22305132009-05-13 10:58:45 -07001169 const char* architecture = "arm";
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001170 int i;
Jack Palevich21a15a22009-05-11 14:49:29 -07001171 for (i = 1; i < argc; i++) {
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001172 char* arg = argv[i];
1173 if (arg[0] == '-') {
1174 switch (arg[1]) {
Jack Palevich22305132009-05-13 10:58:45 -07001175 case 'a':
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001176 if (i + 1 >= argc) {
Jack Palevich22305132009-05-13 10:58:45 -07001177 fprintf(stderr, "Expected architecture after -a\n");
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001178 return 2;
1179 }
Jack Palevich22305132009-05-13 10:58:45 -07001180 architecture = argv[i+1];
1181 i += 1;
1182 break;
1183 case 'd':
1184 if (i + 1 >= argc) {
1185 fprintf(stderr, "Expected filename after -d\n");
1186 return 2;
1187 }
1188 doDump = true;
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001189 outFile = argv[i + 1];
1190 i += 1;
1191 break;
1192 default:
1193 fprintf(stderr, "Unrecognized flag %s\n", arg);
1194 return 3;
1195 }
1196 } else if (inFile == NULL) {
1197 inFile = arg;
1198 } else {
1199 break;
1200 }
1201 }
1202
1203 FILE* in = stdin;
1204 if (inFile) {
1205 in = fopen(inFile, "r");
Jack Palevich21a15a22009-05-11 14:49:29 -07001206 if (!in) {
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001207 fprintf(stderr, "Could not open input file %s\n", inFile);
1208 return 1;
1209 }
1210 }
1211 acc::compiler compiler;
Jack Palevich22305132009-05-13 10:58:45 -07001212 acc::compiler::args args;
1213 args.architecture = architecture;
1214 int compileResult = compiler.compile(in, args);
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001215 if (in != stdin) {
1216 fclose(in);
1217 }
1218 if (compileResult) {
1219 fprintf(stderr, "Compile failed: %d\n", compileResult);
1220 return 6;
1221 }
Jack Palevich22305132009-05-13 10:58:45 -07001222 if (doDump) {
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001223 FILE* save = fopen(outFile, "w");
Jack Palevich21a15a22009-05-11 14:49:29 -07001224 if (!save) {
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001225 fprintf(stderr, "Could not open output file %s\n", outFile);
1226 return 5;
1227 }
1228 compiler.dump(save);
1229 fclose(save);
1230 } else {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001231 fprintf(stderr, "Executing compiled code:\n");
Jack Palevich21a15a22009-05-11 14:49:29 -07001232 int codeArgc = argc - i + 1;
1233 char** codeArgv = argv + i - 1;
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001234 codeArgv[0] = (char*) (inFile ? inFile : "stdin");
Jack Palevich22305132009-05-13 10:58:45 -07001235 int result = compiler.run(codeArgc, codeArgv);
1236 fprintf(stderr, "result: %d\n", result);
1237 return result;
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001238 }
1239
1240 return 0;
1241}