blob: 04994e212a3cccb936216a8e2527c9b1da984071 [file] [log] [blame]
Jack Palevichae54f1f2009-05-08 14:54:15 -07001/*
Jack Palevich21a15a22009-05-11 14:49:29 -07002 Obfuscated Tiny C Compiler
Jack Palevich88311482009-05-08 13:57:37 -07003
Jack Palevich21a15a22009-05-11 14:49:29 -07004 Copyright (C) 2001-2003 Fabrice Bellard
Jack Palevichae54f1f2009-05-08 14:54:15 -07005
Jack Palevich21a15a22009-05-11 14:49:29 -07006 This software is provided 'as-is', without any express or implied
7 warranty. In no event will the authors be held liable for any damages
8 arising from the use of this software.
Jack Paleviche27bf3e2009-05-10 14:09:03 -07009
Jack Palevich21a15a22009-05-11 14:49:29 -070010 Permission is granted to anyone to use this software for any purpose,
11 including commercial applications, and to alter it and redistribute it
12 freely, subject to the following restrictions:
Jack Paleviche27bf3e2009-05-10 14:09:03 -070013
Jack Palevich21a15a22009-05-11 14:49:29 -070014 1. The origin of this software must not be misrepresented; you must not
15 claim that you wrote the original software. If you use this software
16 in a product, an acknowledgment in the product and its documentation
17 *is* required.
18 2. Altered source versions must be plainly marked as such, and must not be
19 misrepresented as being the original software.
20 3. This notice may not be removed or altered from any source distribution.
21 */
Jack Paleviche27bf3e2009-05-10 14:09:03 -070022
Jack Palevich77ae76e2009-05-10 19:59:24 -070023#include <ctype.h>
24#include <dlfcn.h>
Jack Paleviche27bf3e2009-05-10 14:09:03 -070025#include <stdarg.h>
Jack Palevichae54f1f2009-05-08 14:54:15 -070026#include <stdio.h>
Jack Palevichf6b5a532009-05-10 19:16:42 -070027#include <stdlib.h>
28#include <string.h>
Jack Palevichae54f1f2009-05-08 14:54:15 -070029
Jack Palevich546b2242009-05-13 15:10:04 -070030#if defined(__arm__)
31#include <unistd.h>
32#endif
33
Jack Palevicha6535612009-05-13 16:24:17 -070034#include "disassem.h"
35
Jack Palevichbbf8ab52009-05-11 11:54:30 -070036namespace acc {
37
Jack Palevich77ae76e2009-05-10 19:59:24 -070038class compiler {
Jack Palevich21a15a22009-05-11 14:49:29 -070039 class CodeBuf {
40 char* ind;
41 char* pProgramBase;
Jack Palevichf0cbc922009-05-08 16:35:13 -070042
Jack Palevich21a15a22009-05-11 14:49:29 -070043 void release() {
44 if (pProgramBase != 0) {
45 free(pProgramBase);
46 pProgramBase = 0;
Jack Palevichae54f1f2009-05-08 14:54:15 -070047 }
Jack Palevich21a15a22009-05-11 14:49:29 -070048 }
49
50 public:
51 CodeBuf() {
52 pProgramBase = 0;
53 ind = 0;
54 }
55
56 ~CodeBuf() {
57 release();
58 }
59
60 void init(int size) {
61 release();
62 pProgramBase = (char*) calloc(1, size);
63 ind = pProgramBase;
64 }
65
66 void o(int n) {
67 /* cannot use unsigned, so we must do a hack */
68 while (n && n != -1) {
69 *ind++ = n;
70 n = n >> 8;
71 }
72 }
73
Jack Palevich546b2242009-05-13 15:10:04 -070074 int o4(int n) {
75 int result = (int) ind;
76 * (int*) ind = n;
77 ind += 4;
78 return result;
79 }
80
Jack Palevich21a15a22009-05-11 14:49:29 -070081 /*
82 * Output a byte. Handles all values, 0..ff.
83 */
84 void ob(int n) {
85 *ind++ = n;
86 }
87
88 /* output a symbol and patch all calls to it */
89 void gsym(int t) {
90 int n;
91 while (t) {
92 n = *(int *) t; /* next value */
93 *(int *) t = ((int) ind) - t - 4;
94 t = n;
95 }
96 }
97
98 /* psym is used to put an instruction with a data field which is a
99 reference to a symbol. It is in fact the same as oad ! */
100 int psym(int n, int t) {
101 return oad(n, t);
102 }
103
104 /* instruction + address */
105 int oad(int n, int t) {
106 o(n);
107 *(int *) ind = t;
108 t = (int) ind;
109 ind = ind + 4;
110 return t;
111 }
112
113 inline void* getBase() {
114 return (void*) pProgramBase;
115 }
116
117 int getSize() {
118 return ind - pProgramBase;
119 }
120
121 int getPC() {
122 return (int) ind;
123 }
124 };
125
126 class CodeGenerator {
127 public:
128 CodeGenerator() {}
129 virtual ~CodeGenerator() {}
130
Jack Palevich22305132009-05-13 10:58:45 -0700131 virtual void init(CodeBuf* pCodeBuf) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700132 this->pCodeBuf = pCodeBuf;
133 }
134
Jack Palevich22305132009-05-13 10:58:45 -0700135 /* returns address to patch with local variable size
136 */
Jack Palevich546b2242009-05-13 15:10:04 -0700137 virtual int functionEntry(int argCount) = 0;
Jack Palevich22305132009-05-13 10:58:45 -0700138
Jack Palevich546b2242009-05-13 15:10:04 -0700139 virtual void functionExit(int argCount, int localVariableAddress, int localVariableSize) = 0;
Jack Palevich22305132009-05-13 10:58:45 -0700140
141 /* load immediate value */
Jack Palevich546b2242009-05-13 15:10:04 -0700142 virtual void li(int t) = 0;
Jack Palevich22305132009-05-13 10:58:45 -0700143
144 virtual int gjmp(int t) = 0;
145
146 /* l = 0: je, l == 1: jne */
147 virtual int gtst(bool l, int t) = 0;
148
149 virtual void gcmp(int op) = 0;
150
Jack Palevich546b2242009-05-13 15:10:04 -0700151 virtual void genOp(int op) = 0;
Jack Palevich22305132009-05-13 10:58:45 -0700152
153 virtual void clearECX() = 0;
154
155 virtual void pushEAX() = 0;
156
157 virtual void popECX() = 0;
158
159 virtual void storeEAXToAddressECX(bool isInt) = 0;
160
161 virtual void loadEAXIndirect(bool isInt) = 0;
162
163 virtual void leaEAX(int ea) = 0;
164
165 virtual void storeEAX(int ea) = 0;
166
167 virtual void loadEAX(int ea) = 0;
168
169 virtual void postIncrementOrDecrement(int n, int op) = 0;
170
171 virtual int allocStackSpaceForArgs() = 0;
172
173 virtual void storeEAToArg(int l) = 0;
174
175 virtual int callForward(int symbol) = 0;
176
177 virtual void callRelative(int t) = 0;
178
179 virtual void callIndirect(int l) = 0;
180
181 virtual void adjustStackAfterCall(int l) = 0;
182
Jack Palevicha6535612009-05-13 16:24:17 -0700183 virtual int disassemble(FILE* out) = 0;
184
Jack Palevich21a15a22009-05-11 14:49:29 -0700185 /* output a symbol and patch all calls to it */
Jack Palevich22305132009-05-13 10:58:45 -0700186 virtual void gsym(int t) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700187 pCodeBuf->gsym(t);
188 }
189
Jack Palevich546b2242009-05-13 15:10:04 -0700190 virtual int finishCompile() {
191#if defined(__arm__)
Jack Palevicha6535612009-05-13 16:24:17 -0700192 const long base = long(pCodeBuf->getBase());
193 const long curr = base + long(pCodeBuf->getSize());
194 int err = cacheflush(base, curr, 0);
195 return err;
Jack Palevich546b2242009-05-13 15:10:04 -0700196#else
Jack Palevicha6535612009-05-13 16:24:17 -0700197 return 0;
Jack Palevich546b2242009-05-13 15:10:04 -0700198#endif
199 }
200
Jack Palevicha6535612009-05-13 16:24:17 -0700201 /**
202 * Adjust relative branches by this amount.
203 */
204 virtual int jumpOffset() = 0;
205
Jack Palevich21a15a22009-05-11 14:49:29 -0700206 protected:
207 void o(int n) {
208 pCodeBuf->o(n);
209 }
210
211 /*
212 * Output a byte. Handles all values, 0..ff.
213 */
214 void ob(int n) {
215 pCodeBuf->ob(n);
216 }
217
218 /* psym is used to put an instruction with a data field which is a
219 reference to a symbol. It is in fact the same as oad ! */
220 int psym(int n, int t) {
221 return oad(n, t);
222 }
223
224 /* instruction + address */
225 int oad(int n, int t) {
226 return pCodeBuf->oad(n,t);
227 }
228
Jack Palevicha6535612009-05-13 16:24:17 -0700229 int getBase() {
230 return (int) pCodeBuf->getBase();
231 }
232
Jack Palevich21a15a22009-05-11 14:49:29 -0700233 int getPC() {
234 return pCodeBuf->getPC();
235 }
236
Jack Palevich546b2242009-05-13 15:10:04 -0700237 int o4(int data) {
238 return pCodeBuf->o4(data);
239 }
Jack Palevich21a15a22009-05-11 14:49:29 -0700240 private:
241 CodeBuf* pCodeBuf;
242 };
243
Jack Palevich22305132009-05-13 10:58:45 -0700244 class ARMCodeGenerator : public CodeGenerator {
245 public:
246 ARMCodeGenerator() {}
247 virtual ~ARMCodeGenerator() {}
248
Jack Palevich546b2242009-05-13 15:10:04 -0700249 // The gnu ARM assembler prints the constants in little-endian,
250 // but C hexidecimal constants are big-endian. We trick the
251 // gnu assembler into putting out big-endian constants by
252 // using the -mbig-endian flag when assembling.
253
Jack Palevich22305132009-05-13 10:58:45 -0700254 /* returns address to patch with local variable size
255 */
Jack Palevich546b2242009-05-13 15:10:04 -0700256 virtual int functionEntry(int argCount) {
257 fprintf(stderr, "functionEntry(%d);\n", argCount);
258 /*
259 19 0000 E1A0C00D mov ip, sp
260 20 0004 E92DD800 stmfd sp!, {fp, ip, lr, pc}
261 21 0008 E24CB004 sub fp, ip, #4
262 22 000c E24DD008 sub sp, sp, #8
263 */
264 o4(0xE1A0C00D);
265 o4(0xE92DD800);
266 o4(0xE24CB004);
267 return o4(0xE24DD008);
Jack Palevich22305132009-05-13 10:58:45 -0700268 }
269
Jack Palevich546b2242009-05-13 15:10:04 -0700270 virtual void functionExit(int argCount, int localVariableAddress, int localVariableSize) {
271 fprintf(stderr, "functionExit(%d, %d, %d);\n", argCount, localVariableAddress, localVariableSize);
272 /*
273 23 0010 E24BD00C sub sp, fp, #12
274 24 0014 E89DA800 ldmfd sp, {fp, sp, pc}
275 */
276 o4(0xE24BD00C);
277 o4(0xE89DA800);
278 if (localVariableSize < 0 || localVariableSize > 255-8) {
279 error("LocalVariableSize");
280 }
281 *(char*) (localVariableAddress) = localVariableSize + 8;
Jack Palevich22305132009-05-13 10:58:45 -0700282 }
283
284 /* load immediate value */
Jack Palevich546b2242009-05-13 15:10:04 -0700285 virtual void li(int t) {
Jack Palevich22305132009-05-13 10:58:45 -0700286 fprintf(stderr, "li(%d);\n", t);
Jack Palevicha6535612009-05-13 16:24:17 -0700287 if (t >= 0 && t < 255) {
288 o4(0xE3A00000 + t); // E3A00000 mov r0, #0
289 } else if (t >= -256 && t < 0) {
290 // mvn means move constant ^ ~0
291 o4(0xE3E00001 - t); // E3E00000 mvn r0, #0
292 } else {
293 error("immediate constant out of range -256..255: %d", t);
294 }
Jack Palevich22305132009-05-13 10:58:45 -0700295 }
296
297 virtual int gjmp(int t) {
298 fprintf(stderr, "gjmp(%d);\n", t);
Jack Palevicha6535612009-05-13 16:24:17 -0700299 return o4(0xEA000000 + encodeAddress(t));
Jack Palevich22305132009-05-13 10:58:45 -0700300 }
301
302 /* l = 0: je, l == 1: jne */
303 virtual int gtst(bool l, int t) {
304 fprintf(stderr, "gtst(%d, %d);\n", l, t);
305 o(0x0fc085); /* test %eax, %eax, je/jne xxx */
306 return psym(0x84 + l, t);
307 }
308
309 virtual void gcmp(int op) {
310 fprintf(stderr, "gcmp(%d);\n", op);
311#if 0
312 int t = decodeOp(op);
313 o(0xc139); /* cmp %eax,%ecx */
314 li(0);
315 o(0x0f); /* setxx %al */
316 o(t + 0x90);
317 o(0xc0);
318#endif
319 }
320
Jack Palevich546b2242009-05-13 15:10:04 -0700321 virtual void genOp(int op) {
Jack Palevich22305132009-05-13 10:58:45 -0700322 fprintf(stderr, "genOp(%d);\n", op);
323#if 0
324 o(decodeOp(op));
325 if (op == OP_MOD)
326 o(0x92); /* xchg %edx, %eax */
327#endif
328 }
329
330 virtual void clearECX() {
331 fprintf(stderr, "clearECX();\n");
332 oad(0xb9, 0); /* movl $0, %ecx */
333 }
334
335 virtual void pushEAX() {
336 fprintf(stderr, "pushEAX();\n");
337 o(0x50); /* push %eax */
338 }
339
340 virtual void popECX() {
341 fprintf(stderr, "popECX();\n");
342 o(0x59); /* pop %ecx */
343 }
344
345 virtual void storeEAXToAddressECX(bool isInt) {
346 fprintf(stderr, "storeEAXToAddressECX(%d);\n", isInt);
347 o(0x0188 + isInt); /* movl %eax/%al, (%ecx) */
348 }
349
350 virtual void loadEAXIndirect(bool isInt) {
351 fprintf(stderr, "loadEAXIndirect(%d);\n", isInt);
352 if (isInt)
353 o(0x8b); /* mov (%eax), %eax */
354 else
355 o(0xbe0f); /* movsbl (%eax), %eax */
356 ob(0); /* add zero in code */
357 }
358
359 virtual void leaEAX(int ea) {
360 fprintf(stderr, "leaEAX(%d);\n", ea);
361#if 0
362 gmov(10, ea); /* leal EA, %eax */
363#endif
364 }
365
366 virtual void storeEAX(int ea) {
367 fprintf(stderr, "storeEAX(%d);\n", ea);
368#if 0
369 gmov(6, ea); /* mov %eax, EA */
370#endif
371 }
372
373 virtual void loadEAX(int ea) {
374 fprintf(stderr, "loadEAX(%d);\n", ea);
375#if 0
376 gmov(8, ea); /* mov EA, %eax */
377#endif
378 }
379
380 virtual void postIncrementOrDecrement(int n, int op) {
381 fprintf(stderr, "postIncrementOrDecrement(%d, %d);\n", n, op);
382 /* Implement post-increment or post decrement.
383 */
384#if 0
385 gmov(0, n); /* 83 ADD */
386 o(decodeOp(op));
387#endif
388 }
389
390 virtual int allocStackSpaceForArgs() {
391 fprintf(stderr, "allocStackSpaceForArgs();\n");
392 return oad(0xec81, 0); /* sub $xxx, %esp */
393 }
394
395 virtual void storeEAToArg(int l) {
396 fprintf(stderr, "storeEAToArg(%d);\n", l);
397 oad(0x248489, l); /* movl %eax, xxx(%esp) */
398 }
399
400 virtual int callForward(int symbol) {
401 fprintf(stderr, "callForward(%d);\n", symbol);
402 return psym(0xe8, symbol); /* call xxx */
403 }
404
405 virtual void callRelative(int t) {
406 fprintf(stderr, "callRelative(%d);\n", t);
407 psym(0xe8, t); /* call xxx */
408 }
409
410 virtual void callIndirect(int l) {
411 fprintf(stderr, "callIndirect(%d);\n", l);
412 oad(0x2494ff, l); /* call *xxx(%esp) */
413 }
414
415 virtual void adjustStackAfterCall(int l) {
416 fprintf(stderr, "adjustStackAfterCall(%d);\n", l);
417 oad(0xc481, l); /* add $xxx, %esp */
418 }
419
Jack Palevicha6535612009-05-13 16:24:17 -0700420 virtual int jumpOffset() {
421 return 4;
422 }
423
424 /* output a symbol and patch all calls to it */
425 virtual void gsym(int t) {
426 fprintf(stderr, "gsym(0x%x)\n", t);
427 int n;
428 int base = getBase();
429 int pc = getPC();
430 fprintf(stderr, "pc = 0x%x\n", pc);
431 while (t) {
432 int data = * (int*) t;
433 int decodedOffset = ((BRANCH_REL_ADDRESS_MASK & data) << 2);
434 if (decodedOffset == 0) {
435 n = 0;
436 } else {
437 n = base + decodedOffset; /* next value */
438 }
439 *(int *) t = (data & ~BRANCH_REL_ADDRESS_MASK)
440 | encodeRelAddress(pc - t - 8);
441 t = n;
442 }
443 }
444
445 virtual int disassemble(FILE* out) {
446 disasmOut = out;
447 disasm_interface_t di;
448 di.di_readword = disassemble_readword;
449 di.di_printaddr = disassemble_printaddr;
450 di.di_printf = disassemble_printf;
451
452 int base = getBase();
453 int pc = getPC();
454 for(int i = base; i < pc; i += 4) {
455 fprintf(out, "%08x: %08x ", i, *(int*) i);
456 ::disasm(&di, i, 0);
457 }
458 return 0;
459 }
Jack Palevich22305132009-05-13 10:58:45 -0700460 private:
Jack Palevicha6535612009-05-13 16:24:17 -0700461 static FILE* disasmOut;
462
463 static u_int
464 disassemble_readword(u_int address)
465 {
466 return(*((u_int *)address));
467 }
468
469 static void
470 disassemble_printaddr(u_int address)
471 {
472 fprintf(disasmOut, "0x%08x", address);
473 }
474
475 static void
476 disassemble_printf(const char *fmt, ...) {
477 va_list ap;
478 va_start(ap, fmt);
479 vfprintf(disasmOut, fmt, ap);
480 va_end(ap);
481 }
482
483 static const int BRANCH_REL_ADDRESS_MASK = 0x00ffffff;
484
485 /** Encode a relative address that might also be
486 * a label.
487 */
488 int encodeAddress(int value) {
489 int base = getBase();
490 if (value >= base && value <= getPC() ) {
491 // This is a label, encode it relative to the base.
492 value = value - base;
493 }
494 return encodeRelAddress(value);
495 }
496
497 int encodeRelAddress(int value) {
498 return BRANCH_REL_ADDRESS_MASK & (value >> 2);
499 }
Jack Palevich22305132009-05-13 10:58:45 -0700500
Jack Palevich546b2242009-05-13 15:10:04 -0700501 void error(const char* fmt,...) {
502 va_list ap;
503 va_start(ap, fmt);
504 vfprintf(stderr, fmt, ap);
505 va_end(ap);
506 exit(12);
507 }
Jack Palevich22305132009-05-13 10:58:45 -0700508 };
509
Jack Palevich21a15a22009-05-11 14:49:29 -0700510 class X86CodeGenerator : public CodeGenerator {
511 public:
512 X86CodeGenerator() {}
513 virtual ~X86CodeGenerator() {}
514
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700515 /* returns address to patch with local variable size
516 */
Jack Palevich546b2242009-05-13 15:10:04 -0700517 virtual int functionEntry(int argCount) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700518 o(0xe58955); /* push %ebp, mov %esp, %ebp */
519 return oad(0xec81, 0); /* sub $xxx, %esp */
520 }
521
Jack Palevich546b2242009-05-13 15:10:04 -0700522 virtual void functionExit(int argCount, int localVariableAddress, int localVariableSize) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700523 o(0xc3c9); /* leave, ret */
Jack Palevich546b2242009-05-13 15:10:04 -0700524 *(int *) localVariableAddress = localVariableSize; /* save local variables */
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700525 }
526
Jack Palevich21a15a22009-05-11 14:49:29 -0700527 /* load immediate value */
Jack Palevich546b2242009-05-13 15:10:04 -0700528 virtual void li(int t) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700529 oad(0xb8, t); /* mov $xx, %eax */
530 }
531
Jack Palevich22305132009-05-13 10:58:45 -0700532 virtual int gjmp(int t) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700533 return psym(0xe9, t);
534 }
535
536 /* l = 0: je, l == 1: jne */
Jack Palevich22305132009-05-13 10:58:45 -0700537 virtual int gtst(bool l, int t) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700538 o(0x0fc085); /* test %eax, %eax, je/jne xxx */
539 return psym(0x84 + l, t);
540 }
541
Jack Palevich22305132009-05-13 10:58:45 -0700542 virtual void gcmp(int op) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700543 int t = decodeOp(op);
Jack Palevich21a15a22009-05-11 14:49:29 -0700544 o(0xc139); /* cmp %eax,%ecx */
545 li(0);
546 o(0x0f); /* setxx %al */
547 o(t + 0x90);
548 o(0xc0);
549 }
550
Jack Palevich546b2242009-05-13 15:10:04 -0700551 virtual void genOp(int op) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700552 o(decodeOp(op));
553 if (op == OP_MOD)
554 o(0x92); /* xchg %edx, %eax */
555 }
556
Jack Palevich22305132009-05-13 10:58:45 -0700557 virtual void clearECX() {
Jack Palevich21a15a22009-05-11 14:49:29 -0700558 oad(0xb9, 0); /* movl $0, %ecx */
559 }
560
Jack Palevich22305132009-05-13 10:58:45 -0700561 virtual void pushEAX() {
Jack Palevich21a15a22009-05-11 14:49:29 -0700562 o(0x50); /* push %eax */
563 }
564
Jack Palevich22305132009-05-13 10:58:45 -0700565 virtual void popECX() {
Jack Palevich21a15a22009-05-11 14:49:29 -0700566 o(0x59); /* pop %ecx */
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700567 }
568
Jack Palevich22305132009-05-13 10:58:45 -0700569 virtual void storeEAXToAddressECX(bool isInt) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700570 o(0x0188 + isInt); /* movl %eax/%al, (%ecx) */
571 }
572
Jack Palevich22305132009-05-13 10:58:45 -0700573 virtual void loadEAXIndirect(bool isInt) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700574 if (isInt)
575 o(0x8b); /* mov (%eax), %eax */
576 else
577 o(0xbe0f); /* movsbl (%eax), %eax */
578 ob(0); /* add zero in code */
579 }
580
Jack Palevich22305132009-05-13 10:58:45 -0700581 virtual void leaEAX(int ea) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700582 gmov(10, ea); /* leal EA, %eax */
583 }
584
Jack Palevich22305132009-05-13 10:58:45 -0700585 virtual void storeEAX(int ea) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700586 gmov(6, ea); /* mov %eax, EA */
587 }
588
Jack Palevich22305132009-05-13 10:58:45 -0700589 virtual void loadEAX(int ea) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700590 gmov(8, ea); /* mov EA, %eax */
591 }
592
Jack Palevich22305132009-05-13 10:58:45 -0700593 virtual void postIncrementOrDecrement(int n, int op) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700594 /* Implement post-increment or post decrement.
Jack Palevich21a15a22009-05-11 14:49:29 -0700595 */
596 gmov(0, n); /* 83 ADD */
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700597 o(decodeOp(op));
Jack Palevich21a15a22009-05-11 14:49:29 -0700598 }
599
Jack Palevich22305132009-05-13 10:58:45 -0700600 virtual int allocStackSpaceForArgs() {
Jack Palevich21a15a22009-05-11 14:49:29 -0700601 return oad(0xec81, 0); /* sub $xxx, %esp */
602 }
603
Jack Palevich22305132009-05-13 10:58:45 -0700604 virtual void storeEAToArg(int l) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700605 oad(0x248489, l); /* movl %eax, xxx(%esp) */
606 }
607
Jack Palevich22305132009-05-13 10:58:45 -0700608 virtual int callForward(int symbol) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700609 return psym(0xe8, symbol); /* call xxx */
610 }
611
Jack Palevich22305132009-05-13 10:58:45 -0700612 virtual void callRelative(int t) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700613 psym(0xe8, t); /* call xxx */
614 }
615
Jack Palevich22305132009-05-13 10:58:45 -0700616 virtual void callIndirect(int l) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700617 oad(0x2494ff, l); /* call *xxx(%esp) */
618 }
619
Jack Palevich22305132009-05-13 10:58:45 -0700620 virtual void adjustStackAfterCall(int l) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700621 oad(0xc481, l); /* add $xxx, %esp */
622 }
623
Jack Palevicha6535612009-05-13 16:24:17 -0700624 virtual int jumpOffset() {
625 return 5;
626 }
627
628 virtual int disassemble(FILE* out) {
629 return 1;
630 }
631
Jack Palevich21a15a22009-05-11 14:49:29 -0700632 private:
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700633 static const int operatorHelper[];
634
635 int decodeOp(int op) {
636 if (op < 0 || op > OP_COUNT) {
637 fprintf(stderr, "Out-of-range operator: %d\n", op);
638 exit(1);
639 }
640 return operatorHelper[op];
641 }
Jack Palevich21a15a22009-05-11 14:49:29 -0700642
Jack Palevich546b2242009-05-13 15:10:04 -0700643 void gmov(int l, int t) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700644 o(l + 0x83);
645 oad((t < LOCAL) << 7 | 5, t);
646 }
647 };
648
649 /* vars: value of variables
650 loc : local variable index
651 glo : global variable index
652 ind : output code ptr
653 rsym: return symbol
654 prog: output code
655 dstk: define stack
656 dptr, dch: macro state
657 */
658 int tok, tokc, tokl, ch, vars, rsym, loc, glo, sym_stk, dstk,
659 dptr, dch, last_id;
660 void* pSymbolBase;
661 void* pGlobalBase;
662 void* pVarsBase;
663 FILE* file;
664
665 CodeBuf codeBuf;
Jack Palevich22305132009-05-13 10:58:45 -0700666 CodeGenerator* pGen;
Jack Palevich21a15a22009-05-11 14:49:29 -0700667
668 static const int ALLOC_SIZE = 99999;
669
670 /* depends on the init string */
671 static const int TOK_STR_SIZE = 48;
672 static const int TOK_IDENT = 0x100;
673 static const int TOK_INT = 0x100;
674 static const int TOK_IF = 0x120;
675 static const int TOK_ELSE = 0x138;
676 static const int TOK_WHILE = 0x160;
677 static const int TOK_BREAK = 0x190;
678 static const int TOK_RETURN = 0x1c0;
679 static const int TOK_FOR = 0x1f8;
680 static const int TOK_DEFINE = 0x218;
681 static const int TOK_MAIN = 0x250;
682
683 static const int TOK_DUMMY = 1;
684 static const int TOK_NUM = 2;
685
686 static const int LOCAL = 0x200;
687
688 static const int SYM_FORWARD = 0;
689 static const int SYM_DEFINE = 1;
690
691 /* tokens in string heap */
692 static const int TAG_TOK = ' ';
693 static const int TAG_MACRO = 2;
694
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700695 static const int OP_INCREMENT = 0;
696 static const int OP_DECREMENT = 1;
697 static const int OP_MUL = 2;
698 static const int OP_DIV = 3;
699 static const int OP_MOD = 4;
700 static const int OP_PLUS = 5;
701 static const int OP_MINUS = 6;
702 static const int OP_SHIFT_LEFT = 7;
703 static const int OP_SHIFT_RIGHT = 8;
704 static const int OP_LESS_EQUAL = 9;
705 static const int OP_GREATER_EQUAL = 10;
706 static const int OP_LESS = 11;
707 static const int OP_GREATER = 12;
708 static const int OP_EQUALS = 13;
709 static const int OP_NOT_EQUALS = 14;
710 static const int OP_LOGICAL_AND = 15;
711 static const int OP_LOGICAL_OR = 16;
712 static const int OP_BIT_AND = 17;
713 static const int OP_BIT_XOR = 18;
714 static const int OP_BIT_OR = 19;
715 static const int OP_BIT_NOT = 20;
716 static const int OP_LOGICAL_NOT = 21;
717 static const int OP_COUNT = 22;
718
719 /* Operators are searched from front, the two-character operators appear
720 * before the single-character operators with the same first character.
721 * @ is used to pad out single-character operators.
722 */
723 static const char* operatorChars;
724 static const char operatorLevel[];
725
Jack Palevich21a15a22009-05-11 14:49:29 -0700726 void pdef(int t) {
727 *(char *) dstk++ = t;
728 }
729
730 void inp() {
731 if (dptr) {
732 ch = *(char *) dptr++;
733 if (ch == TAG_MACRO) {
734 dptr = 0;
735 ch = dch;
736 }
737 } else
738 ch = fgetc(file);
739 /* printf("ch=%c 0x%x\n", ch, ch); */
740 }
741
742 int isid() {
Jack Palevich546b2242009-05-13 15:10:04 -0700743 return isalnum(ch) | (ch == '_');
Jack Palevich21a15a22009-05-11 14:49:29 -0700744 }
745
746 /* read a character constant */
747 void getq() {
748 if (ch == '\\') {
749 inp();
750 if (ch == 'n')
751 ch = '\n';
752 }
753 }
754
755 void next() {
756 int l, a;
757
Jack Palevich546b2242009-05-13 15:10:04 -0700758 while (isspace(ch) | (ch == '#')) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700759 if (ch == '#') {
760 inp();
761 next();
762 if (tok == TOK_DEFINE) {
763 next();
764 pdef(TAG_TOK); /* fill last ident tag */
765 *(int *) tok = SYM_DEFINE;
766 *(int *) (tok + 4) = dstk; /* define stack */
767 }
768 /* well we always save the values ! */
769 while (ch != '\n') {
770 pdef(ch);
771 inp();
772 }
773 pdef(ch);
774 pdef(TAG_MACRO);
775 }
776 inp();
777 }
778 tokl = 0;
779 tok = ch;
780 /* encode identifiers & numbers */
781 if (isid()) {
782 pdef(TAG_TOK);
783 last_id = dstk;
784 while (isid()) {
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700785 pdef(ch);
786 inp();
Jack Palevichae54f1f2009-05-08 14:54:15 -0700787 }
Jack Palevich21a15a22009-05-11 14:49:29 -0700788 if (isdigit(tok)) {
789 tokc = strtol((char*) last_id, 0, 0);
790 tok = TOK_NUM;
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700791 } else {
Jack Palevich21a15a22009-05-11 14:49:29 -0700792 *(char *) dstk = TAG_TOK; /* no need to mark end of string (we
793 suppose data is initialized to zero by calloc) */
794 tok = (int) (strstr((char*) sym_stk, (char*) (last_id - 1))
795 - sym_stk);
796 *(char *) dstk = 0; /* mark real end of ident for dlsym() */
797 tok = tok * 8 + TOK_IDENT;
798 if (tok > TOK_DEFINE) {
799 tok = vars + tok;
800 /* printf("tok=%s %x\n", last_id, tok); */
801 /* define handling */
802 if (*(int *) tok == SYM_DEFINE) {
803 dptr = *(int *) (tok + 4);
804 dch = ch;
805 inp();
806 next();
807 }
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700808 }
809 }
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700810 } else {
Jack Palevich21a15a22009-05-11 14:49:29 -0700811 inp();
812 if (tok == '\'') {
813 tok = TOK_NUM;
814 getq();
815 tokc = ch;
816 inp();
817 inp();
Jack Palevich546b2242009-05-13 15:10:04 -0700818 } else if ((tok == '/') & (ch == '*')) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700819 inp();
820 while (ch) {
821 while (ch != '*')
822 inp();
823 inp();
824 if (ch == '/')
825 ch = 0;
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700826 }
Jack Palevich21a15a22009-05-11 14:49:29 -0700827 inp();
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700828 next();
Jack Palevich21a15a22009-05-11 14:49:29 -0700829 } else {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700830 const char* t = operatorChars;
831 int opIndex = 0;
Jack Palevich546b2242009-05-13 15:10:04 -0700832 while ((l = *t++) != 0) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700833 a = *t++;
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700834 tokl = operatorLevel[opIndex];
835 tokc = opIndex;
Jack Palevich546b2242009-05-13 15:10:04 -0700836 if ((l == tok) & ((a == ch) | (a == '@'))) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700837#if 0
838 printf("%c%c -> tokl=%d tokc=0x%x\n",
839 l, a, tokl, tokc);
840#endif
841 if (a == ch) {
842 inp();
843 tok = TOK_DUMMY; /* dummy token for double tokens */
844 }
845 break;
846 }
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700847 opIndex++;
848 }
849 if (l == 0) {
850 tokl = 0;
851 tokc = 0;
Jack Palevich21a15a22009-05-11 14:49:29 -0700852 }
853 }
854 }
855#if 0
856 {
857 int p;
858
859 printf("tok=0x%x ", tok);
860 if (tok >= TOK_IDENT) {
861 printf("'");
862 if (tok> TOK_DEFINE)
863 p = sym_stk + 1 + (tok - vars - TOK_IDENT) / 8;
864 else
865 p = sym_stk + 1 + (tok - TOK_IDENT) / 8;
866 while (*(char *)p != TAG_TOK && *(char *)p)
867 printf("%c", *(char *)p++);
868 printf("'\n");
869 } else if (tok == TOK_NUM) {
870 printf("%d\n", tokc);
871 } else {
872 printf("'%c'\n", tok);
873 }
874 }
875#endif
876 }
877
878 void error(const char *fmt, ...) {
879 va_list ap;
880
881 va_start(ap, fmt);
882 fprintf(stderr, "%ld: ", ftell((FILE *) file));
883 vfprintf(stderr, fmt, ap);
884 fprintf(stderr, "\n");
885 va_end(ap);
886 exit(1);
887 }
888
889 void skip(int c) {
890 if (tok != c) {
891 error("'%c' expected", c);
892 }
893 next();
894 }
895
Jack Palevich21a15a22009-05-11 14:49:29 -0700896 /* l is one if '=' parsing wanted (quick hack) */
897 void unary(int l) {
898 int n, t, a, c;
Jack Palevich546b2242009-05-13 15:10:04 -0700899 t = 0;
Jack Palevich21a15a22009-05-11 14:49:29 -0700900 n = 1; /* type of expression 0 = forward, 1 = value, other =
901 lvalue */
902 if (tok == '\"') {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700903 pGen->li(glo);
Jack Palevich21a15a22009-05-11 14:49:29 -0700904 while (ch != '\"') {
905 getq();
906 *(char *) glo++ = ch;
907 inp();
908 }
909 *(char *) glo = 0;
Jack Palevich546b2242009-05-13 15:10:04 -0700910 glo = (glo + 4) & -4; /* align heap */
Jack Palevich21a15a22009-05-11 14:49:29 -0700911 inp();
912 next();
913 } else {
914 c = tokl;
915 a = tokc;
916 t = tok;
917 next();
918 if (t == TOK_NUM) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700919 pGen->li(a);
Jack Palevich21a15a22009-05-11 14:49:29 -0700920 } else if (c == 2) {
921 /* -, +, !, ~ */
922 unary(0);
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700923 pGen->clearECX();
Jack Palevich21a15a22009-05-11 14:49:29 -0700924 if (t == '!')
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700925 pGen->gcmp(a);
Jack Palevich21a15a22009-05-11 14:49:29 -0700926 else
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700927 pGen->genOp(a);
Jack Palevich21a15a22009-05-11 14:49:29 -0700928 } else if (t == '(') {
929 expr();
930 skip(')');
931 } else if (t == '*') {
932 /* parse cast */
933 skip('(');
934 t = tok; /* get type */
935 next(); /* skip int/char/void */
936 next(); /* skip '*' or '(' */
937 if (tok == '*') {
938 /* function type */
939 skip('*');
940 skip(')');
941 skip('(');
942 skip(')');
943 t = 0;
944 }
945 skip(')');
946 unary(0);
947 if (tok == '=') {
948 next();
949 pGen->pushEAX();
950 expr();
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700951 pGen->popECX();
952 pGen->storeEAXToAddressECX(t == TOK_INT);
Jack Palevich21a15a22009-05-11 14:49:29 -0700953 } else if (t) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700954 pGen->loadEAXIndirect(t == TOK_INT);
Jack Palevich21a15a22009-05-11 14:49:29 -0700955 }
956 } else if (t == '&') {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700957 pGen->leaEAX(*(int *) tok);
Jack Palevich21a15a22009-05-11 14:49:29 -0700958 next();
959 } else {
960 n = *(int *) t;
961 /* forward reference: try dlsym */
962 if (!n)
963 n = (int) dlsym(0, (char*) last_id);
Jack Palevich546b2242009-05-13 15:10:04 -0700964 if ((tok == '=') & l) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700965 /* assignment */
966 next();
967 expr();
968 pGen->storeEAX(n);
969 } else if (tok != '(') {
970 /* variable */
971 pGen->loadEAX(n);
972 if (tokl == 11) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700973 pGen->postIncrementOrDecrement(n, tokc);
Jack Palevich21a15a22009-05-11 14:49:29 -0700974 next();
975 }
976 }
977 }
978 }
979
980 /* function call */
981 if (tok == '(') {
982 if (n == 1)
983 pGen->pushEAX();
984
985 /* push args and invert order */
986 a = pGen->allocStackSpaceForArgs();
987 next();
988 l = 0;
989 while (tok != ')') {
990 expr();
991 pGen->storeEAToArg(l);
Jack Palevichbbf8ab52009-05-11 11:54:30 -0700992 if (tok == ',')
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700993 next();
Jack Palevich21a15a22009-05-11 14:49:29 -0700994 l = l + 4;
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700995 }
Jack Palevich21a15a22009-05-11 14:49:29 -0700996 *(int *) a = l;
997 next();
998 if (!n) {
999 /* forward reference */
1000 t = t + 4;
1001 *(int *) t = pGen->callForward(*(int *) t);
1002 } else if (n == 1) {
1003 pGen->callIndirect(l);
1004 l = l + 4;
1005 } else {
Jack Palevicha6535612009-05-13 16:24:17 -07001006 pGen->callRelative(n - codeBuf.getPC() - pGen->jumpOffset()); /* call xxx */
Jack Palevich21a15a22009-05-11 14:49:29 -07001007 }
1008 if (l)
1009 pGen->adjustStackAfterCall(l);
1010 }
1011 }
1012
1013 void sum(int l) {
1014 int t, n, a;
Jack Palevich546b2242009-05-13 15:10:04 -07001015 t = 0;
Jack Palevich21a15a22009-05-11 14:49:29 -07001016 if (l-- == 1)
1017 unary(1);
1018 else {
1019 sum(l);
1020 a = 0;
1021 while (l == tokl) {
1022 n = tok;
1023 t = tokc;
1024 next();
1025
1026 if (l > 8) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001027 a = pGen->gtst(t == OP_LOGICAL_OR, a); /* && and || output code generation */
Jack Palevich21a15a22009-05-11 14:49:29 -07001028 sum(l);
1029 } else {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001030 pGen->pushEAX();
Jack Palevich21a15a22009-05-11 14:49:29 -07001031 sum(l);
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001032 pGen->popECX();
Jack Palevich21a15a22009-05-11 14:49:29 -07001033
Jack Palevich546b2242009-05-13 15:10:04 -07001034 if ((l == 4) | (l == 5)) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001035 pGen->gcmp(t);
Jack Palevich21a15a22009-05-11 14:49:29 -07001036 } else {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001037 pGen->genOp(t);
Jack Palevich21a15a22009-05-11 14:49:29 -07001038 }
1039 }
1040 }
1041 /* && and || output code generation */
1042 if (a && l > 8) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001043 a = pGen->gtst(t == OP_LOGICAL_OR, a);
1044 pGen->li(t != OP_LOGICAL_OR);
Jack Palevicha6535612009-05-13 16:24:17 -07001045 pGen->gjmp(5); /* jmp $ + 5 (sizeof li, FIXME for ARM) */
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001046 pGen->gsym(a);
1047 pGen->li(t == OP_LOGICAL_OR);
Jack Palevich21a15a22009-05-11 14:49:29 -07001048 }
1049 }
1050 }
1051
1052 void expr() {
1053 sum(11);
1054 }
1055
1056 int test_expr() {
1057 expr();
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001058 return pGen->gtst(0, 0);
Jack Palevich21a15a22009-05-11 14:49:29 -07001059 }
1060
1061 void block(int l) {
1062 int a, n, t;
1063
1064 if (tok == TOK_IF) {
Jack Paleviche27bf3e2009-05-10 14:09:03 -07001065 next();
1066 skip('(');
Jack Palevich21a15a22009-05-11 14:49:29 -07001067 a = test_expr();
1068 skip(')');
1069 block(l);
1070 if (tok == TOK_ELSE) {
Jack Paleviche27bf3e2009-05-10 14:09:03 -07001071 next();
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001072 n = pGen->gjmp(0); /* jmp */
1073 pGen->gsym(a);
Jack Palevich21a15a22009-05-11 14:49:29 -07001074 block(l);
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001075 pGen->gsym(n); /* patch else jmp */
Jack Palevich21a15a22009-05-11 14:49:29 -07001076 } else {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001077 pGen->gsym(a); /* patch if test */
Jack Paleviche27bf3e2009-05-10 14:09:03 -07001078 }
Jack Palevich546b2242009-05-13 15:10:04 -07001079 } else if ((tok == TOK_WHILE) | (tok == TOK_FOR)) {
Jack Palevich21a15a22009-05-11 14:49:29 -07001080 t = tok;
1081 next();
1082 skip('(');
1083 if (t == TOK_WHILE) {
Jack Palevicha6535612009-05-13 16:24:17 -07001084 n = codeBuf.getPC(); // top of loop, target of "next" iteration
Jack Palevich21a15a22009-05-11 14:49:29 -07001085 a = test_expr();
1086 } else {
1087 if (tok != ';')
1088 expr();
1089 skip(';');
1090 n = codeBuf.getPC();
1091 a = 0;
1092 if (tok != ';')
1093 a = test_expr();
1094 skip(';');
1095 if (tok != ')') {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001096 t = pGen->gjmp(0);
Jack Palevich21a15a22009-05-11 14:49:29 -07001097 expr();
Jack Palevicha6535612009-05-13 16:24:17 -07001098 pGen->gjmp(n - codeBuf.getPC() - pGen->jumpOffset());
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001099 pGen->gsym(t);
Jack Palevich21a15a22009-05-11 14:49:29 -07001100 n = t + 4;
1101 }
1102 }
1103 skip(')');
1104 block((int) &a);
Jack Palevicha6535612009-05-13 16:24:17 -07001105 pGen->gjmp(n - codeBuf.getPC() - pGen->jumpOffset()); /* jmp */
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001106 pGen->gsym(a);
Jack Palevich21a15a22009-05-11 14:49:29 -07001107 } else if (tok == '{') {
1108 next();
1109 /* declarations */
1110 decl(1);
1111 while (tok != '}')
1112 block(l);
1113 next();
1114 } else {
1115 if (tok == TOK_RETURN) {
1116 next();
1117 if (tok != ';')
1118 expr();
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001119 rsym = pGen->gjmp(rsym); /* jmp */
Jack Palevich21a15a22009-05-11 14:49:29 -07001120 } else if (tok == TOK_BREAK) {
1121 next();
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001122 *(int *) l = pGen->gjmp(*(int *) l);
Jack Palevich21a15a22009-05-11 14:49:29 -07001123 } else if (tok != ';')
1124 expr();
1125 skip(';');
Jack Paleviche27bf3e2009-05-10 14:09:03 -07001126 }
1127 }
Jack Palevich21a15a22009-05-11 14:49:29 -07001128
1129 /* 'l' is true if local declarations */
1130 void decl(int l) {
1131 int a;
1132
Jack Palevich546b2242009-05-13 15:10:04 -07001133 while ((tok == TOK_INT) | ((tok != -1) & (!l))) {
Jack Palevich21a15a22009-05-11 14:49:29 -07001134 if (tok == TOK_INT) {
1135 next();
1136 while (tok != ';') {
1137 if (l) {
1138 loc = loc + 4;
1139 *(int *) tok = -loc;
1140 } else {
1141 *(int *) tok = glo;
1142 glo = glo + 4;
1143 }
1144 next();
1145 if (tok == ',')
1146 next();
1147 }
1148 skip(';');
1149 } else {
1150 /* patch forward references (XXX: do not work for function
1151 pointers) */
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001152 pGen->gsym(*(int *) (tok + 4));
Jack Palevich21a15a22009-05-11 14:49:29 -07001153 /* put function address */
1154 *(int *) tok = codeBuf.getPC();
1155 next();
1156 skip('(');
1157 a = 8;
Jack Palevich546b2242009-05-13 15:10:04 -07001158 int argCount = 0;
Jack Palevich21a15a22009-05-11 14:49:29 -07001159 while (tok != ')') {
1160 /* read param name and compute offset */
1161 *(int *) tok = a;
1162 a = a + 4;
1163 next();
1164 if (tok == ',')
1165 next();
Jack Palevich546b2242009-05-13 15:10:04 -07001166 argCount++;
Jack Palevich21a15a22009-05-11 14:49:29 -07001167 }
1168 next(); /* skip ')' */
1169 rsym = loc = 0;
Jack Palevich546b2242009-05-13 15:10:04 -07001170 a = pGen->functionEntry(argCount);
Jack Palevich21a15a22009-05-11 14:49:29 -07001171 block(0);
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001172 pGen->gsym(rsym);
Jack Palevich546b2242009-05-13 15:10:04 -07001173 pGen->functionExit(argCount, a, loc);
Jack Palevich21a15a22009-05-11 14:49:29 -07001174 }
1175 }
1176 }
1177
1178 void cleanup() {
1179 if (sym_stk != 0) {
1180 free((void*) sym_stk);
1181 sym_stk = 0;
1182 }
1183 if (pGlobalBase != 0) {
1184 free((void*) pGlobalBase);
1185 pGlobalBase = 0;
1186 }
1187 if (pVarsBase != 0) {
1188 free(pVarsBase);
1189 pVarsBase = 0;
1190 }
1191 if (pGen) {
1192 delete pGen;
1193 pGen = 0;
1194 }
1195 }
1196
1197 void clear() {
1198 tok = 0;
1199 tokc = 0;
1200 tokl = 0;
1201 ch = 0;
1202 vars = 0;
1203 rsym = 0;
1204 loc = 0;
1205 glo = 0;
1206 sym_stk = 0;
1207 dstk = 0;
1208 dptr = 0;
1209 dch = 0;
1210 last_id = 0;
1211 file = 0;
1212 pGlobalBase = 0;
1213 pVarsBase = 0;
1214 pGen = 0;
1215 }
Jack Paleviche27bf3e2009-05-10 14:09:03 -07001216
Jack Palevich22305132009-05-13 10:58:45 -07001217 void setArchitecture(const char* architecture) {
1218 delete pGen;
1219 pGen = 0;
1220
1221 if (architecture != NULL) {
1222 if (strcmp(architecture, "arm") == 0) {
1223 pGen = new ARMCodeGenerator();
1224 } else if (strcmp(architecture, "x86") == 0) {
1225 pGen = new X86CodeGenerator();
1226 } else {
1227 fprintf(stderr, "Unknown architecture %s", architecture);
1228 }
1229 }
1230
1231 if (pGen == NULL) {
1232 pGen = new ARMCodeGenerator();
1233 }
1234 }
1235
Jack Palevich77ae76e2009-05-10 19:59:24 -07001236public:
Jack Palevich22305132009-05-13 10:58:45 -07001237 struct args {
1238 args() {
1239 architecture = 0;
1240 }
1241 const char* architecture;
1242 };
1243
Jack Palevich21a15a22009-05-11 14:49:29 -07001244 compiler() {
1245 clear();
Jack Paleviche27bf3e2009-05-10 14:09:03 -07001246 }
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001247
Jack Palevich21a15a22009-05-11 14:49:29 -07001248 ~compiler() {
1249 cleanup();
1250 }
1251
Jack Palevich22305132009-05-13 10:58:45 -07001252 int compile(FILE* in, args& args) {
Jack Palevich21a15a22009-05-11 14:49:29 -07001253 cleanup();
1254 clear();
1255 codeBuf.init(ALLOC_SIZE);
Jack Palevich22305132009-05-13 10:58:45 -07001256 setArchitecture(args.architecture);
Jack Palevich21a15a22009-05-11 14:49:29 -07001257 pGen->init(&codeBuf);
1258 file = in;
1259 sym_stk = (int) calloc(1, ALLOC_SIZE);
1260 dstk = (int) strcpy((char*) sym_stk,
1261 " int if else while break return for define main ")
1262 + TOK_STR_SIZE;
1263 pGlobalBase = calloc(1, ALLOC_SIZE);
1264 glo = (int) pGlobalBase;
1265 pVarsBase = calloc(1, ALLOC_SIZE);
1266 vars = (int) pVarsBase;
1267 inp();
1268 next();
1269 decl(0);
Jack Palevich546b2242009-05-13 15:10:04 -07001270 pGen->finishCompile();
Jack Palevich21a15a22009-05-11 14:49:29 -07001271 return 0;
1272 }
1273
1274 int run(int argc, char** argv) {
1275 typedef int (*mainPtr)(int argc, char** argv);
1276 mainPtr aMain = (mainPtr) *(int*) (vars + TOK_MAIN);
1277 if (!aMain) {
1278 fprintf(stderr, "Could not find function \"main\".\n");
1279 return -1;
1280 }
1281 return aMain(argc, argv);
1282 }
1283
1284 int dump(FILE* out) {
1285 fwrite(codeBuf.getBase(), 1, codeBuf.getSize(), out);
1286 return 0;
1287 }
Jack Palevich77ae76e2009-05-10 19:59:24 -07001288
Jack Palevicha6535612009-05-13 16:24:17 -07001289 int disassemble(FILE* out) {
1290 return pGen->disassemble(out);
1291 }
1292
Jack Palevich77ae76e2009-05-10 19:59:24 -07001293};
1294
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001295const char* compiler::operatorChars =
1296 "++--*@/@%@+@-@<<>><=>=<@>@==!=&&||&@^@|@~@!@";
1297
1298const char compiler::operatorLevel[] =
1299 {11, 11, 1, 1, 1, 2, 2, 3, 3, 4, 4, 4, 4,
1300 5, 5, /* ==, != */
1301 9, 10, /* &&, || */
1302 6, 7, 8, /* & ^ | */
1303 2, 2 /* ~ ! */
1304 };
1305
Jack Palevicha6535612009-05-13 16:24:17 -07001306FILE* compiler::ARMCodeGenerator::disasmOut;
1307
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001308const int compiler::X86CodeGenerator::operatorHelper[] = {
1309 0x1, // ++
1310 0xff, // --
1311 0xc1af0f, // *
1312 0xf9f79991, // /
1313 0xf9f79991, // % (With manual assist to swap results)
1314 0xc801, // +
1315 0xd8f7c829, // -
1316 0xe0d391, // <<
1317 0xf8d391, // >>
1318 0xe, // <=
1319 0xd, // >=
1320 0xc, // <
1321 0xf, // >
1322 0x4, // ==
1323 0x5, // !=
1324 0x0, // &&
1325 0x1, // ||
1326 0xc821, // &
1327 0xc831, // ^
1328 0xc809, // |
1329 0xd0f7, // ~
1330 0x4 // !
1331};
1332
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001333} // namespace acc
1334
Jack Palevich546b2242009-05-13 15:10:04 -07001335// This is a separate function so it can easily be set by breakpoint in gdb.
1336int run(acc::compiler& c, int argc, char** argv) {
1337 return c.run(argc, argv);
1338}
1339
Jack Palevich77ae76e2009-05-10 19:59:24 -07001340int main(int argc, char** argv) {
Jack Palevich22305132009-05-13 10:58:45 -07001341 bool doDump = false;
Jack Palevicha6535612009-05-13 16:24:17 -07001342 bool doDisassemble = false;
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001343 const char* inFile = NULL;
1344 const char* outFile = NULL;
Jack Palevich22305132009-05-13 10:58:45 -07001345 const char* architecture = "arm";
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001346 int i;
Jack Palevich21a15a22009-05-11 14:49:29 -07001347 for (i = 1; i < argc; i++) {
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001348 char* arg = argv[i];
1349 if (arg[0] == '-') {
1350 switch (arg[1]) {
Jack Palevich22305132009-05-13 10:58:45 -07001351 case 'a':
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001352 if (i + 1 >= argc) {
Jack Palevich22305132009-05-13 10:58:45 -07001353 fprintf(stderr, "Expected architecture after -a\n");
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001354 return 2;
1355 }
Jack Palevich22305132009-05-13 10:58:45 -07001356 architecture = argv[i+1];
1357 i += 1;
1358 break;
1359 case 'd':
1360 if (i + 1 >= argc) {
1361 fprintf(stderr, "Expected filename after -d\n");
1362 return 2;
1363 }
1364 doDump = true;
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001365 outFile = argv[i + 1];
1366 i += 1;
1367 break;
Jack Palevicha6535612009-05-13 16:24:17 -07001368 case 'S':
1369 doDisassemble = true;
1370 break;
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001371 default:
1372 fprintf(stderr, "Unrecognized flag %s\n", arg);
1373 return 3;
1374 }
1375 } else if (inFile == NULL) {
1376 inFile = arg;
1377 } else {
1378 break;
1379 }
1380 }
1381
1382 FILE* in = stdin;
1383 if (inFile) {
1384 in = fopen(inFile, "r");
Jack Palevich21a15a22009-05-11 14:49:29 -07001385 if (!in) {
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001386 fprintf(stderr, "Could not open input file %s\n", inFile);
1387 return 1;
1388 }
1389 }
1390 acc::compiler compiler;
Jack Palevich22305132009-05-13 10:58:45 -07001391 acc::compiler::args args;
1392 args.architecture = architecture;
1393 int compileResult = compiler.compile(in, args);
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001394 if (in != stdin) {
1395 fclose(in);
1396 }
1397 if (compileResult) {
1398 fprintf(stderr, "Compile failed: %d\n", compileResult);
1399 return 6;
1400 }
Jack Palevicha6535612009-05-13 16:24:17 -07001401 if (doDisassemble) {
1402 compiler.disassemble(stderr);
1403 }
Jack Palevich22305132009-05-13 10:58:45 -07001404 if (doDump) {
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001405 FILE* save = fopen(outFile, "w");
Jack Palevich21a15a22009-05-11 14:49:29 -07001406 if (!save) {
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001407 fprintf(stderr, "Could not open output file %s\n", outFile);
1408 return 5;
1409 }
1410 compiler.dump(save);
1411 fclose(save);
1412 } else {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001413 fprintf(stderr, "Executing compiled code:\n");
Jack Palevich21a15a22009-05-11 14:49:29 -07001414 int codeArgc = argc - i + 1;
1415 char** codeArgv = argv + i - 1;
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001416 codeArgv[0] = (char*) (inFile ? inFile : "stdin");
Jack Palevich546b2242009-05-13 15:10:04 -07001417 int result = run(compiler, codeArgc, codeArgv);
Jack Palevich22305132009-05-13 10:58:45 -07001418 fprintf(stderr, "result: %d\n", result);
1419 return result;
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001420 }
1421
1422 return 0;
1423}