blob: b27fce49fed33a7c9e5e42633c60dc2bb3de0bee [file] [log] [blame]
Jack Palevichae54f1f2009-05-08 14:54:15 -07001/*
Jack Palevich21a15a22009-05-11 14:49:29 -07002 Obfuscated Tiny C Compiler
Jack Palevich88311482009-05-08 13:57:37 -07003
Jack Palevich21a15a22009-05-11 14:49:29 -07004 Copyright (C) 2001-2003 Fabrice Bellard
Jack Palevichae54f1f2009-05-08 14:54:15 -07005
Jack Palevich21a15a22009-05-11 14:49:29 -07006 This software is provided 'as-is', without any express or implied
7 warranty. In no event will the authors be held liable for any damages
8 arising from the use of this software.
Jack Paleviche27bf3e2009-05-10 14:09:03 -07009
Jack Palevich21a15a22009-05-11 14:49:29 -070010 Permission is granted to anyone to use this software for any purpose,
11 including commercial applications, and to alter it and redistribute it
12 freely, subject to the following restrictions:
Jack Paleviche27bf3e2009-05-10 14:09:03 -070013
Jack Palevich21a15a22009-05-11 14:49:29 -070014 1. The origin of this software must not be misrepresented; you must not
15 claim that you wrote the original software. If you use this software
16 in a product, an acknowledgment in the product and its documentation
17 *is* required.
18 2. Altered source versions must be plainly marked as such, and must not be
19 misrepresented as being the original software.
20 3. This notice may not be removed or altered from any source distribution.
21 */
Jack Paleviche27bf3e2009-05-10 14:09:03 -070022
Jack Palevich77ae76e2009-05-10 19:59:24 -070023#include <ctype.h>
24#include <dlfcn.h>
Jack Paleviche27bf3e2009-05-10 14:09:03 -070025#include <stdarg.h>
Jack Palevichae54f1f2009-05-08 14:54:15 -070026#include <stdio.h>
Jack Palevichf6b5a532009-05-10 19:16:42 -070027#include <stdlib.h>
28#include <string.h>
Jack Palevichae54f1f2009-05-08 14:54:15 -070029
Jack Palevich546b2242009-05-13 15:10:04 -070030#if defined(__arm__)
31#include <unistd.h>
32#endif
33
Jack Palevicha6535612009-05-13 16:24:17 -070034#include "disassem.h"
35
Jack Palevichbbf8ab52009-05-11 11:54:30 -070036namespace acc {
37
Jack Palevich77ae76e2009-05-10 19:59:24 -070038class compiler {
Jack Palevich21a15a22009-05-11 14:49:29 -070039 class CodeBuf {
40 char* ind;
41 char* pProgramBase;
Jack Palevichf0cbc922009-05-08 16:35:13 -070042
Jack Palevich21a15a22009-05-11 14:49:29 -070043 void release() {
44 if (pProgramBase != 0) {
45 free(pProgramBase);
46 pProgramBase = 0;
Jack Palevichae54f1f2009-05-08 14:54:15 -070047 }
Jack Palevich21a15a22009-05-11 14:49:29 -070048 }
49
50 public:
51 CodeBuf() {
52 pProgramBase = 0;
53 ind = 0;
54 }
55
56 ~CodeBuf() {
57 release();
58 }
59
60 void init(int size) {
61 release();
62 pProgramBase = (char*) calloc(1, size);
63 ind = pProgramBase;
64 }
65
66 void o(int n) {
67 /* cannot use unsigned, so we must do a hack */
68 while (n && n != -1) {
69 *ind++ = n;
70 n = n >> 8;
71 }
72 }
73
Jack Palevich546b2242009-05-13 15:10:04 -070074 int o4(int n) {
75 int result = (int) ind;
76 * (int*) ind = n;
77 ind += 4;
78 return result;
79 }
80
Jack Palevich21a15a22009-05-11 14:49:29 -070081 /*
82 * Output a byte. Handles all values, 0..ff.
83 */
84 void ob(int n) {
85 *ind++ = n;
86 }
87
88 /* output a symbol and patch all calls to it */
89 void gsym(int t) {
90 int n;
91 while (t) {
92 n = *(int *) t; /* next value */
93 *(int *) t = ((int) ind) - t - 4;
94 t = n;
95 }
96 }
97
98 /* psym is used to put an instruction with a data field which is a
99 reference to a symbol. It is in fact the same as oad ! */
100 int psym(int n, int t) {
101 return oad(n, t);
102 }
103
104 /* instruction + address */
105 int oad(int n, int t) {
106 o(n);
107 *(int *) ind = t;
108 t = (int) ind;
109 ind = ind + 4;
110 return t;
111 }
112
113 inline void* getBase() {
114 return (void*) pProgramBase;
115 }
116
117 int getSize() {
118 return ind - pProgramBase;
119 }
120
121 int getPC() {
122 return (int) ind;
123 }
124 };
125
126 class CodeGenerator {
127 public:
128 CodeGenerator() {}
129 virtual ~CodeGenerator() {}
130
Jack Palevich22305132009-05-13 10:58:45 -0700131 virtual void init(CodeBuf* pCodeBuf) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700132 this->pCodeBuf = pCodeBuf;
133 }
134
Jack Palevich22305132009-05-13 10:58:45 -0700135 /* returns address to patch with local variable size
136 */
Jack Palevich546b2242009-05-13 15:10:04 -0700137 virtual int functionEntry(int argCount) = 0;
Jack Palevich22305132009-05-13 10:58:45 -0700138
Jack Palevich546b2242009-05-13 15:10:04 -0700139 virtual void functionExit(int argCount, int localVariableAddress, int localVariableSize) = 0;
Jack Palevich22305132009-05-13 10:58:45 -0700140
141 /* load immediate value */
Jack Palevich546b2242009-05-13 15:10:04 -0700142 virtual void li(int t) = 0;
Jack Palevich22305132009-05-13 10:58:45 -0700143
144 virtual int gjmp(int t) = 0;
145
146 /* l = 0: je, l == 1: jne */
147 virtual int gtst(bool l, int t) = 0;
148
149 virtual void gcmp(int op) = 0;
150
Jack Palevich546b2242009-05-13 15:10:04 -0700151 virtual void genOp(int op) = 0;
Jack Palevich22305132009-05-13 10:58:45 -0700152
153 virtual void clearECX() = 0;
154
155 virtual void pushEAX() = 0;
156
157 virtual void popECX() = 0;
158
159 virtual void storeEAXToAddressECX(bool isInt) = 0;
160
161 virtual void loadEAXIndirect(bool isInt) = 0;
162
163 virtual void leaEAX(int ea) = 0;
164
165 virtual void storeEAX(int ea) = 0;
166
167 virtual void loadEAX(int ea) = 0;
168
169 virtual void postIncrementOrDecrement(int n, int op) = 0;
170
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700171 virtual int beginFunctionCallArguments() = 0;
172
173 virtual void endFunctionCallArguments(int a, int l) = 0;
Jack Palevich22305132009-05-13 10:58:45 -0700174
175 virtual void storeEAToArg(int l) = 0;
176
177 virtual int callForward(int symbol) = 0;
178
179 virtual void callRelative(int t) = 0;
180
181 virtual void callIndirect(int l) = 0;
182
183 virtual void adjustStackAfterCall(int l) = 0;
184
Jack Palevicha6535612009-05-13 16:24:17 -0700185 virtual int disassemble(FILE* out) = 0;
186
Jack Palevich21a15a22009-05-11 14:49:29 -0700187 /* output a symbol and patch all calls to it */
Jack Palevich22305132009-05-13 10:58:45 -0700188 virtual void gsym(int t) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700189 pCodeBuf->gsym(t);
190 }
191
Jack Palevich546b2242009-05-13 15:10:04 -0700192 virtual int finishCompile() {
193#if defined(__arm__)
Jack Palevicha6535612009-05-13 16:24:17 -0700194 const long base = long(pCodeBuf->getBase());
195 const long curr = base + long(pCodeBuf->getSize());
196 int err = cacheflush(base, curr, 0);
197 return err;
Jack Palevich546b2242009-05-13 15:10:04 -0700198#else
Jack Palevicha6535612009-05-13 16:24:17 -0700199 return 0;
Jack Palevich546b2242009-05-13 15:10:04 -0700200#endif
201 }
202
Jack Palevicha6535612009-05-13 16:24:17 -0700203 /**
204 * Adjust relative branches by this amount.
205 */
206 virtual int jumpOffset() = 0;
207
Jack Palevich21a15a22009-05-11 14:49:29 -0700208 protected:
209 void o(int n) {
210 pCodeBuf->o(n);
211 }
212
213 /*
214 * Output a byte. Handles all values, 0..ff.
215 */
216 void ob(int n) {
217 pCodeBuf->ob(n);
218 }
219
220 /* psym is used to put an instruction with a data field which is a
221 reference to a symbol. It is in fact the same as oad ! */
222 int psym(int n, int t) {
223 return oad(n, t);
224 }
225
226 /* instruction + address */
227 int oad(int n, int t) {
228 return pCodeBuf->oad(n,t);
229 }
230
Jack Palevicha6535612009-05-13 16:24:17 -0700231 int getBase() {
232 return (int) pCodeBuf->getBase();
233 }
234
Jack Palevich21a15a22009-05-11 14:49:29 -0700235 int getPC() {
236 return pCodeBuf->getPC();
237 }
238
Jack Palevich546b2242009-05-13 15:10:04 -0700239 int o4(int data) {
240 return pCodeBuf->o4(data);
241 }
Jack Palevich21a15a22009-05-11 14:49:29 -0700242 private:
243 CodeBuf* pCodeBuf;
244 };
245
Jack Palevich22305132009-05-13 10:58:45 -0700246 class ARMCodeGenerator : public CodeGenerator {
247 public:
248 ARMCodeGenerator() {}
249 virtual ~ARMCodeGenerator() {}
250
Jack Palevich546b2242009-05-13 15:10:04 -0700251 // The gnu ARM assembler prints the constants in little-endian,
252 // but C hexidecimal constants are big-endian. We trick the
253 // gnu assembler into putting out big-endian constants by
254 // using the -mbig-endian flag when assembling.
255
Jack Palevich22305132009-05-13 10:58:45 -0700256 /* returns address to patch with local variable size
257 */
Jack Palevich546b2242009-05-13 15:10:04 -0700258 virtual int functionEntry(int argCount) {
259 fprintf(stderr, "functionEntry(%d);\n", argCount);
260 /*
261 19 0000 E1A0C00D mov ip, sp
262 20 0004 E92DD800 stmfd sp!, {fp, ip, lr, pc}
263 21 0008 E24CB004 sub fp, ip, #4
264 22 000c E24DD008 sub sp, sp, #8
265 */
266 o4(0xE1A0C00D);
267 o4(0xE92DD800);
268 o4(0xE24CB004);
269 return o4(0xE24DD008);
Jack Palevich22305132009-05-13 10:58:45 -0700270 }
271
Jack Palevich546b2242009-05-13 15:10:04 -0700272 virtual void functionExit(int argCount, int localVariableAddress, int localVariableSize) {
273 fprintf(stderr, "functionExit(%d, %d, %d);\n", argCount, localVariableAddress, localVariableSize);
274 /*
275 23 0010 E24BD00C sub sp, fp, #12
276 24 0014 E89DA800 ldmfd sp, {fp, sp, pc}
277 */
278 o4(0xE24BD00C);
279 o4(0xE89DA800);
280 if (localVariableSize < 0 || localVariableSize > 255-8) {
281 error("LocalVariableSize");
282 }
283 *(char*) (localVariableAddress) = localVariableSize + 8;
Jack Palevich22305132009-05-13 10:58:45 -0700284 }
285
286 /* load immediate value */
Jack Palevich546b2242009-05-13 15:10:04 -0700287 virtual void li(int t) {
Jack Palevich22305132009-05-13 10:58:45 -0700288 fprintf(stderr, "li(%d);\n", t);
Jack Palevicha6535612009-05-13 16:24:17 -0700289 if (t >= 0 && t < 255) {
290 o4(0xE3A00000 + t); // E3A00000 mov r0, #0
291 } else if (t >= -256 && t < 0) {
292 // mvn means move constant ^ ~0
293 o4(0xE3E00001 - t); // E3E00000 mvn r0, #0
294 } else {
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700295 o4(0xE51F0000); // ldr r0, .L3
296 o4(0xEA000000); // b .L99
297 o4(t); // .L3: .word 0
298 // .L99:
Jack Palevicha6535612009-05-13 16:24:17 -0700299 }
Jack Palevich22305132009-05-13 10:58:45 -0700300 }
301
302 virtual int gjmp(int t) {
303 fprintf(stderr, "gjmp(%d);\n", t);
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700304 return o4(0xEA000000 + encodeAddress(t)); // b .L33
Jack Palevich22305132009-05-13 10:58:45 -0700305 }
306
307 /* l = 0: je, l == 1: jne */
308 virtual int gtst(bool l, int t) {
309 fprintf(stderr, "gtst(%d, %d);\n", l, t);
310 o(0x0fc085); /* test %eax, %eax, je/jne xxx */
311 return psym(0x84 + l, t);
312 }
313
314 virtual void gcmp(int op) {
315 fprintf(stderr, "gcmp(%d);\n", op);
316#if 0
317 int t = decodeOp(op);
318 o(0xc139); /* cmp %eax,%ecx */
319 li(0);
320 o(0x0f); /* setxx %al */
321 o(t + 0x90);
322 o(0xc0);
323#endif
324 }
325
Jack Palevich546b2242009-05-13 15:10:04 -0700326 virtual void genOp(int op) {
Jack Palevich22305132009-05-13 10:58:45 -0700327 fprintf(stderr, "genOp(%d);\n", op);
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700328 switch(op) {
329 case OP_MUL:
330 o4(0x0E0000091); // mul r0,r1,r0
331 break;
332 case OP_PLUS:
333 o4(0xE0810000); // add r0,r1,r0
334 break;
335 case OP_MINUS:
336 o4(0xE0410000); // sub r0,r1,r0
337 break;
338 case OP_SHIFT_LEFT:
339 o4(0xE1A00011); // lsl r0,r1,r0
340 break;
341 case OP_SHIFT_RIGHT:
342 o4(0xE1A00051); // asr r0,r1,r0
343 break;
344 case OP_BIT_AND:
345 o4(0xE0010000); // and r0,r1,r0
346 break;
347 case OP_BIT_XOR:
348 o4(0xE0210000); // eor r0,r1,r0
349 break;
350 case OP_BIT_OR:
351 o4(0xE1810000); // orr r0,r1,r0
352 break;
353 case OP_BIT_NOT:
354 o4(0xE1E00000); // mvn r0, r0
355 break;
356 default:
357 error("Unhandled op %d\n", op);
358 break;
359 }
Jack Palevich22305132009-05-13 10:58:45 -0700360#if 0
361 o(decodeOp(op));
362 if (op == OP_MOD)
363 o(0x92); /* xchg %edx, %eax */
364#endif
365 }
366
367 virtual void clearECX() {
368 fprintf(stderr, "clearECX();\n");
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700369 o4(0xE3A01000); // mov r1, #0
Jack Palevich22305132009-05-13 10:58:45 -0700370 }
371
372 virtual void pushEAX() {
373 fprintf(stderr, "pushEAX();\n");
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700374 o4(0xE92D0001); // stmfd sp!,{r0}
Jack Palevich22305132009-05-13 10:58:45 -0700375 }
376
377 virtual void popECX() {
378 fprintf(stderr, "popECX();\n");
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700379 o4(0xE8BD0002); // ldmfd sp!,{r1}
Jack Palevich22305132009-05-13 10:58:45 -0700380 }
381
382 virtual void storeEAXToAddressECX(bool isInt) {
383 fprintf(stderr, "storeEAXToAddressECX(%d);\n", isInt);
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700384 o4(0x0188 + isInt); /* movl %eax/%al, (%ecx) */
Jack Palevich22305132009-05-13 10:58:45 -0700385 }
386
387 virtual void loadEAXIndirect(bool isInt) {
388 fprintf(stderr, "loadEAXIndirect(%d);\n", isInt);
389 if (isInt)
390 o(0x8b); /* mov (%eax), %eax */
391 else
392 o(0xbe0f); /* movsbl (%eax), %eax */
393 ob(0); /* add zero in code */
394 }
395
396 virtual void leaEAX(int ea) {
397 fprintf(stderr, "leaEAX(%d);\n", ea);
398#if 0
399 gmov(10, ea); /* leal EA, %eax */
400#endif
401 }
402
403 virtual void storeEAX(int ea) {
404 fprintf(stderr, "storeEAX(%d);\n", ea);
405#if 0
406 gmov(6, ea); /* mov %eax, EA */
407#endif
408 }
409
410 virtual void loadEAX(int ea) {
411 fprintf(stderr, "loadEAX(%d);\n", ea);
412#if 0
413 gmov(8, ea); /* mov EA, %eax */
414#endif
415 }
416
417 virtual void postIncrementOrDecrement(int n, int op) {
418 fprintf(stderr, "postIncrementOrDecrement(%d, %d);\n", n, op);
419 /* Implement post-increment or post decrement.
420 */
421#if 0
422 gmov(0, n); /* 83 ADD */
423 o(decodeOp(op));
424#endif
425 }
426
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700427 virtual int beginFunctionCallArguments() {
428 fprintf(stderr, "beginFunctionCallArguments();\n");
429 return o4(0xE24DDF00); // Placeholder
430 }
431
432 virtual void endFunctionCallArguments(int a, int l) {
433 fprintf(stderr, "endFunctionCallArguments(0x%08x, %d);\n", a, l);
434 if (l < 0 || l > 0x3FC) {
435 error("L out of range for stack adjustment: 0x%08x", l);
436 }
437 * (int*) a = 0xE24DDF00 | (l >> 2); // sub sp, sp, #0 << 2
438 int argCount = l >> 2;
439 if (argCount > 0) {
440 int regArgCount = argCount > 4 ? 4 : argCount;
441 o4(0xE8BD0000 | ((1 << regArgCount) - 1)); // ldmfd sp!,{}
442 }
Jack Palevich22305132009-05-13 10:58:45 -0700443 }
444
445 virtual void storeEAToArg(int l) {
446 fprintf(stderr, "storeEAToArg(%d);\n", l);
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700447 if (l < 0 || l > 4096-4) {
448 error("l out of range for stack offset: 0x%08x", l);
449 }
450 o4(0xE58D0000 + l); // str r0, [sp, #4]
Jack Palevich22305132009-05-13 10:58:45 -0700451 }
452
453 virtual int callForward(int symbol) {
454 fprintf(stderr, "callForward(%d);\n", symbol);
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700455 // Forward calls are always short (local)
456 return o4(0xEB000000 | encodeAddress(symbol));
Jack Palevich22305132009-05-13 10:58:45 -0700457 }
458
459 virtual void callRelative(int t) {
460 fprintf(stderr, "callRelative(%d);\n", t);
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700461 int abs = t + getPC() + jumpOffset();
462 fprintf(stderr, "abs=%d\n", abs);
463 if (t >= - (1 << 25) && t < (1 << 25)) {
464 o4(0xEB000000 | encodeAddress(t));
465 } else {
466 // Long call.
467 o4(0xE59FC000); // ldr r12, .L1
468 o4(0xEA000000); // b .L99
469 o4(t - 16); // .L1: .word 0
470 o4(0xE08CC00F); // .L99: add r12,pc
471 o4(0xE12FFF3C); // blx r12
472 }
Jack Palevich22305132009-05-13 10:58:45 -0700473 }
474
475 virtual void callIndirect(int l) {
476 fprintf(stderr, "callIndirect(%d);\n", l);
477 oad(0x2494ff, l); /* call *xxx(%esp) */
478 }
479
480 virtual void adjustStackAfterCall(int l) {
481 fprintf(stderr, "adjustStackAfterCall(%d);\n", l);
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700482 if (l < 0 || l > 0x3FC) {
483 error("L out of range for stack adjustment: 0x%08x", l);
484 }
485 int argCount = l >> 2;
486 if (argCount > 4) {
487 int remainingArgs = argCount - 4;
488 o4(0xE28DDF00 | remainingArgs); // add sp, sp, #0x3fc
489 }
490
Jack Palevich22305132009-05-13 10:58:45 -0700491 }
492
Jack Palevicha6535612009-05-13 16:24:17 -0700493 virtual int jumpOffset() {
494 return 4;
495 }
496
497 /* output a symbol and patch all calls to it */
498 virtual void gsym(int t) {
499 fprintf(stderr, "gsym(0x%x)\n", t);
500 int n;
501 int base = getBase();
502 int pc = getPC();
503 fprintf(stderr, "pc = 0x%x\n", pc);
504 while (t) {
505 int data = * (int*) t;
506 int decodedOffset = ((BRANCH_REL_ADDRESS_MASK & data) << 2);
507 if (decodedOffset == 0) {
508 n = 0;
509 } else {
510 n = base + decodedOffset; /* next value */
511 }
512 *(int *) t = (data & ~BRANCH_REL_ADDRESS_MASK)
513 | encodeRelAddress(pc - t - 8);
514 t = n;
515 }
516 }
517
518 virtual int disassemble(FILE* out) {
519 disasmOut = out;
520 disasm_interface_t di;
521 di.di_readword = disassemble_readword;
522 di.di_printaddr = disassemble_printaddr;
523 di.di_printf = disassemble_printf;
524
525 int base = getBase();
526 int pc = getPC();
527 for(int i = base; i < pc; i += 4) {
528 fprintf(out, "%08x: %08x ", i, *(int*) i);
529 ::disasm(&di, i, 0);
530 }
531 return 0;
532 }
Jack Palevich22305132009-05-13 10:58:45 -0700533 private:
Jack Palevicha6535612009-05-13 16:24:17 -0700534 static FILE* disasmOut;
535
536 static u_int
537 disassemble_readword(u_int address)
538 {
539 return(*((u_int *)address));
540 }
541
542 static void
543 disassemble_printaddr(u_int address)
544 {
545 fprintf(disasmOut, "0x%08x", address);
546 }
547
548 static void
549 disassemble_printf(const char *fmt, ...) {
550 va_list ap;
551 va_start(ap, fmt);
552 vfprintf(disasmOut, fmt, ap);
553 va_end(ap);
554 }
555
556 static const int BRANCH_REL_ADDRESS_MASK = 0x00ffffff;
557
558 /** Encode a relative address that might also be
559 * a label.
560 */
561 int encodeAddress(int value) {
562 int base = getBase();
563 if (value >= base && value <= getPC() ) {
564 // This is a label, encode it relative to the base.
565 value = value - base;
566 }
567 return encodeRelAddress(value);
568 }
569
570 int encodeRelAddress(int value) {
571 return BRANCH_REL_ADDRESS_MASK & (value >> 2);
572 }
Jack Palevich22305132009-05-13 10:58:45 -0700573
Jack Palevich546b2242009-05-13 15:10:04 -0700574 void error(const char* fmt,...) {
575 va_list ap;
576 va_start(ap, fmt);
577 vfprintf(stderr, fmt, ap);
578 va_end(ap);
579 exit(12);
580 }
Jack Palevich22305132009-05-13 10:58:45 -0700581 };
582
Jack Palevich21a15a22009-05-11 14:49:29 -0700583 class X86CodeGenerator : public CodeGenerator {
584 public:
585 X86CodeGenerator() {}
586 virtual ~X86CodeGenerator() {}
587
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700588 /* returns address to patch with local variable size
589 */
Jack Palevich546b2242009-05-13 15:10:04 -0700590 virtual int functionEntry(int argCount) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700591 o(0xe58955); /* push %ebp, mov %esp, %ebp */
592 return oad(0xec81, 0); /* sub $xxx, %esp */
593 }
594
Jack Palevich546b2242009-05-13 15:10:04 -0700595 virtual void functionExit(int argCount, int localVariableAddress, int localVariableSize) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700596 o(0xc3c9); /* leave, ret */
Jack Palevich546b2242009-05-13 15:10:04 -0700597 *(int *) localVariableAddress = localVariableSize; /* save local variables */
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700598 }
599
Jack Palevich21a15a22009-05-11 14:49:29 -0700600 /* load immediate value */
Jack Palevich546b2242009-05-13 15:10:04 -0700601 virtual void li(int t) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700602 oad(0xb8, t); /* mov $xx, %eax */
603 }
604
Jack Palevich22305132009-05-13 10:58:45 -0700605 virtual int gjmp(int t) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700606 return psym(0xe9, t);
607 }
608
609 /* l = 0: je, l == 1: jne */
Jack Palevich22305132009-05-13 10:58:45 -0700610 virtual int gtst(bool l, int t) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700611 o(0x0fc085); /* test %eax, %eax, je/jne xxx */
612 return psym(0x84 + l, t);
613 }
614
Jack Palevich22305132009-05-13 10:58:45 -0700615 virtual void gcmp(int op) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700616 int t = decodeOp(op);
Jack Palevich21a15a22009-05-11 14:49:29 -0700617 o(0xc139); /* cmp %eax,%ecx */
618 li(0);
619 o(0x0f); /* setxx %al */
620 o(t + 0x90);
621 o(0xc0);
622 }
623
Jack Palevich546b2242009-05-13 15:10:04 -0700624 virtual void genOp(int op) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700625 o(decodeOp(op));
626 if (op == OP_MOD)
627 o(0x92); /* xchg %edx, %eax */
628 }
629
Jack Palevich22305132009-05-13 10:58:45 -0700630 virtual void clearECX() {
Jack Palevich21a15a22009-05-11 14:49:29 -0700631 oad(0xb9, 0); /* movl $0, %ecx */
632 }
633
Jack Palevich22305132009-05-13 10:58:45 -0700634 virtual void pushEAX() {
Jack Palevich21a15a22009-05-11 14:49:29 -0700635 o(0x50); /* push %eax */
636 }
637
Jack Palevich22305132009-05-13 10:58:45 -0700638 virtual void popECX() {
Jack Palevich21a15a22009-05-11 14:49:29 -0700639 o(0x59); /* pop %ecx */
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700640 }
641
Jack Palevich22305132009-05-13 10:58:45 -0700642 virtual void storeEAXToAddressECX(bool isInt) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700643 o(0x0188 + isInt); /* movl %eax/%al, (%ecx) */
644 }
645
Jack Palevich22305132009-05-13 10:58:45 -0700646 virtual void loadEAXIndirect(bool isInt) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700647 if (isInt)
648 o(0x8b); /* mov (%eax), %eax */
649 else
650 o(0xbe0f); /* movsbl (%eax), %eax */
651 ob(0); /* add zero in code */
652 }
653
Jack Palevich22305132009-05-13 10:58:45 -0700654 virtual void leaEAX(int ea) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700655 gmov(10, ea); /* leal EA, %eax */
656 }
657
Jack Palevich22305132009-05-13 10:58:45 -0700658 virtual void storeEAX(int ea) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700659 gmov(6, ea); /* mov %eax, EA */
660 }
661
Jack Palevich22305132009-05-13 10:58:45 -0700662 virtual void loadEAX(int ea) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700663 gmov(8, ea); /* mov EA, %eax */
664 }
665
Jack Palevich22305132009-05-13 10:58:45 -0700666 virtual void postIncrementOrDecrement(int n, int op) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700667 /* Implement post-increment or post decrement.
Jack Palevich21a15a22009-05-11 14:49:29 -0700668 */
669 gmov(0, n); /* 83 ADD */
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700670 o(decodeOp(op));
Jack Palevich21a15a22009-05-11 14:49:29 -0700671 }
672
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700673 virtual int beginFunctionCallArguments() {
Jack Palevich21a15a22009-05-11 14:49:29 -0700674 return oad(0xec81, 0); /* sub $xxx, %esp */
675 }
676
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700677 virtual void endFunctionCallArguments(int a, int l) {
678 * (int*) a = l;
679 }
680
Jack Palevich22305132009-05-13 10:58:45 -0700681 virtual void storeEAToArg(int l) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700682 oad(0x248489, l); /* movl %eax, xxx(%esp) */
683 }
684
Jack Palevich22305132009-05-13 10:58:45 -0700685 virtual int callForward(int symbol) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700686 return psym(0xe8, symbol); /* call xxx */
687 }
688
Jack Palevich22305132009-05-13 10:58:45 -0700689 virtual void callRelative(int t) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700690 psym(0xe8, t); /* call xxx */
691 }
692
Jack Palevich22305132009-05-13 10:58:45 -0700693 virtual void callIndirect(int l) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700694 oad(0x2494ff, l); /* call *xxx(%esp) */
695 }
696
Jack Palevich22305132009-05-13 10:58:45 -0700697 virtual void adjustStackAfterCall(int l) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700698 oad(0xc481, l); /* add $xxx, %esp */
699 }
700
Jack Palevicha6535612009-05-13 16:24:17 -0700701 virtual int jumpOffset() {
702 return 5;
703 }
704
705 virtual int disassemble(FILE* out) {
706 return 1;
707 }
708
Jack Palevich21a15a22009-05-11 14:49:29 -0700709 private:
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700710 static const int operatorHelper[];
711
712 int decodeOp(int op) {
713 if (op < 0 || op > OP_COUNT) {
714 fprintf(stderr, "Out-of-range operator: %d\n", op);
715 exit(1);
716 }
717 return operatorHelper[op];
718 }
Jack Palevich21a15a22009-05-11 14:49:29 -0700719
Jack Palevich546b2242009-05-13 15:10:04 -0700720 void gmov(int l, int t) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700721 o(l + 0x83);
722 oad((t < LOCAL) << 7 | 5, t);
723 }
724 };
725
726 /* vars: value of variables
727 loc : local variable index
728 glo : global variable index
729 ind : output code ptr
730 rsym: return symbol
731 prog: output code
732 dstk: define stack
733 dptr, dch: macro state
734 */
735 int tok, tokc, tokl, ch, vars, rsym, loc, glo, sym_stk, dstk,
736 dptr, dch, last_id;
737 void* pSymbolBase;
738 void* pGlobalBase;
739 void* pVarsBase;
740 FILE* file;
741
742 CodeBuf codeBuf;
Jack Palevich22305132009-05-13 10:58:45 -0700743 CodeGenerator* pGen;
Jack Palevich21a15a22009-05-11 14:49:29 -0700744
745 static const int ALLOC_SIZE = 99999;
746
747 /* depends on the init string */
748 static const int TOK_STR_SIZE = 48;
749 static const int TOK_IDENT = 0x100;
750 static const int TOK_INT = 0x100;
751 static const int TOK_IF = 0x120;
752 static const int TOK_ELSE = 0x138;
753 static const int TOK_WHILE = 0x160;
754 static const int TOK_BREAK = 0x190;
755 static const int TOK_RETURN = 0x1c0;
756 static const int TOK_FOR = 0x1f8;
757 static const int TOK_DEFINE = 0x218;
758 static const int TOK_MAIN = 0x250;
759
760 static const int TOK_DUMMY = 1;
761 static const int TOK_NUM = 2;
762
763 static const int LOCAL = 0x200;
764
765 static const int SYM_FORWARD = 0;
766 static const int SYM_DEFINE = 1;
767
768 /* tokens in string heap */
769 static const int TAG_TOK = ' ';
770 static const int TAG_MACRO = 2;
771
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700772 static const int OP_INCREMENT = 0;
773 static const int OP_DECREMENT = 1;
774 static const int OP_MUL = 2;
775 static const int OP_DIV = 3;
776 static const int OP_MOD = 4;
777 static const int OP_PLUS = 5;
778 static const int OP_MINUS = 6;
779 static const int OP_SHIFT_LEFT = 7;
780 static const int OP_SHIFT_RIGHT = 8;
781 static const int OP_LESS_EQUAL = 9;
782 static const int OP_GREATER_EQUAL = 10;
783 static const int OP_LESS = 11;
784 static const int OP_GREATER = 12;
785 static const int OP_EQUALS = 13;
786 static const int OP_NOT_EQUALS = 14;
787 static const int OP_LOGICAL_AND = 15;
788 static const int OP_LOGICAL_OR = 16;
789 static const int OP_BIT_AND = 17;
790 static const int OP_BIT_XOR = 18;
791 static const int OP_BIT_OR = 19;
792 static const int OP_BIT_NOT = 20;
793 static const int OP_LOGICAL_NOT = 21;
794 static const int OP_COUNT = 22;
795
796 /* Operators are searched from front, the two-character operators appear
797 * before the single-character operators with the same first character.
798 * @ is used to pad out single-character operators.
799 */
800 static const char* operatorChars;
801 static const char operatorLevel[];
802
Jack Palevich21a15a22009-05-11 14:49:29 -0700803 void pdef(int t) {
804 *(char *) dstk++ = t;
805 }
806
807 void inp() {
808 if (dptr) {
809 ch = *(char *) dptr++;
810 if (ch == TAG_MACRO) {
811 dptr = 0;
812 ch = dch;
813 }
814 } else
815 ch = fgetc(file);
816 /* printf("ch=%c 0x%x\n", ch, ch); */
817 }
818
819 int isid() {
Jack Palevich546b2242009-05-13 15:10:04 -0700820 return isalnum(ch) | (ch == '_');
Jack Palevich21a15a22009-05-11 14:49:29 -0700821 }
822
823 /* read a character constant */
824 void getq() {
825 if (ch == '\\') {
826 inp();
827 if (ch == 'n')
828 ch = '\n';
829 }
830 }
831
832 void next() {
833 int l, a;
834
Jack Palevich546b2242009-05-13 15:10:04 -0700835 while (isspace(ch) | (ch == '#')) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700836 if (ch == '#') {
837 inp();
838 next();
839 if (tok == TOK_DEFINE) {
840 next();
841 pdef(TAG_TOK); /* fill last ident tag */
842 *(int *) tok = SYM_DEFINE;
843 *(int *) (tok + 4) = dstk; /* define stack */
844 }
845 /* well we always save the values ! */
846 while (ch != '\n') {
847 pdef(ch);
848 inp();
849 }
850 pdef(ch);
851 pdef(TAG_MACRO);
852 }
853 inp();
854 }
855 tokl = 0;
856 tok = ch;
857 /* encode identifiers & numbers */
858 if (isid()) {
859 pdef(TAG_TOK);
860 last_id = dstk;
861 while (isid()) {
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700862 pdef(ch);
863 inp();
Jack Palevichae54f1f2009-05-08 14:54:15 -0700864 }
Jack Palevich21a15a22009-05-11 14:49:29 -0700865 if (isdigit(tok)) {
866 tokc = strtol((char*) last_id, 0, 0);
867 tok = TOK_NUM;
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700868 } else {
Jack Palevich21a15a22009-05-11 14:49:29 -0700869 *(char *) dstk = TAG_TOK; /* no need to mark end of string (we
870 suppose data is initialized to zero by calloc) */
871 tok = (int) (strstr((char*) sym_stk, (char*) (last_id - 1))
872 - sym_stk);
873 *(char *) dstk = 0; /* mark real end of ident for dlsym() */
874 tok = tok * 8 + TOK_IDENT;
875 if (tok > TOK_DEFINE) {
876 tok = vars + tok;
877 /* printf("tok=%s %x\n", last_id, tok); */
878 /* define handling */
879 if (*(int *) tok == SYM_DEFINE) {
880 dptr = *(int *) (tok + 4);
881 dch = ch;
882 inp();
883 next();
884 }
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700885 }
886 }
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700887 } else {
Jack Palevich21a15a22009-05-11 14:49:29 -0700888 inp();
889 if (tok == '\'') {
890 tok = TOK_NUM;
891 getq();
892 tokc = ch;
893 inp();
894 inp();
Jack Palevich546b2242009-05-13 15:10:04 -0700895 } else if ((tok == '/') & (ch == '*')) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700896 inp();
897 while (ch) {
898 while (ch != '*')
899 inp();
900 inp();
901 if (ch == '/')
902 ch = 0;
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700903 }
Jack Palevich21a15a22009-05-11 14:49:29 -0700904 inp();
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700905 next();
Jack Palevich21a15a22009-05-11 14:49:29 -0700906 } else {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700907 const char* t = operatorChars;
908 int opIndex = 0;
Jack Palevich546b2242009-05-13 15:10:04 -0700909 while ((l = *t++) != 0) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700910 a = *t++;
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700911 tokl = operatorLevel[opIndex];
912 tokc = opIndex;
Jack Palevich546b2242009-05-13 15:10:04 -0700913 if ((l == tok) & ((a == ch) | (a == '@'))) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700914#if 0
915 printf("%c%c -> tokl=%d tokc=0x%x\n",
916 l, a, tokl, tokc);
917#endif
918 if (a == ch) {
919 inp();
920 tok = TOK_DUMMY; /* dummy token for double tokens */
921 }
922 break;
923 }
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700924 opIndex++;
925 }
926 if (l == 0) {
927 tokl = 0;
928 tokc = 0;
Jack Palevich21a15a22009-05-11 14:49:29 -0700929 }
930 }
931 }
932#if 0
933 {
934 int p;
935
936 printf("tok=0x%x ", tok);
937 if (tok >= TOK_IDENT) {
938 printf("'");
939 if (tok> TOK_DEFINE)
940 p = sym_stk + 1 + (tok - vars - TOK_IDENT) / 8;
941 else
942 p = sym_stk + 1 + (tok - TOK_IDENT) / 8;
943 while (*(char *)p != TAG_TOK && *(char *)p)
944 printf("%c", *(char *)p++);
945 printf("'\n");
946 } else if (tok == TOK_NUM) {
947 printf("%d\n", tokc);
948 } else {
949 printf("'%c'\n", tok);
950 }
951 }
952#endif
953 }
954
955 void error(const char *fmt, ...) {
956 va_list ap;
957
958 va_start(ap, fmt);
959 fprintf(stderr, "%ld: ", ftell((FILE *) file));
960 vfprintf(stderr, fmt, ap);
961 fprintf(stderr, "\n");
962 va_end(ap);
963 exit(1);
964 }
965
966 void skip(int c) {
967 if (tok != c) {
968 error("'%c' expected", c);
969 }
970 next();
971 }
972
Jack Palevich21a15a22009-05-11 14:49:29 -0700973 /* l is one if '=' parsing wanted (quick hack) */
974 void unary(int l) {
975 int n, t, a, c;
Jack Palevich546b2242009-05-13 15:10:04 -0700976 t = 0;
Jack Palevich21a15a22009-05-11 14:49:29 -0700977 n = 1; /* type of expression 0 = forward, 1 = value, other =
978 lvalue */
979 if (tok == '\"') {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700980 pGen->li(glo);
Jack Palevich21a15a22009-05-11 14:49:29 -0700981 while (ch != '\"') {
982 getq();
983 *(char *) glo++ = ch;
984 inp();
985 }
986 *(char *) glo = 0;
Jack Palevich546b2242009-05-13 15:10:04 -0700987 glo = (glo + 4) & -4; /* align heap */
Jack Palevich21a15a22009-05-11 14:49:29 -0700988 inp();
989 next();
990 } else {
991 c = tokl;
992 a = tokc;
993 t = tok;
994 next();
995 if (t == TOK_NUM) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700996 pGen->li(a);
Jack Palevich21a15a22009-05-11 14:49:29 -0700997 } else if (c == 2) {
998 /* -, +, !, ~ */
999 unary(0);
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001000 pGen->clearECX();
Jack Palevich21a15a22009-05-11 14:49:29 -07001001 if (t == '!')
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001002 pGen->gcmp(a);
Jack Palevich21a15a22009-05-11 14:49:29 -07001003 else
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001004 pGen->genOp(a);
Jack Palevich21a15a22009-05-11 14:49:29 -07001005 } else if (t == '(') {
1006 expr();
1007 skip(')');
1008 } else if (t == '*') {
1009 /* parse cast */
1010 skip('(');
1011 t = tok; /* get type */
1012 next(); /* skip int/char/void */
1013 next(); /* skip '*' or '(' */
1014 if (tok == '*') {
1015 /* function type */
1016 skip('*');
1017 skip(')');
1018 skip('(');
1019 skip(')');
1020 t = 0;
1021 }
1022 skip(')');
1023 unary(0);
1024 if (tok == '=') {
1025 next();
1026 pGen->pushEAX();
1027 expr();
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001028 pGen->popECX();
1029 pGen->storeEAXToAddressECX(t == TOK_INT);
Jack Palevich21a15a22009-05-11 14:49:29 -07001030 } else if (t) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001031 pGen->loadEAXIndirect(t == TOK_INT);
Jack Palevich21a15a22009-05-11 14:49:29 -07001032 }
1033 } else if (t == '&') {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001034 pGen->leaEAX(*(int *) tok);
Jack Palevich21a15a22009-05-11 14:49:29 -07001035 next();
1036 } else {
1037 n = *(int *) t;
1038 /* forward reference: try dlsym */
Jack Palevichcb1c9ef2009-05-14 11:38:49 -07001039 if (!n) {
1040 n = (int) dlsym(RTLD_DEFAULT, (char*) last_id);
1041 }
Jack Palevich546b2242009-05-13 15:10:04 -07001042 if ((tok == '=') & l) {
Jack Palevich21a15a22009-05-11 14:49:29 -07001043 /* assignment */
1044 next();
1045 expr();
1046 pGen->storeEAX(n);
1047 } else if (tok != '(') {
1048 /* variable */
1049 pGen->loadEAX(n);
1050 if (tokl == 11) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001051 pGen->postIncrementOrDecrement(n, tokc);
Jack Palevich21a15a22009-05-11 14:49:29 -07001052 next();
1053 }
1054 }
1055 }
1056 }
1057
1058 /* function call */
1059 if (tok == '(') {
1060 if (n == 1)
1061 pGen->pushEAX();
1062
1063 /* push args and invert order */
Jack Palevichcb1c9ef2009-05-14 11:38:49 -07001064 a = pGen->beginFunctionCallArguments();
Jack Palevich21a15a22009-05-11 14:49:29 -07001065 next();
1066 l = 0;
1067 while (tok != ')') {
1068 expr();
1069 pGen->storeEAToArg(l);
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001070 if (tok == ',')
Jack Paleviche27bf3e2009-05-10 14:09:03 -07001071 next();
Jack Palevich21a15a22009-05-11 14:49:29 -07001072 l = l + 4;
Jack Paleviche27bf3e2009-05-10 14:09:03 -07001073 }
Jack Palevichcb1c9ef2009-05-14 11:38:49 -07001074 pGen->endFunctionCallArguments(a, l);
Jack Palevich21a15a22009-05-11 14:49:29 -07001075 next();
1076 if (!n) {
1077 /* forward reference */
1078 t = t + 4;
1079 *(int *) t = pGen->callForward(*(int *) t);
1080 } else if (n == 1) {
1081 pGen->callIndirect(l);
1082 l = l + 4;
1083 } else {
Jack Palevicha6535612009-05-13 16:24:17 -07001084 pGen->callRelative(n - codeBuf.getPC() - pGen->jumpOffset()); /* call xxx */
Jack Palevich21a15a22009-05-11 14:49:29 -07001085 }
1086 if (l)
1087 pGen->adjustStackAfterCall(l);
1088 }
1089 }
1090
1091 void sum(int l) {
1092 int t, n, a;
Jack Palevich546b2242009-05-13 15:10:04 -07001093 t = 0;
Jack Palevich21a15a22009-05-11 14:49:29 -07001094 if (l-- == 1)
1095 unary(1);
1096 else {
1097 sum(l);
1098 a = 0;
1099 while (l == tokl) {
1100 n = tok;
1101 t = tokc;
1102 next();
1103
1104 if (l > 8) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001105 a = pGen->gtst(t == OP_LOGICAL_OR, a); /* && and || output code generation */
Jack Palevich21a15a22009-05-11 14:49:29 -07001106 sum(l);
1107 } else {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001108 pGen->pushEAX();
Jack Palevich21a15a22009-05-11 14:49:29 -07001109 sum(l);
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001110 pGen->popECX();
Jack Palevich21a15a22009-05-11 14:49:29 -07001111
Jack Palevich546b2242009-05-13 15:10:04 -07001112 if ((l == 4) | (l == 5)) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001113 pGen->gcmp(t);
Jack Palevich21a15a22009-05-11 14:49:29 -07001114 } else {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001115 pGen->genOp(t);
Jack Palevich21a15a22009-05-11 14:49:29 -07001116 }
1117 }
1118 }
1119 /* && and || output code generation */
1120 if (a && l > 8) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001121 a = pGen->gtst(t == OP_LOGICAL_OR, a);
1122 pGen->li(t != OP_LOGICAL_OR);
Jack Palevicha6535612009-05-13 16:24:17 -07001123 pGen->gjmp(5); /* jmp $ + 5 (sizeof li, FIXME for ARM) */
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001124 pGen->gsym(a);
1125 pGen->li(t == OP_LOGICAL_OR);
Jack Palevich21a15a22009-05-11 14:49:29 -07001126 }
1127 }
1128 }
1129
1130 void expr() {
1131 sum(11);
1132 }
1133
1134 int test_expr() {
1135 expr();
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001136 return pGen->gtst(0, 0);
Jack Palevich21a15a22009-05-11 14:49:29 -07001137 }
1138
1139 void block(int l) {
1140 int a, n, t;
1141
1142 if (tok == TOK_IF) {
Jack Paleviche27bf3e2009-05-10 14:09:03 -07001143 next();
1144 skip('(');
Jack Palevich21a15a22009-05-11 14:49:29 -07001145 a = test_expr();
1146 skip(')');
1147 block(l);
1148 if (tok == TOK_ELSE) {
Jack Paleviche27bf3e2009-05-10 14:09:03 -07001149 next();
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001150 n = pGen->gjmp(0); /* jmp */
1151 pGen->gsym(a);
Jack Palevich21a15a22009-05-11 14:49:29 -07001152 block(l);
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001153 pGen->gsym(n); /* patch else jmp */
Jack Palevich21a15a22009-05-11 14:49:29 -07001154 } else {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001155 pGen->gsym(a); /* patch if test */
Jack Paleviche27bf3e2009-05-10 14:09:03 -07001156 }
Jack Palevich546b2242009-05-13 15:10:04 -07001157 } else if ((tok == TOK_WHILE) | (tok == TOK_FOR)) {
Jack Palevich21a15a22009-05-11 14:49:29 -07001158 t = tok;
1159 next();
1160 skip('(');
1161 if (t == TOK_WHILE) {
Jack Palevicha6535612009-05-13 16:24:17 -07001162 n = codeBuf.getPC(); // top of loop, target of "next" iteration
Jack Palevich21a15a22009-05-11 14:49:29 -07001163 a = test_expr();
1164 } else {
1165 if (tok != ';')
1166 expr();
1167 skip(';');
1168 n = codeBuf.getPC();
1169 a = 0;
1170 if (tok != ';')
1171 a = test_expr();
1172 skip(';');
1173 if (tok != ')') {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001174 t = pGen->gjmp(0);
Jack Palevich21a15a22009-05-11 14:49:29 -07001175 expr();
Jack Palevicha6535612009-05-13 16:24:17 -07001176 pGen->gjmp(n - codeBuf.getPC() - pGen->jumpOffset());
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001177 pGen->gsym(t);
Jack Palevich21a15a22009-05-11 14:49:29 -07001178 n = t + 4;
1179 }
1180 }
1181 skip(')');
1182 block((int) &a);
Jack Palevicha6535612009-05-13 16:24:17 -07001183 pGen->gjmp(n - codeBuf.getPC() - pGen->jumpOffset()); /* jmp */
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001184 pGen->gsym(a);
Jack Palevich21a15a22009-05-11 14:49:29 -07001185 } else if (tok == '{') {
1186 next();
1187 /* declarations */
1188 decl(1);
1189 while (tok != '}')
1190 block(l);
1191 next();
1192 } else {
1193 if (tok == TOK_RETURN) {
1194 next();
1195 if (tok != ';')
1196 expr();
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001197 rsym = pGen->gjmp(rsym); /* jmp */
Jack Palevich21a15a22009-05-11 14:49:29 -07001198 } else if (tok == TOK_BREAK) {
1199 next();
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001200 *(int *) l = pGen->gjmp(*(int *) l);
Jack Palevich21a15a22009-05-11 14:49:29 -07001201 } else if (tok != ';')
1202 expr();
1203 skip(';');
Jack Paleviche27bf3e2009-05-10 14:09:03 -07001204 }
1205 }
Jack Palevich21a15a22009-05-11 14:49:29 -07001206
1207 /* 'l' is true if local declarations */
1208 void decl(int l) {
1209 int a;
1210
Jack Palevich546b2242009-05-13 15:10:04 -07001211 while ((tok == TOK_INT) | ((tok != -1) & (!l))) {
Jack Palevich21a15a22009-05-11 14:49:29 -07001212 if (tok == TOK_INT) {
1213 next();
1214 while (tok != ';') {
1215 if (l) {
1216 loc = loc + 4;
1217 *(int *) tok = -loc;
1218 } else {
1219 *(int *) tok = glo;
1220 glo = glo + 4;
1221 }
1222 next();
1223 if (tok == ',')
1224 next();
1225 }
1226 skip(';');
1227 } else {
1228 /* patch forward references (XXX: do not work for function
1229 pointers) */
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001230 pGen->gsym(*(int *) (tok + 4));
Jack Palevich21a15a22009-05-11 14:49:29 -07001231 /* put function address */
1232 *(int *) tok = codeBuf.getPC();
1233 next();
1234 skip('(');
1235 a = 8;
Jack Palevich546b2242009-05-13 15:10:04 -07001236 int argCount = 0;
Jack Palevich21a15a22009-05-11 14:49:29 -07001237 while (tok != ')') {
1238 /* read param name and compute offset */
1239 *(int *) tok = a;
1240 a = a + 4;
1241 next();
1242 if (tok == ',')
1243 next();
Jack Palevich546b2242009-05-13 15:10:04 -07001244 argCount++;
Jack Palevich21a15a22009-05-11 14:49:29 -07001245 }
1246 next(); /* skip ')' */
1247 rsym = loc = 0;
Jack Palevich546b2242009-05-13 15:10:04 -07001248 a = pGen->functionEntry(argCount);
Jack Palevich21a15a22009-05-11 14:49:29 -07001249 block(0);
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001250 pGen->gsym(rsym);
Jack Palevich546b2242009-05-13 15:10:04 -07001251 pGen->functionExit(argCount, a, loc);
Jack Palevich21a15a22009-05-11 14:49:29 -07001252 }
1253 }
1254 }
1255
1256 void cleanup() {
1257 if (sym_stk != 0) {
1258 free((void*) sym_stk);
1259 sym_stk = 0;
1260 }
1261 if (pGlobalBase != 0) {
1262 free((void*) pGlobalBase);
1263 pGlobalBase = 0;
1264 }
1265 if (pVarsBase != 0) {
1266 free(pVarsBase);
1267 pVarsBase = 0;
1268 }
1269 if (pGen) {
1270 delete pGen;
1271 pGen = 0;
1272 }
1273 }
1274
1275 void clear() {
1276 tok = 0;
1277 tokc = 0;
1278 tokl = 0;
1279 ch = 0;
1280 vars = 0;
1281 rsym = 0;
1282 loc = 0;
1283 glo = 0;
1284 sym_stk = 0;
1285 dstk = 0;
1286 dptr = 0;
1287 dch = 0;
1288 last_id = 0;
1289 file = 0;
1290 pGlobalBase = 0;
1291 pVarsBase = 0;
1292 pGen = 0;
1293 }
Jack Paleviche27bf3e2009-05-10 14:09:03 -07001294
Jack Palevich22305132009-05-13 10:58:45 -07001295 void setArchitecture(const char* architecture) {
1296 delete pGen;
1297 pGen = 0;
1298
1299 if (architecture != NULL) {
1300 if (strcmp(architecture, "arm") == 0) {
1301 pGen = new ARMCodeGenerator();
1302 } else if (strcmp(architecture, "x86") == 0) {
1303 pGen = new X86CodeGenerator();
1304 } else {
1305 fprintf(stderr, "Unknown architecture %s", architecture);
1306 }
1307 }
1308
1309 if (pGen == NULL) {
1310 pGen = new ARMCodeGenerator();
1311 }
1312 }
1313
Jack Palevich77ae76e2009-05-10 19:59:24 -07001314public:
Jack Palevich22305132009-05-13 10:58:45 -07001315 struct args {
1316 args() {
1317 architecture = 0;
1318 }
1319 const char* architecture;
1320 };
1321
Jack Palevich21a15a22009-05-11 14:49:29 -07001322 compiler() {
1323 clear();
Jack Paleviche27bf3e2009-05-10 14:09:03 -07001324 }
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001325
Jack Palevich21a15a22009-05-11 14:49:29 -07001326 ~compiler() {
1327 cleanup();
1328 }
1329
Jack Palevich22305132009-05-13 10:58:45 -07001330 int compile(FILE* in, args& args) {
Jack Palevich21a15a22009-05-11 14:49:29 -07001331 cleanup();
1332 clear();
1333 codeBuf.init(ALLOC_SIZE);
Jack Palevich22305132009-05-13 10:58:45 -07001334 setArchitecture(args.architecture);
Jack Palevich21a15a22009-05-11 14:49:29 -07001335 pGen->init(&codeBuf);
1336 file = in;
1337 sym_stk = (int) calloc(1, ALLOC_SIZE);
1338 dstk = (int) strcpy((char*) sym_stk,
1339 " int if else while break return for define main ")
1340 + TOK_STR_SIZE;
1341 pGlobalBase = calloc(1, ALLOC_SIZE);
1342 glo = (int) pGlobalBase;
1343 pVarsBase = calloc(1, ALLOC_SIZE);
1344 vars = (int) pVarsBase;
1345 inp();
1346 next();
1347 decl(0);
Jack Palevich546b2242009-05-13 15:10:04 -07001348 pGen->finishCompile();
Jack Palevich21a15a22009-05-11 14:49:29 -07001349 return 0;
1350 }
1351
1352 int run(int argc, char** argv) {
1353 typedef int (*mainPtr)(int argc, char** argv);
1354 mainPtr aMain = (mainPtr) *(int*) (vars + TOK_MAIN);
1355 if (!aMain) {
1356 fprintf(stderr, "Could not find function \"main\".\n");
1357 return -1;
1358 }
1359 return aMain(argc, argv);
1360 }
1361
1362 int dump(FILE* out) {
1363 fwrite(codeBuf.getBase(), 1, codeBuf.getSize(), out);
1364 return 0;
1365 }
Jack Palevich77ae76e2009-05-10 19:59:24 -07001366
Jack Palevicha6535612009-05-13 16:24:17 -07001367 int disassemble(FILE* out) {
1368 return pGen->disassemble(out);
1369 }
1370
Jack Palevich77ae76e2009-05-10 19:59:24 -07001371};
1372
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001373const char* compiler::operatorChars =
1374 "++--*@/@%@+@-@<<>><=>=<@>@==!=&&||&@^@|@~@!@";
1375
1376const char compiler::operatorLevel[] =
1377 {11, 11, 1, 1, 1, 2, 2, 3, 3, 4, 4, 4, 4,
1378 5, 5, /* ==, != */
1379 9, 10, /* &&, || */
1380 6, 7, 8, /* & ^ | */
1381 2, 2 /* ~ ! */
1382 };
1383
Jack Palevicha6535612009-05-13 16:24:17 -07001384FILE* compiler::ARMCodeGenerator::disasmOut;
1385
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001386const int compiler::X86CodeGenerator::operatorHelper[] = {
1387 0x1, // ++
1388 0xff, // --
1389 0xc1af0f, // *
1390 0xf9f79991, // /
1391 0xf9f79991, // % (With manual assist to swap results)
1392 0xc801, // +
1393 0xd8f7c829, // -
1394 0xe0d391, // <<
1395 0xf8d391, // >>
1396 0xe, // <=
1397 0xd, // >=
1398 0xc, // <
1399 0xf, // >
1400 0x4, // ==
1401 0x5, // !=
1402 0x0, // &&
1403 0x1, // ||
1404 0xc821, // &
1405 0xc831, // ^
1406 0xc809, // |
1407 0xd0f7, // ~
1408 0x4 // !
1409};
1410
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001411} // namespace acc
1412
Jack Palevich546b2242009-05-13 15:10:04 -07001413// This is a separate function so it can easily be set by breakpoint in gdb.
1414int run(acc::compiler& c, int argc, char** argv) {
1415 return c.run(argc, argv);
1416}
1417
Jack Palevich77ae76e2009-05-10 19:59:24 -07001418int main(int argc, char** argv) {
Jack Palevich22305132009-05-13 10:58:45 -07001419 bool doDump = false;
Jack Palevicha6535612009-05-13 16:24:17 -07001420 bool doDisassemble = false;
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001421 const char* inFile = NULL;
1422 const char* outFile = NULL;
Jack Palevich22305132009-05-13 10:58:45 -07001423 const char* architecture = "arm";
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001424 int i;
Jack Palevich21a15a22009-05-11 14:49:29 -07001425 for (i = 1; i < argc; i++) {
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001426 char* arg = argv[i];
1427 if (arg[0] == '-') {
1428 switch (arg[1]) {
Jack Palevich22305132009-05-13 10:58:45 -07001429 case 'a':
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001430 if (i + 1 >= argc) {
Jack Palevich22305132009-05-13 10:58:45 -07001431 fprintf(stderr, "Expected architecture after -a\n");
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001432 return 2;
1433 }
Jack Palevich22305132009-05-13 10:58:45 -07001434 architecture = argv[i+1];
1435 i += 1;
1436 break;
1437 case 'd':
1438 if (i + 1 >= argc) {
1439 fprintf(stderr, "Expected filename after -d\n");
1440 return 2;
1441 }
1442 doDump = true;
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001443 outFile = argv[i + 1];
1444 i += 1;
1445 break;
Jack Palevicha6535612009-05-13 16:24:17 -07001446 case 'S':
1447 doDisassemble = true;
1448 break;
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001449 default:
1450 fprintf(stderr, "Unrecognized flag %s\n", arg);
1451 return 3;
1452 }
1453 } else if (inFile == NULL) {
1454 inFile = arg;
1455 } else {
1456 break;
1457 }
1458 }
1459
1460 FILE* in = stdin;
1461 if (inFile) {
1462 in = fopen(inFile, "r");
Jack Palevich21a15a22009-05-11 14:49:29 -07001463 if (!in) {
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001464 fprintf(stderr, "Could not open input file %s\n", inFile);
1465 return 1;
1466 }
1467 }
1468 acc::compiler compiler;
Jack Palevich22305132009-05-13 10:58:45 -07001469 acc::compiler::args args;
1470 args.architecture = architecture;
1471 int compileResult = compiler.compile(in, args);
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001472 if (in != stdin) {
1473 fclose(in);
1474 }
1475 if (compileResult) {
1476 fprintf(stderr, "Compile failed: %d\n", compileResult);
1477 return 6;
1478 }
Jack Palevicha6535612009-05-13 16:24:17 -07001479 if (doDisassemble) {
1480 compiler.disassemble(stderr);
1481 }
Jack Palevich22305132009-05-13 10:58:45 -07001482 if (doDump) {
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001483 FILE* save = fopen(outFile, "w");
Jack Palevich21a15a22009-05-11 14:49:29 -07001484 if (!save) {
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001485 fprintf(stderr, "Could not open output file %s\n", outFile);
1486 return 5;
1487 }
1488 compiler.dump(save);
1489 fclose(save);
1490 } else {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001491 fprintf(stderr, "Executing compiled code:\n");
Jack Palevich21a15a22009-05-11 14:49:29 -07001492 int codeArgc = argc - i + 1;
1493 char** codeArgv = argv + i - 1;
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001494 codeArgv[0] = (char*) (inFile ? inFile : "stdin");
Jack Palevich546b2242009-05-13 15:10:04 -07001495 int result = run(compiler, codeArgc, codeArgv);
Jack Palevich22305132009-05-13 10:58:45 -07001496 fprintf(stderr, "result: %d\n", result);
1497 return result;
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001498 }
1499
1500 return 0;
1501}