blob: c8c08262de15b06ffba0d71bd3c14e38c6ddd548 [file] [log] [blame]
Jack Palevichae54f1f2009-05-08 14:54:15 -07001/*
Jack Palevich21a15a22009-05-11 14:49:29 -07002 Obfuscated Tiny C Compiler
Jack Palevich88311482009-05-08 13:57:37 -07003
Jack Palevich21a15a22009-05-11 14:49:29 -07004 Copyright (C) 2001-2003 Fabrice Bellard
Jack Palevichae54f1f2009-05-08 14:54:15 -07005
Jack Palevich21a15a22009-05-11 14:49:29 -07006 This software is provided 'as-is', without any express or implied
7 warranty. In no event will the authors be held liable for any damages
8 arising from the use of this software.
Jack Paleviche27bf3e2009-05-10 14:09:03 -07009
Jack Palevich21a15a22009-05-11 14:49:29 -070010 Permission is granted to anyone to use this software for any purpose,
11 including commercial applications, and to alter it and redistribute it
12 freely, subject to the following restrictions:
Jack Paleviche27bf3e2009-05-10 14:09:03 -070013
Jack Palevich21a15a22009-05-11 14:49:29 -070014 1. The origin of this software must not be misrepresented; you must not
15 claim that you wrote the original software. If you use this software
16 in a product, an acknowledgment in the product and its documentation
17 *is* required.
18 2. Altered source versions must be plainly marked as such, and must not be
19 misrepresented as being the original software.
20 3. This notice may not be removed or altered from any source distribution.
21 */
Jack Paleviche27bf3e2009-05-10 14:09:03 -070022
Jack Palevich77ae76e2009-05-10 19:59:24 -070023#include <ctype.h>
24#include <dlfcn.h>
Jack Paleviche27bf3e2009-05-10 14:09:03 -070025#include <stdarg.h>
Jack Palevichae54f1f2009-05-08 14:54:15 -070026#include <stdio.h>
Jack Palevichf6b5a532009-05-10 19:16:42 -070027#include <stdlib.h>
28#include <string.h>
Jack Palevichae54f1f2009-05-08 14:54:15 -070029
Jack Palevich546b2242009-05-13 15:10:04 -070030#if defined(__arm__)
31#include <unistd.h>
32#endif
33
Jack Palevicha6535612009-05-13 16:24:17 -070034#include "disassem.h"
35
Jack Palevichbbf8ab52009-05-11 11:54:30 -070036namespace acc {
37
Jack Palevich77ae76e2009-05-10 19:59:24 -070038class compiler {
Jack Palevich21a15a22009-05-11 14:49:29 -070039 class CodeBuf {
40 char* ind;
41 char* pProgramBase;
Jack Palevichf0cbc922009-05-08 16:35:13 -070042
Jack Palevich21a15a22009-05-11 14:49:29 -070043 void release() {
44 if (pProgramBase != 0) {
45 free(pProgramBase);
46 pProgramBase = 0;
Jack Palevichae54f1f2009-05-08 14:54:15 -070047 }
Jack Palevich21a15a22009-05-11 14:49:29 -070048 }
49
50 public:
51 CodeBuf() {
52 pProgramBase = 0;
53 ind = 0;
54 }
55
56 ~CodeBuf() {
57 release();
58 }
59
60 void init(int size) {
61 release();
62 pProgramBase = (char*) calloc(1, size);
63 ind = pProgramBase;
64 }
65
66 void o(int n) {
67 /* cannot use unsigned, so we must do a hack */
68 while (n && n != -1) {
69 *ind++ = n;
70 n = n >> 8;
71 }
72 }
73
Jack Palevich546b2242009-05-13 15:10:04 -070074 int o4(int n) {
75 int result = (int) ind;
76 * (int*) ind = n;
77 ind += 4;
78 return result;
79 }
80
Jack Palevich21a15a22009-05-11 14:49:29 -070081 /*
82 * Output a byte. Handles all values, 0..ff.
83 */
84 void ob(int n) {
85 *ind++ = n;
86 }
87
88 /* output a symbol and patch all calls to it */
89 void gsym(int t) {
90 int n;
91 while (t) {
92 n = *(int *) t; /* next value */
93 *(int *) t = ((int) ind) - t - 4;
94 t = n;
95 }
96 }
97
98 /* psym is used to put an instruction with a data field which is a
99 reference to a symbol. It is in fact the same as oad ! */
100 int psym(int n, int t) {
101 return oad(n, t);
102 }
103
104 /* instruction + address */
105 int oad(int n, int t) {
106 o(n);
107 *(int *) ind = t;
108 t = (int) ind;
109 ind = ind + 4;
110 return t;
111 }
112
113 inline void* getBase() {
114 return (void*) pProgramBase;
115 }
116
117 int getSize() {
118 return ind - pProgramBase;
119 }
120
121 int getPC() {
122 return (int) ind;
123 }
124 };
125
126 class CodeGenerator {
127 public:
128 CodeGenerator() {}
129 virtual ~CodeGenerator() {}
130
Jack Palevich22305132009-05-13 10:58:45 -0700131 virtual void init(CodeBuf* pCodeBuf) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700132 this->pCodeBuf = pCodeBuf;
133 }
134
Jack Palevich22305132009-05-13 10:58:45 -0700135 /* returns address to patch with local variable size
136 */
Jack Palevich546b2242009-05-13 15:10:04 -0700137 virtual int functionEntry(int argCount) = 0;
Jack Palevich22305132009-05-13 10:58:45 -0700138
Jack Palevich546b2242009-05-13 15:10:04 -0700139 virtual void functionExit(int argCount, int localVariableAddress, int localVariableSize) = 0;
Jack Palevich22305132009-05-13 10:58:45 -0700140
141 /* load immediate value */
Jack Palevich546b2242009-05-13 15:10:04 -0700142 virtual void li(int t) = 0;
Jack Palevich22305132009-05-13 10:58:45 -0700143
144 virtual int gjmp(int t) = 0;
145
146 /* l = 0: je, l == 1: jne */
147 virtual int gtst(bool l, int t) = 0;
148
149 virtual void gcmp(int op) = 0;
150
Jack Palevich546b2242009-05-13 15:10:04 -0700151 virtual void genOp(int op) = 0;
Jack Palevich22305132009-05-13 10:58:45 -0700152
153 virtual void clearECX() = 0;
154
155 virtual void pushEAX() = 0;
156
157 virtual void popECX() = 0;
158
159 virtual void storeEAXToAddressECX(bool isInt) = 0;
160
161 virtual void loadEAXIndirect(bool isInt) = 0;
162
163 virtual void leaEAX(int ea) = 0;
164
165 virtual void storeEAX(int ea) = 0;
166
167 virtual void loadEAX(int ea) = 0;
168
169 virtual void postIncrementOrDecrement(int n, int op) = 0;
170
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700171 virtual int beginFunctionCallArguments() = 0;
172
173 virtual void endFunctionCallArguments(int a, int l) = 0;
Jack Palevich22305132009-05-13 10:58:45 -0700174
175 virtual void storeEAToArg(int l) = 0;
176
177 virtual int callForward(int symbol) = 0;
178
179 virtual void callRelative(int t) = 0;
180
181 virtual void callIndirect(int l) = 0;
182
183 virtual void adjustStackAfterCall(int l) = 0;
184
Jack Palevicha6535612009-05-13 16:24:17 -0700185 virtual int disassemble(FILE* out) = 0;
186
Jack Palevich21a15a22009-05-11 14:49:29 -0700187 /* output a symbol and patch all calls to it */
Jack Palevich22305132009-05-13 10:58:45 -0700188 virtual void gsym(int t) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700189 pCodeBuf->gsym(t);
190 }
191
Jack Palevich546b2242009-05-13 15:10:04 -0700192 virtual int finishCompile() {
193#if defined(__arm__)
Jack Palevicha6535612009-05-13 16:24:17 -0700194 const long base = long(pCodeBuf->getBase());
195 const long curr = base + long(pCodeBuf->getSize());
196 int err = cacheflush(base, curr, 0);
197 return err;
Jack Palevich546b2242009-05-13 15:10:04 -0700198#else
Jack Palevicha6535612009-05-13 16:24:17 -0700199 return 0;
Jack Palevich546b2242009-05-13 15:10:04 -0700200#endif
201 }
202
Jack Palevicha6535612009-05-13 16:24:17 -0700203 /**
204 * Adjust relative branches by this amount.
205 */
206 virtual int jumpOffset() = 0;
207
Jack Palevich21a15a22009-05-11 14:49:29 -0700208 protected:
209 void o(int n) {
210 pCodeBuf->o(n);
211 }
212
213 /*
214 * Output a byte. Handles all values, 0..ff.
215 */
216 void ob(int n) {
217 pCodeBuf->ob(n);
218 }
219
220 /* psym is used to put an instruction with a data field which is a
221 reference to a symbol. It is in fact the same as oad ! */
222 int psym(int n, int t) {
223 return oad(n, t);
224 }
225
226 /* instruction + address */
227 int oad(int n, int t) {
228 return pCodeBuf->oad(n,t);
229 }
230
Jack Palevicha6535612009-05-13 16:24:17 -0700231 int getBase() {
232 return (int) pCodeBuf->getBase();
233 }
234
Jack Palevich21a15a22009-05-11 14:49:29 -0700235 int getPC() {
236 return pCodeBuf->getPC();
237 }
238
Jack Palevich546b2242009-05-13 15:10:04 -0700239 int o4(int data) {
240 return pCodeBuf->o4(data);
241 }
Jack Palevich21a15a22009-05-11 14:49:29 -0700242 private:
243 CodeBuf* pCodeBuf;
244 };
245
Jack Palevich22305132009-05-13 10:58:45 -0700246 class ARMCodeGenerator : public CodeGenerator {
247 public:
248 ARMCodeGenerator() {}
249 virtual ~ARMCodeGenerator() {}
250
251 /* returns address to patch with local variable size
252 */
Jack Palevich546b2242009-05-13 15:10:04 -0700253 virtual int functionEntry(int argCount) {
254 fprintf(stderr, "functionEntry(%d);\n", argCount);
Jack Palevich69796b62009-05-14 15:42:26 -0700255 // sp -> arg4 arg5 ...
256 // Push our register-based arguments back on the stack
257 if (argCount > 0) {
258 int regArgCount = argCount <= 4 ? argCount : 4;
259 o4(0xE92D0000 | ((1 << argCount) - 1)); // stmfd sp!, {}
260 }
261 // sp -> arg0 arg1 ...
262 o4(0xE92D4800); // stmfd sp!, {fp, lr}
263 // sp, fp -> oldfp, retadr, arg0 arg1 ....
264 o4(0xE1A0B00D); // mov fp, sp
265 return o4(0xE24DD000); // sub sp, sp, # <local variables>
Jack Palevich22305132009-05-13 10:58:45 -0700266 }
267
Jack Palevich546b2242009-05-13 15:10:04 -0700268 virtual void functionExit(int argCount, int localVariableAddress, int localVariableSize) {
269 fprintf(stderr, "functionExit(%d, %d, %d);\n", argCount, localVariableAddress, localVariableSize);
Jack Palevich69796b62009-05-14 15:42:26 -0700270 // Patch local variable allocation code:
271 if (localVariableSize < 0 || localVariableSize > 255) {
Jack Palevich546b2242009-05-13 15:10:04 -0700272 error("LocalVariableSize");
273 }
Jack Palevich69796b62009-05-14 15:42:26 -0700274 *(char*) (localVariableAddress) = localVariableSize;
275
276 // sp -> locals .... fp -> oldfp, retadr, arg0, arg1, ...
277 o4(0xE1A0E00B); // mov lr, fp
278 o4(0xE59BB000); // ldr fp, [fp]
279 o4(0xE28ED004); // add sp, lr, #4
280 // sp -> retadr, arg0, ...
281 o4(0xE8BD4000); // ldmfd sp!, {lr}
282 // sp -> arg0 ....
283 if (argCount > 0) {
284 // We store the PC into the lr so we can adjust the sp before
285 // returning. (We need to pull off the registers we pushed
286 // earlier. We don't need to actually store them anywhere,
287 // just adjust the stack.
288 int regArgCount = argCount <= 4 ? argCount : 4;
289 o4(0xE28DD000 | (regArgCount << 2)); // add sp, sp, #argCount << 2
290 }
291 o4(0xE12FFF1E); // bx lr
Jack Palevich22305132009-05-13 10:58:45 -0700292 }
293
294 /* load immediate value */
Jack Palevich546b2242009-05-13 15:10:04 -0700295 virtual void li(int t) {
Jack Palevich22305132009-05-13 10:58:45 -0700296 fprintf(stderr, "li(%d);\n", t);
Jack Palevicha6535612009-05-13 16:24:17 -0700297 if (t >= 0 && t < 255) {
Jack Palevich69796b62009-05-14 15:42:26 -0700298 o4(0xE3A00000 + t); // mov r0, #0
Jack Palevicha6535612009-05-13 16:24:17 -0700299 } else if (t >= -256 && t < 0) {
300 // mvn means move constant ^ ~0
Jack Palevich69796b62009-05-14 15:42:26 -0700301 o4(0xE3E00001 - t); // mvn r0, #0
Jack Palevicha6535612009-05-13 16:24:17 -0700302 } else {
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700303 o4(0xE51F0000); // ldr r0, .L3
304 o4(0xEA000000); // b .L99
305 o4(t); // .L3: .word 0
306 // .L99:
Jack Palevicha6535612009-05-13 16:24:17 -0700307 }
Jack Palevich22305132009-05-13 10:58:45 -0700308 }
309
310 virtual int gjmp(int t) {
311 fprintf(stderr, "gjmp(%d);\n", t);
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700312 return o4(0xEA000000 + encodeAddress(t)); // b .L33
Jack Palevich22305132009-05-13 10:58:45 -0700313 }
314
315 /* l = 0: je, l == 1: jne */
316 virtual int gtst(bool l, int t) {
317 fprintf(stderr, "gtst(%d, %d);\n", l, t);
Jack Palevich69796b62009-05-14 15:42:26 -0700318 error("Unimplemented");
Jack Palevich22305132009-05-13 10:58:45 -0700319 o(0x0fc085); /* test %eax, %eax, je/jne xxx */
320 return psym(0x84 + l, t);
321 }
322
323 virtual void gcmp(int op) {
324 fprintf(stderr, "gcmp(%d);\n", op);
Jack Palevich69796b62009-05-14 15:42:26 -0700325 error("Unimplemented");
Jack Palevich22305132009-05-13 10:58:45 -0700326#if 0
327 int t = decodeOp(op);
328 o(0xc139); /* cmp %eax,%ecx */
329 li(0);
330 o(0x0f); /* setxx %al */
331 o(t + 0x90);
332 o(0xc0);
333#endif
334 }
335
Jack Palevich546b2242009-05-13 15:10:04 -0700336 virtual void genOp(int op) {
Jack Palevich22305132009-05-13 10:58:45 -0700337 fprintf(stderr, "genOp(%d);\n", op);
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700338 switch(op) {
339 case OP_MUL:
340 o4(0x0E0000091); // mul r0,r1,r0
341 break;
342 case OP_PLUS:
343 o4(0xE0810000); // add r0,r1,r0
344 break;
345 case OP_MINUS:
346 o4(0xE0410000); // sub r0,r1,r0
347 break;
348 case OP_SHIFT_LEFT:
349 o4(0xE1A00011); // lsl r0,r1,r0
350 break;
351 case OP_SHIFT_RIGHT:
352 o4(0xE1A00051); // asr r0,r1,r0
353 break;
354 case OP_BIT_AND:
355 o4(0xE0010000); // and r0,r1,r0
356 break;
357 case OP_BIT_XOR:
358 o4(0xE0210000); // eor r0,r1,r0
359 break;
360 case OP_BIT_OR:
361 o4(0xE1810000); // orr r0,r1,r0
362 break;
363 case OP_BIT_NOT:
364 o4(0xE1E00000); // mvn r0, r0
365 break;
366 default:
Jack Palevich69796b62009-05-14 15:42:26 -0700367 error("Unimplemented op %d\n", op);
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700368 break;
369 }
Jack Palevich22305132009-05-13 10:58:45 -0700370#if 0
371 o(decodeOp(op));
372 if (op == OP_MOD)
373 o(0x92); /* xchg %edx, %eax */
374#endif
375 }
376
377 virtual void clearECX() {
378 fprintf(stderr, "clearECX();\n");
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700379 o4(0xE3A01000); // mov r1, #0
Jack Palevich22305132009-05-13 10:58:45 -0700380 }
381
382 virtual void pushEAX() {
383 fprintf(stderr, "pushEAX();\n");
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700384 o4(0xE92D0001); // stmfd sp!,{r0}
Jack Palevich22305132009-05-13 10:58:45 -0700385 }
386
387 virtual void popECX() {
388 fprintf(stderr, "popECX();\n");
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700389 o4(0xE8BD0002); // ldmfd sp!,{r1}
Jack Palevich22305132009-05-13 10:58:45 -0700390 }
391
392 virtual void storeEAXToAddressECX(bool isInt) {
393 fprintf(stderr, "storeEAXToAddressECX(%d);\n", isInt);
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700394 o4(0x0188 + isInt); /* movl %eax/%al, (%ecx) */
Jack Palevich22305132009-05-13 10:58:45 -0700395 }
396
397 virtual void loadEAXIndirect(bool isInt) {
398 fprintf(stderr, "loadEAXIndirect(%d);\n", isInt);
399 if (isInt)
Jack Palevich69796b62009-05-14 15:42:26 -0700400 o4(0xE5900000); // ldr r0, [r0]
Jack Palevich22305132009-05-13 10:58:45 -0700401 else
Jack Palevich69796b62009-05-14 15:42:26 -0700402 o4(0xE5D00000); // ldrb r0, [r0]
Jack Palevich22305132009-05-13 10:58:45 -0700403 }
404
405 virtual void leaEAX(int ea) {
Jack Palevich69796b62009-05-14 15:42:26 -0700406 fprintf(stderr, "[!!! fixme !!!] leaEAX(%d);\n", ea);
407 error("Unimplemented");
408 if (ea < -4095 || ea > 4095) {
409 error("Offset out of range: %08x", ea);
410 }
411 o4(0xE59B0000 | (0x1fff & ea)); //ldr r0, [fp,#ea]
Jack Palevich22305132009-05-13 10:58:45 -0700412 }
413
414 virtual void storeEAX(int ea) {
415 fprintf(stderr, "storeEAX(%d);\n", ea);
Jack Palevich69796b62009-05-14 15:42:26 -0700416 int fpOffset = ea;
417 if (fpOffset < -4095 || fpOffset > 4095) {
418 error("Offset out of range: %08x", ea);
419 }
420 if (fpOffset < 0) {
421 o4(0xE50B0000 | (0xfff & (-fpOffset))); // str r0, [fp,#-ea]
422 } else {
423 o4(0xE58B0000 | (0xfff & fpOffset)); // str r0, [fp,#ea]
424 }
Jack Palevich22305132009-05-13 10:58:45 -0700425 }
426
427 virtual void loadEAX(int ea) {
428 fprintf(stderr, "loadEAX(%d);\n", ea);
Jack Palevich69796b62009-05-14 15:42:26 -0700429 int fpOffset = ea;
430 if (fpOffset < -4095 || fpOffset > 4095) {
431 error("Offset out of range: %08x", ea);
432 }
433 if (fpOffset < 0) {
434 o4(0xE51B0000 | (0xfff & (-fpOffset))); // ldr r0, [fp,#-ea]
435 } else {
436 o4(0xE59B0000 | (0xfff & fpOffset)); //ldr r0, [fp,#ea]
437 }
Jack Palevich22305132009-05-13 10:58:45 -0700438 }
439
440 virtual void postIncrementOrDecrement(int n, int op) {
441 fprintf(stderr, "postIncrementOrDecrement(%d, %d);\n", n, op);
442 /* Implement post-increment or post decrement.
443 */
Jack Palevich69796b62009-05-14 15:42:26 -0700444
445 error("Unimplemented");
Jack Palevich22305132009-05-13 10:58:45 -0700446#if 0
447 gmov(0, n); /* 83 ADD */
448 o(decodeOp(op));
449#endif
450 }
451
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700452 virtual int beginFunctionCallArguments() {
453 fprintf(stderr, "beginFunctionCallArguments();\n");
454 return o4(0xE24DDF00); // Placeholder
455 }
456
457 virtual void endFunctionCallArguments(int a, int l) {
458 fprintf(stderr, "endFunctionCallArguments(0x%08x, %d);\n", a, l);
459 if (l < 0 || l > 0x3FC) {
460 error("L out of range for stack adjustment: 0x%08x", l);
461 }
462 * (int*) a = 0xE24DDF00 | (l >> 2); // sub sp, sp, #0 << 2
463 int argCount = l >> 2;
464 if (argCount > 0) {
465 int regArgCount = argCount > 4 ? 4 : argCount;
466 o4(0xE8BD0000 | ((1 << regArgCount) - 1)); // ldmfd sp!,{}
467 }
Jack Palevich22305132009-05-13 10:58:45 -0700468 }
469
470 virtual void storeEAToArg(int l) {
471 fprintf(stderr, "storeEAToArg(%d);\n", l);
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700472 if (l < 0 || l > 4096-4) {
473 error("l out of range for stack offset: 0x%08x", l);
474 }
475 o4(0xE58D0000 + l); // str r0, [sp, #4]
Jack Palevich22305132009-05-13 10:58:45 -0700476 }
477
478 virtual int callForward(int symbol) {
479 fprintf(stderr, "callForward(%d);\n", symbol);
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700480 // Forward calls are always short (local)
481 return o4(0xEB000000 | encodeAddress(symbol));
Jack Palevich22305132009-05-13 10:58:45 -0700482 }
483
484 virtual void callRelative(int t) {
485 fprintf(stderr, "callRelative(%d);\n", t);
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700486 int abs = t + getPC() + jumpOffset();
Jack Palevich69796b62009-05-14 15:42:26 -0700487 fprintf(stderr, "abs=%d (0x08%x)\n", abs, abs);
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700488 if (t >= - (1 << 25) && t < (1 << 25)) {
489 o4(0xEB000000 | encodeAddress(t));
490 } else {
491 // Long call.
492 o4(0xE59FC000); // ldr r12, .L1
493 o4(0xEA000000); // b .L99
494 o4(t - 16); // .L1: .word 0
495 o4(0xE08CC00F); // .L99: add r12,pc
496 o4(0xE12FFF3C); // blx r12
497 }
Jack Palevich22305132009-05-13 10:58:45 -0700498 }
499
500 virtual void callIndirect(int l) {
501 fprintf(stderr, "callIndirect(%d);\n", l);
502 oad(0x2494ff, l); /* call *xxx(%esp) */
503 }
504
505 virtual void adjustStackAfterCall(int l) {
506 fprintf(stderr, "adjustStackAfterCall(%d);\n", l);
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700507 if (l < 0 || l > 0x3FC) {
508 error("L out of range for stack adjustment: 0x%08x", l);
509 }
510 int argCount = l >> 2;
511 if (argCount > 4) {
512 int remainingArgs = argCount - 4;
513 o4(0xE28DDF00 | remainingArgs); // add sp, sp, #0x3fc
514 }
515
Jack Palevich22305132009-05-13 10:58:45 -0700516 }
517
Jack Palevicha6535612009-05-13 16:24:17 -0700518 virtual int jumpOffset() {
519 return 4;
520 }
521
522 /* output a symbol and patch all calls to it */
523 virtual void gsym(int t) {
524 fprintf(stderr, "gsym(0x%x)\n", t);
525 int n;
526 int base = getBase();
527 int pc = getPC();
528 fprintf(stderr, "pc = 0x%x\n", pc);
529 while (t) {
530 int data = * (int*) t;
531 int decodedOffset = ((BRANCH_REL_ADDRESS_MASK & data) << 2);
532 if (decodedOffset == 0) {
533 n = 0;
534 } else {
535 n = base + decodedOffset; /* next value */
536 }
537 *(int *) t = (data & ~BRANCH_REL_ADDRESS_MASK)
538 | encodeRelAddress(pc - t - 8);
539 t = n;
540 }
541 }
542
543 virtual int disassemble(FILE* out) {
544 disasmOut = out;
545 disasm_interface_t di;
546 di.di_readword = disassemble_readword;
547 di.di_printaddr = disassemble_printaddr;
548 di.di_printf = disassemble_printf;
549
550 int base = getBase();
551 int pc = getPC();
552 for(int i = base; i < pc; i += 4) {
553 fprintf(out, "%08x: %08x ", i, *(int*) i);
554 ::disasm(&di, i, 0);
555 }
556 return 0;
557 }
Jack Palevich22305132009-05-13 10:58:45 -0700558 private:
Jack Palevicha6535612009-05-13 16:24:17 -0700559 static FILE* disasmOut;
560
561 static u_int
562 disassemble_readword(u_int address)
563 {
564 return(*((u_int *)address));
565 }
566
567 static void
568 disassemble_printaddr(u_int address)
569 {
570 fprintf(disasmOut, "0x%08x", address);
571 }
572
573 static void
574 disassemble_printf(const char *fmt, ...) {
575 va_list ap;
576 va_start(ap, fmt);
577 vfprintf(disasmOut, fmt, ap);
578 va_end(ap);
579 }
580
581 static const int BRANCH_REL_ADDRESS_MASK = 0x00ffffff;
582
583 /** Encode a relative address that might also be
584 * a label.
585 */
586 int encodeAddress(int value) {
587 int base = getBase();
588 if (value >= base && value <= getPC() ) {
589 // This is a label, encode it relative to the base.
590 value = value - base;
591 }
592 return encodeRelAddress(value);
593 }
594
595 int encodeRelAddress(int value) {
596 return BRANCH_REL_ADDRESS_MASK & (value >> 2);
597 }
Jack Palevich22305132009-05-13 10:58:45 -0700598
Jack Palevich546b2242009-05-13 15:10:04 -0700599 void error(const char* fmt,...) {
600 va_list ap;
601 va_start(ap, fmt);
602 vfprintf(stderr, fmt, ap);
603 va_end(ap);
604 exit(12);
605 }
Jack Palevich22305132009-05-13 10:58:45 -0700606 };
607
Jack Palevich21a15a22009-05-11 14:49:29 -0700608 class X86CodeGenerator : public CodeGenerator {
609 public:
610 X86CodeGenerator() {}
611 virtual ~X86CodeGenerator() {}
612
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700613 /* returns address to patch with local variable size
614 */
Jack Palevich546b2242009-05-13 15:10:04 -0700615 virtual int functionEntry(int argCount) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700616 o(0xe58955); /* push %ebp, mov %esp, %ebp */
617 return oad(0xec81, 0); /* sub $xxx, %esp */
618 }
619
Jack Palevich546b2242009-05-13 15:10:04 -0700620 virtual void functionExit(int argCount, int localVariableAddress, int localVariableSize) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700621 o(0xc3c9); /* leave, ret */
Jack Palevich546b2242009-05-13 15:10:04 -0700622 *(int *) localVariableAddress = localVariableSize; /* save local variables */
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700623 }
624
Jack Palevich21a15a22009-05-11 14:49:29 -0700625 /* load immediate value */
Jack Palevich546b2242009-05-13 15:10:04 -0700626 virtual void li(int t) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700627 oad(0xb8, t); /* mov $xx, %eax */
628 }
629
Jack Palevich22305132009-05-13 10:58:45 -0700630 virtual int gjmp(int t) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700631 return psym(0xe9, t);
632 }
633
634 /* l = 0: je, l == 1: jne */
Jack Palevich22305132009-05-13 10:58:45 -0700635 virtual int gtst(bool l, int t) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700636 o(0x0fc085); /* test %eax, %eax, je/jne xxx */
637 return psym(0x84 + l, t);
638 }
639
Jack Palevich22305132009-05-13 10:58:45 -0700640 virtual void gcmp(int op) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700641 int t = decodeOp(op);
Jack Palevich21a15a22009-05-11 14:49:29 -0700642 o(0xc139); /* cmp %eax,%ecx */
643 li(0);
644 o(0x0f); /* setxx %al */
645 o(t + 0x90);
646 o(0xc0);
647 }
648
Jack Palevich546b2242009-05-13 15:10:04 -0700649 virtual void genOp(int op) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700650 o(decodeOp(op));
651 if (op == OP_MOD)
652 o(0x92); /* xchg %edx, %eax */
653 }
654
Jack Palevich22305132009-05-13 10:58:45 -0700655 virtual void clearECX() {
Jack Palevich21a15a22009-05-11 14:49:29 -0700656 oad(0xb9, 0); /* movl $0, %ecx */
657 }
658
Jack Palevich22305132009-05-13 10:58:45 -0700659 virtual void pushEAX() {
Jack Palevich21a15a22009-05-11 14:49:29 -0700660 o(0x50); /* push %eax */
661 }
662
Jack Palevich22305132009-05-13 10:58:45 -0700663 virtual void popECX() {
Jack Palevich21a15a22009-05-11 14:49:29 -0700664 o(0x59); /* pop %ecx */
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700665 }
666
Jack Palevich22305132009-05-13 10:58:45 -0700667 virtual void storeEAXToAddressECX(bool isInt) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700668 o(0x0188 + isInt); /* movl %eax/%al, (%ecx) */
669 }
670
Jack Palevich22305132009-05-13 10:58:45 -0700671 virtual void loadEAXIndirect(bool isInt) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700672 if (isInt)
673 o(0x8b); /* mov (%eax), %eax */
674 else
675 o(0xbe0f); /* movsbl (%eax), %eax */
676 ob(0); /* add zero in code */
677 }
678
Jack Palevich22305132009-05-13 10:58:45 -0700679 virtual void leaEAX(int ea) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700680 gmov(10, ea); /* leal EA, %eax */
681 }
682
Jack Palevich22305132009-05-13 10:58:45 -0700683 virtual void storeEAX(int ea) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700684 gmov(6, ea); /* mov %eax, EA */
685 }
686
Jack Palevich22305132009-05-13 10:58:45 -0700687 virtual void loadEAX(int ea) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700688 gmov(8, ea); /* mov EA, %eax */
689 }
690
Jack Palevich22305132009-05-13 10:58:45 -0700691 virtual void postIncrementOrDecrement(int n, int op) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700692 /* Implement post-increment or post decrement.
Jack Palevich21a15a22009-05-11 14:49:29 -0700693 */
694 gmov(0, n); /* 83 ADD */
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700695 o(decodeOp(op));
Jack Palevich21a15a22009-05-11 14:49:29 -0700696 }
697
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700698 virtual int beginFunctionCallArguments() {
Jack Palevich21a15a22009-05-11 14:49:29 -0700699 return oad(0xec81, 0); /* sub $xxx, %esp */
700 }
701
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700702 virtual void endFunctionCallArguments(int a, int l) {
703 * (int*) a = l;
704 }
705
Jack Palevich22305132009-05-13 10:58:45 -0700706 virtual void storeEAToArg(int l) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700707 oad(0x248489, l); /* movl %eax, xxx(%esp) */
708 }
709
Jack Palevich22305132009-05-13 10:58:45 -0700710 virtual int callForward(int symbol) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700711 return psym(0xe8, symbol); /* call xxx */
712 }
713
Jack Palevich22305132009-05-13 10:58:45 -0700714 virtual void callRelative(int t) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700715 psym(0xe8, t); /* call xxx */
716 }
717
Jack Palevich22305132009-05-13 10:58:45 -0700718 virtual void callIndirect(int l) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700719 oad(0x2494ff, l); /* call *xxx(%esp) */
720 }
721
Jack Palevich22305132009-05-13 10:58:45 -0700722 virtual void adjustStackAfterCall(int l) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700723 oad(0xc481, l); /* add $xxx, %esp */
724 }
725
Jack Palevicha6535612009-05-13 16:24:17 -0700726 virtual int jumpOffset() {
727 return 5;
728 }
729
730 virtual int disassemble(FILE* out) {
731 return 1;
732 }
733
Jack Palevich21a15a22009-05-11 14:49:29 -0700734 private:
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700735 static const int operatorHelper[];
736
737 int decodeOp(int op) {
738 if (op < 0 || op > OP_COUNT) {
739 fprintf(stderr, "Out-of-range operator: %d\n", op);
740 exit(1);
741 }
742 return operatorHelper[op];
743 }
Jack Palevich21a15a22009-05-11 14:49:29 -0700744
Jack Palevich546b2242009-05-13 15:10:04 -0700745 void gmov(int l, int t) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700746 o(l + 0x83);
747 oad((t < LOCAL) << 7 | 5, t);
748 }
749 };
750
751 /* vars: value of variables
752 loc : local variable index
753 glo : global variable index
754 ind : output code ptr
755 rsym: return symbol
756 prog: output code
757 dstk: define stack
758 dptr, dch: macro state
759 */
760 int tok, tokc, tokl, ch, vars, rsym, loc, glo, sym_stk, dstk,
761 dptr, dch, last_id;
762 void* pSymbolBase;
763 void* pGlobalBase;
764 void* pVarsBase;
765 FILE* file;
766
767 CodeBuf codeBuf;
Jack Palevich22305132009-05-13 10:58:45 -0700768 CodeGenerator* pGen;
Jack Palevich21a15a22009-05-11 14:49:29 -0700769
770 static const int ALLOC_SIZE = 99999;
771
772 /* depends on the init string */
773 static const int TOK_STR_SIZE = 48;
774 static const int TOK_IDENT = 0x100;
775 static const int TOK_INT = 0x100;
776 static const int TOK_IF = 0x120;
777 static const int TOK_ELSE = 0x138;
778 static const int TOK_WHILE = 0x160;
779 static const int TOK_BREAK = 0x190;
780 static const int TOK_RETURN = 0x1c0;
781 static const int TOK_FOR = 0x1f8;
782 static const int TOK_DEFINE = 0x218;
783 static const int TOK_MAIN = 0x250;
784
785 static const int TOK_DUMMY = 1;
786 static const int TOK_NUM = 2;
787
788 static const int LOCAL = 0x200;
789
790 static const int SYM_FORWARD = 0;
791 static const int SYM_DEFINE = 1;
792
793 /* tokens in string heap */
794 static const int TAG_TOK = ' ';
795 static const int TAG_MACRO = 2;
796
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700797 static const int OP_INCREMENT = 0;
798 static const int OP_DECREMENT = 1;
799 static const int OP_MUL = 2;
800 static const int OP_DIV = 3;
801 static const int OP_MOD = 4;
802 static const int OP_PLUS = 5;
803 static const int OP_MINUS = 6;
804 static const int OP_SHIFT_LEFT = 7;
805 static const int OP_SHIFT_RIGHT = 8;
806 static const int OP_LESS_EQUAL = 9;
807 static const int OP_GREATER_EQUAL = 10;
808 static const int OP_LESS = 11;
809 static const int OP_GREATER = 12;
810 static const int OP_EQUALS = 13;
811 static const int OP_NOT_EQUALS = 14;
812 static const int OP_LOGICAL_AND = 15;
813 static const int OP_LOGICAL_OR = 16;
814 static const int OP_BIT_AND = 17;
815 static const int OP_BIT_XOR = 18;
816 static const int OP_BIT_OR = 19;
817 static const int OP_BIT_NOT = 20;
818 static const int OP_LOGICAL_NOT = 21;
819 static const int OP_COUNT = 22;
820
821 /* Operators are searched from front, the two-character operators appear
822 * before the single-character operators with the same first character.
823 * @ is used to pad out single-character operators.
824 */
825 static const char* operatorChars;
826 static const char operatorLevel[];
827
Jack Palevich21a15a22009-05-11 14:49:29 -0700828 void pdef(int t) {
829 *(char *) dstk++ = t;
830 }
831
832 void inp() {
833 if (dptr) {
834 ch = *(char *) dptr++;
835 if (ch == TAG_MACRO) {
836 dptr = 0;
837 ch = dch;
838 }
839 } else
840 ch = fgetc(file);
841 /* printf("ch=%c 0x%x\n", ch, ch); */
842 }
843
844 int isid() {
Jack Palevich546b2242009-05-13 15:10:04 -0700845 return isalnum(ch) | (ch == '_');
Jack Palevich21a15a22009-05-11 14:49:29 -0700846 }
847
848 /* read a character constant */
849 void getq() {
850 if (ch == '\\') {
851 inp();
852 if (ch == 'n')
853 ch = '\n';
854 }
855 }
856
857 void next() {
858 int l, a;
859
Jack Palevich546b2242009-05-13 15:10:04 -0700860 while (isspace(ch) | (ch == '#')) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700861 if (ch == '#') {
862 inp();
863 next();
864 if (tok == TOK_DEFINE) {
865 next();
866 pdef(TAG_TOK); /* fill last ident tag */
867 *(int *) tok = SYM_DEFINE;
868 *(int *) (tok + 4) = dstk; /* define stack */
869 }
870 /* well we always save the values ! */
871 while (ch != '\n') {
872 pdef(ch);
873 inp();
874 }
875 pdef(ch);
876 pdef(TAG_MACRO);
877 }
878 inp();
879 }
880 tokl = 0;
881 tok = ch;
882 /* encode identifiers & numbers */
883 if (isid()) {
884 pdef(TAG_TOK);
885 last_id = dstk;
886 while (isid()) {
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700887 pdef(ch);
888 inp();
Jack Palevichae54f1f2009-05-08 14:54:15 -0700889 }
Jack Palevich21a15a22009-05-11 14:49:29 -0700890 if (isdigit(tok)) {
891 tokc = strtol((char*) last_id, 0, 0);
892 tok = TOK_NUM;
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700893 } else {
Jack Palevich21a15a22009-05-11 14:49:29 -0700894 *(char *) dstk = TAG_TOK; /* no need to mark end of string (we
895 suppose data is initialized to zero by calloc) */
896 tok = (int) (strstr((char*) sym_stk, (char*) (last_id - 1))
897 - sym_stk);
898 *(char *) dstk = 0; /* mark real end of ident for dlsym() */
899 tok = tok * 8 + TOK_IDENT;
900 if (tok > TOK_DEFINE) {
901 tok = vars + tok;
902 /* printf("tok=%s %x\n", last_id, tok); */
903 /* define handling */
904 if (*(int *) tok == SYM_DEFINE) {
905 dptr = *(int *) (tok + 4);
906 dch = ch;
907 inp();
908 next();
909 }
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700910 }
911 }
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700912 } else {
Jack Palevich21a15a22009-05-11 14:49:29 -0700913 inp();
914 if (tok == '\'') {
915 tok = TOK_NUM;
916 getq();
917 tokc = ch;
918 inp();
919 inp();
Jack Palevich546b2242009-05-13 15:10:04 -0700920 } else if ((tok == '/') & (ch == '*')) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700921 inp();
922 while (ch) {
923 while (ch != '*')
924 inp();
925 inp();
926 if (ch == '/')
927 ch = 0;
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700928 }
Jack Palevich21a15a22009-05-11 14:49:29 -0700929 inp();
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700930 next();
Jack Palevich21a15a22009-05-11 14:49:29 -0700931 } else {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700932 const char* t = operatorChars;
933 int opIndex = 0;
Jack Palevich546b2242009-05-13 15:10:04 -0700934 while ((l = *t++) != 0) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700935 a = *t++;
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700936 tokl = operatorLevel[opIndex];
937 tokc = opIndex;
Jack Palevich546b2242009-05-13 15:10:04 -0700938 if ((l == tok) & ((a == ch) | (a == '@'))) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700939#if 0
940 printf("%c%c -> tokl=%d tokc=0x%x\n",
941 l, a, tokl, tokc);
942#endif
943 if (a == ch) {
944 inp();
945 tok = TOK_DUMMY; /* dummy token for double tokens */
946 }
947 break;
948 }
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700949 opIndex++;
950 }
951 if (l == 0) {
952 tokl = 0;
953 tokc = 0;
Jack Palevich21a15a22009-05-11 14:49:29 -0700954 }
955 }
956 }
957#if 0
958 {
959 int p;
960
961 printf("tok=0x%x ", tok);
962 if (tok >= TOK_IDENT) {
963 printf("'");
964 if (tok> TOK_DEFINE)
965 p = sym_stk + 1 + (tok - vars - TOK_IDENT) / 8;
966 else
967 p = sym_stk + 1 + (tok - TOK_IDENT) / 8;
968 while (*(char *)p != TAG_TOK && *(char *)p)
969 printf("%c", *(char *)p++);
970 printf("'\n");
971 } else if (tok == TOK_NUM) {
972 printf("%d\n", tokc);
973 } else {
974 printf("'%c'\n", tok);
975 }
976 }
977#endif
978 }
979
980 void error(const char *fmt, ...) {
981 va_list ap;
982
983 va_start(ap, fmt);
984 fprintf(stderr, "%ld: ", ftell((FILE *) file));
985 vfprintf(stderr, fmt, ap);
986 fprintf(stderr, "\n");
987 va_end(ap);
988 exit(1);
989 }
990
991 void skip(int c) {
992 if (tok != c) {
993 error("'%c' expected", c);
994 }
995 next();
996 }
997
Jack Palevich21a15a22009-05-11 14:49:29 -0700998 /* l is one if '=' parsing wanted (quick hack) */
999 void unary(int l) {
1000 int n, t, a, c;
Jack Palevich546b2242009-05-13 15:10:04 -07001001 t = 0;
Jack Palevich21a15a22009-05-11 14:49:29 -07001002 n = 1; /* type of expression 0 = forward, 1 = value, other =
1003 lvalue */
1004 if (tok == '\"') {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001005 pGen->li(glo);
Jack Palevich21a15a22009-05-11 14:49:29 -07001006 while (ch != '\"') {
1007 getq();
1008 *(char *) glo++ = ch;
1009 inp();
1010 }
1011 *(char *) glo = 0;
Jack Palevich546b2242009-05-13 15:10:04 -07001012 glo = (glo + 4) & -4; /* align heap */
Jack Palevich21a15a22009-05-11 14:49:29 -07001013 inp();
1014 next();
1015 } else {
1016 c = tokl;
1017 a = tokc;
1018 t = tok;
1019 next();
1020 if (t == TOK_NUM) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001021 pGen->li(a);
Jack Palevich21a15a22009-05-11 14:49:29 -07001022 } else if (c == 2) {
1023 /* -, +, !, ~ */
1024 unary(0);
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001025 pGen->clearECX();
Jack Palevich21a15a22009-05-11 14:49:29 -07001026 if (t == '!')
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001027 pGen->gcmp(a);
Jack Palevich21a15a22009-05-11 14:49:29 -07001028 else
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001029 pGen->genOp(a);
Jack Palevich21a15a22009-05-11 14:49:29 -07001030 } else if (t == '(') {
1031 expr();
1032 skip(')');
1033 } else if (t == '*') {
1034 /* parse cast */
1035 skip('(');
1036 t = tok; /* get type */
1037 next(); /* skip int/char/void */
1038 next(); /* skip '*' or '(' */
1039 if (tok == '*') {
1040 /* function type */
1041 skip('*');
1042 skip(')');
1043 skip('(');
1044 skip(')');
1045 t = 0;
1046 }
1047 skip(')');
1048 unary(0);
1049 if (tok == '=') {
1050 next();
1051 pGen->pushEAX();
1052 expr();
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001053 pGen->popECX();
1054 pGen->storeEAXToAddressECX(t == TOK_INT);
Jack Palevich21a15a22009-05-11 14:49:29 -07001055 } else if (t) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001056 pGen->loadEAXIndirect(t == TOK_INT);
Jack Palevich21a15a22009-05-11 14:49:29 -07001057 }
1058 } else if (t == '&') {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001059 pGen->leaEAX(*(int *) tok);
Jack Palevich21a15a22009-05-11 14:49:29 -07001060 next();
1061 } else {
1062 n = *(int *) t;
1063 /* forward reference: try dlsym */
Jack Palevichcb1c9ef2009-05-14 11:38:49 -07001064 if (!n) {
1065 n = (int) dlsym(RTLD_DEFAULT, (char*) last_id);
1066 }
Jack Palevich546b2242009-05-13 15:10:04 -07001067 if ((tok == '=') & l) {
Jack Palevich21a15a22009-05-11 14:49:29 -07001068 /* assignment */
1069 next();
1070 expr();
1071 pGen->storeEAX(n);
1072 } else if (tok != '(') {
1073 /* variable */
1074 pGen->loadEAX(n);
1075 if (tokl == 11) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001076 pGen->postIncrementOrDecrement(n, tokc);
Jack Palevich21a15a22009-05-11 14:49:29 -07001077 next();
1078 }
1079 }
1080 }
1081 }
1082
1083 /* function call */
1084 if (tok == '(') {
1085 if (n == 1)
1086 pGen->pushEAX();
1087
1088 /* push args and invert order */
Jack Palevichcb1c9ef2009-05-14 11:38:49 -07001089 a = pGen->beginFunctionCallArguments();
Jack Palevich21a15a22009-05-11 14:49:29 -07001090 next();
1091 l = 0;
1092 while (tok != ')') {
1093 expr();
1094 pGen->storeEAToArg(l);
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001095 if (tok == ',')
Jack Paleviche27bf3e2009-05-10 14:09:03 -07001096 next();
Jack Palevich21a15a22009-05-11 14:49:29 -07001097 l = l + 4;
Jack Paleviche27bf3e2009-05-10 14:09:03 -07001098 }
Jack Palevichcb1c9ef2009-05-14 11:38:49 -07001099 pGen->endFunctionCallArguments(a, l);
Jack Palevich21a15a22009-05-11 14:49:29 -07001100 next();
1101 if (!n) {
1102 /* forward reference */
1103 t = t + 4;
1104 *(int *) t = pGen->callForward(*(int *) t);
1105 } else if (n == 1) {
1106 pGen->callIndirect(l);
1107 l = l + 4;
1108 } else {
Jack Palevicha6535612009-05-13 16:24:17 -07001109 pGen->callRelative(n - codeBuf.getPC() - pGen->jumpOffset()); /* call xxx */
Jack Palevich21a15a22009-05-11 14:49:29 -07001110 }
1111 if (l)
1112 pGen->adjustStackAfterCall(l);
1113 }
1114 }
1115
1116 void sum(int l) {
1117 int t, n, a;
Jack Palevich546b2242009-05-13 15:10:04 -07001118 t = 0;
Jack Palevich21a15a22009-05-11 14:49:29 -07001119 if (l-- == 1)
1120 unary(1);
1121 else {
1122 sum(l);
1123 a = 0;
1124 while (l == tokl) {
1125 n = tok;
1126 t = tokc;
1127 next();
1128
1129 if (l > 8) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001130 a = pGen->gtst(t == OP_LOGICAL_OR, a); /* && and || output code generation */
Jack Palevich21a15a22009-05-11 14:49:29 -07001131 sum(l);
1132 } else {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001133 pGen->pushEAX();
Jack Palevich21a15a22009-05-11 14:49:29 -07001134 sum(l);
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001135 pGen->popECX();
Jack Palevich21a15a22009-05-11 14:49:29 -07001136
Jack Palevich546b2242009-05-13 15:10:04 -07001137 if ((l == 4) | (l == 5)) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001138 pGen->gcmp(t);
Jack Palevich21a15a22009-05-11 14:49:29 -07001139 } else {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001140 pGen->genOp(t);
Jack Palevich21a15a22009-05-11 14:49:29 -07001141 }
1142 }
1143 }
1144 /* && and || output code generation */
1145 if (a && l > 8) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001146 a = pGen->gtst(t == OP_LOGICAL_OR, a);
1147 pGen->li(t != OP_LOGICAL_OR);
Jack Palevicha6535612009-05-13 16:24:17 -07001148 pGen->gjmp(5); /* jmp $ + 5 (sizeof li, FIXME for ARM) */
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001149 pGen->gsym(a);
1150 pGen->li(t == OP_LOGICAL_OR);
Jack Palevich21a15a22009-05-11 14:49:29 -07001151 }
1152 }
1153 }
1154
1155 void expr() {
1156 sum(11);
1157 }
1158
1159 int test_expr() {
1160 expr();
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001161 return pGen->gtst(0, 0);
Jack Palevich21a15a22009-05-11 14:49:29 -07001162 }
1163
1164 void block(int l) {
1165 int a, n, t;
1166
1167 if (tok == TOK_IF) {
Jack Paleviche27bf3e2009-05-10 14:09:03 -07001168 next();
1169 skip('(');
Jack Palevich21a15a22009-05-11 14:49:29 -07001170 a = test_expr();
1171 skip(')');
1172 block(l);
1173 if (tok == TOK_ELSE) {
Jack Paleviche27bf3e2009-05-10 14:09:03 -07001174 next();
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001175 n = pGen->gjmp(0); /* jmp */
1176 pGen->gsym(a);
Jack Palevich21a15a22009-05-11 14:49:29 -07001177 block(l);
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001178 pGen->gsym(n); /* patch else jmp */
Jack Palevich21a15a22009-05-11 14:49:29 -07001179 } else {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001180 pGen->gsym(a); /* patch if test */
Jack Paleviche27bf3e2009-05-10 14:09:03 -07001181 }
Jack Palevich546b2242009-05-13 15:10:04 -07001182 } else if ((tok == TOK_WHILE) | (tok == TOK_FOR)) {
Jack Palevich21a15a22009-05-11 14:49:29 -07001183 t = tok;
1184 next();
1185 skip('(');
1186 if (t == TOK_WHILE) {
Jack Palevicha6535612009-05-13 16:24:17 -07001187 n = codeBuf.getPC(); // top of loop, target of "next" iteration
Jack Palevich21a15a22009-05-11 14:49:29 -07001188 a = test_expr();
1189 } else {
1190 if (tok != ';')
1191 expr();
1192 skip(';');
1193 n = codeBuf.getPC();
1194 a = 0;
1195 if (tok != ';')
1196 a = test_expr();
1197 skip(';');
1198 if (tok != ')') {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001199 t = pGen->gjmp(0);
Jack Palevich21a15a22009-05-11 14:49:29 -07001200 expr();
Jack Palevicha6535612009-05-13 16:24:17 -07001201 pGen->gjmp(n - codeBuf.getPC() - pGen->jumpOffset());
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001202 pGen->gsym(t);
Jack Palevich21a15a22009-05-11 14:49:29 -07001203 n = t + 4;
1204 }
1205 }
1206 skip(')');
1207 block((int) &a);
Jack Palevicha6535612009-05-13 16:24:17 -07001208 pGen->gjmp(n - codeBuf.getPC() - pGen->jumpOffset()); /* jmp */
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001209 pGen->gsym(a);
Jack Palevich21a15a22009-05-11 14:49:29 -07001210 } else if (tok == '{') {
1211 next();
1212 /* declarations */
1213 decl(1);
1214 while (tok != '}')
1215 block(l);
1216 next();
1217 } else {
1218 if (tok == TOK_RETURN) {
1219 next();
1220 if (tok != ';')
1221 expr();
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001222 rsym = pGen->gjmp(rsym); /* jmp */
Jack Palevich21a15a22009-05-11 14:49:29 -07001223 } else if (tok == TOK_BREAK) {
1224 next();
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001225 *(int *) l = pGen->gjmp(*(int *) l);
Jack Palevich21a15a22009-05-11 14:49:29 -07001226 } else if (tok != ';')
1227 expr();
1228 skip(';');
Jack Paleviche27bf3e2009-05-10 14:09:03 -07001229 }
1230 }
Jack Palevich21a15a22009-05-11 14:49:29 -07001231
1232 /* 'l' is true if local declarations */
1233 void decl(int l) {
1234 int a;
1235
Jack Palevich546b2242009-05-13 15:10:04 -07001236 while ((tok == TOK_INT) | ((tok != -1) & (!l))) {
Jack Palevich21a15a22009-05-11 14:49:29 -07001237 if (tok == TOK_INT) {
1238 next();
1239 while (tok != ';') {
1240 if (l) {
1241 loc = loc + 4;
1242 *(int *) tok = -loc;
1243 } else {
1244 *(int *) tok = glo;
1245 glo = glo + 4;
1246 }
1247 next();
1248 if (tok == ',')
1249 next();
1250 }
1251 skip(';');
1252 } else {
1253 /* patch forward references (XXX: do not work for function
1254 pointers) */
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001255 pGen->gsym(*(int *) (tok + 4));
Jack Palevich21a15a22009-05-11 14:49:29 -07001256 /* put function address */
1257 *(int *) tok = codeBuf.getPC();
1258 next();
1259 skip('(');
1260 a = 8;
Jack Palevich546b2242009-05-13 15:10:04 -07001261 int argCount = 0;
Jack Palevich21a15a22009-05-11 14:49:29 -07001262 while (tok != ')') {
1263 /* read param name and compute offset */
1264 *(int *) tok = a;
1265 a = a + 4;
1266 next();
1267 if (tok == ',')
1268 next();
Jack Palevich546b2242009-05-13 15:10:04 -07001269 argCount++;
Jack Palevich21a15a22009-05-11 14:49:29 -07001270 }
1271 next(); /* skip ')' */
1272 rsym = loc = 0;
Jack Palevich546b2242009-05-13 15:10:04 -07001273 a = pGen->functionEntry(argCount);
Jack Palevich21a15a22009-05-11 14:49:29 -07001274 block(0);
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001275 pGen->gsym(rsym);
Jack Palevich546b2242009-05-13 15:10:04 -07001276 pGen->functionExit(argCount, a, loc);
Jack Palevich21a15a22009-05-11 14:49:29 -07001277 }
1278 }
1279 }
1280
1281 void cleanup() {
1282 if (sym_stk != 0) {
1283 free((void*) sym_stk);
1284 sym_stk = 0;
1285 }
1286 if (pGlobalBase != 0) {
1287 free((void*) pGlobalBase);
1288 pGlobalBase = 0;
1289 }
1290 if (pVarsBase != 0) {
1291 free(pVarsBase);
1292 pVarsBase = 0;
1293 }
1294 if (pGen) {
1295 delete pGen;
1296 pGen = 0;
1297 }
1298 }
1299
1300 void clear() {
1301 tok = 0;
1302 tokc = 0;
1303 tokl = 0;
1304 ch = 0;
1305 vars = 0;
1306 rsym = 0;
1307 loc = 0;
1308 glo = 0;
1309 sym_stk = 0;
1310 dstk = 0;
1311 dptr = 0;
1312 dch = 0;
1313 last_id = 0;
1314 file = 0;
1315 pGlobalBase = 0;
1316 pVarsBase = 0;
1317 pGen = 0;
1318 }
Jack Paleviche27bf3e2009-05-10 14:09:03 -07001319
Jack Palevich22305132009-05-13 10:58:45 -07001320 void setArchitecture(const char* architecture) {
1321 delete pGen;
1322 pGen = 0;
1323
1324 if (architecture != NULL) {
1325 if (strcmp(architecture, "arm") == 0) {
1326 pGen = new ARMCodeGenerator();
1327 } else if (strcmp(architecture, "x86") == 0) {
1328 pGen = new X86CodeGenerator();
1329 } else {
1330 fprintf(stderr, "Unknown architecture %s", architecture);
1331 }
1332 }
1333
1334 if (pGen == NULL) {
1335 pGen = new ARMCodeGenerator();
1336 }
1337 }
1338
Jack Palevich77ae76e2009-05-10 19:59:24 -07001339public:
Jack Palevich22305132009-05-13 10:58:45 -07001340 struct args {
1341 args() {
1342 architecture = 0;
1343 }
1344 const char* architecture;
1345 };
1346
Jack Palevich21a15a22009-05-11 14:49:29 -07001347 compiler() {
1348 clear();
Jack Paleviche27bf3e2009-05-10 14:09:03 -07001349 }
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001350
Jack Palevich21a15a22009-05-11 14:49:29 -07001351 ~compiler() {
1352 cleanup();
1353 }
1354
Jack Palevich22305132009-05-13 10:58:45 -07001355 int compile(FILE* in, args& args) {
Jack Palevich21a15a22009-05-11 14:49:29 -07001356 cleanup();
1357 clear();
1358 codeBuf.init(ALLOC_SIZE);
Jack Palevich22305132009-05-13 10:58:45 -07001359 setArchitecture(args.architecture);
Jack Palevich21a15a22009-05-11 14:49:29 -07001360 pGen->init(&codeBuf);
1361 file = in;
1362 sym_stk = (int) calloc(1, ALLOC_SIZE);
1363 dstk = (int) strcpy((char*) sym_stk,
1364 " int if else while break return for define main ")
1365 + TOK_STR_SIZE;
1366 pGlobalBase = calloc(1, ALLOC_SIZE);
1367 glo = (int) pGlobalBase;
1368 pVarsBase = calloc(1, ALLOC_SIZE);
1369 vars = (int) pVarsBase;
1370 inp();
1371 next();
1372 decl(0);
Jack Palevich546b2242009-05-13 15:10:04 -07001373 pGen->finishCompile();
Jack Palevich21a15a22009-05-11 14:49:29 -07001374 return 0;
1375 }
1376
1377 int run(int argc, char** argv) {
1378 typedef int (*mainPtr)(int argc, char** argv);
1379 mainPtr aMain = (mainPtr) *(int*) (vars + TOK_MAIN);
1380 if (!aMain) {
1381 fprintf(stderr, "Could not find function \"main\".\n");
1382 return -1;
1383 }
1384 return aMain(argc, argv);
1385 }
1386
1387 int dump(FILE* out) {
1388 fwrite(codeBuf.getBase(), 1, codeBuf.getSize(), out);
1389 return 0;
1390 }
Jack Palevich77ae76e2009-05-10 19:59:24 -07001391
Jack Palevicha6535612009-05-13 16:24:17 -07001392 int disassemble(FILE* out) {
1393 return pGen->disassemble(out);
1394 }
1395
Jack Palevich77ae76e2009-05-10 19:59:24 -07001396};
1397
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001398const char* compiler::operatorChars =
1399 "++--*@/@%@+@-@<<>><=>=<@>@==!=&&||&@^@|@~@!@";
1400
1401const char compiler::operatorLevel[] =
1402 {11, 11, 1, 1, 1, 2, 2, 3, 3, 4, 4, 4, 4,
1403 5, 5, /* ==, != */
1404 9, 10, /* &&, || */
1405 6, 7, 8, /* & ^ | */
1406 2, 2 /* ~ ! */
1407 };
1408
Jack Palevicha6535612009-05-13 16:24:17 -07001409FILE* compiler::ARMCodeGenerator::disasmOut;
1410
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001411const int compiler::X86CodeGenerator::operatorHelper[] = {
1412 0x1, // ++
1413 0xff, // --
1414 0xc1af0f, // *
1415 0xf9f79991, // /
1416 0xf9f79991, // % (With manual assist to swap results)
1417 0xc801, // +
1418 0xd8f7c829, // -
1419 0xe0d391, // <<
1420 0xf8d391, // >>
1421 0xe, // <=
1422 0xd, // >=
1423 0xc, // <
1424 0xf, // >
1425 0x4, // ==
1426 0x5, // !=
1427 0x0, // &&
1428 0x1, // ||
1429 0xc821, // &
1430 0xc831, // ^
1431 0xc809, // |
1432 0xd0f7, // ~
1433 0x4 // !
1434};
1435
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001436} // namespace acc
1437
Jack Palevich546b2242009-05-13 15:10:04 -07001438// This is a separate function so it can easily be set by breakpoint in gdb.
1439int run(acc::compiler& c, int argc, char** argv) {
1440 return c.run(argc, argv);
1441}
1442
Jack Palevich77ae76e2009-05-10 19:59:24 -07001443int main(int argc, char** argv) {
Jack Palevich22305132009-05-13 10:58:45 -07001444 bool doDump = false;
Jack Palevicha6535612009-05-13 16:24:17 -07001445 bool doDisassemble = false;
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001446 const char* inFile = NULL;
1447 const char* outFile = NULL;
Jack Palevich22305132009-05-13 10:58:45 -07001448 const char* architecture = "arm";
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001449 int i;
Jack Palevich21a15a22009-05-11 14:49:29 -07001450 for (i = 1; i < argc; i++) {
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001451 char* arg = argv[i];
1452 if (arg[0] == '-') {
1453 switch (arg[1]) {
Jack Palevich22305132009-05-13 10:58:45 -07001454 case 'a':
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001455 if (i + 1 >= argc) {
Jack Palevich22305132009-05-13 10:58:45 -07001456 fprintf(stderr, "Expected architecture after -a\n");
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001457 return 2;
1458 }
Jack Palevich22305132009-05-13 10:58:45 -07001459 architecture = argv[i+1];
1460 i += 1;
1461 break;
1462 case 'd':
1463 if (i + 1 >= argc) {
1464 fprintf(stderr, "Expected filename after -d\n");
1465 return 2;
1466 }
1467 doDump = true;
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001468 outFile = argv[i + 1];
1469 i += 1;
1470 break;
Jack Palevicha6535612009-05-13 16:24:17 -07001471 case 'S':
1472 doDisassemble = true;
1473 break;
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001474 default:
1475 fprintf(stderr, "Unrecognized flag %s\n", arg);
1476 return 3;
1477 }
1478 } else if (inFile == NULL) {
1479 inFile = arg;
1480 } else {
1481 break;
1482 }
1483 }
1484
1485 FILE* in = stdin;
1486 if (inFile) {
1487 in = fopen(inFile, "r");
Jack Palevich21a15a22009-05-11 14:49:29 -07001488 if (!in) {
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001489 fprintf(stderr, "Could not open input file %s\n", inFile);
1490 return 1;
1491 }
1492 }
1493 acc::compiler compiler;
Jack Palevich22305132009-05-13 10:58:45 -07001494 acc::compiler::args args;
1495 args.architecture = architecture;
1496 int compileResult = compiler.compile(in, args);
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001497 if (in != stdin) {
1498 fclose(in);
1499 }
1500 if (compileResult) {
1501 fprintf(stderr, "Compile failed: %d\n", compileResult);
1502 return 6;
1503 }
Jack Palevicha6535612009-05-13 16:24:17 -07001504 if (doDisassemble) {
1505 compiler.disassemble(stderr);
1506 }
Jack Palevich22305132009-05-13 10:58:45 -07001507 if (doDump) {
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001508 FILE* save = fopen(outFile, "w");
Jack Palevich21a15a22009-05-11 14:49:29 -07001509 if (!save) {
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001510 fprintf(stderr, "Could not open output file %s\n", outFile);
1511 return 5;
1512 }
1513 compiler.dump(save);
1514 fclose(save);
1515 } else {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001516 fprintf(stderr, "Executing compiled code:\n");
Jack Palevich21a15a22009-05-11 14:49:29 -07001517 int codeArgc = argc - i + 1;
1518 char** codeArgv = argv + i - 1;
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001519 codeArgv[0] = (char*) (inFile ? inFile : "stdin");
Jack Palevich546b2242009-05-13 15:10:04 -07001520 int result = run(compiler, codeArgc, codeArgv);
Jack Palevich22305132009-05-13 10:58:45 -07001521 fprintf(stderr, "result: %d\n", result);
1522 return result;
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001523 }
1524
1525 return 0;
1526}