blob: fa9a5beec76b290992f1d59e8a0748b19c684d59 [file] [log] [blame]
Jack Palevichae54f1f2009-05-08 14:54:15 -07001/*
Jack Palevich21a15a22009-05-11 14:49:29 -07002 Obfuscated Tiny C Compiler
Jack Palevich88311482009-05-08 13:57:37 -07003
Jack Palevich21a15a22009-05-11 14:49:29 -07004 Copyright (C) 2001-2003 Fabrice Bellard
Jack Palevichae54f1f2009-05-08 14:54:15 -07005
Jack Palevich21a15a22009-05-11 14:49:29 -07006 This software is provided 'as-is', without any express or implied
7 warranty. In no event will the authors be held liable for any damages
8 arising from the use of this software.
Jack Paleviche27bf3e2009-05-10 14:09:03 -07009
Jack Palevich21a15a22009-05-11 14:49:29 -070010 Permission is granted to anyone to use this software for any purpose,
11 including commercial applications, and to alter it and redistribute it
12 freely, subject to the following restrictions:
Jack Paleviche27bf3e2009-05-10 14:09:03 -070013
Jack Palevich21a15a22009-05-11 14:49:29 -070014 1. The origin of this software must not be misrepresented; you must not
15 claim that you wrote the original software. If you use this software
16 in a product, an acknowledgment in the product and its documentation
17 *is* required.
18 2. Altered source versions must be plainly marked as such, and must not be
19 misrepresented as being the original software.
20 3. This notice may not be removed or altered from any source distribution.
21 */
Jack Paleviche27bf3e2009-05-10 14:09:03 -070022
Jack Palevich77ae76e2009-05-10 19:59:24 -070023#include <ctype.h>
24#include <dlfcn.h>
Jack Paleviche27bf3e2009-05-10 14:09:03 -070025#include <stdarg.h>
Jack Palevichae54f1f2009-05-08 14:54:15 -070026#include <stdio.h>
Jack Palevichf6b5a532009-05-10 19:16:42 -070027#include <stdlib.h>
28#include <string.h>
Jack Palevichae54f1f2009-05-08 14:54:15 -070029
Jack Palevich546b2242009-05-13 15:10:04 -070030#if defined(__arm__)
31#include <unistd.h>
32#endif
33
Jack Palevicha6535612009-05-13 16:24:17 -070034#include "disassem.h"
35
Jack Palevichbbf8ab52009-05-11 11:54:30 -070036namespace acc {
37
Jack Palevich77ae76e2009-05-10 19:59:24 -070038class compiler {
Jack Palevich21a15a22009-05-11 14:49:29 -070039 class CodeBuf {
40 char* ind;
41 char* pProgramBase;
Jack Palevichf0cbc922009-05-08 16:35:13 -070042
Jack Palevich21a15a22009-05-11 14:49:29 -070043 void release() {
44 if (pProgramBase != 0) {
45 free(pProgramBase);
46 pProgramBase = 0;
Jack Palevichae54f1f2009-05-08 14:54:15 -070047 }
Jack Palevich21a15a22009-05-11 14:49:29 -070048 }
49
50 public:
51 CodeBuf() {
52 pProgramBase = 0;
53 ind = 0;
54 }
55
56 ~CodeBuf() {
57 release();
58 }
59
60 void init(int size) {
61 release();
62 pProgramBase = (char*) calloc(1, size);
63 ind = pProgramBase;
64 }
65
66 void o(int n) {
67 /* cannot use unsigned, so we must do a hack */
68 while (n && n != -1) {
69 *ind++ = n;
70 n = n >> 8;
71 }
72 }
73
Jack Palevich546b2242009-05-13 15:10:04 -070074 int o4(int n) {
75 int result = (int) ind;
76 * (int*) ind = n;
77 ind += 4;
78 return result;
79 }
80
Jack Palevich21a15a22009-05-11 14:49:29 -070081 /*
82 * Output a byte. Handles all values, 0..ff.
83 */
84 void ob(int n) {
85 *ind++ = n;
86 }
87
88 /* output a symbol and patch all calls to it */
89 void gsym(int t) {
90 int n;
91 while (t) {
92 n = *(int *) t; /* next value */
93 *(int *) t = ((int) ind) - t - 4;
94 t = n;
95 }
96 }
97
98 /* psym is used to put an instruction with a data field which is a
99 reference to a symbol. It is in fact the same as oad ! */
100 int psym(int n, int t) {
101 return oad(n, t);
102 }
103
104 /* instruction + address */
105 int oad(int n, int t) {
106 o(n);
107 *(int *) ind = t;
108 t = (int) ind;
109 ind = ind + 4;
110 return t;
111 }
112
113 inline void* getBase() {
114 return (void*) pProgramBase;
115 }
116
117 int getSize() {
118 return ind - pProgramBase;
119 }
120
121 int getPC() {
122 return (int) ind;
123 }
124 };
125
126 class CodeGenerator {
127 public:
128 CodeGenerator() {}
129 virtual ~CodeGenerator() {}
130
Jack Palevich22305132009-05-13 10:58:45 -0700131 virtual void init(CodeBuf* pCodeBuf) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700132 this->pCodeBuf = pCodeBuf;
133 }
134
Jack Palevich22305132009-05-13 10:58:45 -0700135 /* returns address to patch with local variable size
136 */
Jack Palevich546b2242009-05-13 15:10:04 -0700137 virtual int functionEntry(int argCount) = 0;
Jack Palevich22305132009-05-13 10:58:45 -0700138
Jack Palevich546b2242009-05-13 15:10:04 -0700139 virtual void functionExit(int argCount, int localVariableAddress, int localVariableSize) = 0;
Jack Palevich22305132009-05-13 10:58:45 -0700140
141 /* load immediate value */
Jack Palevich546b2242009-05-13 15:10:04 -0700142 virtual void li(int t) = 0;
Jack Palevich22305132009-05-13 10:58:45 -0700143
144 virtual int gjmp(int t) = 0;
145
146 /* l = 0: je, l == 1: jne */
147 virtual int gtst(bool l, int t) = 0;
148
149 virtual void gcmp(int op) = 0;
150
Jack Palevich546b2242009-05-13 15:10:04 -0700151 virtual void genOp(int op) = 0;
Jack Palevich22305132009-05-13 10:58:45 -0700152
153 virtual void clearECX() = 0;
154
155 virtual void pushEAX() = 0;
156
157 virtual void popECX() = 0;
158
159 virtual void storeEAXToAddressECX(bool isInt) = 0;
160
161 virtual void loadEAXIndirect(bool isInt) = 0;
162
163 virtual void leaEAX(int ea) = 0;
164
165 virtual void storeEAX(int ea) = 0;
166
167 virtual void loadEAX(int ea) = 0;
168
169 virtual void postIncrementOrDecrement(int n, int op) = 0;
170
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700171 virtual int beginFunctionCallArguments() = 0;
172
173 virtual void endFunctionCallArguments(int a, int l) = 0;
Jack Palevich22305132009-05-13 10:58:45 -0700174
175 virtual void storeEAToArg(int l) = 0;
176
177 virtual int callForward(int symbol) = 0;
178
179 virtual void callRelative(int t) = 0;
180
181 virtual void callIndirect(int l) = 0;
182
183 virtual void adjustStackAfterCall(int l) = 0;
184
Jack Palevicha6535612009-05-13 16:24:17 -0700185 virtual int disassemble(FILE* out) = 0;
186
Jack Palevich21a15a22009-05-11 14:49:29 -0700187 /* output a symbol and patch all calls to it */
Jack Palevich22305132009-05-13 10:58:45 -0700188 virtual void gsym(int t) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700189 pCodeBuf->gsym(t);
190 }
191
Jack Palevich546b2242009-05-13 15:10:04 -0700192 virtual int finishCompile() {
193#if defined(__arm__)
Jack Palevicha6535612009-05-13 16:24:17 -0700194 const long base = long(pCodeBuf->getBase());
195 const long curr = base + long(pCodeBuf->getSize());
196 int err = cacheflush(base, curr, 0);
197 return err;
Jack Palevich546b2242009-05-13 15:10:04 -0700198#else
Jack Palevicha6535612009-05-13 16:24:17 -0700199 return 0;
Jack Palevich546b2242009-05-13 15:10:04 -0700200#endif
201 }
202
Jack Palevicha6535612009-05-13 16:24:17 -0700203 /**
204 * Adjust relative branches by this amount.
205 */
206 virtual int jumpOffset() = 0;
207
Jack Palevich21a15a22009-05-11 14:49:29 -0700208 protected:
209 void o(int n) {
210 pCodeBuf->o(n);
211 }
212
213 /*
214 * Output a byte. Handles all values, 0..ff.
215 */
216 void ob(int n) {
217 pCodeBuf->ob(n);
218 }
219
220 /* psym is used to put an instruction with a data field which is a
221 reference to a symbol. It is in fact the same as oad ! */
222 int psym(int n, int t) {
223 return oad(n, t);
224 }
225
226 /* instruction + address */
227 int oad(int n, int t) {
228 return pCodeBuf->oad(n,t);
229 }
230
Jack Palevicha6535612009-05-13 16:24:17 -0700231 int getBase() {
232 return (int) pCodeBuf->getBase();
233 }
234
Jack Palevich21a15a22009-05-11 14:49:29 -0700235 int getPC() {
236 return pCodeBuf->getPC();
237 }
238
Jack Palevich546b2242009-05-13 15:10:04 -0700239 int o4(int data) {
240 return pCodeBuf->o4(data);
241 }
Jack Palevich21a15a22009-05-11 14:49:29 -0700242 private:
243 CodeBuf* pCodeBuf;
244 };
245
Jack Palevich22305132009-05-13 10:58:45 -0700246 class ARMCodeGenerator : public CodeGenerator {
247 public:
248 ARMCodeGenerator() {}
249 virtual ~ARMCodeGenerator() {}
250
251 /* returns address to patch with local variable size
252 */
Jack Palevich546b2242009-05-13 15:10:04 -0700253 virtual int functionEntry(int argCount) {
254 fprintf(stderr, "functionEntry(%d);\n", argCount);
Jack Palevich69796b62009-05-14 15:42:26 -0700255 // sp -> arg4 arg5 ...
256 // Push our register-based arguments back on the stack
257 if (argCount > 0) {
258 int regArgCount = argCount <= 4 ? argCount : 4;
259 o4(0xE92D0000 | ((1 << argCount) - 1)); // stmfd sp!, {}
260 }
261 // sp -> arg0 arg1 ...
262 o4(0xE92D4800); // stmfd sp!, {fp, lr}
263 // sp, fp -> oldfp, retadr, arg0 arg1 ....
264 o4(0xE1A0B00D); // mov fp, sp
265 return o4(0xE24DD000); // sub sp, sp, # <local variables>
Jack Palevich22305132009-05-13 10:58:45 -0700266 }
267
Jack Palevich546b2242009-05-13 15:10:04 -0700268 virtual void functionExit(int argCount, int localVariableAddress, int localVariableSize) {
269 fprintf(stderr, "functionExit(%d, %d, %d);\n", argCount, localVariableAddress, localVariableSize);
Jack Palevich69796b62009-05-14 15:42:26 -0700270 // Patch local variable allocation code:
271 if (localVariableSize < 0 || localVariableSize > 255) {
Jack Palevich8de461d2009-05-14 17:21:45 -0700272 error("localVariables out of range: %d", localVariableSize);
Jack Palevich546b2242009-05-13 15:10:04 -0700273 }
Jack Palevich69796b62009-05-14 15:42:26 -0700274 *(char*) (localVariableAddress) = localVariableSize;
275
276 // sp -> locals .... fp -> oldfp, retadr, arg0, arg1, ...
277 o4(0xE1A0E00B); // mov lr, fp
278 o4(0xE59BB000); // ldr fp, [fp]
279 o4(0xE28ED004); // add sp, lr, #4
280 // sp -> retadr, arg0, ...
281 o4(0xE8BD4000); // ldmfd sp!, {lr}
282 // sp -> arg0 ....
283 if (argCount > 0) {
284 // We store the PC into the lr so we can adjust the sp before
Jack Palevich8de461d2009-05-14 17:21:45 -0700285 // returning. We need to pull off the registers we pushed
Jack Palevich69796b62009-05-14 15:42:26 -0700286 // earlier. We don't need to actually store them anywhere,
287 // just adjust the stack.
288 int regArgCount = argCount <= 4 ? argCount : 4;
289 o4(0xE28DD000 | (regArgCount << 2)); // add sp, sp, #argCount << 2
290 }
291 o4(0xE12FFF1E); // bx lr
Jack Palevich22305132009-05-13 10:58:45 -0700292 }
293
294 /* load immediate value */
Jack Palevich546b2242009-05-13 15:10:04 -0700295 virtual void li(int t) {
Jack Palevich22305132009-05-13 10:58:45 -0700296 fprintf(stderr, "li(%d);\n", t);
Jack Palevicha6535612009-05-13 16:24:17 -0700297 if (t >= 0 && t < 255) {
Jack Palevich69796b62009-05-14 15:42:26 -0700298 o4(0xE3A00000 + t); // mov r0, #0
Jack Palevicha6535612009-05-13 16:24:17 -0700299 } else if (t >= -256 && t < 0) {
300 // mvn means move constant ^ ~0
Jack Palevich69796b62009-05-14 15:42:26 -0700301 o4(0xE3E00001 - t); // mvn r0, #0
Jack Palevicha6535612009-05-13 16:24:17 -0700302 } else {
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700303 o4(0xE51F0000); // ldr r0, .L3
304 o4(0xEA000000); // b .L99
305 o4(t); // .L3: .word 0
306 // .L99:
Jack Palevicha6535612009-05-13 16:24:17 -0700307 }
Jack Palevich22305132009-05-13 10:58:45 -0700308 }
309
310 virtual int gjmp(int t) {
311 fprintf(stderr, "gjmp(%d);\n", t);
Jack Palevich8de461d2009-05-14 17:21:45 -0700312 return o4(0xEA000000 | encodeAddress(t)); // b .L33
Jack Palevich22305132009-05-13 10:58:45 -0700313 }
314
315 /* l = 0: je, l == 1: jne */
316 virtual int gtst(bool l, int t) {
317 fprintf(stderr, "gtst(%d, %d);\n", l, t);
Jack Palevich8de461d2009-05-14 17:21:45 -0700318 o4(0xE3500000); // cmp r0,#0
319 int branch = l ? 0x1A000000 : 0x0A000000; // bne : beq
320 return o4(branch | encodeAddress(t));
Jack Palevich22305132009-05-13 10:58:45 -0700321 }
322
323 virtual void gcmp(int op) {
324 fprintf(stderr, "gcmp(%d);\n", op);
Jack Palevich8de461d2009-05-14 17:21:45 -0700325 o4(0xE1510000); // cmp r1, r1
326 switch(op) {
327 case OP_EQUALS:
328 o4(0x03A00001); // moveq r0,#1
329 o4(0x13A00000); // movne r0,#0
330 break;
331 case OP_NOT_EQUALS:
332 o4(0x03A00000); // moveq r0,#0
333 o4(0x13A00001); // movne r0,#1
334 break;
335 case OP_LESS_EQUAL:
336 o4(0xD3A00001); // movle r0,#1
337 o4(0xC3A00000); // movgt r0,#0
338 break;
339 case OP_GREATER:
340 o4(0xD3A00000); // movle r0,#0
341 o4(0xC3A00001); // movgt r0,#1
342 break;
343 case OP_GREATER_EQUAL:
344 o4(0xA3A00001); // movge r0,#1
345 o4(0xB3A00000); // movlt r0,#0
346 break;
347 case OP_LESS:
348 o4(0xA3A00000); // movge r0,#0
349 o4(0xB3A00001); // movlt r0,#1
350 break;
351 default:
352 error("Unknown comparison op %d", op);
353 break;
354 }
Jack Palevich22305132009-05-13 10:58:45 -0700355 }
356
Jack Palevich546b2242009-05-13 15:10:04 -0700357 virtual void genOp(int op) {
Jack Palevich22305132009-05-13 10:58:45 -0700358 fprintf(stderr, "genOp(%d);\n", op);
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700359 switch(op) {
360 case OP_MUL:
361 o4(0x0E0000091); // mul r0,r1,r0
362 break;
363 case OP_PLUS:
364 o4(0xE0810000); // add r0,r1,r0
365 break;
366 case OP_MINUS:
367 o4(0xE0410000); // sub r0,r1,r0
368 break;
369 case OP_SHIFT_LEFT:
370 o4(0xE1A00011); // lsl r0,r1,r0
371 break;
372 case OP_SHIFT_RIGHT:
373 o4(0xE1A00051); // asr r0,r1,r0
374 break;
375 case OP_BIT_AND:
376 o4(0xE0010000); // and r0,r1,r0
377 break;
378 case OP_BIT_XOR:
379 o4(0xE0210000); // eor r0,r1,r0
380 break;
381 case OP_BIT_OR:
382 o4(0xE1810000); // orr r0,r1,r0
383 break;
384 case OP_BIT_NOT:
385 o4(0xE1E00000); // mvn r0, r0
386 break;
387 default:
Jack Palevich69796b62009-05-14 15:42:26 -0700388 error("Unimplemented op %d\n", op);
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700389 break;
390 }
Jack Palevich22305132009-05-13 10:58:45 -0700391#if 0
392 o(decodeOp(op));
393 if (op == OP_MOD)
394 o(0x92); /* xchg %edx, %eax */
395#endif
396 }
397
398 virtual void clearECX() {
399 fprintf(stderr, "clearECX();\n");
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700400 o4(0xE3A01000); // mov r1, #0
Jack Palevich22305132009-05-13 10:58:45 -0700401 }
402
403 virtual void pushEAX() {
404 fprintf(stderr, "pushEAX();\n");
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700405 o4(0xE92D0001); // stmfd sp!,{r0}
Jack Palevich22305132009-05-13 10:58:45 -0700406 }
407
408 virtual void popECX() {
409 fprintf(stderr, "popECX();\n");
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700410 o4(0xE8BD0002); // ldmfd sp!,{r1}
Jack Palevich22305132009-05-13 10:58:45 -0700411 }
412
413 virtual void storeEAXToAddressECX(bool isInt) {
414 fprintf(stderr, "storeEAXToAddressECX(%d);\n", isInt);
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700415 o4(0x0188 + isInt); /* movl %eax/%al, (%ecx) */
Jack Palevich22305132009-05-13 10:58:45 -0700416 }
417
418 virtual void loadEAXIndirect(bool isInt) {
419 fprintf(stderr, "loadEAXIndirect(%d);\n", isInt);
420 if (isInt)
Jack Palevich69796b62009-05-14 15:42:26 -0700421 o4(0xE5900000); // ldr r0, [r0]
Jack Palevich22305132009-05-13 10:58:45 -0700422 else
Jack Palevich69796b62009-05-14 15:42:26 -0700423 o4(0xE5D00000); // ldrb r0, [r0]
Jack Palevich22305132009-05-13 10:58:45 -0700424 }
425
426 virtual void leaEAX(int ea) {
Jack Palevich69796b62009-05-14 15:42:26 -0700427 fprintf(stderr, "[!!! fixme !!!] leaEAX(%d);\n", ea);
428 error("Unimplemented");
429 if (ea < -4095 || ea > 4095) {
430 error("Offset out of range: %08x", ea);
431 }
432 o4(0xE59B0000 | (0x1fff & ea)); //ldr r0, [fp,#ea]
Jack Palevich22305132009-05-13 10:58:45 -0700433 }
434
435 virtual void storeEAX(int ea) {
436 fprintf(stderr, "storeEAX(%d);\n", ea);
Jack Palevich69796b62009-05-14 15:42:26 -0700437 int fpOffset = ea;
438 if (fpOffset < -4095 || fpOffset > 4095) {
439 error("Offset out of range: %08x", ea);
440 }
441 if (fpOffset < 0) {
442 o4(0xE50B0000 | (0xfff & (-fpOffset))); // str r0, [fp,#-ea]
443 } else {
444 o4(0xE58B0000 | (0xfff & fpOffset)); // str r0, [fp,#ea]
445 }
Jack Palevich22305132009-05-13 10:58:45 -0700446 }
447
448 virtual void loadEAX(int ea) {
449 fprintf(stderr, "loadEAX(%d);\n", ea);
Jack Palevich69796b62009-05-14 15:42:26 -0700450 int fpOffset = ea;
451 if (fpOffset < -4095 || fpOffset > 4095) {
452 error("Offset out of range: %08x", ea);
453 }
454 if (fpOffset < 0) {
455 o4(0xE51B0000 | (0xfff & (-fpOffset))); // ldr r0, [fp,#-ea]
456 } else {
457 o4(0xE59B0000 | (0xfff & fpOffset)); //ldr r0, [fp,#ea]
458 }
Jack Palevich22305132009-05-13 10:58:45 -0700459 }
460
461 virtual void postIncrementOrDecrement(int n, int op) {
462 fprintf(stderr, "postIncrementOrDecrement(%d, %d);\n", n, op);
463 /* Implement post-increment or post decrement.
464 */
Jack Palevich69796b62009-05-14 15:42:26 -0700465
466 error("Unimplemented");
Jack Palevich22305132009-05-13 10:58:45 -0700467#if 0
468 gmov(0, n); /* 83 ADD */
469 o(decodeOp(op));
470#endif
471 }
472
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700473 virtual int beginFunctionCallArguments() {
474 fprintf(stderr, "beginFunctionCallArguments();\n");
475 return o4(0xE24DDF00); // Placeholder
476 }
477
478 virtual void endFunctionCallArguments(int a, int l) {
479 fprintf(stderr, "endFunctionCallArguments(0x%08x, %d);\n", a, l);
480 if (l < 0 || l > 0x3FC) {
481 error("L out of range for stack adjustment: 0x%08x", l);
482 }
483 * (int*) a = 0xE24DDF00 | (l >> 2); // sub sp, sp, #0 << 2
484 int argCount = l >> 2;
485 if (argCount > 0) {
486 int regArgCount = argCount > 4 ? 4 : argCount;
487 o4(0xE8BD0000 | ((1 << regArgCount) - 1)); // ldmfd sp!,{}
488 }
Jack Palevich22305132009-05-13 10:58:45 -0700489 }
490
491 virtual void storeEAToArg(int l) {
492 fprintf(stderr, "storeEAToArg(%d);\n", l);
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700493 if (l < 0 || l > 4096-4) {
494 error("l out of range for stack offset: 0x%08x", l);
495 }
496 o4(0xE58D0000 + l); // str r0, [sp, #4]
Jack Palevich22305132009-05-13 10:58:45 -0700497 }
498
499 virtual int callForward(int symbol) {
500 fprintf(stderr, "callForward(%d);\n", symbol);
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700501 // Forward calls are always short (local)
502 return o4(0xEB000000 | encodeAddress(symbol));
Jack Palevich22305132009-05-13 10:58:45 -0700503 }
504
505 virtual void callRelative(int t) {
506 fprintf(stderr, "callRelative(%d);\n", t);
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700507 int abs = t + getPC() + jumpOffset();
Jack Palevich69796b62009-05-14 15:42:26 -0700508 fprintf(stderr, "abs=%d (0x08%x)\n", abs, abs);
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700509 if (t >= - (1 << 25) && t < (1 << 25)) {
510 o4(0xEB000000 | encodeAddress(t));
511 } else {
512 // Long call.
513 o4(0xE59FC000); // ldr r12, .L1
514 o4(0xEA000000); // b .L99
515 o4(t - 16); // .L1: .word 0
516 o4(0xE08CC00F); // .L99: add r12,pc
517 o4(0xE12FFF3C); // blx r12
518 }
Jack Palevich22305132009-05-13 10:58:45 -0700519 }
520
521 virtual void callIndirect(int l) {
522 fprintf(stderr, "callIndirect(%d);\n", l);
523 oad(0x2494ff, l); /* call *xxx(%esp) */
524 }
525
526 virtual void adjustStackAfterCall(int l) {
527 fprintf(stderr, "adjustStackAfterCall(%d);\n", l);
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700528 if (l < 0 || l > 0x3FC) {
529 error("L out of range for stack adjustment: 0x%08x", l);
530 }
531 int argCount = l >> 2;
532 if (argCount > 4) {
533 int remainingArgs = argCount - 4;
534 o4(0xE28DDF00 | remainingArgs); // add sp, sp, #0x3fc
535 }
536
Jack Palevich22305132009-05-13 10:58:45 -0700537 }
538
Jack Palevicha6535612009-05-13 16:24:17 -0700539 virtual int jumpOffset() {
540 return 4;
541 }
542
543 /* output a symbol and patch all calls to it */
544 virtual void gsym(int t) {
545 fprintf(stderr, "gsym(0x%x)\n", t);
546 int n;
547 int base = getBase();
548 int pc = getPC();
549 fprintf(stderr, "pc = 0x%x\n", pc);
550 while (t) {
551 int data = * (int*) t;
552 int decodedOffset = ((BRANCH_REL_ADDRESS_MASK & data) << 2);
553 if (decodedOffset == 0) {
554 n = 0;
555 } else {
556 n = base + decodedOffset; /* next value */
557 }
558 *(int *) t = (data & ~BRANCH_REL_ADDRESS_MASK)
559 | encodeRelAddress(pc - t - 8);
560 t = n;
561 }
562 }
563
564 virtual int disassemble(FILE* out) {
565 disasmOut = out;
566 disasm_interface_t di;
567 di.di_readword = disassemble_readword;
568 di.di_printaddr = disassemble_printaddr;
569 di.di_printf = disassemble_printf;
570
571 int base = getBase();
572 int pc = getPC();
573 for(int i = base; i < pc; i += 4) {
574 fprintf(out, "%08x: %08x ", i, *(int*) i);
575 ::disasm(&di, i, 0);
576 }
577 return 0;
578 }
Jack Palevich22305132009-05-13 10:58:45 -0700579 private:
Jack Palevicha6535612009-05-13 16:24:17 -0700580 static FILE* disasmOut;
581
582 static u_int
583 disassemble_readword(u_int address)
584 {
585 return(*((u_int *)address));
586 }
587
588 static void
589 disassemble_printaddr(u_int address)
590 {
591 fprintf(disasmOut, "0x%08x", address);
592 }
593
594 static void
595 disassemble_printf(const char *fmt, ...) {
596 va_list ap;
597 va_start(ap, fmt);
598 vfprintf(disasmOut, fmt, ap);
599 va_end(ap);
600 }
601
602 static const int BRANCH_REL_ADDRESS_MASK = 0x00ffffff;
603
604 /** Encode a relative address that might also be
605 * a label.
606 */
607 int encodeAddress(int value) {
608 int base = getBase();
609 if (value >= base && value <= getPC() ) {
610 // This is a label, encode it relative to the base.
611 value = value - base;
612 }
613 return encodeRelAddress(value);
614 }
615
616 int encodeRelAddress(int value) {
617 return BRANCH_REL_ADDRESS_MASK & (value >> 2);
618 }
Jack Palevich22305132009-05-13 10:58:45 -0700619
Jack Palevich546b2242009-05-13 15:10:04 -0700620 void error(const char* fmt,...) {
621 va_list ap;
622 va_start(ap, fmt);
623 vfprintf(stderr, fmt, ap);
624 va_end(ap);
625 exit(12);
626 }
Jack Palevich22305132009-05-13 10:58:45 -0700627 };
628
Jack Palevich21a15a22009-05-11 14:49:29 -0700629 class X86CodeGenerator : public CodeGenerator {
630 public:
631 X86CodeGenerator() {}
632 virtual ~X86CodeGenerator() {}
633
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700634 /* returns address to patch with local variable size
635 */
Jack Palevich546b2242009-05-13 15:10:04 -0700636 virtual int functionEntry(int argCount) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700637 o(0xe58955); /* push %ebp, mov %esp, %ebp */
638 return oad(0xec81, 0); /* sub $xxx, %esp */
639 }
640
Jack Palevich546b2242009-05-13 15:10:04 -0700641 virtual void functionExit(int argCount, int localVariableAddress, int localVariableSize) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700642 o(0xc3c9); /* leave, ret */
Jack Palevich546b2242009-05-13 15:10:04 -0700643 *(int *) localVariableAddress = localVariableSize; /* save local variables */
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700644 }
645
Jack Palevich21a15a22009-05-11 14:49:29 -0700646 /* load immediate value */
Jack Palevich546b2242009-05-13 15:10:04 -0700647 virtual void li(int t) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700648 oad(0xb8, t); /* mov $xx, %eax */
649 }
650
Jack Palevich22305132009-05-13 10:58:45 -0700651 virtual int gjmp(int t) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700652 return psym(0xe9, t);
653 }
654
655 /* l = 0: je, l == 1: jne */
Jack Palevich22305132009-05-13 10:58:45 -0700656 virtual int gtst(bool l, int t) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700657 o(0x0fc085); /* test %eax, %eax, je/jne xxx */
658 return psym(0x84 + l, t);
659 }
660
Jack Palevich22305132009-05-13 10:58:45 -0700661 virtual void gcmp(int op) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700662 int t = decodeOp(op);
Jack Palevich21a15a22009-05-11 14:49:29 -0700663 o(0xc139); /* cmp %eax,%ecx */
664 li(0);
665 o(0x0f); /* setxx %al */
666 o(t + 0x90);
667 o(0xc0);
668 }
669
Jack Palevich546b2242009-05-13 15:10:04 -0700670 virtual void genOp(int op) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700671 o(decodeOp(op));
672 if (op == OP_MOD)
673 o(0x92); /* xchg %edx, %eax */
674 }
675
Jack Palevich22305132009-05-13 10:58:45 -0700676 virtual void clearECX() {
Jack Palevich21a15a22009-05-11 14:49:29 -0700677 oad(0xb9, 0); /* movl $0, %ecx */
678 }
679
Jack Palevich22305132009-05-13 10:58:45 -0700680 virtual void pushEAX() {
Jack Palevich21a15a22009-05-11 14:49:29 -0700681 o(0x50); /* push %eax */
682 }
683
Jack Palevich22305132009-05-13 10:58:45 -0700684 virtual void popECX() {
Jack Palevich21a15a22009-05-11 14:49:29 -0700685 o(0x59); /* pop %ecx */
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700686 }
687
Jack Palevich22305132009-05-13 10:58:45 -0700688 virtual void storeEAXToAddressECX(bool isInt) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700689 o(0x0188 + isInt); /* movl %eax/%al, (%ecx) */
690 }
691
Jack Palevich22305132009-05-13 10:58:45 -0700692 virtual void loadEAXIndirect(bool isInt) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700693 if (isInt)
694 o(0x8b); /* mov (%eax), %eax */
695 else
696 o(0xbe0f); /* movsbl (%eax), %eax */
697 ob(0); /* add zero in code */
698 }
699
Jack Palevich22305132009-05-13 10:58:45 -0700700 virtual void leaEAX(int ea) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700701 gmov(10, ea); /* leal EA, %eax */
702 }
703
Jack Palevich22305132009-05-13 10:58:45 -0700704 virtual void storeEAX(int ea) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700705 gmov(6, ea); /* mov %eax, EA */
706 }
707
Jack Palevich22305132009-05-13 10:58:45 -0700708 virtual void loadEAX(int ea) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700709 gmov(8, ea); /* mov EA, %eax */
710 }
711
Jack Palevich22305132009-05-13 10:58:45 -0700712 virtual void postIncrementOrDecrement(int n, int op) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700713 /* Implement post-increment or post decrement.
Jack Palevich21a15a22009-05-11 14:49:29 -0700714 */
715 gmov(0, n); /* 83 ADD */
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700716 o(decodeOp(op));
Jack Palevich21a15a22009-05-11 14:49:29 -0700717 }
718
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700719 virtual int beginFunctionCallArguments() {
Jack Palevich21a15a22009-05-11 14:49:29 -0700720 return oad(0xec81, 0); /* sub $xxx, %esp */
721 }
722
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700723 virtual void endFunctionCallArguments(int a, int l) {
724 * (int*) a = l;
725 }
726
Jack Palevich22305132009-05-13 10:58:45 -0700727 virtual void storeEAToArg(int l) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700728 oad(0x248489, l); /* movl %eax, xxx(%esp) */
729 }
730
Jack Palevich22305132009-05-13 10:58:45 -0700731 virtual int callForward(int symbol) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700732 return psym(0xe8, symbol); /* call xxx */
733 }
734
Jack Palevich22305132009-05-13 10:58:45 -0700735 virtual void callRelative(int t) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700736 psym(0xe8, t); /* call xxx */
737 }
738
Jack Palevich22305132009-05-13 10:58:45 -0700739 virtual void callIndirect(int l) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700740 oad(0x2494ff, l); /* call *xxx(%esp) */
741 }
742
Jack Palevich22305132009-05-13 10:58:45 -0700743 virtual void adjustStackAfterCall(int l) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700744 oad(0xc481, l); /* add $xxx, %esp */
745 }
746
Jack Palevicha6535612009-05-13 16:24:17 -0700747 virtual int jumpOffset() {
748 return 5;
749 }
750
751 virtual int disassemble(FILE* out) {
752 return 1;
753 }
754
Jack Palevich21a15a22009-05-11 14:49:29 -0700755 private:
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700756 static const int operatorHelper[];
757
758 int decodeOp(int op) {
759 if (op < 0 || op > OP_COUNT) {
760 fprintf(stderr, "Out-of-range operator: %d\n", op);
761 exit(1);
762 }
763 return operatorHelper[op];
764 }
Jack Palevich21a15a22009-05-11 14:49:29 -0700765
Jack Palevich546b2242009-05-13 15:10:04 -0700766 void gmov(int l, int t) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700767 o(l + 0x83);
768 oad((t < LOCAL) << 7 | 5, t);
769 }
770 };
771
772 /* vars: value of variables
773 loc : local variable index
774 glo : global variable index
775 ind : output code ptr
776 rsym: return symbol
777 prog: output code
778 dstk: define stack
779 dptr, dch: macro state
780 */
781 int tok, tokc, tokl, ch, vars, rsym, loc, glo, sym_stk, dstk,
782 dptr, dch, last_id;
783 void* pSymbolBase;
784 void* pGlobalBase;
785 void* pVarsBase;
786 FILE* file;
787
788 CodeBuf codeBuf;
Jack Palevich22305132009-05-13 10:58:45 -0700789 CodeGenerator* pGen;
Jack Palevich21a15a22009-05-11 14:49:29 -0700790
791 static const int ALLOC_SIZE = 99999;
792
793 /* depends on the init string */
794 static const int TOK_STR_SIZE = 48;
795 static const int TOK_IDENT = 0x100;
796 static const int TOK_INT = 0x100;
797 static const int TOK_IF = 0x120;
798 static const int TOK_ELSE = 0x138;
799 static const int TOK_WHILE = 0x160;
800 static const int TOK_BREAK = 0x190;
801 static const int TOK_RETURN = 0x1c0;
802 static const int TOK_FOR = 0x1f8;
803 static const int TOK_DEFINE = 0x218;
804 static const int TOK_MAIN = 0x250;
805
806 static const int TOK_DUMMY = 1;
807 static const int TOK_NUM = 2;
808
809 static const int LOCAL = 0x200;
810
811 static const int SYM_FORWARD = 0;
812 static const int SYM_DEFINE = 1;
813
814 /* tokens in string heap */
815 static const int TAG_TOK = ' ';
816 static const int TAG_MACRO = 2;
817
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700818 static const int OP_INCREMENT = 0;
819 static const int OP_DECREMENT = 1;
820 static const int OP_MUL = 2;
821 static const int OP_DIV = 3;
822 static const int OP_MOD = 4;
823 static const int OP_PLUS = 5;
824 static const int OP_MINUS = 6;
825 static const int OP_SHIFT_LEFT = 7;
826 static const int OP_SHIFT_RIGHT = 8;
827 static const int OP_LESS_EQUAL = 9;
828 static const int OP_GREATER_EQUAL = 10;
829 static const int OP_LESS = 11;
830 static const int OP_GREATER = 12;
831 static const int OP_EQUALS = 13;
832 static const int OP_NOT_EQUALS = 14;
833 static const int OP_LOGICAL_AND = 15;
834 static const int OP_LOGICAL_OR = 16;
835 static const int OP_BIT_AND = 17;
836 static const int OP_BIT_XOR = 18;
837 static const int OP_BIT_OR = 19;
838 static const int OP_BIT_NOT = 20;
839 static const int OP_LOGICAL_NOT = 21;
840 static const int OP_COUNT = 22;
841
842 /* Operators are searched from front, the two-character operators appear
843 * before the single-character operators with the same first character.
844 * @ is used to pad out single-character operators.
845 */
846 static const char* operatorChars;
847 static const char operatorLevel[];
848
Jack Palevich21a15a22009-05-11 14:49:29 -0700849 void pdef(int t) {
850 *(char *) dstk++ = t;
851 }
852
853 void inp() {
854 if (dptr) {
855 ch = *(char *) dptr++;
856 if (ch == TAG_MACRO) {
857 dptr = 0;
858 ch = dch;
859 }
860 } else
861 ch = fgetc(file);
862 /* printf("ch=%c 0x%x\n", ch, ch); */
863 }
864
865 int isid() {
Jack Palevich546b2242009-05-13 15:10:04 -0700866 return isalnum(ch) | (ch == '_');
Jack Palevich21a15a22009-05-11 14:49:29 -0700867 }
868
869 /* read a character constant */
870 void getq() {
871 if (ch == '\\') {
872 inp();
873 if (ch == 'n')
874 ch = '\n';
875 }
876 }
877
878 void next() {
879 int l, a;
880
Jack Palevich546b2242009-05-13 15:10:04 -0700881 while (isspace(ch) | (ch == '#')) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700882 if (ch == '#') {
883 inp();
884 next();
885 if (tok == TOK_DEFINE) {
886 next();
887 pdef(TAG_TOK); /* fill last ident tag */
888 *(int *) tok = SYM_DEFINE;
889 *(int *) (tok + 4) = dstk; /* define stack */
890 }
891 /* well we always save the values ! */
892 while (ch != '\n') {
893 pdef(ch);
894 inp();
895 }
896 pdef(ch);
897 pdef(TAG_MACRO);
898 }
899 inp();
900 }
901 tokl = 0;
902 tok = ch;
903 /* encode identifiers & numbers */
904 if (isid()) {
905 pdef(TAG_TOK);
906 last_id = dstk;
907 while (isid()) {
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700908 pdef(ch);
909 inp();
Jack Palevichae54f1f2009-05-08 14:54:15 -0700910 }
Jack Palevich21a15a22009-05-11 14:49:29 -0700911 if (isdigit(tok)) {
912 tokc = strtol((char*) last_id, 0, 0);
913 tok = TOK_NUM;
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700914 } else {
Jack Palevich21a15a22009-05-11 14:49:29 -0700915 *(char *) dstk = TAG_TOK; /* no need to mark end of string (we
916 suppose data is initialized to zero by calloc) */
917 tok = (int) (strstr((char*) sym_stk, (char*) (last_id - 1))
918 - sym_stk);
919 *(char *) dstk = 0; /* mark real end of ident for dlsym() */
920 tok = tok * 8 + TOK_IDENT;
921 if (tok > TOK_DEFINE) {
922 tok = vars + tok;
923 /* printf("tok=%s %x\n", last_id, tok); */
924 /* define handling */
925 if (*(int *) tok == SYM_DEFINE) {
926 dptr = *(int *) (tok + 4);
927 dch = ch;
928 inp();
929 next();
930 }
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700931 }
932 }
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700933 } else {
Jack Palevich21a15a22009-05-11 14:49:29 -0700934 inp();
935 if (tok == '\'') {
936 tok = TOK_NUM;
937 getq();
938 tokc = ch;
939 inp();
940 inp();
Jack Palevich546b2242009-05-13 15:10:04 -0700941 } else if ((tok == '/') & (ch == '*')) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700942 inp();
943 while (ch) {
944 while (ch != '*')
945 inp();
946 inp();
947 if (ch == '/')
948 ch = 0;
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700949 }
Jack Palevich21a15a22009-05-11 14:49:29 -0700950 inp();
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700951 next();
Jack Palevich21a15a22009-05-11 14:49:29 -0700952 } else {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700953 const char* t = operatorChars;
954 int opIndex = 0;
Jack Palevich546b2242009-05-13 15:10:04 -0700955 while ((l = *t++) != 0) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700956 a = *t++;
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700957 tokl = operatorLevel[opIndex];
958 tokc = opIndex;
Jack Palevich546b2242009-05-13 15:10:04 -0700959 if ((l == tok) & ((a == ch) | (a == '@'))) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700960#if 0
961 printf("%c%c -> tokl=%d tokc=0x%x\n",
962 l, a, tokl, tokc);
963#endif
964 if (a == ch) {
965 inp();
966 tok = TOK_DUMMY; /* dummy token for double tokens */
967 }
968 break;
969 }
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700970 opIndex++;
971 }
972 if (l == 0) {
973 tokl = 0;
974 tokc = 0;
Jack Palevich21a15a22009-05-11 14:49:29 -0700975 }
976 }
977 }
978#if 0
979 {
980 int p;
981
982 printf("tok=0x%x ", tok);
983 if (tok >= TOK_IDENT) {
984 printf("'");
985 if (tok> TOK_DEFINE)
986 p = sym_stk + 1 + (tok - vars - TOK_IDENT) / 8;
987 else
988 p = sym_stk + 1 + (tok - TOK_IDENT) / 8;
989 while (*(char *)p != TAG_TOK && *(char *)p)
990 printf("%c", *(char *)p++);
991 printf("'\n");
992 } else if (tok == TOK_NUM) {
993 printf("%d\n", tokc);
994 } else {
995 printf("'%c'\n", tok);
996 }
997 }
998#endif
999 }
1000
1001 void error(const char *fmt, ...) {
1002 va_list ap;
1003
1004 va_start(ap, fmt);
1005 fprintf(stderr, "%ld: ", ftell((FILE *) file));
1006 vfprintf(stderr, fmt, ap);
1007 fprintf(stderr, "\n");
1008 va_end(ap);
1009 exit(1);
1010 }
1011
1012 void skip(int c) {
1013 if (tok != c) {
1014 error("'%c' expected", c);
1015 }
1016 next();
1017 }
1018
Jack Palevich21a15a22009-05-11 14:49:29 -07001019 /* l is one if '=' parsing wanted (quick hack) */
1020 void unary(int l) {
1021 int n, t, a, c;
Jack Palevich546b2242009-05-13 15:10:04 -07001022 t = 0;
Jack Palevich21a15a22009-05-11 14:49:29 -07001023 n = 1; /* type of expression 0 = forward, 1 = value, other =
1024 lvalue */
1025 if (tok == '\"') {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001026 pGen->li(glo);
Jack Palevich21a15a22009-05-11 14:49:29 -07001027 while (ch != '\"') {
1028 getq();
1029 *(char *) glo++ = ch;
1030 inp();
1031 }
1032 *(char *) glo = 0;
Jack Palevich546b2242009-05-13 15:10:04 -07001033 glo = (glo + 4) & -4; /* align heap */
Jack Palevich21a15a22009-05-11 14:49:29 -07001034 inp();
1035 next();
1036 } else {
1037 c = tokl;
1038 a = tokc;
1039 t = tok;
1040 next();
1041 if (t == TOK_NUM) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001042 pGen->li(a);
Jack Palevich21a15a22009-05-11 14:49:29 -07001043 } else if (c == 2) {
1044 /* -, +, !, ~ */
1045 unary(0);
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001046 pGen->clearECX();
Jack Palevich21a15a22009-05-11 14:49:29 -07001047 if (t == '!')
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001048 pGen->gcmp(a);
Jack Palevich21a15a22009-05-11 14:49:29 -07001049 else
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001050 pGen->genOp(a);
Jack Palevich21a15a22009-05-11 14:49:29 -07001051 } else if (t == '(') {
1052 expr();
1053 skip(')');
1054 } else if (t == '*') {
1055 /* parse cast */
1056 skip('(');
1057 t = tok; /* get type */
1058 next(); /* skip int/char/void */
1059 next(); /* skip '*' or '(' */
1060 if (tok == '*') {
1061 /* function type */
1062 skip('*');
1063 skip(')');
1064 skip('(');
1065 skip(')');
1066 t = 0;
1067 }
1068 skip(')');
1069 unary(0);
1070 if (tok == '=') {
1071 next();
1072 pGen->pushEAX();
1073 expr();
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001074 pGen->popECX();
1075 pGen->storeEAXToAddressECX(t == TOK_INT);
Jack Palevich21a15a22009-05-11 14:49:29 -07001076 } else if (t) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001077 pGen->loadEAXIndirect(t == TOK_INT);
Jack Palevich21a15a22009-05-11 14:49:29 -07001078 }
1079 } else if (t == '&') {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001080 pGen->leaEAX(*(int *) tok);
Jack Palevich21a15a22009-05-11 14:49:29 -07001081 next();
1082 } else {
1083 n = *(int *) t;
1084 /* forward reference: try dlsym */
Jack Palevichcb1c9ef2009-05-14 11:38:49 -07001085 if (!n) {
1086 n = (int) dlsym(RTLD_DEFAULT, (char*) last_id);
1087 }
Jack Palevich546b2242009-05-13 15:10:04 -07001088 if ((tok == '=') & l) {
Jack Palevich21a15a22009-05-11 14:49:29 -07001089 /* assignment */
1090 next();
1091 expr();
1092 pGen->storeEAX(n);
1093 } else if (tok != '(') {
1094 /* variable */
1095 pGen->loadEAX(n);
1096 if (tokl == 11) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001097 pGen->postIncrementOrDecrement(n, tokc);
Jack Palevich21a15a22009-05-11 14:49:29 -07001098 next();
1099 }
1100 }
1101 }
1102 }
1103
1104 /* function call */
1105 if (tok == '(') {
1106 if (n == 1)
1107 pGen->pushEAX();
1108
1109 /* push args and invert order */
Jack Palevichcb1c9ef2009-05-14 11:38:49 -07001110 a = pGen->beginFunctionCallArguments();
Jack Palevich21a15a22009-05-11 14:49:29 -07001111 next();
1112 l = 0;
1113 while (tok != ')') {
1114 expr();
1115 pGen->storeEAToArg(l);
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001116 if (tok == ',')
Jack Paleviche27bf3e2009-05-10 14:09:03 -07001117 next();
Jack Palevich21a15a22009-05-11 14:49:29 -07001118 l = l + 4;
Jack Paleviche27bf3e2009-05-10 14:09:03 -07001119 }
Jack Palevichcb1c9ef2009-05-14 11:38:49 -07001120 pGen->endFunctionCallArguments(a, l);
Jack Palevich21a15a22009-05-11 14:49:29 -07001121 next();
1122 if (!n) {
1123 /* forward reference */
1124 t = t + 4;
1125 *(int *) t = pGen->callForward(*(int *) t);
1126 } else if (n == 1) {
1127 pGen->callIndirect(l);
1128 l = l + 4;
1129 } else {
Jack Palevicha6535612009-05-13 16:24:17 -07001130 pGen->callRelative(n - codeBuf.getPC() - pGen->jumpOffset()); /* call xxx */
Jack Palevich21a15a22009-05-11 14:49:29 -07001131 }
1132 if (l)
1133 pGen->adjustStackAfterCall(l);
1134 }
1135 }
1136
1137 void sum(int l) {
1138 int t, n, a;
Jack Palevich546b2242009-05-13 15:10:04 -07001139 t = 0;
Jack Palevich21a15a22009-05-11 14:49:29 -07001140 if (l-- == 1)
1141 unary(1);
1142 else {
1143 sum(l);
1144 a = 0;
1145 while (l == tokl) {
1146 n = tok;
1147 t = tokc;
1148 next();
1149
1150 if (l > 8) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001151 a = pGen->gtst(t == OP_LOGICAL_OR, a); /* && and || output code generation */
Jack Palevich21a15a22009-05-11 14:49:29 -07001152 sum(l);
1153 } else {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001154 pGen->pushEAX();
Jack Palevich21a15a22009-05-11 14:49:29 -07001155 sum(l);
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001156 pGen->popECX();
Jack Palevich21a15a22009-05-11 14:49:29 -07001157
Jack Palevich546b2242009-05-13 15:10:04 -07001158 if ((l == 4) | (l == 5)) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001159 pGen->gcmp(t);
Jack Palevich21a15a22009-05-11 14:49:29 -07001160 } else {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001161 pGen->genOp(t);
Jack Palevich21a15a22009-05-11 14:49:29 -07001162 }
1163 }
1164 }
1165 /* && and || output code generation */
1166 if (a && l > 8) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001167 a = pGen->gtst(t == OP_LOGICAL_OR, a);
1168 pGen->li(t != OP_LOGICAL_OR);
Jack Palevicha6535612009-05-13 16:24:17 -07001169 pGen->gjmp(5); /* jmp $ + 5 (sizeof li, FIXME for ARM) */
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001170 pGen->gsym(a);
1171 pGen->li(t == OP_LOGICAL_OR);
Jack Palevich21a15a22009-05-11 14:49:29 -07001172 }
1173 }
1174 }
1175
1176 void expr() {
1177 sum(11);
1178 }
1179
1180 int test_expr() {
1181 expr();
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001182 return pGen->gtst(0, 0);
Jack Palevich21a15a22009-05-11 14:49:29 -07001183 }
1184
1185 void block(int l) {
1186 int a, n, t;
1187
1188 if (tok == TOK_IF) {
Jack Paleviche27bf3e2009-05-10 14:09:03 -07001189 next();
1190 skip('(');
Jack Palevich21a15a22009-05-11 14:49:29 -07001191 a = test_expr();
1192 skip(')');
1193 block(l);
1194 if (tok == TOK_ELSE) {
Jack Paleviche27bf3e2009-05-10 14:09:03 -07001195 next();
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001196 n = pGen->gjmp(0); /* jmp */
1197 pGen->gsym(a);
Jack Palevich21a15a22009-05-11 14:49:29 -07001198 block(l);
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001199 pGen->gsym(n); /* patch else jmp */
Jack Palevich21a15a22009-05-11 14:49:29 -07001200 } else {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001201 pGen->gsym(a); /* patch if test */
Jack Paleviche27bf3e2009-05-10 14:09:03 -07001202 }
Jack Palevich546b2242009-05-13 15:10:04 -07001203 } else if ((tok == TOK_WHILE) | (tok == TOK_FOR)) {
Jack Palevich21a15a22009-05-11 14:49:29 -07001204 t = tok;
1205 next();
1206 skip('(');
1207 if (t == TOK_WHILE) {
Jack Palevicha6535612009-05-13 16:24:17 -07001208 n = codeBuf.getPC(); // top of loop, target of "next" iteration
Jack Palevich21a15a22009-05-11 14:49:29 -07001209 a = test_expr();
1210 } else {
1211 if (tok != ';')
1212 expr();
1213 skip(';');
1214 n = codeBuf.getPC();
1215 a = 0;
1216 if (tok != ';')
1217 a = test_expr();
1218 skip(';');
1219 if (tok != ')') {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001220 t = pGen->gjmp(0);
Jack Palevich21a15a22009-05-11 14:49:29 -07001221 expr();
Jack Palevicha6535612009-05-13 16:24:17 -07001222 pGen->gjmp(n - codeBuf.getPC() - pGen->jumpOffset());
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001223 pGen->gsym(t);
Jack Palevich21a15a22009-05-11 14:49:29 -07001224 n = t + 4;
1225 }
1226 }
1227 skip(')');
1228 block((int) &a);
Jack Palevicha6535612009-05-13 16:24:17 -07001229 pGen->gjmp(n - codeBuf.getPC() - pGen->jumpOffset()); /* jmp */
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001230 pGen->gsym(a);
Jack Palevich21a15a22009-05-11 14:49:29 -07001231 } else if (tok == '{') {
1232 next();
1233 /* declarations */
1234 decl(1);
1235 while (tok != '}')
1236 block(l);
1237 next();
1238 } else {
1239 if (tok == TOK_RETURN) {
1240 next();
1241 if (tok != ';')
1242 expr();
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001243 rsym = pGen->gjmp(rsym); /* jmp */
Jack Palevich21a15a22009-05-11 14:49:29 -07001244 } else if (tok == TOK_BREAK) {
1245 next();
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001246 *(int *) l = pGen->gjmp(*(int *) l);
Jack Palevich21a15a22009-05-11 14:49:29 -07001247 } else if (tok != ';')
1248 expr();
1249 skip(';');
Jack Paleviche27bf3e2009-05-10 14:09:03 -07001250 }
1251 }
Jack Palevich21a15a22009-05-11 14:49:29 -07001252
1253 /* 'l' is true if local declarations */
1254 void decl(int l) {
1255 int a;
1256
Jack Palevich546b2242009-05-13 15:10:04 -07001257 while ((tok == TOK_INT) | ((tok != -1) & (!l))) {
Jack Palevich21a15a22009-05-11 14:49:29 -07001258 if (tok == TOK_INT) {
1259 next();
1260 while (tok != ';') {
1261 if (l) {
1262 loc = loc + 4;
1263 *(int *) tok = -loc;
1264 } else {
1265 *(int *) tok = glo;
1266 glo = glo + 4;
1267 }
1268 next();
1269 if (tok == ',')
1270 next();
1271 }
1272 skip(';');
1273 } else {
1274 /* patch forward references (XXX: do not work for function
1275 pointers) */
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001276 pGen->gsym(*(int *) (tok + 4));
Jack Palevich21a15a22009-05-11 14:49:29 -07001277 /* put function address */
1278 *(int *) tok = codeBuf.getPC();
1279 next();
1280 skip('(');
1281 a = 8;
Jack Palevich546b2242009-05-13 15:10:04 -07001282 int argCount = 0;
Jack Palevich21a15a22009-05-11 14:49:29 -07001283 while (tok != ')') {
1284 /* read param name and compute offset */
1285 *(int *) tok = a;
1286 a = a + 4;
1287 next();
1288 if (tok == ',')
1289 next();
Jack Palevich546b2242009-05-13 15:10:04 -07001290 argCount++;
Jack Palevich21a15a22009-05-11 14:49:29 -07001291 }
1292 next(); /* skip ')' */
1293 rsym = loc = 0;
Jack Palevich546b2242009-05-13 15:10:04 -07001294 a = pGen->functionEntry(argCount);
Jack Palevich21a15a22009-05-11 14:49:29 -07001295 block(0);
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001296 pGen->gsym(rsym);
Jack Palevich546b2242009-05-13 15:10:04 -07001297 pGen->functionExit(argCount, a, loc);
Jack Palevich21a15a22009-05-11 14:49:29 -07001298 }
1299 }
1300 }
1301
1302 void cleanup() {
1303 if (sym_stk != 0) {
1304 free((void*) sym_stk);
1305 sym_stk = 0;
1306 }
1307 if (pGlobalBase != 0) {
1308 free((void*) pGlobalBase);
1309 pGlobalBase = 0;
1310 }
1311 if (pVarsBase != 0) {
1312 free(pVarsBase);
1313 pVarsBase = 0;
1314 }
1315 if (pGen) {
1316 delete pGen;
1317 pGen = 0;
1318 }
1319 }
1320
1321 void clear() {
1322 tok = 0;
1323 tokc = 0;
1324 tokl = 0;
1325 ch = 0;
1326 vars = 0;
1327 rsym = 0;
1328 loc = 0;
1329 glo = 0;
1330 sym_stk = 0;
1331 dstk = 0;
1332 dptr = 0;
1333 dch = 0;
1334 last_id = 0;
1335 file = 0;
1336 pGlobalBase = 0;
1337 pVarsBase = 0;
1338 pGen = 0;
1339 }
Jack Paleviche27bf3e2009-05-10 14:09:03 -07001340
Jack Palevich22305132009-05-13 10:58:45 -07001341 void setArchitecture(const char* architecture) {
1342 delete pGen;
1343 pGen = 0;
1344
1345 if (architecture != NULL) {
1346 if (strcmp(architecture, "arm") == 0) {
1347 pGen = new ARMCodeGenerator();
1348 } else if (strcmp(architecture, "x86") == 0) {
1349 pGen = new X86CodeGenerator();
1350 } else {
1351 fprintf(stderr, "Unknown architecture %s", architecture);
1352 }
1353 }
1354
1355 if (pGen == NULL) {
1356 pGen = new ARMCodeGenerator();
1357 }
1358 }
1359
Jack Palevich77ae76e2009-05-10 19:59:24 -07001360public:
Jack Palevich22305132009-05-13 10:58:45 -07001361 struct args {
1362 args() {
1363 architecture = 0;
1364 }
1365 const char* architecture;
1366 };
1367
Jack Palevich21a15a22009-05-11 14:49:29 -07001368 compiler() {
1369 clear();
Jack Paleviche27bf3e2009-05-10 14:09:03 -07001370 }
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001371
Jack Palevich21a15a22009-05-11 14:49:29 -07001372 ~compiler() {
1373 cleanup();
1374 }
1375
Jack Palevich22305132009-05-13 10:58:45 -07001376 int compile(FILE* in, args& args) {
Jack Palevich21a15a22009-05-11 14:49:29 -07001377 cleanup();
1378 clear();
1379 codeBuf.init(ALLOC_SIZE);
Jack Palevich22305132009-05-13 10:58:45 -07001380 setArchitecture(args.architecture);
Jack Palevich21a15a22009-05-11 14:49:29 -07001381 pGen->init(&codeBuf);
1382 file = in;
1383 sym_stk = (int) calloc(1, ALLOC_SIZE);
1384 dstk = (int) strcpy((char*) sym_stk,
1385 " int if else while break return for define main ")
1386 + TOK_STR_SIZE;
1387 pGlobalBase = calloc(1, ALLOC_SIZE);
1388 glo = (int) pGlobalBase;
1389 pVarsBase = calloc(1, ALLOC_SIZE);
1390 vars = (int) pVarsBase;
1391 inp();
1392 next();
1393 decl(0);
Jack Palevich546b2242009-05-13 15:10:04 -07001394 pGen->finishCompile();
Jack Palevich21a15a22009-05-11 14:49:29 -07001395 return 0;
1396 }
1397
1398 int run(int argc, char** argv) {
1399 typedef int (*mainPtr)(int argc, char** argv);
1400 mainPtr aMain = (mainPtr) *(int*) (vars + TOK_MAIN);
1401 if (!aMain) {
1402 fprintf(stderr, "Could not find function \"main\".\n");
1403 return -1;
1404 }
1405 return aMain(argc, argv);
1406 }
1407
1408 int dump(FILE* out) {
1409 fwrite(codeBuf.getBase(), 1, codeBuf.getSize(), out);
1410 return 0;
1411 }
Jack Palevich77ae76e2009-05-10 19:59:24 -07001412
Jack Palevicha6535612009-05-13 16:24:17 -07001413 int disassemble(FILE* out) {
1414 return pGen->disassemble(out);
1415 }
1416
Jack Palevich77ae76e2009-05-10 19:59:24 -07001417};
1418
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001419const char* compiler::operatorChars =
1420 "++--*@/@%@+@-@<<>><=>=<@>@==!=&&||&@^@|@~@!@";
1421
1422const char compiler::operatorLevel[] =
1423 {11, 11, 1, 1, 1, 2, 2, 3, 3, 4, 4, 4, 4,
1424 5, 5, /* ==, != */
1425 9, 10, /* &&, || */
1426 6, 7, 8, /* & ^ | */
1427 2, 2 /* ~ ! */
1428 };
1429
Jack Palevicha6535612009-05-13 16:24:17 -07001430FILE* compiler::ARMCodeGenerator::disasmOut;
1431
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001432const int compiler::X86CodeGenerator::operatorHelper[] = {
1433 0x1, // ++
1434 0xff, // --
1435 0xc1af0f, // *
1436 0xf9f79991, // /
1437 0xf9f79991, // % (With manual assist to swap results)
1438 0xc801, // +
1439 0xd8f7c829, // -
1440 0xe0d391, // <<
1441 0xf8d391, // >>
1442 0xe, // <=
1443 0xd, // >=
1444 0xc, // <
1445 0xf, // >
1446 0x4, // ==
1447 0x5, // !=
1448 0x0, // &&
1449 0x1, // ||
1450 0xc821, // &
1451 0xc831, // ^
1452 0xc809, // |
1453 0xd0f7, // ~
1454 0x4 // !
1455};
1456
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001457} // namespace acc
1458
Jack Palevich546b2242009-05-13 15:10:04 -07001459// This is a separate function so it can easily be set by breakpoint in gdb.
1460int run(acc::compiler& c, int argc, char** argv) {
1461 return c.run(argc, argv);
1462}
1463
Jack Palevich77ae76e2009-05-10 19:59:24 -07001464int main(int argc, char** argv) {
Jack Palevich22305132009-05-13 10:58:45 -07001465 bool doDump = false;
Jack Palevicha6535612009-05-13 16:24:17 -07001466 bool doDisassemble = false;
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001467 const char* inFile = NULL;
1468 const char* outFile = NULL;
Jack Palevich22305132009-05-13 10:58:45 -07001469 const char* architecture = "arm";
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001470 int i;
Jack Palevich21a15a22009-05-11 14:49:29 -07001471 for (i = 1; i < argc; i++) {
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001472 char* arg = argv[i];
1473 if (arg[0] == '-') {
1474 switch (arg[1]) {
Jack Palevich22305132009-05-13 10:58:45 -07001475 case 'a':
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001476 if (i + 1 >= argc) {
Jack Palevich22305132009-05-13 10:58:45 -07001477 fprintf(stderr, "Expected architecture after -a\n");
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001478 return 2;
1479 }
Jack Palevich22305132009-05-13 10:58:45 -07001480 architecture = argv[i+1];
1481 i += 1;
1482 break;
1483 case 'd':
1484 if (i + 1 >= argc) {
1485 fprintf(stderr, "Expected filename after -d\n");
1486 return 2;
1487 }
1488 doDump = true;
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001489 outFile = argv[i + 1];
1490 i += 1;
1491 break;
Jack Palevicha6535612009-05-13 16:24:17 -07001492 case 'S':
1493 doDisassemble = true;
1494 break;
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001495 default:
1496 fprintf(stderr, "Unrecognized flag %s\n", arg);
1497 return 3;
1498 }
1499 } else if (inFile == NULL) {
1500 inFile = arg;
1501 } else {
1502 break;
1503 }
1504 }
1505
1506 FILE* in = stdin;
1507 if (inFile) {
1508 in = fopen(inFile, "r");
Jack Palevich21a15a22009-05-11 14:49:29 -07001509 if (!in) {
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001510 fprintf(stderr, "Could not open input file %s\n", inFile);
1511 return 1;
1512 }
1513 }
1514 acc::compiler compiler;
Jack Palevich22305132009-05-13 10:58:45 -07001515 acc::compiler::args args;
1516 args.architecture = architecture;
1517 int compileResult = compiler.compile(in, args);
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001518 if (in != stdin) {
1519 fclose(in);
1520 }
1521 if (compileResult) {
1522 fprintf(stderr, "Compile failed: %d\n", compileResult);
1523 return 6;
1524 }
Jack Palevicha6535612009-05-13 16:24:17 -07001525 if (doDisassemble) {
1526 compiler.disassemble(stderr);
1527 }
Jack Palevich22305132009-05-13 10:58:45 -07001528 if (doDump) {
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001529 FILE* save = fopen(outFile, "w");
Jack Palevich21a15a22009-05-11 14:49:29 -07001530 if (!save) {
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001531 fprintf(stderr, "Could not open output file %s\n", outFile);
1532 return 5;
1533 }
1534 compiler.dump(save);
1535 fclose(save);
1536 } else {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001537 fprintf(stderr, "Executing compiled code:\n");
Jack Palevich21a15a22009-05-11 14:49:29 -07001538 int codeArgc = argc - i + 1;
1539 char** codeArgv = argv + i - 1;
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001540 codeArgv[0] = (char*) (inFile ? inFile : "stdin");
Jack Palevich546b2242009-05-13 15:10:04 -07001541 int result = run(compiler, codeArgc, codeArgv);
Jack Palevich22305132009-05-13 10:58:45 -07001542 fprintf(stderr, "result: %d\n", result);
1543 return result;
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001544 }
1545
1546 return 0;
1547}