blob: 559b14674e15de88e0d2e6b810a322f0e3c6b4af [file] [log] [blame]
Jack Palevichae54f1f2009-05-08 14:54:15 -07001/*
Jack Palevich21a15a22009-05-11 14:49:29 -07002 Obfuscated Tiny C Compiler
Jack Palevich88311482009-05-08 13:57:37 -07003
Jack Palevich21a15a22009-05-11 14:49:29 -07004 Copyright (C) 2001-2003 Fabrice Bellard
Jack Palevichae54f1f2009-05-08 14:54:15 -07005
Jack Palevich21a15a22009-05-11 14:49:29 -07006 This software is provided 'as-is', without any express or implied
7 warranty. In no event will the authors be held liable for any damages
8 arising from the use of this software.
Jack Paleviche27bf3e2009-05-10 14:09:03 -07009
Jack Palevich21a15a22009-05-11 14:49:29 -070010 Permission is granted to anyone to use this software for any purpose,
11 including commercial applications, and to alter it and redistribute it
12 freely, subject to the following restrictions:
Jack Paleviche27bf3e2009-05-10 14:09:03 -070013
Jack Palevich21a15a22009-05-11 14:49:29 -070014 1. The origin of this software must not be misrepresented; you must not
15 claim that you wrote the original software. If you use this software
16 in a product, an acknowledgment in the product and its documentation
17 *is* required.
18 2. Altered source versions must be plainly marked as such, and must not be
19 misrepresented as being the original software.
20 3. This notice may not be removed or altered from any source distribution.
21 */
Jack Paleviche27bf3e2009-05-10 14:09:03 -070022
Jack Palevich77ae76e2009-05-10 19:59:24 -070023#include <ctype.h>
24#include <dlfcn.h>
Jack Paleviche27bf3e2009-05-10 14:09:03 -070025#include <stdarg.h>
Jack Palevichae54f1f2009-05-08 14:54:15 -070026#include <stdio.h>
Jack Palevichf6b5a532009-05-10 19:16:42 -070027#include <stdlib.h>
28#include <string.h>
Jack Palevichae54f1f2009-05-08 14:54:15 -070029
Jack Palevich546b2242009-05-13 15:10:04 -070030#if defined(__arm__)
31#include <unistd.h>
32#endif
33
Jack Palevicha6535612009-05-13 16:24:17 -070034#include "disassem.h"
35
Jack Palevichbbf8ab52009-05-11 11:54:30 -070036namespace acc {
37
Jack Palevich77ae76e2009-05-10 19:59:24 -070038class compiler {
Jack Palevich21a15a22009-05-11 14:49:29 -070039 class CodeBuf {
40 char* ind;
41 char* pProgramBase;
Jack Palevichf0cbc922009-05-08 16:35:13 -070042
Jack Palevich21a15a22009-05-11 14:49:29 -070043 void release() {
44 if (pProgramBase != 0) {
45 free(pProgramBase);
46 pProgramBase = 0;
Jack Palevichae54f1f2009-05-08 14:54:15 -070047 }
Jack Palevich21a15a22009-05-11 14:49:29 -070048 }
49
50 public:
51 CodeBuf() {
52 pProgramBase = 0;
53 ind = 0;
54 }
55
56 ~CodeBuf() {
57 release();
58 }
59
60 void init(int size) {
61 release();
62 pProgramBase = (char*) calloc(1, size);
63 ind = pProgramBase;
64 }
65
66 void o(int n) {
67 /* cannot use unsigned, so we must do a hack */
68 while (n && n != -1) {
69 *ind++ = n;
70 n = n >> 8;
71 }
72 }
73
Jack Palevich546b2242009-05-13 15:10:04 -070074 int o4(int n) {
75 int result = (int) ind;
76 * (int*) ind = n;
77 ind += 4;
78 return result;
79 }
80
Jack Palevich21a15a22009-05-11 14:49:29 -070081 /*
82 * Output a byte. Handles all values, 0..ff.
83 */
84 void ob(int n) {
85 *ind++ = n;
86 }
87
88 /* output a symbol and patch all calls to it */
89 void gsym(int t) {
90 int n;
91 while (t) {
92 n = *(int *) t; /* next value */
93 *(int *) t = ((int) ind) - t - 4;
94 t = n;
95 }
96 }
97
98 /* psym is used to put an instruction with a data field which is a
99 reference to a symbol. It is in fact the same as oad ! */
100 int psym(int n, int t) {
101 return oad(n, t);
102 }
103
104 /* instruction + address */
105 int oad(int n, int t) {
106 o(n);
107 *(int *) ind = t;
108 t = (int) ind;
109 ind = ind + 4;
110 return t;
111 }
112
113 inline void* getBase() {
114 return (void*) pProgramBase;
115 }
116
117 int getSize() {
118 return ind - pProgramBase;
119 }
120
121 int getPC() {
122 return (int) ind;
123 }
124 };
125
126 class CodeGenerator {
127 public:
128 CodeGenerator() {}
129 virtual ~CodeGenerator() {}
130
Jack Palevich22305132009-05-13 10:58:45 -0700131 virtual void init(CodeBuf* pCodeBuf) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700132 this->pCodeBuf = pCodeBuf;
133 }
134
Jack Palevich22305132009-05-13 10:58:45 -0700135 /* returns address to patch with local variable size
136 */
Jack Palevich546b2242009-05-13 15:10:04 -0700137 virtual int functionEntry(int argCount) = 0;
Jack Palevich22305132009-05-13 10:58:45 -0700138
Jack Palevich546b2242009-05-13 15:10:04 -0700139 virtual void functionExit(int argCount, int localVariableAddress, int localVariableSize) = 0;
Jack Palevich22305132009-05-13 10:58:45 -0700140
141 /* load immediate value */
Jack Palevich546b2242009-05-13 15:10:04 -0700142 virtual void li(int t) = 0;
Jack Palevich22305132009-05-13 10:58:45 -0700143
144 virtual int gjmp(int t) = 0;
145
146 /* l = 0: je, l == 1: jne */
147 virtual int gtst(bool l, int t) = 0;
148
149 virtual void gcmp(int op) = 0;
150
Jack Palevich546b2242009-05-13 15:10:04 -0700151 virtual void genOp(int op) = 0;
Jack Palevich22305132009-05-13 10:58:45 -0700152
153 virtual void clearECX() = 0;
154
155 virtual void pushEAX() = 0;
156
157 virtual void popECX() = 0;
158
159 virtual void storeEAXToAddressECX(bool isInt) = 0;
160
161 virtual void loadEAXIndirect(bool isInt) = 0;
162
163 virtual void leaEAX(int ea) = 0;
164
165 virtual void storeEAX(int ea) = 0;
166
Jack Palevich4d93f302009-05-15 13:30:00 -0700167 virtual void loadEAX(int ea, bool isIncDec, int op) = 0;
Jack Palevich22305132009-05-13 10:58:45 -0700168
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700169 virtual int beginFunctionCallArguments() = 0;
170
171 virtual void endFunctionCallArguments(int a, int l) = 0;
Jack Palevich22305132009-05-13 10:58:45 -0700172
173 virtual void storeEAToArg(int l) = 0;
174
175 virtual int callForward(int symbol) = 0;
176
177 virtual void callRelative(int t) = 0;
178
179 virtual void callIndirect(int l) = 0;
180
181 virtual void adjustStackAfterCall(int l) = 0;
182
Jack Palevicha6535612009-05-13 16:24:17 -0700183 virtual int disassemble(FILE* out) = 0;
184
Jack Palevich21a15a22009-05-11 14:49:29 -0700185 /* output a symbol and patch all calls to it */
Jack Palevich22305132009-05-13 10:58:45 -0700186 virtual void gsym(int t) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700187 pCodeBuf->gsym(t);
188 }
189
Jack Palevich546b2242009-05-13 15:10:04 -0700190 virtual int finishCompile() {
191#if defined(__arm__)
Jack Palevicha6535612009-05-13 16:24:17 -0700192 const long base = long(pCodeBuf->getBase());
193 const long curr = base + long(pCodeBuf->getSize());
194 int err = cacheflush(base, curr, 0);
195 return err;
Jack Palevich546b2242009-05-13 15:10:04 -0700196#else
Jack Palevicha6535612009-05-13 16:24:17 -0700197 return 0;
Jack Palevich546b2242009-05-13 15:10:04 -0700198#endif
199 }
200
Jack Palevicha6535612009-05-13 16:24:17 -0700201 /**
202 * Adjust relative branches by this amount.
203 */
204 virtual int jumpOffset() = 0;
205
Jack Palevich21a15a22009-05-11 14:49:29 -0700206 protected:
207 void o(int n) {
208 pCodeBuf->o(n);
209 }
210
211 /*
212 * Output a byte. Handles all values, 0..ff.
213 */
214 void ob(int n) {
215 pCodeBuf->ob(n);
216 }
217
218 /* psym is used to put an instruction with a data field which is a
219 reference to a symbol. It is in fact the same as oad ! */
220 int psym(int n, int t) {
221 return oad(n, t);
222 }
223
224 /* instruction + address */
225 int oad(int n, int t) {
226 return pCodeBuf->oad(n,t);
227 }
228
Jack Palevicha6535612009-05-13 16:24:17 -0700229 int getBase() {
230 return (int) pCodeBuf->getBase();
231 }
232
Jack Palevich21a15a22009-05-11 14:49:29 -0700233 int getPC() {
234 return pCodeBuf->getPC();
235 }
236
Jack Palevich546b2242009-05-13 15:10:04 -0700237 int o4(int data) {
238 return pCodeBuf->o4(data);
239 }
Jack Palevich21a15a22009-05-11 14:49:29 -0700240 private:
241 CodeBuf* pCodeBuf;
242 };
243
Jack Palevich22305132009-05-13 10:58:45 -0700244 class ARMCodeGenerator : public CodeGenerator {
245 public:
246 ARMCodeGenerator() {}
247 virtual ~ARMCodeGenerator() {}
248
249 /* returns address to patch with local variable size
250 */
Jack Palevich546b2242009-05-13 15:10:04 -0700251 virtual int functionEntry(int argCount) {
252 fprintf(stderr, "functionEntry(%d);\n", argCount);
Jack Palevich69796b62009-05-14 15:42:26 -0700253 // sp -> arg4 arg5 ...
254 // Push our register-based arguments back on the stack
255 if (argCount > 0) {
256 int regArgCount = argCount <= 4 ? argCount : 4;
257 o4(0xE92D0000 | ((1 << argCount) - 1)); // stmfd sp!, {}
258 }
259 // sp -> arg0 arg1 ...
260 o4(0xE92D4800); // stmfd sp!, {fp, lr}
261 // sp, fp -> oldfp, retadr, arg0 arg1 ....
262 o4(0xE1A0B00D); // mov fp, sp
263 return o4(0xE24DD000); // sub sp, sp, # <local variables>
Jack Palevich22305132009-05-13 10:58:45 -0700264 }
265
Jack Palevich546b2242009-05-13 15:10:04 -0700266 virtual void functionExit(int argCount, int localVariableAddress, int localVariableSize) {
267 fprintf(stderr, "functionExit(%d, %d, %d);\n", argCount, localVariableAddress, localVariableSize);
Jack Palevich69796b62009-05-14 15:42:26 -0700268 // Patch local variable allocation code:
269 if (localVariableSize < 0 || localVariableSize > 255) {
Jack Palevich8de461d2009-05-14 17:21:45 -0700270 error("localVariables out of range: %d", localVariableSize);
Jack Palevich546b2242009-05-13 15:10:04 -0700271 }
Jack Palevich69796b62009-05-14 15:42:26 -0700272 *(char*) (localVariableAddress) = localVariableSize;
273
274 // sp -> locals .... fp -> oldfp, retadr, arg0, arg1, ...
275 o4(0xE1A0E00B); // mov lr, fp
276 o4(0xE59BB000); // ldr fp, [fp]
277 o4(0xE28ED004); // add sp, lr, #4
278 // sp -> retadr, arg0, ...
279 o4(0xE8BD4000); // ldmfd sp!, {lr}
280 // sp -> arg0 ....
281 if (argCount > 0) {
282 // We store the PC into the lr so we can adjust the sp before
Jack Palevich8de461d2009-05-14 17:21:45 -0700283 // returning. We need to pull off the registers we pushed
Jack Palevich69796b62009-05-14 15:42:26 -0700284 // earlier. We don't need to actually store them anywhere,
285 // just adjust the stack.
286 int regArgCount = argCount <= 4 ? argCount : 4;
287 o4(0xE28DD000 | (regArgCount << 2)); // add sp, sp, #argCount << 2
288 }
289 o4(0xE12FFF1E); // bx lr
Jack Palevich22305132009-05-13 10:58:45 -0700290 }
291
292 /* load immediate value */
Jack Palevich546b2242009-05-13 15:10:04 -0700293 virtual void li(int t) {
Jack Palevich22305132009-05-13 10:58:45 -0700294 fprintf(stderr, "li(%d);\n", t);
Jack Palevicha6535612009-05-13 16:24:17 -0700295 if (t >= 0 && t < 255) {
Jack Palevich69796b62009-05-14 15:42:26 -0700296 o4(0xE3A00000 + t); // mov r0, #0
Jack Palevicha6535612009-05-13 16:24:17 -0700297 } else if (t >= -256 && t < 0) {
298 // mvn means move constant ^ ~0
Jack Palevich69796b62009-05-14 15:42:26 -0700299 o4(0xE3E00001 - t); // mvn r0, #0
Jack Palevicha6535612009-05-13 16:24:17 -0700300 } else {
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700301 o4(0xE51F0000); // ldr r0, .L3
302 o4(0xEA000000); // b .L99
303 o4(t); // .L3: .word 0
304 // .L99:
Jack Palevicha6535612009-05-13 16:24:17 -0700305 }
Jack Palevich22305132009-05-13 10:58:45 -0700306 }
307
308 virtual int gjmp(int t) {
309 fprintf(stderr, "gjmp(%d);\n", t);
Jack Palevich8de461d2009-05-14 17:21:45 -0700310 return o4(0xEA000000 | encodeAddress(t)); // b .L33
Jack Palevich22305132009-05-13 10:58:45 -0700311 }
312
313 /* l = 0: je, l == 1: jne */
314 virtual int gtst(bool l, int t) {
315 fprintf(stderr, "gtst(%d, %d);\n", l, t);
Jack Palevich8de461d2009-05-14 17:21:45 -0700316 o4(0xE3500000); // cmp r0,#0
317 int branch = l ? 0x1A000000 : 0x0A000000; // bne : beq
318 return o4(branch | encodeAddress(t));
Jack Palevich22305132009-05-13 10:58:45 -0700319 }
320
321 virtual void gcmp(int op) {
322 fprintf(stderr, "gcmp(%d);\n", op);
Jack Palevich8de461d2009-05-14 17:21:45 -0700323 o4(0xE1510000); // cmp r1, r1
324 switch(op) {
325 case OP_EQUALS:
326 o4(0x03A00001); // moveq r0,#1
327 o4(0x13A00000); // movne r0,#0
328 break;
329 case OP_NOT_EQUALS:
330 o4(0x03A00000); // moveq r0,#0
331 o4(0x13A00001); // movne r0,#1
332 break;
333 case OP_LESS_EQUAL:
334 o4(0xD3A00001); // movle r0,#1
335 o4(0xC3A00000); // movgt r0,#0
336 break;
337 case OP_GREATER:
338 o4(0xD3A00000); // movle r0,#0
339 o4(0xC3A00001); // movgt r0,#1
340 break;
341 case OP_GREATER_EQUAL:
342 o4(0xA3A00001); // movge r0,#1
343 o4(0xB3A00000); // movlt r0,#0
344 break;
345 case OP_LESS:
346 o4(0xA3A00000); // movge r0,#0
347 o4(0xB3A00001); // movlt r0,#1
348 break;
349 default:
350 error("Unknown comparison op %d", op);
351 break;
352 }
Jack Palevich22305132009-05-13 10:58:45 -0700353 }
354
Jack Palevich546b2242009-05-13 15:10:04 -0700355 virtual void genOp(int op) {
Jack Palevich22305132009-05-13 10:58:45 -0700356 fprintf(stderr, "genOp(%d);\n", op);
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700357 switch(op) {
358 case OP_MUL:
359 o4(0x0E0000091); // mul r0,r1,r0
360 break;
361 case OP_PLUS:
362 o4(0xE0810000); // add r0,r1,r0
363 break;
364 case OP_MINUS:
365 o4(0xE0410000); // sub r0,r1,r0
366 break;
367 case OP_SHIFT_LEFT:
368 o4(0xE1A00011); // lsl r0,r1,r0
369 break;
370 case OP_SHIFT_RIGHT:
371 o4(0xE1A00051); // asr r0,r1,r0
372 break;
373 case OP_BIT_AND:
374 o4(0xE0010000); // and r0,r1,r0
375 break;
376 case OP_BIT_XOR:
377 o4(0xE0210000); // eor r0,r1,r0
378 break;
379 case OP_BIT_OR:
380 o4(0xE1810000); // orr r0,r1,r0
381 break;
382 case OP_BIT_NOT:
383 o4(0xE1E00000); // mvn r0, r0
384 break;
385 default:
Jack Palevich69796b62009-05-14 15:42:26 -0700386 error("Unimplemented op %d\n", op);
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700387 break;
388 }
Jack Palevich22305132009-05-13 10:58:45 -0700389#if 0
390 o(decodeOp(op));
391 if (op == OP_MOD)
392 o(0x92); /* xchg %edx, %eax */
393#endif
394 }
395
396 virtual void clearECX() {
397 fprintf(stderr, "clearECX();\n");
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700398 o4(0xE3A01000); // mov r1, #0
Jack Palevich22305132009-05-13 10:58:45 -0700399 }
400
401 virtual void pushEAX() {
402 fprintf(stderr, "pushEAX();\n");
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700403 o4(0xE92D0001); // stmfd sp!,{r0}
Jack Palevich22305132009-05-13 10:58:45 -0700404 }
405
406 virtual void popECX() {
407 fprintf(stderr, "popECX();\n");
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700408 o4(0xE8BD0002); // ldmfd sp!,{r1}
Jack Palevich22305132009-05-13 10:58:45 -0700409 }
410
411 virtual void storeEAXToAddressECX(bool isInt) {
412 fprintf(stderr, "storeEAXToAddressECX(%d);\n", isInt);
Jack Palevichbd894902009-05-14 19:35:31 -0700413 if (isInt) {
414 o4(0xE5810000); // str r0, [r1]
415 } else {
416 o4(0xE5C10000); // strb r0, [r1]
417 }
Jack Palevich22305132009-05-13 10:58:45 -0700418 }
419
420 virtual void loadEAXIndirect(bool isInt) {
421 fprintf(stderr, "loadEAXIndirect(%d);\n", isInt);
422 if (isInt)
Jack Palevich69796b62009-05-14 15:42:26 -0700423 o4(0xE5900000); // ldr r0, [r0]
Jack Palevich22305132009-05-13 10:58:45 -0700424 else
Jack Palevich69796b62009-05-14 15:42:26 -0700425 o4(0xE5D00000); // ldrb r0, [r0]
Jack Palevich22305132009-05-13 10:58:45 -0700426 }
427
428 virtual void leaEAX(int ea) {
Jack Palevichbd894902009-05-14 19:35:31 -0700429 fprintf(stderr, "leaEAX(%d);\n", ea);
Jack Palevich4d93f302009-05-15 13:30:00 -0700430 if (ea < LOCAL) {
431 // Local, fp relative
432 if (ea < -1023 || ea > 1023 || ((ea & 3) != 0)) {
433 error("Offset out of range: %08x", ea);
434 }
435 if (ea < 0) {
436 o4(0xE24B0F00 | (0xff & ((-ea) >> 2))); // sub r0, fp, #ea
437 } else {
438 o4(0xE28B0F00 | (0xff & (ea >> 2))); // add r0, fp, #ea
439 }
Jack Palevichbd894902009-05-14 19:35:31 -0700440 } else {
Jack Palevich4d93f302009-05-15 13:30:00 -0700441 // Global, absolute.
442 o4(0xE59F0000); // ldr r0, .L1
443 o4(0xEA000000); // b .L99
444 o4(ea); // .L1: .word 0
445 // .L99:
Jack Palevichbd894902009-05-14 19:35:31 -0700446 }
Jack Palevich22305132009-05-13 10:58:45 -0700447 }
448
449 virtual void storeEAX(int ea) {
450 fprintf(stderr, "storeEAX(%d);\n", ea);
Jack Palevich4d93f302009-05-15 13:30:00 -0700451 if (ea < LOCAL) {
452 // Local, fp relative
453 if (ea < -4095 || ea > 4095) {
454 error("Offset out of range: %08x", ea);
455 }
456 if (ea < 0) {
457 o4(0xE50B0000 | (0xfff & (-ea))); // str r0, [fp,#-ea]
458 } else {
459 o4(0xE58B0000 | (0xfff & ea)); // str r0, [fp,#ea]
460 }
461 } else{
462 // Global, absolute
463 o4(0xE59F1000); // ldr r1, .L1
464 o4(0xEA000000); // b .L99
465 o4(ea); // .L1: .word 0
466 o4(0xE5810000); // .L99: str r0, [r1]
Jack Palevich69796b62009-05-14 15:42:26 -0700467 }
Jack Palevich22305132009-05-13 10:58:45 -0700468 }
469
Jack Palevich4d93f302009-05-15 13:30:00 -0700470 virtual void loadEAX(int ea, bool isIncDec, int op) {
471 fprintf(stderr, "loadEAX(%d, %d, %d);\n", ea, isIncDec, op);
472 if (ea < LOCAL) {
473 // Local, fp relative
474 if (ea < -4095 || ea > 4095) {
475 error("Offset out of range: %08x", ea);
476 }
477 if (ea < 0) {
478 o4(0xE51B0000 | (0xfff & (-ea))); // ldr r0, [fp,#-ea]
479 } else {
480 o4(0xE59B0000 | (0xfff & ea)); // ldr r0, [fp,#ea]
481 }
Jack Palevich69796b62009-05-14 15:42:26 -0700482 } else {
Jack Palevich4d93f302009-05-15 13:30:00 -0700483 // Global, absolute
484 o4(0xE59F2000); // ldr r2, .L1
485 o4(0xEA000000); // b .L99
486 o4(ea); // .L1: .word ea
487 o4(0xE5920000); // .L99: ldr r0, [r2]
Jack Palevich69796b62009-05-14 15:42:26 -0700488 }
Jack Palevich22305132009-05-13 10:58:45 -0700489
Jack Palevich4d93f302009-05-15 13:30:00 -0700490 if (isIncDec) {
491 switch (op) {
492 case OP_INCREMENT:
493 o4(0xE2801001); // add r1, r0, #1
494 break;
495 case OP_DECREMENT:
496 o4(0xE2401001); // sub r1, r0, #1
497 break;
498 default:
499 error("unknown opcode: %d", op);
500 }
501 if (ea < LOCAL) {
502 // Local, fp relative
503 // Don't need range check, was already checked above
504 if (ea < 0) {
505 o4(0xE50B1000 | (0xfff & (-ea))); // str r1, [fp,#-ea]
506 } else {
507 o4(0xE58B1000 | (0xfff & ea)); // str r1, [fp,#ea]
508 }
509 } else{
510 // Global, absolute
511 // r2 is already set up from before.
512 o4(0xE5821000); // str r1, [r2]
513 }
Jack Palevichbd894902009-05-14 19:35:31 -0700514 }
Jack Palevich22305132009-05-13 10:58:45 -0700515 }
516
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700517 virtual int beginFunctionCallArguments() {
518 fprintf(stderr, "beginFunctionCallArguments();\n");
519 return o4(0xE24DDF00); // Placeholder
520 }
521
522 virtual void endFunctionCallArguments(int a, int l) {
523 fprintf(stderr, "endFunctionCallArguments(0x%08x, %d);\n", a, l);
524 if (l < 0 || l > 0x3FC) {
525 error("L out of range for stack adjustment: 0x%08x", l);
526 }
527 * (int*) a = 0xE24DDF00 | (l >> 2); // sub sp, sp, #0 << 2
528 int argCount = l >> 2;
529 if (argCount > 0) {
530 int regArgCount = argCount > 4 ? 4 : argCount;
531 o4(0xE8BD0000 | ((1 << regArgCount) - 1)); // ldmfd sp!,{}
532 }
Jack Palevich22305132009-05-13 10:58:45 -0700533 }
534
535 virtual void storeEAToArg(int l) {
536 fprintf(stderr, "storeEAToArg(%d);\n", l);
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700537 if (l < 0 || l > 4096-4) {
538 error("l out of range for stack offset: 0x%08x", l);
539 }
540 o4(0xE58D0000 + l); // str r0, [sp, #4]
Jack Palevich22305132009-05-13 10:58:45 -0700541 }
542
543 virtual int callForward(int symbol) {
544 fprintf(stderr, "callForward(%d);\n", symbol);
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700545 // Forward calls are always short (local)
546 return o4(0xEB000000 | encodeAddress(symbol));
Jack Palevich22305132009-05-13 10:58:45 -0700547 }
548
549 virtual void callRelative(int t) {
550 fprintf(stderr, "callRelative(%d);\n", t);
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700551 int abs = t + getPC() + jumpOffset();
Jack Palevichbd894902009-05-14 19:35:31 -0700552 fprintf(stderr, "abs=%d (0x%08x)\n", abs, abs);
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700553 if (t >= - (1 << 25) && t < (1 << 25)) {
554 o4(0xEB000000 | encodeAddress(t));
555 } else {
556 // Long call.
557 o4(0xE59FC000); // ldr r12, .L1
558 o4(0xEA000000); // b .L99
Jack Palevichbd894902009-05-14 19:35:31 -0700559 o4(t - 12); // .L1: .word 0
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700560 o4(0xE08CC00F); // .L99: add r12,pc
561 o4(0xE12FFF3C); // blx r12
562 }
Jack Palevich22305132009-05-13 10:58:45 -0700563 }
564
565 virtual void callIndirect(int l) {
566 fprintf(stderr, "callIndirect(%d);\n", l);
567 oad(0x2494ff, l); /* call *xxx(%esp) */
568 }
569
570 virtual void adjustStackAfterCall(int l) {
571 fprintf(stderr, "adjustStackAfterCall(%d);\n", l);
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700572 if (l < 0 || l > 0x3FC) {
573 error("L out of range for stack adjustment: 0x%08x", l);
574 }
575 int argCount = l >> 2;
576 if (argCount > 4) {
577 int remainingArgs = argCount - 4;
578 o4(0xE28DDF00 | remainingArgs); // add sp, sp, #0x3fc
579 }
580
Jack Palevich22305132009-05-13 10:58:45 -0700581 }
582
Jack Palevicha6535612009-05-13 16:24:17 -0700583 virtual int jumpOffset() {
Jack Palevichbd894902009-05-14 19:35:31 -0700584 return 8;
Jack Palevicha6535612009-05-13 16:24:17 -0700585 }
586
587 /* output a symbol and patch all calls to it */
588 virtual void gsym(int t) {
589 fprintf(stderr, "gsym(0x%x)\n", t);
590 int n;
591 int base = getBase();
592 int pc = getPC();
593 fprintf(stderr, "pc = 0x%x\n", pc);
594 while (t) {
595 int data = * (int*) t;
596 int decodedOffset = ((BRANCH_REL_ADDRESS_MASK & data) << 2);
597 if (decodedOffset == 0) {
598 n = 0;
599 } else {
600 n = base + decodedOffset; /* next value */
601 }
602 *(int *) t = (data & ~BRANCH_REL_ADDRESS_MASK)
603 | encodeRelAddress(pc - t - 8);
604 t = n;
605 }
606 }
607
608 virtual int disassemble(FILE* out) {
609 disasmOut = out;
610 disasm_interface_t di;
611 di.di_readword = disassemble_readword;
612 di.di_printaddr = disassemble_printaddr;
613 di.di_printf = disassemble_printf;
614
615 int base = getBase();
616 int pc = getPC();
617 for(int i = base; i < pc; i += 4) {
618 fprintf(out, "%08x: %08x ", i, *(int*) i);
619 ::disasm(&di, i, 0);
620 }
621 return 0;
622 }
Jack Palevich22305132009-05-13 10:58:45 -0700623 private:
Jack Palevicha6535612009-05-13 16:24:17 -0700624 static FILE* disasmOut;
625
626 static u_int
627 disassemble_readword(u_int address)
628 {
629 return(*((u_int *)address));
630 }
631
632 static void
633 disassemble_printaddr(u_int address)
634 {
635 fprintf(disasmOut, "0x%08x", address);
636 }
637
638 static void
639 disassemble_printf(const char *fmt, ...) {
640 va_list ap;
641 va_start(ap, fmt);
642 vfprintf(disasmOut, fmt, ap);
643 va_end(ap);
644 }
645
646 static const int BRANCH_REL_ADDRESS_MASK = 0x00ffffff;
647
648 /** Encode a relative address that might also be
649 * a label.
650 */
651 int encodeAddress(int value) {
652 int base = getBase();
653 if (value >= base && value <= getPC() ) {
654 // This is a label, encode it relative to the base.
655 value = value - base;
656 }
657 return encodeRelAddress(value);
658 }
659
660 int encodeRelAddress(int value) {
661 return BRANCH_REL_ADDRESS_MASK & (value >> 2);
662 }
Jack Palevich22305132009-05-13 10:58:45 -0700663
Jack Palevich546b2242009-05-13 15:10:04 -0700664 void error(const char* fmt,...) {
665 va_list ap;
666 va_start(ap, fmt);
667 vfprintf(stderr, fmt, ap);
668 va_end(ap);
669 exit(12);
670 }
Jack Palevich22305132009-05-13 10:58:45 -0700671 };
672
Jack Palevich21a15a22009-05-11 14:49:29 -0700673 class X86CodeGenerator : public CodeGenerator {
674 public:
675 X86CodeGenerator() {}
676 virtual ~X86CodeGenerator() {}
677
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700678 /* returns address to patch with local variable size
679 */
Jack Palevich546b2242009-05-13 15:10:04 -0700680 virtual int functionEntry(int argCount) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700681 o(0xe58955); /* push %ebp, mov %esp, %ebp */
682 return oad(0xec81, 0); /* sub $xxx, %esp */
683 }
684
Jack Palevich546b2242009-05-13 15:10:04 -0700685 virtual void functionExit(int argCount, int localVariableAddress, int localVariableSize) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700686 o(0xc3c9); /* leave, ret */
Jack Palevich546b2242009-05-13 15:10:04 -0700687 *(int *) localVariableAddress = localVariableSize; /* save local variables */
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700688 }
689
Jack Palevich21a15a22009-05-11 14:49:29 -0700690 /* load immediate value */
Jack Palevich546b2242009-05-13 15:10:04 -0700691 virtual void li(int t) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700692 oad(0xb8, t); /* mov $xx, %eax */
693 }
694
Jack Palevich22305132009-05-13 10:58:45 -0700695 virtual int gjmp(int t) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700696 return psym(0xe9, t);
697 }
698
699 /* l = 0: je, l == 1: jne */
Jack Palevich22305132009-05-13 10:58:45 -0700700 virtual int gtst(bool l, int t) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700701 o(0x0fc085); /* test %eax, %eax, je/jne xxx */
702 return psym(0x84 + l, t);
703 }
704
Jack Palevich22305132009-05-13 10:58:45 -0700705 virtual void gcmp(int op) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700706 int t = decodeOp(op);
Jack Palevich21a15a22009-05-11 14:49:29 -0700707 o(0xc139); /* cmp %eax,%ecx */
708 li(0);
709 o(0x0f); /* setxx %al */
710 o(t + 0x90);
711 o(0xc0);
712 }
713
Jack Palevich546b2242009-05-13 15:10:04 -0700714 virtual void genOp(int op) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700715 o(decodeOp(op));
716 if (op == OP_MOD)
717 o(0x92); /* xchg %edx, %eax */
718 }
719
Jack Palevich22305132009-05-13 10:58:45 -0700720 virtual void clearECX() {
Jack Palevich21a15a22009-05-11 14:49:29 -0700721 oad(0xb9, 0); /* movl $0, %ecx */
722 }
723
Jack Palevich22305132009-05-13 10:58:45 -0700724 virtual void pushEAX() {
Jack Palevich21a15a22009-05-11 14:49:29 -0700725 o(0x50); /* push %eax */
726 }
727
Jack Palevich22305132009-05-13 10:58:45 -0700728 virtual void popECX() {
Jack Palevich21a15a22009-05-11 14:49:29 -0700729 o(0x59); /* pop %ecx */
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700730 }
731
Jack Palevich22305132009-05-13 10:58:45 -0700732 virtual void storeEAXToAddressECX(bool isInt) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700733 o(0x0188 + isInt); /* movl %eax/%al, (%ecx) */
734 }
735
Jack Palevich22305132009-05-13 10:58:45 -0700736 virtual void loadEAXIndirect(bool isInt) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700737 if (isInt)
738 o(0x8b); /* mov (%eax), %eax */
739 else
740 o(0xbe0f); /* movsbl (%eax), %eax */
741 ob(0); /* add zero in code */
742 }
743
Jack Palevich22305132009-05-13 10:58:45 -0700744 virtual void leaEAX(int ea) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700745 gmov(10, ea); /* leal EA, %eax */
746 }
747
Jack Palevich22305132009-05-13 10:58:45 -0700748 virtual void storeEAX(int ea) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700749 gmov(6, ea); /* mov %eax, EA */
750 }
751
Jack Palevich4d93f302009-05-15 13:30:00 -0700752 virtual void loadEAX(int ea, bool isIncDec, int op) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700753 gmov(8, ea); /* mov EA, %eax */
Jack Palevich4d93f302009-05-15 13:30:00 -0700754 if (isIncDec) {
755 /* Implement post-increment or post decrement.
756 */
757 gmov(0, ea); /* 83 ADD */
758 o(decodeOp(op));
759 }
Jack Palevich21a15a22009-05-11 14:49:29 -0700760 }
761
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700762 virtual int beginFunctionCallArguments() {
Jack Palevich21a15a22009-05-11 14:49:29 -0700763 return oad(0xec81, 0); /* sub $xxx, %esp */
764 }
765
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700766 virtual void endFunctionCallArguments(int a, int l) {
767 * (int*) a = l;
768 }
769
Jack Palevich22305132009-05-13 10:58:45 -0700770 virtual void storeEAToArg(int l) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700771 oad(0x248489, l); /* movl %eax, xxx(%esp) */
772 }
773
Jack Palevich22305132009-05-13 10:58:45 -0700774 virtual int callForward(int symbol) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700775 return psym(0xe8, symbol); /* call xxx */
776 }
777
Jack Palevich22305132009-05-13 10:58:45 -0700778 virtual void callRelative(int t) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700779 psym(0xe8, t); /* call xxx */
780 }
781
Jack Palevich22305132009-05-13 10:58:45 -0700782 virtual void callIndirect(int l) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700783 oad(0x2494ff, l); /* call *xxx(%esp) */
784 }
785
Jack Palevich22305132009-05-13 10:58:45 -0700786 virtual void adjustStackAfterCall(int l) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700787 oad(0xc481, l); /* add $xxx, %esp */
788 }
789
Jack Palevicha6535612009-05-13 16:24:17 -0700790 virtual int jumpOffset() {
791 return 5;
792 }
793
794 virtual int disassemble(FILE* out) {
795 return 1;
796 }
797
Jack Palevich21a15a22009-05-11 14:49:29 -0700798 private:
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700799 static const int operatorHelper[];
800
801 int decodeOp(int op) {
802 if (op < 0 || op > OP_COUNT) {
803 fprintf(stderr, "Out-of-range operator: %d\n", op);
804 exit(1);
805 }
806 return operatorHelper[op];
807 }
Jack Palevich21a15a22009-05-11 14:49:29 -0700808
Jack Palevich546b2242009-05-13 15:10:04 -0700809 void gmov(int l, int t) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700810 o(l + 0x83);
811 oad((t < LOCAL) << 7 | 5, t);
812 }
813 };
814
815 /* vars: value of variables
816 loc : local variable index
817 glo : global variable index
818 ind : output code ptr
819 rsym: return symbol
820 prog: output code
821 dstk: define stack
822 dptr, dch: macro state
823 */
824 int tok, tokc, tokl, ch, vars, rsym, loc, glo, sym_stk, dstk,
825 dptr, dch, last_id;
826 void* pSymbolBase;
827 void* pGlobalBase;
828 void* pVarsBase;
829 FILE* file;
830
831 CodeBuf codeBuf;
Jack Palevich22305132009-05-13 10:58:45 -0700832 CodeGenerator* pGen;
Jack Palevich21a15a22009-05-11 14:49:29 -0700833
834 static const int ALLOC_SIZE = 99999;
835
836 /* depends on the init string */
837 static const int TOK_STR_SIZE = 48;
838 static const int TOK_IDENT = 0x100;
839 static const int TOK_INT = 0x100;
840 static const int TOK_IF = 0x120;
841 static const int TOK_ELSE = 0x138;
842 static const int TOK_WHILE = 0x160;
843 static const int TOK_BREAK = 0x190;
844 static const int TOK_RETURN = 0x1c0;
845 static const int TOK_FOR = 0x1f8;
846 static const int TOK_DEFINE = 0x218;
847 static const int TOK_MAIN = 0x250;
848
849 static const int TOK_DUMMY = 1;
850 static const int TOK_NUM = 2;
851
852 static const int LOCAL = 0x200;
853
854 static const int SYM_FORWARD = 0;
855 static const int SYM_DEFINE = 1;
856
857 /* tokens in string heap */
858 static const int TAG_TOK = ' ';
859 static const int TAG_MACRO = 2;
860
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700861 static const int OP_INCREMENT = 0;
862 static const int OP_DECREMENT = 1;
863 static const int OP_MUL = 2;
864 static const int OP_DIV = 3;
865 static const int OP_MOD = 4;
866 static const int OP_PLUS = 5;
867 static const int OP_MINUS = 6;
868 static const int OP_SHIFT_LEFT = 7;
869 static const int OP_SHIFT_RIGHT = 8;
870 static const int OP_LESS_EQUAL = 9;
871 static const int OP_GREATER_EQUAL = 10;
872 static const int OP_LESS = 11;
873 static const int OP_GREATER = 12;
874 static const int OP_EQUALS = 13;
875 static const int OP_NOT_EQUALS = 14;
876 static const int OP_LOGICAL_AND = 15;
877 static const int OP_LOGICAL_OR = 16;
878 static const int OP_BIT_AND = 17;
879 static const int OP_BIT_XOR = 18;
880 static const int OP_BIT_OR = 19;
881 static const int OP_BIT_NOT = 20;
882 static const int OP_LOGICAL_NOT = 21;
883 static const int OP_COUNT = 22;
884
885 /* Operators are searched from front, the two-character operators appear
886 * before the single-character operators with the same first character.
887 * @ is used to pad out single-character operators.
888 */
889 static const char* operatorChars;
890 static const char operatorLevel[];
891
Jack Palevich21a15a22009-05-11 14:49:29 -0700892 void pdef(int t) {
893 *(char *) dstk++ = t;
894 }
895
896 void inp() {
897 if (dptr) {
898 ch = *(char *) dptr++;
899 if (ch == TAG_MACRO) {
900 dptr = 0;
901 ch = dch;
902 }
903 } else
904 ch = fgetc(file);
905 /* printf("ch=%c 0x%x\n", ch, ch); */
906 }
907
908 int isid() {
Jack Palevich546b2242009-05-13 15:10:04 -0700909 return isalnum(ch) | (ch == '_');
Jack Palevich21a15a22009-05-11 14:49:29 -0700910 }
911
912 /* read a character constant */
913 void getq() {
914 if (ch == '\\') {
915 inp();
916 if (ch == 'n')
917 ch = '\n';
918 }
919 }
920
921 void next() {
922 int l, a;
923
Jack Palevich546b2242009-05-13 15:10:04 -0700924 while (isspace(ch) | (ch == '#')) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700925 if (ch == '#') {
926 inp();
927 next();
928 if (tok == TOK_DEFINE) {
929 next();
930 pdef(TAG_TOK); /* fill last ident tag */
931 *(int *) tok = SYM_DEFINE;
932 *(int *) (tok + 4) = dstk; /* define stack */
933 }
934 /* well we always save the values ! */
935 while (ch != '\n') {
936 pdef(ch);
937 inp();
938 }
939 pdef(ch);
940 pdef(TAG_MACRO);
941 }
942 inp();
943 }
944 tokl = 0;
945 tok = ch;
946 /* encode identifiers & numbers */
947 if (isid()) {
948 pdef(TAG_TOK);
949 last_id = dstk;
950 while (isid()) {
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700951 pdef(ch);
952 inp();
Jack Palevichae54f1f2009-05-08 14:54:15 -0700953 }
Jack Palevich21a15a22009-05-11 14:49:29 -0700954 if (isdigit(tok)) {
955 tokc = strtol((char*) last_id, 0, 0);
956 tok = TOK_NUM;
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700957 } else {
Jack Palevich21a15a22009-05-11 14:49:29 -0700958 *(char *) dstk = TAG_TOK; /* no need to mark end of string (we
959 suppose data is initialized to zero by calloc) */
960 tok = (int) (strstr((char*) sym_stk, (char*) (last_id - 1))
961 - sym_stk);
962 *(char *) dstk = 0; /* mark real end of ident for dlsym() */
963 tok = tok * 8 + TOK_IDENT;
964 if (tok > TOK_DEFINE) {
965 tok = vars + tok;
966 /* printf("tok=%s %x\n", last_id, tok); */
967 /* define handling */
968 if (*(int *) tok == SYM_DEFINE) {
969 dptr = *(int *) (tok + 4);
970 dch = ch;
971 inp();
972 next();
973 }
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700974 }
975 }
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700976 } else {
Jack Palevich21a15a22009-05-11 14:49:29 -0700977 inp();
978 if (tok == '\'') {
979 tok = TOK_NUM;
980 getq();
981 tokc = ch;
982 inp();
983 inp();
Jack Palevich546b2242009-05-13 15:10:04 -0700984 } else if ((tok == '/') & (ch == '*')) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700985 inp();
986 while (ch) {
987 while (ch != '*')
988 inp();
989 inp();
990 if (ch == '/')
991 ch = 0;
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700992 }
Jack Palevich21a15a22009-05-11 14:49:29 -0700993 inp();
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700994 next();
Jack Palevichbd894902009-05-14 19:35:31 -0700995 } else if ((tok == '/') & (ch == '/')) {
996 inp();
997 while (ch && (ch != '\n')) {
998 inp();
999 }
1000 inp();
1001 next();
Jack Palevich21a15a22009-05-11 14:49:29 -07001002 } else {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001003 const char* t = operatorChars;
1004 int opIndex = 0;
Jack Palevich546b2242009-05-13 15:10:04 -07001005 while ((l = *t++) != 0) {
Jack Palevich21a15a22009-05-11 14:49:29 -07001006 a = *t++;
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001007 tokl = operatorLevel[opIndex];
1008 tokc = opIndex;
Jack Palevich546b2242009-05-13 15:10:04 -07001009 if ((l == tok) & ((a == ch) | (a == '@'))) {
Jack Palevich21a15a22009-05-11 14:49:29 -07001010#if 0
1011 printf("%c%c -> tokl=%d tokc=0x%x\n",
1012 l, a, tokl, tokc);
1013#endif
1014 if (a == ch) {
1015 inp();
1016 tok = TOK_DUMMY; /* dummy token for double tokens */
1017 }
1018 break;
1019 }
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001020 opIndex++;
1021 }
1022 if (l == 0) {
1023 tokl = 0;
1024 tokc = 0;
Jack Palevich21a15a22009-05-11 14:49:29 -07001025 }
1026 }
1027 }
1028#if 0
1029 {
1030 int p;
1031
1032 printf("tok=0x%x ", tok);
1033 if (tok >= TOK_IDENT) {
1034 printf("'");
1035 if (tok> TOK_DEFINE)
1036 p = sym_stk + 1 + (tok - vars - TOK_IDENT) / 8;
1037 else
1038 p = sym_stk + 1 + (tok - TOK_IDENT) / 8;
1039 while (*(char *)p != TAG_TOK && *(char *)p)
1040 printf("%c", *(char *)p++);
1041 printf("'\n");
1042 } else if (tok == TOK_NUM) {
1043 printf("%d\n", tokc);
1044 } else {
1045 printf("'%c'\n", tok);
1046 }
1047 }
1048#endif
1049 }
1050
1051 void error(const char *fmt, ...) {
1052 va_list ap;
1053
1054 va_start(ap, fmt);
1055 fprintf(stderr, "%ld: ", ftell((FILE *) file));
1056 vfprintf(stderr, fmt, ap);
1057 fprintf(stderr, "\n");
1058 va_end(ap);
1059 exit(1);
1060 }
1061
1062 void skip(int c) {
1063 if (tok != c) {
1064 error("'%c' expected", c);
1065 }
1066 next();
1067 }
1068
Jack Palevich21a15a22009-05-11 14:49:29 -07001069 /* l is one if '=' parsing wanted (quick hack) */
1070 void unary(int l) {
1071 int n, t, a, c;
Jack Palevich546b2242009-05-13 15:10:04 -07001072 t = 0;
Jack Palevich21a15a22009-05-11 14:49:29 -07001073 n = 1; /* type of expression 0 = forward, 1 = value, other =
1074 lvalue */
1075 if (tok == '\"') {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001076 pGen->li(glo);
Jack Palevich21a15a22009-05-11 14:49:29 -07001077 while (ch != '\"') {
1078 getq();
1079 *(char *) glo++ = ch;
1080 inp();
1081 }
1082 *(char *) glo = 0;
Jack Palevich546b2242009-05-13 15:10:04 -07001083 glo = (glo + 4) & -4; /* align heap */
Jack Palevich21a15a22009-05-11 14:49:29 -07001084 inp();
1085 next();
1086 } else {
1087 c = tokl;
1088 a = tokc;
1089 t = tok;
1090 next();
1091 if (t == TOK_NUM) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001092 pGen->li(a);
Jack Palevich21a15a22009-05-11 14:49:29 -07001093 } else if (c == 2) {
1094 /* -, +, !, ~ */
1095 unary(0);
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001096 pGen->clearECX();
Jack Palevich21a15a22009-05-11 14:49:29 -07001097 if (t == '!')
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001098 pGen->gcmp(a);
Jack Palevich21a15a22009-05-11 14:49:29 -07001099 else
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001100 pGen->genOp(a);
Jack Palevich21a15a22009-05-11 14:49:29 -07001101 } else if (t == '(') {
1102 expr();
1103 skip(')');
1104 } else if (t == '*') {
1105 /* parse cast */
1106 skip('(');
1107 t = tok; /* get type */
1108 next(); /* skip int/char/void */
1109 next(); /* skip '*' or '(' */
1110 if (tok == '*') {
1111 /* function type */
1112 skip('*');
1113 skip(')');
1114 skip('(');
1115 skip(')');
1116 t = 0;
1117 }
1118 skip(')');
1119 unary(0);
1120 if (tok == '=') {
1121 next();
1122 pGen->pushEAX();
1123 expr();
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001124 pGen->popECX();
1125 pGen->storeEAXToAddressECX(t == TOK_INT);
Jack Palevich21a15a22009-05-11 14:49:29 -07001126 } else if (t) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001127 pGen->loadEAXIndirect(t == TOK_INT);
Jack Palevich21a15a22009-05-11 14:49:29 -07001128 }
1129 } else if (t == '&') {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001130 pGen->leaEAX(*(int *) tok);
Jack Palevich21a15a22009-05-11 14:49:29 -07001131 next();
1132 } else {
1133 n = *(int *) t;
1134 /* forward reference: try dlsym */
Jack Palevichcb1c9ef2009-05-14 11:38:49 -07001135 if (!n) {
1136 n = (int) dlsym(RTLD_DEFAULT, (char*) last_id);
1137 }
Jack Palevich546b2242009-05-13 15:10:04 -07001138 if ((tok == '=') & l) {
Jack Palevich21a15a22009-05-11 14:49:29 -07001139 /* assignment */
1140 next();
1141 expr();
1142 pGen->storeEAX(n);
1143 } else if (tok != '(') {
1144 /* variable */
Jack Palevich4d93f302009-05-15 13:30:00 -07001145 pGen->loadEAX(n, tokl == 11, tokc);
Jack Palevich21a15a22009-05-11 14:49:29 -07001146 if (tokl == 11) {
Jack Palevich21a15a22009-05-11 14:49:29 -07001147 next();
1148 }
1149 }
1150 }
1151 }
1152
1153 /* function call */
1154 if (tok == '(') {
1155 if (n == 1)
1156 pGen->pushEAX();
1157
1158 /* push args and invert order */
Jack Palevichcb1c9ef2009-05-14 11:38:49 -07001159 a = pGen->beginFunctionCallArguments();
Jack Palevich21a15a22009-05-11 14:49:29 -07001160 next();
1161 l = 0;
1162 while (tok != ')') {
1163 expr();
1164 pGen->storeEAToArg(l);
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001165 if (tok == ',')
Jack Paleviche27bf3e2009-05-10 14:09:03 -07001166 next();
Jack Palevich21a15a22009-05-11 14:49:29 -07001167 l = l + 4;
Jack Paleviche27bf3e2009-05-10 14:09:03 -07001168 }
Jack Palevichcb1c9ef2009-05-14 11:38:49 -07001169 pGen->endFunctionCallArguments(a, l);
Jack Palevich21a15a22009-05-11 14:49:29 -07001170 next();
1171 if (!n) {
1172 /* forward reference */
1173 t = t + 4;
1174 *(int *) t = pGen->callForward(*(int *) t);
1175 } else if (n == 1) {
1176 pGen->callIndirect(l);
1177 l = l + 4;
1178 } else {
Jack Palevicha6535612009-05-13 16:24:17 -07001179 pGen->callRelative(n - codeBuf.getPC() - pGen->jumpOffset()); /* call xxx */
Jack Palevich21a15a22009-05-11 14:49:29 -07001180 }
1181 if (l)
1182 pGen->adjustStackAfterCall(l);
1183 }
1184 }
1185
1186 void sum(int l) {
1187 int t, n, a;
Jack Palevich546b2242009-05-13 15:10:04 -07001188 t = 0;
Jack Palevich21a15a22009-05-11 14:49:29 -07001189 if (l-- == 1)
1190 unary(1);
1191 else {
1192 sum(l);
1193 a = 0;
1194 while (l == tokl) {
1195 n = tok;
1196 t = tokc;
1197 next();
1198
1199 if (l > 8) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001200 a = pGen->gtst(t == OP_LOGICAL_OR, a); /* && and || output code generation */
Jack Palevich21a15a22009-05-11 14:49:29 -07001201 sum(l);
1202 } else {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001203 pGen->pushEAX();
Jack Palevich21a15a22009-05-11 14:49:29 -07001204 sum(l);
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001205 pGen->popECX();
Jack Palevich21a15a22009-05-11 14:49:29 -07001206
Jack Palevich546b2242009-05-13 15:10:04 -07001207 if ((l == 4) | (l == 5)) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001208 pGen->gcmp(t);
Jack Palevich21a15a22009-05-11 14:49:29 -07001209 } else {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001210 pGen->genOp(t);
Jack Palevich21a15a22009-05-11 14:49:29 -07001211 }
1212 }
1213 }
1214 /* && and || output code generation */
1215 if (a && l > 8) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001216 a = pGen->gtst(t == OP_LOGICAL_OR, a);
1217 pGen->li(t != OP_LOGICAL_OR);
Jack Palevicha6535612009-05-13 16:24:17 -07001218 pGen->gjmp(5); /* jmp $ + 5 (sizeof li, FIXME for ARM) */
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001219 pGen->gsym(a);
1220 pGen->li(t == OP_LOGICAL_OR);
Jack Palevich21a15a22009-05-11 14:49:29 -07001221 }
1222 }
1223 }
1224
1225 void expr() {
1226 sum(11);
1227 }
1228
1229 int test_expr() {
1230 expr();
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001231 return pGen->gtst(0, 0);
Jack Palevich21a15a22009-05-11 14:49:29 -07001232 }
1233
1234 void block(int l) {
1235 int a, n, t;
1236
1237 if (tok == TOK_IF) {
Jack Paleviche27bf3e2009-05-10 14:09:03 -07001238 next();
1239 skip('(');
Jack Palevich21a15a22009-05-11 14:49:29 -07001240 a = test_expr();
1241 skip(')');
1242 block(l);
1243 if (tok == TOK_ELSE) {
Jack Paleviche27bf3e2009-05-10 14:09:03 -07001244 next();
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001245 n = pGen->gjmp(0); /* jmp */
1246 pGen->gsym(a);
Jack Palevich21a15a22009-05-11 14:49:29 -07001247 block(l);
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001248 pGen->gsym(n); /* patch else jmp */
Jack Palevich21a15a22009-05-11 14:49:29 -07001249 } else {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001250 pGen->gsym(a); /* patch if test */
Jack Paleviche27bf3e2009-05-10 14:09:03 -07001251 }
Jack Palevich546b2242009-05-13 15:10:04 -07001252 } else if ((tok == TOK_WHILE) | (tok == TOK_FOR)) {
Jack Palevich21a15a22009-05-11 14:49:29 -07001253 t = tok;
1254 next();
1255 skip('(');
1256 if (t == TOK_WHILE) {
Jack Palevicha6535612009-05-13 16:24:17 -07001257 n = codeBuf.getPC(); // top of loop, target of "next" iteration
Jack Palevich21a15a22009-05-11 14:49:29 -07001258 a = test_expr();
1259 } else {
1260 if (tok != ';')
1261 expr();
1262 skip(';');
1263 n = codeBuf.getPC();
1264 a = 0;
1265 if (tok != ';')
1266 a = test_expr();
1267 skip(';');
1268 if (tok != ')') {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001269 t = pGen->gjmp(0);
Jack Palevich21a15a22009-05-11 14:49:29 -07001270 expr();
Jack Palevicha6535612009-05-13 16:24:17 -07001271 pGen->gjmp(n - codeBuf.getPC() - pGen->jumpOffset());
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001272 pGen->gsym(t);
Jack Palevich21a15a22009-05-11 14:49:29 -07001273 n = t + 4;
1274 }
1275 }
1276 skip(')');
1277 block((int) &a);
Jack Palevicha6535612009-05-13 16:24:17 -07001278 pGen->gjmp(n - codeBuf.getPC() - pGen->jumpOffset()); /* jmp */
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001279 pGen->gsym(a);
Jack Palevich21a15a22009-05-11 14:49:29 -07001280 } else if (tok == '{') {
1281 next();
1282 /* declarations */
1283 decl(1);
1284 while (tok != '}')
1285 block(l);
1286 next();
1287 } else {
1288 if (tok == TOK_RETURN) {
1289 next();
1290 if (tok != ';')
1291 expr();
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001292 rsym = pGen->gjmp(rsym); /* jmp */
Jack Palevich21a15a22009-05-11 14:49:29 -07001293 } else if (tok == TOK_BREAK) {
1294 next();
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001295 *(int *) l = pGen->gjmp(*(int *) l);
Jack Palevich21a15a22009-05-11 14:49:29 -07001296 } else if (tok != ';')
1297 expr();
1298 skip(';');
Jack Paleviche27bf3e2009-05-10 14:09:03 -07001299 }
1300 }
Jack Palevich21a15a22009-05-11 14:49:29 -07001301
1302 /* 'l' is true if local declarations */
1303 void decl(int l) {
1304 int a;
1305
Jack Palevich546b2242009-05-13 15:10:04 -07001306 while ((tok == TOK_INT) | ((tok != -1) & (!l))) {
Jack Palevich21a15a22009-05-11 14:49:29 -07001307 if (tok == TOK_INT) {
1308 next();
1309 while (tok != ';') {
1310 if (l) {
1311 loc = loc + 4;
1312 *(int *) tok = -loc;
1313 } else {
1314 *(int *) tok = glo;
1315 glo = glo + 4;
1316 }
1317 next();
1318 if (tok == ',')
1319 next();
1320 }
1321 skip(';');
1322 } else {
1323 /* patch forward references (XXX: do not work for function
1324 pointers) */
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001325 pGen->gsym(*(int *) (tok + 4));
Jack Palevich21a15a22009-05-11 14:49:29 -07001326 /* put function address */
1327 *(int *) tok = codeBuf.getPC();
1328 next();
1329 skip('(');
1330 a = 8;
Jack Palevich546b2242009-05-13 15:10:04 -07001331 int argCount = 0;
Jack Palevich21a15a22009-05-11 14:49:29 -07001332 while (tok != ')') {
1333 /* read param name and compute offset */
1334 *(int *) tok = a;
1335 a = a + 4;
1336 next();
1337 if (tok == ',')
1338 next();
Jack Palevich546b2242009-05-13 15:10:04 -07001339 argCount++;
Jack Palevich21a15a22009-05-11 14:49:29 -07001340 }
1341 next(); /* skip ')' */
1342 rsym = loc = 0;
Jack Palevich546b2242009-05-13 15:10:04 -07001343 a = pGen->functionEntry(argCount);
Jack Palevich21a15a22009-05-11 14:49:29 -07001344 block(0);
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001345 pGen->gsym(rsym);
Jack Palevich546b2242009-05-13 15:10:04 -07001346 pGen->functionExit(argCount, a, loc);
Jack Palevich21a15a22009-05-11 14:49:29 -07001347 }
1348 }
1349 }
1350
1351 void cleanup() {
1352 if (sym_stk != 0) {
1353 free((void*) sym_stk);
1354 sym_stk = 0;
1355 }
1356 if (pGlobalBase != 0) {
1357 free((void*) pGlobalBase);
1358 pGlobalBase = 0;
1359 }
1360 if (pVarsBase != 0) {
1361 free(pVarsBase);
1362 pVarsBase = 0;
1363 }
1364 if (pGen) {
1365 delete pGen;
1366 pGen = 0;
1367 }
1368 }
1369
1370 void clear() {
1371 tok = 0;
1372 tokc = 0;
1373 tokl = 0;
1374 ch = 0;
1375 vars = 0;
1376 rsym = 0;
1377 loc = 0;
1378 glo = 0;
1379 sym_stk = 0;
1380 dstk = 0;
1381 dptr = 0;
1382 dch = 0;
1383 last_id = 0;
1384 file = 0;
1385 pGlobalBase = 0;
1386 pVarsBase = 0;
1387 pGen = 0;
1388 }
Jack Paleviche27bf3e2009-05-10 14:09:03 -07001389
Jack Palevich22305132009-05-13 10:58:45 -07001390 void setArchitecture(const char* architecture) {
1391 delete pGen;
1392 pGen = 0;
1393
1394 if (architecture != NULL) {
1395 if (strcmp(architecture, "arm") == 0) {
1396 pGen = new ARMCodeGenerator();
1397 } else if (strcmp(architecture, "x86") == 0) {
1398 pGen = new X86CodeGenerator();
1399 } else {
1400 fprintf(stderr, "Unknown architecture %s", architecture);
1401 }
1402 }
1403
1404 if (pGen == NULL) {
1405 pGen = new ARMCodeGenerator();
1406 }
1407 }
1408
Jack Palevich77ae76e2009-05-10 19:59:24 -07001409public:
Jack Palevich22305132009-05-13 10:58:45 -07001410 struct args {
1411 args() {
1412 architecture = 0;
1413 }
1414 const char* architecture;
1415 };
1416
Jack Palevich21a15a22009-05-11 14:49:29 -07001417 compiler() {
1418 clear();
Jack Paleviche27bf3e2009-05-10 14:09:03 -07001419 }
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001420
Jack Palevich21a15a22009-05-11 14:49:29 -07001421 ~compiler() {
1422 cleanup();
1423 }
1424
Jack Palevich22305132009-05-13 10:58:45 -07001425 int compile(FILE* in, args& args) {
Jack Palevich21a15a22009-05-11 14:49:29 -07001426 cleanup();
1427 clear();
1428 codeBuf.init(ALLOC_SIZE);
Jack Palevich22305132009-05-13 10:58:45 -07001429 setArchitecture(args.architecture);
Jack Palevich21a15a22009-05-11 14:49:29 -07001430 pGen->init(&codeBuf);
1431 file = in;
1432 sym_stk = (int) calloc(1, ALLOC_SIZE);
1433 dstk = (int) strcpy((char*) sym_stk,
1434 " int if else while break return for define main ")
1435 + TOK_STR_SIZE;
1436 pGlobalBase = calloc(1, ALLOC_SIZE);
1437 glo = (int) pGlobalBase;
1438 pVarsBase = calloc(1, ALLOC_SIZE);
1439 vars = (int) pVarsBase;
1440 inp();
1441 next();
1442 decl(0);
Jack Palevich546b2242009-05-13 15:10:04 -07001443 pGen->finishCompile();
Jack Palevich21a15a22009-05-11 14:49:29 -07001444 return 0;
1445 }
1446
1447 int run(int argc, char** argv) {
1448 typedef int (*mainPtr)(int argc, char** argv);
1449 mainPtr aMain = (mainPtr) *(int*) (vars + TOK_MAIN);
1450 if (!aMain) {
1451 fprintf(stderr, "Could not find function \"main\".\n");
1452 return -1;
1453 }
1454 return aMain(argc, argv);
1455 }
1456
1457 int dump(FILE* out) {
1458 fwrite(codeBuf.getBase(), 1, codeBuf.getSize(), out);
1459 return 0;
1460 }
Jack Palevich77ae76e2009-05-10 19:59:24 -07001461
Jack Palevicha6535612009-05-13 16:24:17 -07001462 int disassemble(FILE* out) {
1463 return pGen->disassemble(out);
1464 }
1465
Jack Palevich77ae76e2009-05-10 19:59:24 -07001466};
1467
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001468const char* compiler::operatorChars =
1469 "++--*@/@%@+@-@<<>><=>=<@>@==!=&&||&@^@|@~@!@";
1470
1471const char compiler::operatorLevel[] =
1472 {11, 11, 1, 1, 1, 2, 2, 3, 3, 4, 4, 4, 4,
1473 5, 5, /* ==, != */
1474 9, 10, /* &&, || */
1475 6, 7, 8, /* & ^ | */
1476 2, 2 /* ~ ! */
1477 };
1478
Jack Palevicha6535612009-05-13 16:24:17 -07001479FILE* compiler::ARMCodeGenerator::disasmOut;
1480
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001481const int compiler::X86CodeGenerator::operatorHelper[] = {
1482 0x1, // ++
1483 0xff, // --
1484 0xc1af0f, // *
1485 0xf9f79991, // /
1486 0xf9f79991, // % (With manual assist to swap results)
1487 0xc801, // +
1488 0xd8f7c829, // -
1489 0xe0d391, // <<
1490 0xf8d391, // >>
1491 0xe, // <=
1492 0xd, // >=
1493 0xc, // <
1494 0xf, // >
1495 0x4, // ==
1496 0x5, // !=
1497 0x0, // &&
1498 0x1, // ||
1499 0xc821, // &
1500 0xc831, // ^
1501 0xc809, // |
1502 0xd0f7, // ~
1503 0x4 // !
1504};
1505
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001506} // namespace acc
1507
Jack Palevich546b2242009-05-13 15:10:04 -07001508// This is a separate function so it can easily be set by breakpoint in gdb.
1509int run(acc::compiler& c, int argc, char** argv) {
1510 return c.run(argc, argv);
1511}
1512
Jack Palevich77ae76e2009-05-10 19:59:24 -07001513int main(int argc, char** argv) {
Jack Palevich22305132009-05-13 10:58:45 -07001514 bool doDump = false;
Jack Palevicha6535612009-05-13 16:24:17 -07001515 bool doDisassemble = false;
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001516 const char* inFile = NULL;
1517 const char* outFile = NULL;
Jack Palevich22305132009-05-13 10:58:45 -07001518 const char* architecture = "arm";
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001519 int i;
Jack Palevich21a15a22009-05-11 14:49:29 -07001520 for (i = 1; i < argc; i++) {
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001521 char* arg = argv[i];
1522 if (arg[0] == '-') {
1523 switch (arg[1]) {
Jack Palevich22305132009-05-13 10:58:45 -07001524 case 'a':
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001525 if (i + 1 >= argc) {
Jack Palevich22305132009-05-13 10:58:45 -07001526 fprintf(stderr, "Expected architecture after -a\n");
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001527 return 2;
1528 }
Jack Palevich22305132009-05-13 10:58:45 -07001529 architecture = argv[i+1];
1530 i += 1;
1531 break;
1532 case 'd':
1533 if (i + 1 >= argc) {
1534 fprintf(stderr, "Expected filename after -d\n");
1535 return 2;
1536 }
1537 doDump = true;
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001538 outFile = argv[i + 1];
1539 i += 1;
1540 break;
Jack Palevicha6535612009-05-13 16:24:17 -07001541 case 'S':
1542 doDisassemble = true;
1543 break;
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001544 default:
1545 fprintf(stderr, "Unrecognized flag %s\n", arg);
1546 return 3;
1547 }
1548 } else if (inFile == NULL) {
1549 inFile = arg;
1550 } else {
1551 break;
1552 }
1553 }
1554
1555 FILE* in = stdin;
1556 if (inFile) {
1557 in = fopen(inFile, "r");
Jack Palevich21a15a22009-05-11 14:49:29 -07001558 if (!in) {
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001559 fprintf(stderr, "Could not open input file %s\n", inFile);
1560 return 1;
1561 }
1562 }
1563 acc::compiler compiler;
Jack Palevich22305132009-05-13 10:58:45 -07001564 acc::compiler::args args;
1565 args.architecture = architecture;
1566 int compileResult = compiler.compile(in, args);
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001567 if (in != stdin) {
1568 fclose(in);
1569 }
1570 if (compileResult) {
1571 fprintf(stderr, "Compile failed: %d\n", compileResult);
1572 return 6;
1573 }
Jack Palevicha6535612009-05-13 16:24:17 -07001574 if (doDisassemble) {
1575 compiler.disassemble(stderr);
1576 }
Jack Palevich22305132009-05-13 10:58:45 -07001577 if (doDump) {
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001578 FILE* save = fopen(outFile, "w");
Jack Palevich21a15a22009-05-11 14:49:29 -07001579 if (!save) {
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001580 fprintf(stderr, "Could not open output file %s\n", outFile);
1581 return 5;
1582 }
1583 compiler.dump(save);
1584 fclose(save);
1585 } else {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001586 fprintf(stderr, "Executing compiled code:\n");
Jack Palevich21a15a22009-05-11 14:49:29 -07001587 int codeArgc = argc - i + 1;
1588 char** codeArgv = argv + i - 1;
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001589 codeArgv[0] = (char*) (inFile ? inFile : "stdin");
Jack Palevich546b2242009-05-13 15:10:04 -07001590 int result = run(compiler, codeArgc, codeArgv);
Jack Palevich22305132009-05-13 10:58:45 -07001591 fprintf(stderr, "result: %d\n", result);
1592 return result;
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001593 }
1594
1595 return 0;
1596}