blob: 7fa376f70306e712041ca4b7d786c2e6d2690609 [file] [log] [blame]
Jack Palevichae54f1f2009-05-08 14:54:15 -07001/*
Jack Paleviche27bf3e2009-05-10 14:09:03 -07002 Obfuscated Tiny C Compiler
Jack Palevich88311482009-05-08 13:57:37 -07003
Jack Paleviche27bf3e2009-05-10 14:09:03 -07004 Copyright (C) 2001-2003 Fabrice Bellard
Jack Palevichae54f1f2009-05-08 14:54:15 -07005
Jack Paleviche27bf3e2009-05-10 14:09:03 -07006 This software is provided 'as-is', without any express or implied
7 warranty. In no event will the authors be held liable for any damages
8 arising from the use of this software.
9
10 Permission is granted to anyone to use this software for any purpose,
11 including commercial applications, and to alter it and redistribute it
12 freely, subject to the following restrictions:
13
14 1. The origin of this software must not be misrepresented; you must not
15 claim that you wrote the original software. If you use this software
16 in a product, an acknowledgment in the product and its documentation
17 *is* required.
18 2. Altered source versions must be plainly marked as such, and must not be
19 misrepresented as being the original software.
20 3. This notice may not be removed or altered from any source distribution.
21*/
22
23#include <stdarg.h>
Jack Palevichae54f1f2009-05-08 14:54:15 -070024#include <stdio.h>
Jack Palevichae54f1f2009-05-08 14:54:15 -070025
Jack Paleviche27bf3e2009-05-10 14:09:03 -070026/* vars: value of variables
27 loc : local variable index
28 glo : global variable index
29 ind : output code ptr
30 rsym: return symbol
31 prog: output code
32 dstk: define stack
33 dptr, dch: macro state
34*/
35int tok, tokc, tokl, ch, vars, rsym, prog, ind, loc, glo, file, sym_stk, dstk, dptr, dch, last_id;
Jack Palevich7448a2e2009-05-08 18:33:45 -070036
Jack Paleviche27bf3e2009-05-10 14:09:03 -070037#define ALLOC_SIZE 99999
Jack Palevichf0cbc922009-05-08 16:35:13 -070038
Jack Paleviche27bf3e2009-05-10 14:09:03 -070039/* depends on the init string */
40#define TOK_STR_SIZE 48
41#define TOK_IDENT 0x100
42#define TOK_INT 0x100
43#define TOK_IF 0x120
44#define TOK_ELSE 0x138
45#define TOK_WHILE 0x160
46#define TOK_BREAK 0x190
47#define TOK_RETURN 0x1c0
48#define TOK_FOR 0x1f8
49#define TOK_DEFINE 0x218
50#define TOK_MAIN 0x250
Jack Palevichae54f1f2009-05-08 14:54:15 -070051
Jack Paleviche27bf3e2009-05-10 14:09:03 -070052#define TOK_DUMMY 1
53#define TOK_NUM 2
Jack Palevichae54f1f2009-05-08 14:54:15 -070054
Jack Paleviche27bf3e2009-05-10 14:09:03 -070055#define LOCAL 0x200
56
57#define SYM_FORWARD 0
58#define SYM_DEFINE 1
59
60/* tokens in string heap */
61#define TAG_TOK ' '
62#define TAG_MACRO 2
63
64pdef(t)
65{
66 *(char *)dstk++ = t;
Jack Palevich88311482009-05-08 13:57:37 -070067}
Jack Palevichae54f1f2009-05-08 14:54:15 -070068
Jack Paleviche27bf3e2009-05-10 14:09:03 -070069inp()
70{
71 if (dptr) {
72 ch = *(char *)dptr++;
73 if (ch == TAG_MACRO) {
74 dptr = 0;
75 ch = dch;
Jack Palevichae54f1f2009-05-08 14:54:15 -070076 }
77 } else
Jack Paleviche27bf3e2009-05-10 14:09:03 -070078 ch = fgetc(file);
79 /* printf("ch=%c 0x%x\n", ch, ch); */
Jack Palevich88311482009-05-08 13:57:37 -070080}
Jack Palevichae54f1f2009-05-08 14:54:15 -070081
Jack Paleviche27bf3e2009-05-10 14:09:03 -070082isid()
83{
84 return isalnum(ch) | ch == '_';
Jack Palevich88311482009-05-08 13:57:37 -070085}
Jack Palevichae54f1f2009-05-08 14:54:15 -070086
Jack Paleviche27bf3e2009-05-10 14:09:03 -070087/* read a character constant */
88getq()
89{
90 if (ch == '\\') {
91 inp();
92 if (ch == 'n')
93 ch = '\n';
Jack Palevichae54f1f2009-05-08 14:54:15 -070094 }
Jack Palevich88311482009-05-08 13:57:37 -070095}
Jack Palevichae54f1f2009-05-08 14:54:15 -070096
Jack Paleviche27bf3e2009-05-10 14:09:03 -070097next()
98{
99 int t, l, a;
100
101 while (isspace(ch) | ch == '#') {
102 if (ch == '#') {
103 inp();
104 next();
105 if (tok == TOK_DEFINE) {
106 next();
107 pdef(TAG_TOK); /* fill last ident tag */
108 *(int *)tok = SYM_DEFINE;
109 *(int *)(tok + 4) = dstk; /* define stack */
Jack Palevichae54f1f2009-05-08 14:54:15 -0700110 }
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700111 /* well we always save the values ! */
112 while (ch != '\n') {
113 pdef(ch);
114 inp();
Jack Palevichae54f1f2009-05-08 14:54:15 -0700115 }
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700116 pdef(ch);
117 pdef(TAG_MACRO);
Jack Palevichae54f1f2009-05-08 14:54:15 -0700118 }
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700119 inp();
Jack Palevichae54f1f2009-05-08 14:54:15 -0700120 }
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700121 tokl = 0;
122 tok = ch;
123 /* encode identifiers & numbers */
124 if (isid()) {
125 pdef(TAG_TOK);
126 last_id = dstk;
127 while (isid()) {
128 pdef(ch);
129 inp();
Jack Palevichae54f1f2009-05-08 14:54:15 -0700130 }
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700131 if (isdigit(tok)) {
132 tokc = strtol(last_id, 0, 0);
133 tok = TOK_NUM;
Jack Palevichae54f1f2009-05-08 14:54:15 -0700134 } else {
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700135 *(char *)dstk = TAG_TOK; /* no need to mark end of string (we
136 suppose data is initied to zero */
137 tok = strstr(sym_stk, last_id - 1) - sym_stk;
138 *(char *)dstk = 0; /* mark real end of ident for dlsym() */
139 tok = tok * 8 + TOK_IDENT;
140 if (tok > TOK_DEFINE) {
141 tok = vars + tok;
142 /* printf("tok=%s %x\n", last_id, tok); */
143 /* define handling */
144 if (*(int *)tok == SYM_DEFINE) {
145 dptr = *(int *)(tok + 4);
146 dch = ch;
147 inp();
148 next();
Jack Palevichae54f1f2009-05-08 14:54:15 -0700149 }
150 }
151 }
152 } else {
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700153 inp();
154 if (tok == '\'') {
155 tok = TOK_NUM;
156 getq();
157 tokc = ch;
158 inp();
159 inp();
160 } else if (tok == '/' & ch == '*') {
161 inp();
162 while (ch) {
163 while (ch != '*')
164 inp();
165 inp();
166 if (ch == '/')
167 ch = 0;
Jack Palevichae54f1f2009-05-08 14:54:15 -0700168 }
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700169 inp();
170 next();
171 } else
172 {
173 t = "++#m--%am*@R<^1c/@%[_[H3c%@%[_[H3c+@.B#d-@%:_^BKd<<Z/03e>>`/03e<=0f>=/f<@.f>@1f==&g!=\'g&&k||#l&@.BCh^@.BSi|@.B+j~@/%Yd!@&d*@b";
174 while (l = *(char *)t++) {
175 a = *(char *)t++;
176 tokc = 0;
177 while ((tokl = *(char *)t++ - 'b') < 0)
178 tokc = tokc * 64 + tokl + 64;
179 if (l == tok & (a == ch | a == '@')) {
180#if 0
181 printf("%c%c -> tokl=%d tokc=0x%x\n",
182 l, a, tokl, tokc);
183#endif
184 if (a == ch) {
185 inp();
186 tok = TOK_DUMMY; /* dummy token for double tokens */
Jack Palevichae54f1f2009-05-08 14:54:15 -0700187 }
188 break;
189 }
190 }
191 }
192 }
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700193#if 0
194 {
195 int p;
Jack Palevichae54f1f2009-05-08 14:54:15 -0700196
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700197 printf("tok=0x%x ", tok);
198 if (tok >= TOK_IDENT) {
199 printf("'");
200 if (tok > TOK_DEFINE)
201 p = sym_stk + 1 + (tok - vars - TOK_IDENT) / 8;
202 else
203 p = sym_stk + 1 + (tok - TOK_IDENT) / 8;
204 while (*(char *)p != TAG_TOK && *(char *)p)
205 printf("%c", *(char *)p++);
206 printf("'\n");
207 } else if (tok == TOK_NUM) {
208 printf("%d\n", tokc);
209 } else {
210 printf("'%c'\n", tok);
Jack Palevichae54f1f2009-05-08 14:54:15 -0700211 }
212 }
Jack Palevichae54f1f2009-05-08 14:54:15 -0700213#endif
Jack Palevich88311482009-05-08 13:57:37 -0700214}
215
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700216void error(char *fmt,...)
217{
218 va_list ap;
219
220 va_start(ap, fmt);
221 fprintf(stderr, "%d: ", ftell((FILE *)file));
222 vfprintf(stderr, fmt, ap);
223 fprintf(stderr, "\n");
224 exit(1);
225 va_end(ap);
226}
227
228void skip(c)
229{
230 if (tok != c) {
231 error("'%c' expected", c);
232 }
233 next();
234}
235
236o(n)
237{
238 /* cannot use unsigned, so we must do a hack */
239 while (n && n != -1) {
240 *(char *)ind++ = n;
241 n = n >> 8;
242 }
243}
244
245/* output a symbol and patch all calls to it */
246gsym(t)
247{
248 int n;
249 while (t) {
250 n = *(int *)t; /* next value */
251 *(int *)t = ind - t - 4;
252 t = n;
253 }
254}
255
256/* psym is used to put an instruction with a data field which is a
257 reference to a symbol. It is in fact the same as oad ! */
258#define psym oad
259
260/* instruction + address */
261oad(n, t)
262{
263 o(n);
264 *(int *)ind = t;
265 t = ind;
266 ind = ind + 4;
267 return t;
268}
269
270/* load immediate value */
271li(t)
272{
273 oad(0xb8, t); /* mov $xx, %eax */
274}
275
276gjmp(t)
277{
278 return psym(0xe9, t);
279}
280
281/* l = 0: je, l == 1: jne */
282gtst(l, t)
283{
284 o(0x0fc085); /* test %eax, %eax, je/jne xxx */
285 return psym(0x84 + l, t);
286}
287
288gcmp(t)
289{
290 o(0xc139); /* cmp %eax,%ecx */
291 li(0);
292 o(0x0f); /* setxx %al */
293 o(t + 0x90);
294 o(0xc0);
295}
296
297gmov(l, t)
298{
299 o(l + 0x83);
300 oad((t < LOCAL) << 7 | 5, t);
301}
302
303/* l is one if '=' parsing wanted (quick hack) */
304unary(l)
305{
306 int n, t, a, c;
307
308 n = 1; /* type of expression 0 = forward, 1 = value, other =
309 lvalue */
310 if (tok == '\"') {
311 li(glo);
312 while (ch != '\"') {
313 getq();
314 *(char *)glo++ = ch;
315 inp();
316 }
317 *(char *)glo = 0;
318 glo = glo + 4 & -4; /* align heap */
319 inp();
320 next();
321 } else {
322 c = tokl;
323 a = tokc;
324 t = tok;
325 next();
326 if (t == TOK_NUM) {
327 li(a);
328 } else if (c == 2) {
329 /* -, +, !, ~ */
330 unary(0);
331 oad(0xb9, 0); /* movl $0, %ecx */
332 if (t == '!')
333 gcmp(a);
334 else
335 o(a);
336 } else if (t == '(') {
337 expr();
338 skip(')');
339 } else if (t == '*') {
340 /* parse cast */
341 skip('(');
342 t = tok; /* get type */
343 next(); /* skip int/char/void */
344 next(); /* skip '*' or '(' */
345 if (tok == '*') {
346 /* function type */
347 skip('*');
348 skip(')');
349 skip('(');
350 skip(')');
351 t = 0;
352 }
353 skip(')');
354 unary(0);
355 if (tok == '=') {
356 next();
357 o(0x50); /* push %eax */
358 expr();
359 o(0x59); /* pop %ecx */
360 o(0x0188 + (t == TOK_INT)); /* movl %eax/%al, (%ecx) */
361 } else if (t) {
362 if (t == TOK_INT)
363 o(0x8b); /* mov (%eax), %eax */
364 else
365 o(0xbe0f); /* movsbl (%eax), %eax */
366 ind++; /* add zero in code */
367 }
368 } else if (t == '&') {
369 gmov(10, *(int *)tok); /* leal EA, %eax */
370 next();
371 } else {
372 n = *(int *)t;
373 /* forward reference: try dlsym */
374 if (!n)
375 n = dlsym(0, last_id);
376 if (tok == '=' & l) {
377 /* assignment */
378 next();
379 expr();
380 gmov(6, n); /* mov %eax, EA */
381 } else if (tok != '(') {
382 /* variable */
383 gmov(8, n); /* mov EA, %eax */
384 if (tokl == 11) {
385 gmov(0, n);
386 o(tokc);
387 next();
388 }
389 }
390 }
391 }
392
393 /* function call */
394 if (tok == '(') {
395 if (n == 1)
396 o(0x50); /* push %eax */
397
398 /* push args and invert order */
399 a = oad(0xec81, 0); /* sub $xxx, %esp */
400 next();
401 l = 0;
402 while(tok != ')') {
403 expr();
404 oad(0x248489, l); /* movl %eax, xxx(%esp) */
405 if (tok == ',')
406 next();
407 l = l + 4;
408 }
409 *(int *)a = l;
410 next();
411 if (!n) {
412 /* forward reference */
413 t = t + 4;
414 *(int *)t = psym(0xe8, *(int *)t);
415 } else if (n == 1) {
416 oad(0x2494ff, l); /* call *xxx(%esp) */
417 l = l + 4;
418 } else {
419 oad(0xe8, n - ind - 5); /* call xxx */
420 }
421 if (l)
422 oad(0xc481, l); /* add $xxx, %esp */
423 }
424}
425
426sum(l)
427{
428 int t, n, a;
429
430 if (l-- == 1)
431 unary(1);
432 else {
433 sum(l);
434 a = 0;
435 while (l == tokl) {
436 n = tok;
437 t = tokc;
438 next();
439
440 if (l > 8) {
441 a = gtst(t, a); /* && and || output code generation */
442 sum(l);
443 } else {
444 o(0x50); /* push %eax */
445 sum(l);
446 o(0x59); /* pop %ecx */
447
448 if (l == 4 | l == 5) {
449 gcmp(t);
450 } else {
451 o(t);
452 if (n == '%')
453 o(0x92); /* xchg %edx, %eax */
454 }
455 }
456 }
457 /* && and || output code generation */
458 if (a && l > 8) {
459 a = gtst(t, a);
460 li(t ^ 1);
461 gjmp(5); /* jmp $ + 5 */
462 gsym(a);
463 li(t);
464 }
465 }
466}
467
468expr()
469{
470 sum(11);
471}
472
473
474test_expr()
475{
476 expr();
477 return gtst(0, 0);
478}
479
480block(l)
481{
482 int a, n, t;
483
484 if (tok == TOK_IF) {
485 next();
486 skip('(');
487 a = test_expr();
488 skip(')');
489 block(l);
490 if (tok == TOK_ELSE) {
491 next();
492 n = gjmp(0); /* jmp */
493 gsym(a);
494 block(l);
495 gsym(n); /* patch else jmp */
496 } else {
497 gsym(a); /* patch if test */
498 }
499 } else if (tok == TOK_WHILE | tok == TOK_FOR) {
500 t = tok;
501 next();
502 skip('(');
503 if (t == TOK_WHILE) {
504 n = ind;
505 a = test_expr();
506 } else {
507 if (tok != ';')
508 expr();
509 skip(';');
510 n = ind;
511 a = 0;
512 if (tok != ';')
513 a = test_expr();
514 skip(';');
515 if (tok != ')') {
516 t = gjmp(0);
517 expr();
518 gjmp(n - ind - 5);
519 gsym(t);
520 n = t + 4;
521 }
522 }
523 skip(')');
524 block(&a);
525 gjmp(n - ind - 5); /* jmp */
526 gsym(a);
527 } else if (tok == '{') {
528 next();
529 /* declarations */
530 decl(1);
531 while(tok != '}')
532 block(l);
533 next();
534 } else {
535 if (tok == TOK_RETURN) {
536 next();
537 if (tok != ';')
538 expr();
539 rsym = gjmp(rsym); /* jmp */
540 } else if (tok == TOK_BREAK) {
541 next();
542 *(int *)l = gjmp(*(int *)l);
543 } else if (tok != ';')
544 expr();
545 skip(';');
546 }
547}
548
549/* 'l' is true if local declarations */
550decl(l)
551{
552 int a;
553
554 while (tok == TOK_INT | tok != -1 & !l) {
555 if (tok == TOK_INT) {
556 next();
557 while (tok != ';') {
558 if (l) {
559 loc = loc + 4;
560 *(int *)tok = -loc;
561 } else {
562 *(int *)tok = glo;
563 glo = glo + 4;
564 }
565 next();
566 if (tok == ',')
567 next();
568 }
569 skip(';');
570 } else {
571 /* patch forward references (XXX: do not work for function
572 pointers) */
573 gsym(*(int *)(tok + 4));
574 /* put function address */
575 *(int *)tok = ind;
576 next();
577 skip('(');
578 a = 8;
579 while (tok != ')') {
580 /* read param name and compute offset */
581 *(int *)tok = a;
582 a = a + 4;
583 next();
584 if (tok == ',')
585 next();
586 }
587 next(); /* skip ')' */
588 rsym = loc = 0;
589 o(0xe58955); /* push %ebp, mov %esp, %ebp */
590 a = oad(0xec81, 0); /* sub $xxx, %esp */
591 block(0);
592 gsym(rsym);
593 o(0xc3c9); /* leave, ret */
594 *(int *)a = loc; /* save local variables */
595 }
596 }
597}
598
599main(n, t)
600{
601 file = stdin;
602 if (n-- > 1) {
603 t = t + 4;
604 file = fopen(*(int *)t, "r");
605 }
606 dstk = strcpy(sym_stk = calloc(1, ALLOC_SIZE),
607 " int if else while break return for define main ") + TOK_STR_SIZE;
608 glo = calloc(1, ALLOC_SIZE);
609 ind = prog = calloc(1, ALLOC_SIZE);
610 vars = calloc(1, ALLOC_SIZE);
611 inp();
612 next();
613 decl(0);
614#ifdef TEST
615 {
616 FILE *f;
617 f = fopen(*(char **)(t + 4), "w");
618 fwrite((void *)prog, 1, ind - prog, f);
619 fclose(f);
620 return 0;
621 }
622#else
623 return (*(int (*)())*(int *)(vars + TOK_MAIN)) (n, t);
624#endif
625}