blob: 4aa2b3cc204786b6f104eee54a890f650b528132 [file] [log] [blame]
Jack Palevichae54f1f2009-05-08 14:54:15 -07001/*
Jack Paleviche27bf3e2009-05-10 14:09:03 -07002 Obfuscated Tiny C Compiler
Jack Palevich88311482009-05-08 13:57:37 -07003
Jack Paleviche27bf3e2009-05-10 14:09:03 -07004 Copyright (C) 2001-2003 Fabrice Bellard
Jack Palevichae54f1f2009-05-08 14:54:15 -07005
Jack Paleviche27bf3e2009-05-10 14:09:03 -07006 This software is provided 'as-is', without any express or implied
7 warranty. In no event will the authors be held liable for any damages
8 arising from the use of this software.
9
10 Permission is granted to anyone to use this software for any purpose,
11 including commercial applications, and to alter it and redistribute it
12 freely, subject to the following restrictions:
13
14 1. The origin of this software must not be misrepresented; you must not
15 claim that you wrote the original software. If you use this software
16 in a product, an acknowledgment in the product and its documentation
17 *is* required.
18 2. Altered source versions must be plainly marked as such, and must not be
19 misrepresented as being the original software.
20 3. This notice may not be removed or altered from any source distribution.
21*/
22
23#include <stdarg.h>
Jack Palevichae54f1f2009-05-08 14:54:15 -070024#include <stdio.h>
Jack Palevichf6b5a532009-05-10 19:16:42 -070025#include <stdlib.h>
26#include <string.h>
Jack Palevichae54f1f2009-05-08 14:54:15 -070027
Jack Paleviche27bf3e2009-05-10 14:09:03 -070028/* vars: value of variables
29 loc : local variable index
30 glo : global variable index
31 ind : output code ptr
32 rsym: return symbol
33 prog: output code
34 dstk: define stack
35 dptr, dch: macro state
36*/
Jack Palevichf6b5a532009-05-10 19:16:42 -070037int tok, tokc, tokl, ch, vars, rsym, prog, ind, loc, glo, sym_stk, dstk, dptr, dch, last_id;
38FILE* file;
Jack Palevich7448a2e2009-05-08 18:33:45 -070039
Jack Paleviche27bf3e2009-05-10 14:09:03 -070040#define ALLOC_SIZE 99999
Jack Palevichf0cbc922009-05-08 16:35:13 -070041
Jack Paleviche27bf3e2009-05-10 14:09:03 -070042/* depends on the init string */
43#define TOK_STR_SIZE 48
44#define TOK_IDENT 0x100
45#define TOK_INT 0x100
46#define TOK_IF 0x120
47#define TOK_ELSE 0x138
48#define TOK_WHILE 0x160
49#define TOK_BREAK 0x190
50#define TOK_RETURN 0x1c0
51#define TOK_FOR 0x1f8
52#define TOK_DEFINE 0x218
53#define TOK_MAIN 0x250
Jack Palevichae54f1f2009-05-08 14:54:15 -070054
Jack Paleviche27bf3e2009-05-10 14:09:03 -070055#define TOK_DUMMY 1
56#define TOK_NUM 2
Jack Palevichae54f1f2009-05-08 14:54:15 -070057
Jack Paleviche27bf3e2009-05-10 14:09:03 -070058#define LOCAL 0x200
59
60#define SYM_FORWARD 0
61#define SYM_DEFINE 1
62
63/* tokens in string heap */
64#define TAG_TOK ' '
65#define TAG_MACRO 2
66
67pdef(t)
68{
69 *(char *)dstk++ = t;
Jack Palevich88311482009-05-08 13:57:37 -070070}
Jack Palevichae54f1f2009-05-08 14:54:15 -070071
Jack Paleviche27bf3e2009-05-10 14:09:03 -070072inp()
73{
74 if (dptr) {
75 ch = *(char *)dptr++;
76 if (ch == TAG_MACRO) {
77 dptr = 0;
78 ch = dch;
Jack Palevichae54f1f2009-05-08 14:54:15 -070079 }
80 } else
Jack Paleviche27bf3e2009-05-10 14:09:03 -070081 ch = fgetc(file);
82 /* printf("ch=%c 0x%x\n", ch, ch); */
Jack Palevich88311482009-05-08 13:57:37 -070083}
Jack Palevichae54f1f2009-05-08 14:54:15 -070084
Jack Paleviche27bf3e2009-05-10 14:09:03 -070085isid()
86{
87 return isalnum(ch) | ch == '_';
Jack Palevich88311482009-05-08 13:57:37 -070088}
Jack Palevichae54f1f2009-05-08 14:54:15 -070089
Jack Paleviche27bf3e2009-05-10 14:09:03 -070090/* read a character constant */
91getq()
92{
93 if (ch == '\\') {
94 inp();
95 if (ch == 'n')
96 ch = '\n';
Jack Palevichae54f1f2009-05-08 14:54:15 -070097 }
Jack Palevich88311482009-05-08 13:57:37 -070098}
Jack Palevichae54f1f2009-05-08 14:54:15 -070099
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700100next()
101{
Jack Palevichf6b5a532009-05-10 19:16:42 -0700102 int l, a;
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700103
104 while (isspace(ch) | ch == '#') {
105 if (ch == '#') {
106 inp();
107 next();
108 if (tok == TOK_DEFINE) {
109 next();
110 pdef(TAG_TOK); /* fill last ident tag */
111 *(int *)tok = SYM_DEFINE;
112 *(int *)(tok + 4) = dstk; /* define stack */
Jack Palevichae54f1f2009-05-08 14:54:15 -0700113 }
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700114 /* well we always save the values ! */
115 while (ch != '\n') {
116 pdef(ch);
117 inp();
Jack Palevichae54f1f2009-05-08 14:54:15 -0700118 }
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700119 pdef(ch);
120 pdef(TAG_MACRO);
Jack Palevichae54f1f2009-05-08 14:54:15 -0700121 }
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700122 inp();
Jack Palevichae54f1f2009-05-08 14:54:15 -0700123 }
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700124 tokl = 0;
125 tok = ch;
126 /* encode identifiers & numbers */
127 if (isid()) {
128 pdef(TAG_TOK);
129 last_id = dstk;
130 while (isid()) {
131 pdef(ch);
132 inp();
Jack Palevichae54f1f2009-05-08 14:54:15 -0700133 }
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700134 if (isdigit(tok)) {
Jack Palevichf6b5a532009-05-10 19:16:42 -0700135 tokc = strtol((char*) last_id, 0, 0);
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700136 tok = TOK_NUM;
Jack Palevichae54f1f2009-05-08 14:54:15 -0700137 } else {
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700138 *(char *)dstk = TAG_TOK; /* no need to mark end of string (we
139 suppose data is initied to zero */
Jack Palevichf6b5a532009-05-10 19:16:42 -0700140 tok = (int) (strstr((char*) sym_stk, (char*) (last_id - 1)) - sym_stk);
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700141 *(char *)dstk = 0; /* mark real end of ident for dlsym() */
142 tok = tok * 8 + TOK_IDENT;
143 if (tok > TOK_DEFINE) {
144 tok = vars + tok;
145 /* printf("tok=%s %x\n", last_id, tok); */
146 /* define handling */
147 if (*(int *)tok == SYM_DEFINE) {
148 dptr = *(int *)(tok + 4);
149 dch = ch;
150 inp();
151 next();
Jack Palevichae54f1f2009-05-08 14:54:15 -0700152 }
153 }
154 }
155 } else {
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700156 inp();
157 if (tok == '\'') {
158 tok = TOK_NUM;
159 getq();
160 tokc = ch;
161 inp();
162 inp();
163 } else if (tok == '/' & ch == '*') {
164 inp();
165 while (ch) {
166 while (ch != '*')
167 inp();
168 inp();
169 if (ch == '/')
170 ch = 0;
Jack Palevichae54f1f2009-05-08 14:54:15 -0700171 }
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700172 inp();
173 next();
174 } else
175 {
Jack Palevichf6b5a532009-05-10 19:16:42 -0700176 char* t = "++#m--%am*@R<^1c/@%[_[H3c%@%[_[H3c+@.B#d-@%:_^BKd<<Z/03e>>`/03e<=0f>=/f<@.f>@1f==&g!=\'g&&k||#l&@.BCh^@.BSi|@.B+j~@/%Yd!@&d*@b";
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700177 while (l = *(char *)t++) {
178 a = *(char *)t++;
179 tokc = 0;
180 while ((tokl = *(char *)t++ - 'b') < 0)
181 tokc = tokc * 64 + tokl + 64;
182 if (l == tok & (a == ch | a == '@')) {
183#if 0
184 printf("%c%c -> tokl=%d tokc=0x%x\n",
185 l, a, tokl, tokc);
186#endif
187 if (a == ch) {
188 inp();
189 tok = TOK_DUMMY; /* dummy token for double tokens */
Jack Palevichae54f1f2009-05-08 14:54:15 -0700190 }
191 break;
192 }
193 }
194 }
195 }
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700196#if 0
197 {
198 int p;
Jack Palevichae54f1f2009-05-08 14:54:15 -0700199
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700200 printf("tok=0x%x ", tok);
201 if (tok >= TOK_IDENT) {
202 printf("'");
203 if (tok > TOK_DEFINE)
204 p = sym_stk + 1 + (tok - vars - TOK_IDENT) / 8;
205 else
206 p = sym_stk + 1 + (tok - TOK_IDENT) / 8;
207 while (*(char *)p != TAG_TOK && *(char *)p)
208 printf("%c", *(char *)p++);
209 printf("'\n");
210 } else if (tok == TOK_NUM) {
211 printf("%d\n", tokc);
212 } else {
213 printf("'%c'\n", tok);
Jack Palevichae54f1f2009-05-08 14:54:15 -0700214 }
215 }
Jack Palevichae54f1f2009-05-08 14:54:15 -0700216#endif
Jack Palevich88311482009-05-08 13:57:37 -0700217}
218
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700219void error(char *fmt,...)
220{
221 va_list ap;
222
223 va_start(ap, fmt);
224 fprintf(stderr, "%d: ", ftell((FILE *)file));
225 vfprintf(stderr, fmt, ap);
226 fprintf(stderr, "\n");
227 exit(1);
228 va_end(ap);
229}
230
231void skip(c)
232{
233 if (tok != c) {
234 error("'%c' expected", c);
235 }
236 next();
237}
238
239o(n)
240{
241 /* cannot use unsigned, so we must do a hack */
242 while (n && n != -1) {
243 *(char *)ind++ = n;
244 n = n >> 8;
245 }
246}
247
248/* output a symbol and patch all calls to it */
249gsym(t)
250{
251 int n;
252 while (t) {
253 n = *(int *)t; /* next value */
254 *(int *)t = ind - t - 4;
255 t = n;
256 }
257}
258
259/* psym is used to put an instruction with a data field which is a
260 reference to a symbol. It is in fact the same as oad ! */
261#define psym oad
262
263/* instruction + address */
264oad(n, t)
265{
266 o(n);
267 *(int *)ind = t;
268 t = ind;
269 ind = ind + 4;
270 return t;
271}
272
273/* load immediate value */
274li(t)
275{
276 oad(0xb8, t); /* mov $xx, %eax */
277}
278
279gjmp(t)
280{
281 return psym(0xe9, t);
282}
283
284/* l = 0: je, l == 1: jne */
285gtst(l, t)
286{
287 o(0x0fc085); /* test %eax, %eax, je/jne xxx */
288 return psym(0x84 + l, t);
289}
290
291gcmp(t)
292{
293 o(0xc139); /* cmp %eax,%ecx */
294 li(0);
295 o(0x0f); /* setxx %al */
296 o(t + 0x90);
297 o(0xc0);
298}
299
300gmov(l, t)
301{
302 o(l + 0x83);
303 oad((t < LOCAL) << 7 | 5, t);
304}
305
306/* l is one if '=' parsing wanted (quick hack) */
307unary(l)
308{
309 int n, t, a, c;
310
311 n = 1; /* type of expression 0 = forward, 1 = value, other =
312 lvalue */
313 if (tok == '\"') {
314 li(glo);
315 while (ch != '\"') {
316 getq();
317 *(char *)glo++ = ch;
318 inp();
319 }
320 *(char *)glo = 0;
321 glo = glo + 4 & -4; /* align heap */
322 inp();
323 next();
324 } else {
325 c = tokl;
326 a = tokc;
327 t = tok;
328 next();
329 if (t == TOK_NUM) {
330 li(a);
331 } else if (c == 2) {
332 /* -, +, !, ~ */
333 unary(0);
334 oad(0xb9, 0); /* movl $0, %ecx */
335 if (t == '!')
336 gcmp(a);
337 else
338 o(a);
339 } else if (t == '(') {
340 expr();
341 skip(')');
342 } else if (t == '*') {
343 /* parse cast */
344 skip('(');
345 t = tok; /* get type */
346 next(); /* skip int/char/void */
347 next(); /* skip '*' or '(' */
348 if (tok == '*') {
349 /* function type */
350 skip('*');
351 skip(')');
352 skip('(');
353 skip(')');
354 t = 0;
355 }
356 skip(')');
357 unary(0);
358 if (tok == '=') {
359 next();
360 o(0x50); /* push %eax */
361 expr();
362 o(0x59); /* pop %ecx */
363 o(0x0188 + (t == TOK_INT)); /* movl %eax/%al, (%ecx) */
364 } else if (t) {
365 if (t == TOK_INT)
366 o(0x8b); /* mov (%eax), %eax */
367 else
368 o(0xbe0f); /* movsbl (%eax), %eax */
369 ind++; /* add zero in code */
370 }
371 } else if (t == '&') {
372 gmov(10, *(int *)tok); /* leal EA, %eax */
373 next();
374 } else {
375 n = *(int *)t;
376 /* forward reference: try dlsym */
377 if (!n)
378 n = dlsym(0, last_id);
379 if (tok == '=' & l) {
380 /* assignment */
381 next();
382 expr();
383 gmov(6, n); /* mov %eax, EA */
384 } else if (tok != '(') {
385 /* variable */
386 gmov(8, n); /* mov EA, %eax */
387 if (tokl == 11) {
388 gmov(0, n);
389 o(tokc);
390 next();
391 }
392 }
393 }
394 }
395
396 /* function call */
397 if (tok == '(') {
398 if (n == 1)
399 o(0x50); /* push %eax */
400
401 /* push args and invert order */
402 a = oad(0xec81, 0); /* sub $xxx, %esp */
403 next();
404 l = 0;
405 while(tok != ')') {
406 expr();
407 oad(0x248489, l); /* movl %eax, xxx(%esp) */
408 if (tok == ',')
409 next();
410 l = l + 4;
411 }
412 *(int *)a = l;
413 next();
414 if (!n) {
415 /* forward reference */
416 t = t + 4;
417 *(int *)t = psym(0xe8, *(int *)t);
418 } else if (n == 1) {
419 oad(0x2494ff, l); /* call *xxx(%esp) */
420 l = l + 4;
421 } else {
422 oad(0xe8, n - ind - 5); /* call xxx */
423 }
424 if (l)
425 oad(0xc481, l); /* add $xxx, %esp */
426 }
427}
428
429sum(l)
430{
431 int t, n, a;
432
433 if (l-- == 1)
434 unary(1);
435 else {
436 sum(l);
437 a = 0;
438 while (l == tokl) {
439 n = tok;
440 t = tokc;
441 next();
442
443 if (l > 8) {
444 a = gtst(t, a); /* && and || output code generation */
445 sum(l);
446 } else {
447 o(0x50); /* push %eax */
448 sum(l);
449 o(0x59); /* pop %ecx */
450
451 if (l == 4 | l == 5) {
452 gcmp(t);
453 } else {
454 o(t);
455 if (n == '%')
456 o(0x92); /* xchg %edx, %eax */
457 }
458 }
459 }
460 /* && and || output code generation */
461 if (a && l > 8) {
462 a = gtst(t, a);
463 li(t ^ 1);
464 gjmp(5); /* jmp $ + 5 */
465 gsym(a);
466 li(t);
467 }
468 }
469}
470
471expr()
472{
473 sum(11);
474}
475
476
477test_expr()
478{
479 expr();
480 return gtst(0, 0);
481}
482
483block(l)
484{
485 int a, n, t;
486
487 if (tok == TOK_IF) {
488 next();
489 skip('(');
490 a = test_expr();
491 skip(')');
492 block(l);
493 if (tok == TOK_ELSE) {
494 next();
495 n = gjmp(0); /* jmp */
496 gsym(a);
497 block(l);
498 gsym(n); /* patch else jmp */
499 } else {
500 gsym(a); /* patch if test */
501 }
502 } else if (tok == TOK_WHILE | tok == TOK_FOR) {
503 t = tok;
504 next();
505 skip('(');
506 if (t == TOK_WHILE) {
507 n = ind;
508 a = test_expr();
509 } else {
510 if (tok != ';')
511 expr();
512 skip(';');
513 n = ind;
514 a = 0;
515 if (tok != ';')
516 a = test_expr();
517 skip(';');
518 if (tok != ')') {
519 t = gjmp(0);
520 expr();
521 gjmp(n - ind - 5);
522 gsym(t);
523 n = t + 4;
524 }
525 }
526 skip(')');
527 block(&a);
528 gjmp(n - ind - 5); /* jmp */
529 gsym(a);
530 } else if (tok == '{') {
531 next();
532 /* declarations */
533 decl(1);
534 while(tok != '}')
535 block(l);
536 next();
537 } else {
538 if (tok == TOK_RETURN) {
539 next();
540 if (tok != ';')
541 expr();
542 rsym = gjmp(rsym); /* jmp */
543 } else if (tok == TOK_BREAK) {
544 next();
545 *(int *)l = gjmp(*(int *)l);
546 } else if (tok != ';')
547 expr();
548 skip(';');
549 }
550}
551
552/* 'l' is true if local declarations */
553decl(l)
554{
555 int a;
556
557 while (tok == TOK_INT | tok != -1 & !l) {
558 if (tok == TOK_INT) {
559 next();
560 while (tok != ';') {
561 if (l) {
562 loc = loc + 4;
563 *(int *)tok = -loc;
564 } else {
565 *(int *)tok = glo;
566 glo = glo + 4;
567 }
568 next();
569 if (tok == ',')
570 next();
571 }
572 skip(';');
573 } else {
574 /* patch forward references (XXX: do not work for function
575 pointers) */
576 gsym(*(int *)(tok + 4));
577 /* put function address */
578 *(int *)tok = ind;
579 next();
580 skip('(');
581 a = 8;
582 while (tok != ')') {
583 /* read param name and compute offset */
584 *(int *)tok = a;
585 a = a + 4;
586 next();
587 if (tok == ',')
588 next();
589 }
590 next(); /* skip ')' */
591 rsym = loc = 0;
592 o(0xe58955); /* push %ebp, mov %esp, %ebp */
593 a = oad(0xec81, 0); /* sub $xxx, %esp */
594 block(0);
595 gsym(rsym);
596 o(0xc3c9); /* leave, ret */
597 *(int *)a = loc; /* save local variables */
598 }
599 }
600}
601
Jack Palevichf6b5a532009-05-10 19:16:42 -0700602main(int n, char** t)
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700603{
604 file = stdin;
605 if (n-- > 1) {
Jack Palevichf6b5a532009-05-10 19:16:42 -0700606 t = t + 1;
607 file = fopen(*t, "r");
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700608 }
Jack Palevichf6b5a532009-05-10 19:16:42 -0700609 sym_stk = (int) calloc(1, ALLOC_SIZE);
610 dstk = (int) strcpy((char*) sym_stk,
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700611 " int if else while break return for define main ") + TOK_STR_SIZE;
Jack Palevichf6b5a532009-05-10 19:16:42 -0700612 glo = (int) calloc(1, ALLOC_SIZE);
613 ind = prog = (int) calloc(1, ALLOC_SIZE);
614 vars = (int) calloc(1, ALLOC_SIZE);
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700615 inp();
616 next();
617 decl(0);
618#ifdef TEST
619 {
620 FILE *f;
Jack Palevichf6b5a532009-05-10 19:16:42 -0700621 f = fopen(t[1], "w");
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700622 fwrite((void *)prog, 1, ind - prog, f);
623 fclose(f);
624 return 0;
625 }
626#else
627 return (*(int (*)())*(int *)(vars + TOK_MAIN)) (n, t);
628#endif
629}