blob: d8eb4ccc24a19047f7a7d975a3b1c3504192a5b6 [file] [log] [blame]
Fabrice Bellard16134592009-05-10 14:01:59 -07001/*
2 Obfuscated Tiny C Compiler
3
4 Copyright (C) 2001-2003 Fabrice Bellard
5
6 This software is provided 'as-is', without any express or implied
7 warranty. In no event will the authors be held liable for any damages
8 arising from the use of this software.
9
10 Permission is granted to anyone to use this software for any purpose,
11 including commercial applications, and to alter it and redistribute it
12 freely, subject to the following restrictions:
13
14 1. The origin of this software must not be misrepresented; you must not
15 claim that you wrote the original software. If you use this software
16 in a product, an acknowledgment in the product and its documentation
17 *is* required.
18 2. Altered source versions must be plainly marked as such, and must not be
19 misrepresented as being the original software.
20 3. This notice may not be removed or altered from any source distribution.
21*/
22#ifndef TINY
23#include <stdarg.h>
24#endif
25#include <stdio.h>
26
27/* vars: value of variables
28 loc : local variable index
29 glo : global variable index
30 ind : output code ptr
31 rsym: return symbol
32 prog: output code
33 dstk: define stack
34 dptr, dch: macro state
35*/
36int tok, tokc, tokl, ch, vars, rsym, prog, ind, loc, glo, file, sym_stk, dstk, dptr, dch, last_id;
37
38#define ALLOC_SIZE 99999
39
40/* depends on the init string */
41#define TOK_STR_SIZE 48
42#define TOK_IDENT 0x100
43#define TOK_INT 0x100
44#define TOK_IF 0x120
45#define TOK_ELSE 0x138
46#define TOK_WHILE 0x160
47#define TOK_BREAK 0x190
48#define TOK_RETURN 0x1c0
49#define TOK_FOR 0x1f8
50#define TOK_DEFINE 0x218
51#define TOK_MAIN 0x250
52
53#define TOK_DUMMY 1
54#define TOK_NUM 2
55
56#define LOCAL 0x200
57
58#define SYM_FORWARD 0
59#define SYM_DEFINE 1
60
61/* tokens in string heap */
62#define TAG_TOK ' '
63#define TAG_MACRO 2
64
65pdef(t)
66{
67 *(char *)dstk++ = t;
68}
69
70inp()
71{
72 if (dptr) {
73 ch = *(char *)dptr++;
74 if (ch == TAG_MACRO) {
75 dptr = 0;
76 ch = dch;
77 }
78 } else
79 ch = fgetc(file);
80 /* printf("ch=%c 0x%x\n", ch, ch); */
81}
82
83isid()
84{
85 return isalnum(ch) | ch == '_';
86}
87
88/* read a character constant */
89getq()
90{
91 if (ch == '\\') {
92 inp();
93 if (ch == 'n')
94 ch = '\n';
95 }
96}
97
98next()
99{
100 int t, l, a;
101
102 while (isspace(ch) | ch == '#') {
103 if (ch == '#') {
104 inp();
105 next();
106 if (tok == TOK_DEFINE) {
107 next();
108 pdef(TAG_TOK); /* fill last ident tag */
109 *(int *)tok = SYM_DEFINE;
110 *(int *)(tok + 4) = dstk; /* define stack */
111 }
112 /* well we always save the values ! */
113 while (ch != '\n') {
114 pdef(ch);
115 inp();
116 }
117 pdef(ch);
118 pdef(TAG_MACRO);
119 }
120 inp();
121 }
122 tokl = 0;
123 tok = ch;
124 /* encode identifiers & numbers */
125 if (isid()) {
126 pdef(TAG_TOK);
127 last_id = dstk;
128 while (isid()) {
129 pdef(ch);
130 inp();
131 }
132 if (isdigit(tok)) {
133 tokc = strtol(last_id, 0, 0);
134 tok = TOK_NUM;
135 } else {
136 *(char *)dstk = TAG_TOK; /* no need to mark end of string (we
137 suppose data is initied to zero */
138 tok = strstr(sym_stk, last_id - 1) - sym_stk;
139 *(char *)dstk = 0; /* mark real end of ident for dlsym() */
140 tok = tok * 8 + TOK_IDENT;
141 if (tok > TOK_DEFINE) {
142 tok = vars + tok;
143 /* printf("tok=%s %x\n", last_id, tok); */
144 /* define handling */
145 if (*(int *)tok == SYM_DEFINE) {
146 dptr = *(int *)(tok + 4);
147 dch = ch;
148 inp();
149 next();
150 }
151 }
152 }
153 } else {
154 inp();
155 if (tok == '\'') {
156 tok = TOK_NUM;
157 getq();
158 tokc = ch;
159 inp();
160 inp();
161 } else if (tok == '/' & ch == '*') {
162 inp();
163 while (ch) {
164 while (ch != '*')
165 inp();
166 inp();
167 if (ch == '/')
168 ch = 0;
169 }
170 inp();
171 next();
172 } else
173 {
174 t = "++#m--%am*@R<^1c/@%[_[H3c%@%[_[H3c+@.B#d-@%:_^BKd<<Z/03e>>`/03e<=0f>=/f<@.f>@1f==&g!=\'g&&k||#l&@.BCh^@.BSi|@.B+j~@/%Yd!@&d*@b";
175 while (l = *(char *)t++) {
176 a = *(char *)t++;
177 tokc = 0;
178 while ((tokl = *(char *)t++ - 'b') < 0)
179 tokc = tokc * 64 + tokl + 64;
180 if (l == tok & (a == ch | a == '@')) {
181#if 0
182 printf("%c%c -> tokl=%d tokc=0x%x\n",
183 l, a, tokl, tokc);
184#endif
185 if (a == ch) {
186 inp();
187 tok = TOK_DUMMY; /* dummy token for double tokens */
188 }
189 break;
190 }
191 }
192 }
193 }
194#if 0
195 {
196 int p;
197
198 printf("tok=0x%x ", tok);
199 if (tok >= TOK_IDENT) {
200 printf("'");
201 if (tok > TOK_DEFINE)
202 p = sym_stk + 1 + (tok - vars - TOK_IDENT) / 8;
203 else
204 p = sym_stk + 1 + (tok - TOK_IDENT) / 8;
205 while (*(char *)p != TAG_TOK && *(char *)p)
206 printf("%c", *(char *)p++);
207 printf("'\n");
208 } else if (tok == TOK_NUM) {
209 printf("%d\n", tokc);
210 } else {
211 printf("'%c'\n", tok);
212 }
213 }
214#endif
215}
216
217#ifdef TINY
218#define skip(c) next()
219#else
220
221void error(char *fmt,...)
222{
223 va_list ap;
224
225 va_start(ap, fmt);
226 fprintf(stderr, "%d: ", ftell((FILE *)file));
227 vfprintf(stderr, fmt, ap);
228 fprintf(stderr, "\n");
229 exit(1);
230 va_end(ap);
231}
232
233void skip(c)
234{
235 if (tok != c) {
236 error("'%c' expected", c);
237 }
238 next();
239}
240
241#endif
242
243o(n)
244{
245 /* cannot use unsigned, so we must do a hack */
246 while (n && n != -1) {
247 *(char *)ind++ = n;
248 n = n >> 8;
249 }
250}
251
252/* output a symbol and patch all calls to it */
253gsym(t)
254{
255 int n;
256 while (t) {
257 n = *(int *)t; /* next value */
258 *(int *)t = ind - t - 4;
259 t = n;
260 }
261}
262
263/* psym is used to put an instruction with a data field which is a
264 reference to a symbol. It is in fact the same as oad ! */
265#define psym oad
266
267/* instruction + address */
268oad(n, t)
269{
270 o(n);
271 *(int *)ind = t;
272 t = ind;
273 ind = ind + 4;
274 return t;
275}
276
277/* load immediate value */
278li(t)
279{
280 oad(0xb8, t); /* mov $xx, %eax */
281}
282
283gjmp(t)
284{
285 return psym(0xe9, t);
286}
287
288/* l = 0: je, l == 1: jne */
289gtst(l, t)
290{
291 o(0x0fc085); /* test %eax, %eax, je/jne xxx */
292 return psym(0x84 + l, t);
293}
294
295gcmp(t)
296{
297 o(0xc139); /* cmp %eax,%ecx */
298 li(0);
299 o(0x0f); /* setxx %al */
300 o(t + 0x90);
301 o(0xc0);
302}
303
304gmov(l, t)
305{
306 o(l + 0x83);
307 oad((t < LOCAL) << 7 | 5, t);
308}
309
310/* l is one if '=' parsing wanted (quick hack) */
311unary(l)
312{
313 int n, t, a, c;
314
315 n = 1; /* type of expression 0 = forward, 1 = value, other =
316 lvalue */
317 if (tok == '\"') {
318 li(glo);
319 while (ch != '\"') {
320 getq();
321 *(char *)glo++ = ch;
322 inp();
323 }
324 *(char *)glo = 0;
325 glo = glo + 4 & -4; /* align heap */
326 inp();
327 next();
328 } else {
329 c = tokl;
330 a = tokc;
331 t = tok;
332 next();
333 if (t == TOK_NUM) {
334 li(a);
335 } else if (c == 2) {
336 /* -, +, !, ~ */
337 unary(0);
338 oad(0xb9, 0); /* movl $0, %ecx */
339 if (t == '!')
340 gcmp(a);
341 else
342 o(a);
343 } else if (t == '(') {
344 expr();
345 skip(')');
346 } else if (t == '*') {
347 /* parse cast */
348 skip('(');
349 t = tok; /* get type */
350 next(); /* skip int/char/void */
351 next(); /* skip '*' or '(' */
352 if (tok == '*') {
353 /* function type */
354 skip('*');
355 skip(')');
356 skip('(');
357 skip(')');
358 t = 0;
359 }
360 skip(')');
361 unary(0);
362 if (tok == '=') {
363 next();
364 o(0x50); /* push %eax */
365 expr();
366 o(0x59); /* pop %ecx */
367 o(0x0188 + (t == TOK_INT)); /* movl %eax/%al, (%ecx) */
368 } else if (t) {
369 if (t == TOK_INT)
370 o(0x8b); /* mov (%eax), %eax */
371 else
372 o(0xbe0f); /* movsbl (%eax), %eax */
373 ind++; /* add zero in code */
374 }
375 } else if (t == '&') {
376 gmov(10, *(int *)tok); /* leal EA, %eax */
377 next();
378 } else {
379 n = *(int *)t;
380 /* forward reference: try dlsym */
381 if (!n)
382 n = dlsym(0, last_id);
383 if (tok == '=' & l) {
384 /* assignment */
385 next();
386 expr();
387 gmov(6, n); /* mov %eax, EA */
388 } else if (tok != '(') {
389 /* variable */
390 gmov(8, n); /* mov EA, %eax */
391 if (tokl == 11) {
392 gmov(0, n);
393 o(tokc);
394 next();
395 }
396 }
397 }
398 }
399
400 /* function call */
401 if (tok == '(') {
402 if (n == 1)
403 o(0x50); /* push %eax */
404
405 /* push args and invert order */
406 a = oad(0xec81, 0); /* sub $xxx, %esp */
407 next();
408 l = 0;
409 while(tok != ')') {
410 expr();
411 oad(0x248489, l); /* movl %eax, xxx(%esp) */
412 if (tok == ',')
413 next();
414 l = l + 4;
415 }
416 *(int *)a = l;
417 next();
418 if (!n) {
419 /* forward reference */
420 t = t + 4;
421 *(int *)t = psym(0xe8, *(int *)t);
422 } else if (n == 1) {
423 oad(0x2494ff, l); /* call *xxx(%esp) */
424 l = l + 4;
425 } else {
426 oad(0xe8, n - ind - 5); /* call xxx */
427 }
428 if (l)
429 oad(0xc481, l); /* add $xxx, %esp */
430 }
431}
432
433sum(l)
434{
435 int t, n, a;
436
437 if (l-- == 1)
438 unary(1);
439 else {
440 sum(l);
441 a = 0;
442 while (l == tokl) {
443 n = tok;
444 t = tokc;
445 next();
446
447 if (l > 8) {
448 a = gtst(t, a); /* && and || output code generation */
449 sum(l);
450 } else {
451 o(0x50); /* push %eax */
452 sum(l);
453 o(0x59); /* pop %ecx */
454
455 if (l == 4 | l == 5) {
456 gcmp(t);
457 } else {
458 o(t);
459 if (n == '%')
460 o(0x92); /* xchg %edx, %eax */
461 }
462 }
463 }
464 /* && and || output code generation */
465 if (a && l > 8) {
466 a = gtst(t, a);
467 li(t ^ 1);
468 gjmp(5); /* jmp $ + 5 */
469 gsym(a);
470 li(t);
471 }
472 }
473}
474
475expr()
476{
477 sum(11);
478}
479
480
481test_expr()
482{
483 expr();
484 return gtst(0, 0);
485}
486
487block(l)
488{
489 int a, n, t;
490
491 if (tok == TOK_IF) {
492 next();
493 skip('(');
494 a = test_expr();
495 skip(')');
496 block(l);
497 if (tok == TOK_ELSE) {
498 next();
499 n = gjmp(0); /* jmp */
500 gsym(a);
501 block(l);
502 gsym(n); /* patch else jmp */
503 } else {
504 gsym(a); /* patch if test */
505 }
506 } else if (tok == TOK_WHILE | tok == TOK_FOR) {
507 t = tok;
508 next();
509 skip('(');
510 if (t == TOK_WHILE) {
511 n = ind;
512 a = test_expr();
513 } else {
514 if (tok != ';')
515 expr();
516 skip(';');
517 n = ind;
518 a = 0;
519 if (tok != ';')
520 a = test_expr();
521 skip(';');
522 if (tok != ')') {
523 t = gjmp(0);
524 expr();
525 gjmp(n - ind - 5);
526 gsym(t);
527 n = t + 4;
528 }
529 }
530 skip(')');
531 block(&a);
532 gjmp(n - ind - 5); /* jmp */
533 gsym(a);
534 } else if (tok == '{') {
535 next();
536 /* declarations */
537 decl(1);
538 while(tok != '}')
539 block(l);
540 next();
541 } else {
542 if (tok == TOK_RETURN) {
543 next();
544 if (tok != ';')
545 expr();
546 rsym = gjmp(rsym); /* jmp */
547 } else if (tok == TOK_BREAK) {
548 next();
549 *(int *)l = gjmp(*(int *)l);
550 } else if (tok != ';')
551 expr();
552 skip(';');
553 }
554}
555
556/* 'l' is true if local declarations */
557decl(l)
558{
559 int a;
560
561 while (tok == TOK_INT | tok != -1 & !l) {
562 if (tok == TOK_INT) {
563 next();
564 while (tok != ';') {
565 if (l) {
566 loc = loc + 4;
567 *(int *)tok = -loc;
568 } else {
569 *(int *)tok = glo;
570 glo = glo + 4;
571 }
572 next();
573 if (tok == ',')
574 next();
575 }
576 skip(';');
577 } else {
578 /* patch forward references (XXX: do not work for function
579 pointers) */
580 gsym(*(int *)(tok + 4));
581 /* put function address */
582 *(int *)tok = ind;
583 next();
584 skip('(');
585 a = 8;
586 while (tok != ')') {
587 /* read param name and compute offset */
588 *(int *)tok = a;
589 a = a + 4;
590 next();
591 if (tok == ',')
592 next();
593 }
594 next(); /* skip ')' */
595 rsym = loc = 0;
596 o(0xe58955); /* push %ebp, mov %esp, %ebp */
597 a = oad(0xec81, 0); /* sub $xxx, %esp */
598 block(0);
599 gsym(rsym);
600 o(0xc3c9); /* leave, ret */
601 *(int *)a = loc; /* save local variables */
602 }
603 }
604}
605
606main(n, t)
607{
608 file = stdin;
609 if (n-- > 1) {
610 t = t + 4;
611 file = fopen(*(int *)t, "r");
612 }
613 dstk = strcpy(sym_stk = calloc(1, ALLOC_SIZE),
614 " int if else while break return for define main ") + TOK_STR_SIZE;
615 glo = calloc(1, ALLOC_SIZE);
616 ind = prog = calloc(1, ALLOC_SIZE);
617 vars = calloc(1, ALLOC_SIZE);
618 inp();
619 next();
620 decl(0);
621#ifdef TEST
622 {
623 FILE *f;
624 f = fopen(*(char **)(t + 4), "w");
625 fwrite((void *)prog, 1, ind - prog, f);
626 fclose(f);
627 return 0;
628 }
629#else
630 return (*(int (*)())*(int *)(vars + TOK_MAIN)) (n, t);
631#endif
632}