blob: 0f8e606f37850e35fb9efbc00040ccdde81aaf5e [file] [log] [blame]
/*
Obfuscated Tiny C Compiler
Copyright (C) 2001-2003 Fabrice Bellard
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product and its documentation
*is* required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include <ctype.h>
#include <dlfcn.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
namespace acc {
class compiler {
class CodeBuf {
char* ind;
char* pProgramBase;
void release() {
if (pProgramBase != 0) {
free(pProgramBase);
pProgramBase = 0;
}
}
public:
CodeBuf() {
pProgramBase = 0;
ind = 0;
}
~CodeBuf() {
release();
}
void init(int size) {
release();
pProgramBase = (char*) calloc(1, size);
ind = pProgramBase;
}
void o(int n) {
/* cannot use unsigned, so we must do a hack */
while (n && n != -1) {
*ind++ = n;
n = n >> 8;
}
}
/*
* Output a byte. Handles all values, 0..ff.
*/
void ob(int n) {
*ind++ = n;
}
/* output a symbol and patch all calls to it */
void gsym(int t) {
int n;
while (t) {
n = *(int *) t; /* next value */
*(int *) t = ((int) ind) - t - 4;
t = n;
}
}
/* psym is used to put an instruction with a data field which is a
reference to a symbol. It is in fact the same as oad ! */
int psym(int n, int t) {
return oad(n, t);
}
/* instruction + address */
int oad(int n, int t) {
o(n);
*(int *) ind = t;
t = (int) ind;
ind = ind + 4;
return t;
}
inline void* getBase() {
return (void*) pProgramBase;
}
int getSize() {
return ind - pProgramBase;
}
int getPC() {
return (int) ind;
}
};
class CodeGenerator {
public:
CodeGenerator() {}
virtual ~CodeGenerator() {}
void init(CodeBuf* pCodeBuf) {
this->pCodeBuf = pCodeBuf;
}
/* output a symbol and patch all calls to it */
void gsym(int t) {
pCodeBuf->gsym(t);
}
protected:
void o(int n) {
pCodeBuf->o(n);
}
/*
* Output a byte. Handles all values, 0..ff.
*/
void ob(int n) {
pCodeBuf->ob(n);
}
/* psym is used to put an instruction with a data field which is a
reference to a symbol. It is in fact the same as oad ! */
int psym(int n, int t) {
return oad(n, t);
}
/* instruction + address */
int oad(int n, int t) {
return pCodeBuf->oad(n,t);
}
int getPC() {
return pCodeBuf->getPC();
}
private:
CodeBuf* pCodeBuf;
};
class X86CodeGenerator : public CodeGenerator {
public:
X86CodeGenerator() {}
virtual ~X86CodeGenerator() {}
/* returns address to patch with local variable size
*/
int functionEntry() {
o(0xe58955); /* push %ebp, mov %esp, %ebp */
return oad(0xec81, 0); /* sub $xxx, %esp */
}
void functionExit() {
o(0xc3c9); /* leave, ret */
}
/* load immediate value */
int li(int t) {
oad(0xb8, t); /* mov $xx, %eax */
}
int gjmp(int t) {
return psym(0xe9, t);
}
/* l = 0: je, l == 1: jne */
int gtst(int l, int t) {
o(0x0fc085); /* test %eax, %eax, je/jne xxx */
return psym(0x84 + l, t);
}
int gcmp(int op) {
int t = decodeOp(op);
o(0xc139); /* cmp %eax,%ecx */
li(0);
o(0x0f); /* setxx %al */
o(t + 0x90);
o(0xc0);
}
int genOp(int op) {
o(decodeOp(op));
if (op == OP_MOD)
o(0x92); /* xchg %edx, %eax */
}
void clearECX() {
oad(0xb9, 0); /* movl $0, %ecx */
}
void pushEAX() {
o(0x50); /* push %eax */
}
void popECX() {
o(0x59); /* pop %ecx */
}
void storeEAXToAddressECX(bool isInt) {
o(0x0188 + isInt); /* movl %eax/%al, (%ecx) */
}
void loadEAXIndirect(bool isInt) {
if (isInt)
o(0x8b); /* mov (%eax), %eax */
else
o(0xbe0f); /* movsbl (%eax), %eax */
ob(0); /* add zero in code */
}
void leaEAX(int ea) {
gmov(10, ea); /* leal EA, %eax */
}
void storeEAX(int ea) {
gmov(6, ea); /* mov %eax, EA */
}
void loadEAX(int ea) {
gmov(8, ea); /* mov EA, %eax */
}
void postIncrementOrDecrement(int n, int op) {
/* Implement post-increment or post decrement.
*/
gmov(0, n); /* 83 ADD */
o(decodeOp(op));
}
int allocStackSpaceForArgs() {
return oad(0xec81, 0); /* sub $xxx, %esp */
}
void storeEAToArg(int l) {
oad(0x248489, l); /* movl %eax, xxx(%esp) */
}
int callForward(int symbol) {
return psym(0xe8, symbol); /* call xxx */
}
void callRelative(int t) {
psym(0xe8, t); /* call xxx */
}
void callIndirect(int l) {
oad(0x2494ff, l); /* call *xxx(%esp) */
}
void adjustStackAfterCall(int l) {
oad(0xc481, l); /* add $xxx, %esp */
}
private:
static const int operatorHelper[];
int decodeOp(int op) {
if (op < 0 || op > OP_COUNT) {
fprintf(stderr, "Out-of-range operator: %d\n", op);
exit(1);
}
return operatorHelper[op];
}
int gmov(int l, int t) {
o(l + 0x83);
oad((t < LOCAL) << 7 | 5, t);
}
};
/* vars: value of variables
loc : local variable index
glo : global variable index
ind : output code ptr
rsym: return symbol
prog: output code
dstk: define stack
dptr, dch: macro state
*/
int tok, tokc, tokl, ch, vars, rsym, loc, glo, sym_stk, dstk,
dptr, dch, last_id;
void* pSymbolBase;
void* pGlobalBase;
void* pVarsBase;
FILE* file;
CodeBuf codeBuf;
X86CodeGenerator* pGen;
static const int ALLOC_SIZE = 99999;
/* depends on the init string */
static const int TOK_STR_SIZE = 48;
static const int TOK_IDENT = 0x100;
static const int TOK_INT = 0x100;
static const int TOK_IF = 0x120;
static const int TOK_ELSE = 0x138;
static const int TOK_WHILE = 0x160;
static const int TOK_BREAK = 0x190;
static const int TOK_RETURN = 0x1c0;
static const int TOK_FOR = 0x1f8;
static const int TOK_DEFINE = 0x218;
static const int TOK_MAIN = 0x250;
static const int TOK_DUMMY = 1;
static const int TOK_NUM = 2;
static const int LOCAL = 0x200;
static const int SYM_FORWARD = 0;
static const int SYM_DEFINE = 1;
/* tokens in string heap */
static const int TAG_TOK = ' ';
static const int TAG_MACRO = 2;
static const int OP_INCREMENT = 0;
static const int OP_DECREMENT = 1;
static const int OP_MUL = 2;
static const int OP_DIV = 3;
static const int OP_MOD = 4;
static const int OP_PLUS = 5;
static const int OP_MINUS = 6;
static const int OP_SHIFT_LEFT = 7;
static const int OP_SHIFT_RIGHT = 8;
static const int OP_LESS_EQUAL = 9;
static const int OP_GREATER_EQUAL = 10;
static const int OP_LESS = 11;
static const int OP_GREATER = 12;
static const int OP_EQUALS = 13;
static const int OP_NOT_EQUALS = 14;
static const int OP_LOGICAL_AND = 15;
static const int OP_LOGICAL_OR = 16;
static const int OP_BIT_AND = 17;
static const int OP_BIT_XOR = 18;
static const int OP_BIT_OR = 19;
static const int OP_BIT_NOT = 20;
static const int OP_LOGICAL_NOT = 21;
static const int OP_COUNT = 22;
/* Operators are searched from front, the two-character operators appear
* before the single-character operators with the same first character.
* @ is used to pad out single-character operators.
*/
static const char* operatorChars;
static const char operatorLevel[];
void pdef(int t) {
*(char *) dstk++ = t;
}
void inp() {
if (dptr) {
ch = *(char *) dptr++;
if (ch == TAG_MACRO) {
dptr = 0;
ch = dch;
}
} else
ch = fgetc(file);
/* printf("ch=%c 0x%x\n", ch, ch); */
}
int isid() {
return isalnum(ch) | ch == '_';
}
/* read a character constant */
void getq() {
if (ch == '\\') {
inp();
if (ch == 'n')
ch = '\n';
}
}
void next() {
int l, a;
while (isspace(ch) | ch == '#') {
if (ch == '#') {
inp();
next();
if (tok == TOK_DEFINE) {
next();
pdef(TAG_TOK); /* fill last ident tag */
*(int *) tok = SYM_DEFINE;
*(int *) (tok + 4) = dstk; /* define stack */
}
/* well we always save the values ! */
while (ch != '\n') {
pdef(ch);
inp();
}
pdef(ch);
pdef(TAG_MACRO);
}
inp();
}
tokl = 0;
tok = ch;
/* encode identifiers & numbers */
if (isid()) {
pdef(TAG_TOK);
last_id = dstk;
while (isid()) {
pdef(ch);
inp();
}
if (isdigit(tok)) {
tokc = strtol((char*) last_id, 0, 0);
tok = TOK_NUM;
} else {
*(char *) dstk = TAG_TOK; /* no need to mark end of string (we
suppose data is initialized to zero by calloc) */
tok = (int) (strstr((char*) sym_stk, (char*) (last_id - 1))
- sym_stk);
*(char *) dstk = 0; /* mark real end of ident for dlsym() */
tok = tok * 8 + TOK_IDENT;
if (tok > TOK_DEFINE) {
tok = vars + tok;
/* printf("tok=%s %x\n", last_id, tok); */
/* define handling */
if (*(int *) tok == SYM_DEFINE) {
dptr = *(int *) (tok + 4);
dch = ch;
inp();
next();
}
}
}
} else {
inp();
if (tok == '\'') {
tok = TOK_NUM;
getq();
tokc = ch;
inp();
inp();
} else if (tok == '/' & ch == '*') {
inp();
while (ch) {
while (ch != '*')
inp();
inp();
if (ch == '/')
ch = 0;
}
inp();
next();
} else {
const char* t = operatorChars;
int opIndex = 0;
while (l = *t++) {
a = *t++;
tokl = operatorLevel[opIndex];
tokc = opIndex;
if (l == tok & (a == ch | a == '@')) {
#if 0
printf("%c%c -> tokl=%d tokc=0x%x\n",
l, a, tokl, tokc);
#endif
if (a == ch) {
inp();
tok = TOK_DUMMY; /* dummy token for double tokens */
}
break;
}
opIndex++;
}
if (l == 0) {
tokl = 0;
tokc = 0;
}
}
}
#if 0
{
int p;
printf("tok=0x%x ", tok);
if (tok >= TOK_IDENT) {
printf("'");
if (tok> TOK_DEFINE)
p = sym_stk + 1 + (tok - vars - TOK_IDENT) / 8;
else
p = sym_stk + 1 + (tok - TOK_IDENT) / 8;
while (*(char *)p != TAG_TOK && *(char *)p)
printf("%c", *(char *)p++);
printf("'\n");
} else if (tok == TOK_NUM) {
printf("%d\n", tokc);
} else {
printf("'%c'\n", tok);
}
}
#endif
}
void error(const char *fmt, ...) {
va_list ap;
va_start(ap, fmt);
fprintf(stderr, "%ld: ", ftell((FILE *) file));
vfprintf(stderr, fmt, ap);
fprintf(stderr, "\n");
va_end(ap);
exit(1);
}
void skip(int c) {
if (tok != c) {
error("'%c' expected", c);
}
next();
}
/* l is one if '=' parsing wanted (quick hack) */
void unary(int l) {
int n, t, a, c;
n = 1; /* type of expression 0 = forward, 1 = value, other =
lvalue */
if (tok == '\"') {
pGen->li(glo);
while (ch != '\"') {
getq();
*(char *) glo++ = ch;
inp();
}
*(char *) glo = 0;
glo = glo + 4 & -4; /* align heap */
inp();
next();
} else {
c = tokl;
a = tokc;
t = tok;
next();
if (t == TOK_NUM) {
pGen->li(a);
} else if (c == 2) {
/* -, +, !, ~ */
unary(0);
pGen->clearECX();
if (t == '!')
pGen->gcmp(a);
else
pGen->genOp(a);
} else if (t == '(') {
expr();
skip(')');
} else if (t == '*') {
/* parse cast */
skip('(');
t = tok; /* get type */
next(); /* skip int/char/void */
next(); /* skip '*' or '(' */
if (tok == '*') {
/* function type */
skip('*');
skip(')');
skip('(');
skip(')');
t = 0;
}
skip(')');
unary(0);
if (tok == '=') {
next();
pGen->pushEAX();
expr();
pGen->popECX();
pGen->storeEAXToAddressECX(t == TOK_INT);
} else if (t) {
pGen->loadEAXIndirect(t == TOK_INT);
}
} else if (t == '&') {
pGen->leaEAX(*(int *) tok);
next();
} else {
n = *(int *) t;
/* forward reference: try dlsym */
if (!n)
n = (int) dlsym(0, (char*) last_id);
if (tok == '=' & l) {
/* assignment */
next();
expr();
pGen->storeEAX(n);
} else if (tok != '(') {
/* variable */
pGen->loadEAX(n);
if (tokl == 11) {
pGen->postIncrementOrDecrement(n, tokc);
next();
}
}
}
}
/* function call */
if (tok == '(') {
if (n == 1)
pGen->pushEAX();
/* push args and invert order */
a = pGen->allocStackSpaceForArgs();
next();
l = 0;
while (tok != ')') {
expr();
pGen->storeEAToArg(l);
if (tok == ',')
next();
l = l + 4;
}
*(int *) a = l;
next();
if (!n) {
/* forward reference */
t = t + 4;
*(int *) t = pGen->callForward(*(int *) t);
} else if (n == 1) {
pGen->callIndirect(l);
l = l + 4;
} else {
pGen->callRelative(n - codeBuf.getPC() - 5); /* call xxx */
}
if (l)
pGen->adjustStackAfterCall(l);
}
}
void sum(int l) {
int t, n, a;
if (l-- == 1)
unary(1);
else {
sum(l);
a = 0;
while (l == tokl) {
n = tok;
t = tokc;
next();
if (l > 8) {
a = pGen->gtst(t == OP_LOGICAL_OR, a); /* && and || output code generation */
sum(l);
} else {
pGen->pushEAX();
sum(l);
pGen->popECX();
if (l == 4 | l == 5) {
pGen->gcmp(t);
} else {
pGen->genOp(t);
}
}
}
/* && and || output code generation */
if (a && l > 8) {
a = pGen->gtst(t == OP_LOGICAL_OR, a);
pGen->li(t != OP_LOGICAL_OR);
pGen->gjmp(5); /* jmp $ + 5 */
pGen->gsym(a);
pGen->li(t == OP_LOGICAL_OR);
}
}
}
void expr() {
sum(11);
}
int test_expr() {
expr();
return pGen->gtst(0, 0);
}
void block(int l) {
int a, n, t;
if (tok == TOK_IF) {
next();
skip('(');
a = test_expr();
skip(')');
block(l);
if (tok == TOK_ELSE) {
next();
n = pGen->gjmp(0); /* jmp */
pGen->gsym(a);
block(l);
pGen->gsym(n); /* patch else jmp */
} else {
pGen->gsym(a); /* patch if test */
}
} else if (tok == TOK_WHILE | tok == TOK_FOR) {
t = tok;
next();
skip('(');
if (t == TOK_WHILE) {
n = codeBuf.getPC();
a = test_expr();
} else {
if (tok != ';')
expr();
skip(';');
n = codeBuf.getPC();
a = 0;
if (tok != ';')
a = test_expr();
skip(';');
if (tok != ')') {
t = pGen->gjmp(0);
expr();
pGen->gjmp(n - codeBuf.getPC() - 5);
pGen->gsym(t);
n = t + 4;
}
}
skip(')');
block((int) &a);
pGen->gjmp(n - codeBuf.getPC() - 5); /* jmp */
pGen->gsym(a);
} else if (tok == '{') {
next();
/* declarations */
decl(1);
while (tok != '}')
block(l);
next();
} else {
if (tok == TOK_RETURN) {
next();
if (tok != ';')
expr();
rsym = pGen->gjmp(rsym); /* jmp */
} else if (tok == TOK_BREAK) {
next();
*(int *) l = pGen->gjmp(*(int *) l);
} else if (tok != ';')
expr();
skip(';');
}
}
/* 'l' is true if local declarations */
void decl(int l) {
int a;
while (tok == TOK_INT | tok != -1 & !l) {
if (tok == TOK_INT) {
next();
while (tok != ';') {
if (l) {
loc = loc + 4;
*(int *) tok = -loc;
} else {
*(int *) tok = glo;
glo = glo + 4;
}
next();
if (tok == ',')
next();
}
skip(';');
} else {
/* patch forward references (XXX: do not work for function
pointers) */
pGen->gsym(*(int *) (tok + 4));
/* put function address */
*(int *) tok = codeBuf.getPC();
next();
skip('(');
a = 8;
while (tok != ')') {
/* read param name and compute offset */
*(int *) tok = a;
a = a + 4;
next();
if (tok == ',')
next();
}
next(); /* skip ')' */
rsym = loc = 0;
a = pGen->functionEntry();
block(0);
pGen->gsym(rsym);
pGen->functionExit();
*(int *) a = loc; /* save local variables */
}
}
}
void cleanup() {
if (sym_stk != 0) {
free((void*) sym_stk);
sym_stk = 0;
}
if (pGlobalBase != 0) {
free((void*) pGlobalBase);
pGlobalBase = 0;
}
if (pVarsBase != 0) {
free(pVarsBase);
pVarsBase = 0;
}
if (pGen) {
delete pGen;
pGen = 0;
}
}
void clear() {
tok = 0;
tokc = 0;
tokl = 0;
ch = 0;
vars = 0;
rsym = 0;
loc = 0;
glo = 0;
sym_stk = 0;
dstk = 0;
dptr = 0;
dch = 0;
last_id = 0;
file = 0;
pGlobalBase = 0;
pVarsBase = 0;
pGen = 0;
}
public:
compiler() {
clear();
}
~compiler() {
cleanup();
}
int compile(FILE* in) {
cleanup();
clear();
codeBuf.init(ALLOC_SIZE);
pGen = new X86CodeGenerator();
pGen->init(&codeBuf);
file = in;
sym_stk = (int) calloc(1, ALLOC_SIZE);
dstk = (int) strcpy((char*) sym_stk,
" int if else while break return for define main ")
+ TOK_STR_SIZE;
pGlobalBase = calloc(1, ALLOC_SIZE);
glo = (int) pGlobalBase;
pVarsBase = calloc(1, ALLOC_SIZE);
vars = (int) pVarsBase;
inp();
next();
decl(0);
return 0;
}
int run(int argc, char** argv) {
typedef int (*mainPtr)(int argc, char** argv);
mainPtr aMain = (mainPtr) *(int*) (vars + TOK_MAIN);
if (!aMain) {
fprintf(stderr, "Could not find function \"main\".\n");
return -1;
}
return aMain(argc, argv);
}
int dump(FILE* out) {
fwrite(codeBuf.getBase(), 1, codeBuf.getSize(), out);
return 0;
}
};
const char* compiler::operatorChars =
"++--*@/@%@+@-@<<>><=>=<@>@==!=&&||&@^@|@~@!@";
const char compiler::operatorLevel[] =
{11, 11, 1, 1, 1, 2, 2, 3, 3, 4, 4, 4, 4,
5, 5, /* ==, != */
9, 10, /* &&, || */
6, 7, 8, /* & ^ | */
2, 2 /* ~ ! */
};
const int compiler::X86CodeGenerator::operatorHelper[] = {
0x1, // ++
0xff, // --
0xc1af0f, // *
0xf9f79991, // /
0xf9f79991, // % (With manual assist to swap results)
0xc801, // +
0xd8f7c829, // -
0xe0d391, // <<
0xf8d391, // >>
0xe, // <=
0xd, // >=
0xc, // <
0xf, // >
0x4, // ==
0x5, // !=
0x0, // &&
0x1, // ||
0xc821, // &
0xc831, // ^
0xc809, // |
0xd0f7, // ~
0x4 // !
};
} // namespace acc
int main(int argc, char** argv) {
bool doTest = false;
const char* inFile = NULL;
const char* outFile = NULL;
int i;
for (i = 1; i < argc; i++) {
char* arg = argv[i];
if (arg[0] == '-') {
switch (arg[1]) {
case 'T':
if (i + 1 >= argc) {
fprintf(stderr, "Expected filename after -T\n");
return 2;
}
doTest = true;
outFile = argv[i + 1];
i += 1;
break;
default:
fprintf(stderr, "Unrecognized flag %s\n", arg);
return 3;
}
} else if (inFile == NULL) {
inFile = arg;
} else {
break;
}
}
FILE* in = stdin;
if (inFile) {
in = fopen(inFile, "r");
if (!in) {
fprintf(stderr, "Could not open input file %s\n", inFile);
return 1;
}
}
acc::compiler compiler;
int compileResult = compiler.compile(in);
if (in != stdin) {
fclose(in);
}
if (compileResult) {
fprintf(stderr, "Compile failed: %d\n", compileResult);
return 6;
}
if (doTest) {
FILE* save = fopen(outFile, "w");
if (!save) {
fprintf(stderr, "Could not open output file %s\n", outFile);
return 5;
}
compiler.dump(save);
fclose(save);
} else {
fprintf(stderr, "Executing compiled code:\n");
int codeArgc = argc - i + 1;
char** codeArgv = argv + i - 1;
codeArgv[0] = (char*) (inFile ? inFile : "stdin");
return compiler.run(codeArgc, codeArgv);
}
return 0;
}