blob: a8505cd254294eba1a5b40017bb2fbd6c1966c4f [file] [log] [blame]
/*
**
** Copyright 2009, The Android Open Source Project
**
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
**
** http://www.apache.org/licenses/LICENSE-2.0
**
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*/
/* Based upon the freeware version of the Obfuscated Tiny C Compiler
* by Francis Bellard. <francis@bellard.org>.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define TOKEN_OPERATOR 1
#define TOKEN_NUMBER 2
#define TOKEN_SYMBOL_BASE 256
#define TOKEN_INT 256
#define TOKEN_IF 288
#define TOKEN_ELSE 312
#define TOKEN_WHILE 352
#define TOKEN_BREAK 400
#define TOKEN_RETURN 448
#define TOKEN_FOR 504
#define TOKEN_DEFINE 536
static int currentToken;
static int currentTokenData;
static int gCurrentTokenOperatorLevel;
static int currentChar;
static int gEndOfFunctionTarget;
static int gProgramCounter;
static int gFunctionStackSize;
static int savedChar;
static char* pInProgressMacro;
static char* P;
static char* ac;
static char* gStringTable;
static char* pSymbolTable;
static char* M;
static char* R;
static FILE* pInput;
static void parseDeclarations (int isLocal);
static void parseExpression();
static void addToSymbolTable(char e) {
*pSymbolTable++ = e;
}
static void nextChar() {
if (pInProgressMacro) {
currentChar = *(char*) pInProgressMacro++;
if (currentChar == 2) {
pInProgressMacro = NULL;
currentChar = savedChar;
}
} else
currentChar = fgetc(pInput);
}
static int isSymbolChar() {
return isalnum(currentChar) || currentChar == '_';
}
static void unescapeCurrentChar() {
if (currentChar == '\\') {
nextChar();
if (currentChar == 'n')
currentChar = '\n';
}
}
static void nextToken() {
int j, m;
while (isspace(currentChar) || currentChar == '#') {
if (currentChar == '#') {
nextChar();
nextToken();
if (currentToken == TOKEN_DEFINE) {
nextToken();
addToSymbolTable(' ');
*(int*) currentToken = 1;
*(int*) (currentToken + 4) = (int) pSymbolTable;
}
while (currentChar != '\n') {
addToSymbolTable(currentChar);
nextChar();
}
addToSymbolTable(currentChar);
addToSymbolTable(2);
}
nextChar();
}
gCurrentTokenOperatorLevel = 0;
currentToken = currentChar;
if (isSymbolChar()) {
addToSymbolTable(' ');
M = pSymbolTable;
while (isSymbolChar()) {
addToSymbolTable(currentChar);
nextChar();
}
if (isdigit(currentToken)) {
currentTokenData = strtol(M, 0, 0);
currentToken = TOKEN_NUMBER;
} else {
*(char*) pSymbolTable = ' ';
currentToken = strstr(R, M - 1) - R;
*(char*) pSymbolTable = 0;
currentToken = currentToken * 8 + TOKEN_SYMBOL_BASE;
if (currentToken > TOKEN_DEFINE) {
currentToken = ((int) P) + currentToken;
if (*(int*) currentToken == 1) {
pInProgressMacro = (char*) (*(int*) (currentToken + 4));
savedChar = currentChar;
nextChar();
nextToken();
}
}
}
} else {
nextChar();
if (currentToken == '\'') {
currentToken = TOKEN_NUMBER;
unescapeCurrentChar();
currentTokenData = currentChar;
nextChar();
nextChar();
} else if (currentToken == '/' & currentChar == '*') {
nextChar();
while (currentChar) {
while (currentChar != '*')
nextChar();
nextChar();
if (currentChar == '/')
currentChar = 0;
}
nextChar();
nextToken();
} else {
char* e = "++#m--%am*@R<^1c/@%[_[H3c%@%[_[H3c+@.B#d-@%:_^BKd<<Z/03e>>`/03e<=0f>=/f<@.f>@1f==&g!='g&&k||#l&@.BCh^@.BSi|@.B+j~@/%Yd!@&d*@b";
while (j = *(char*) e++) {
m = *(char*) e++;
currentTokenData = 0;
while ((gCurrentTokenOperatorLevel = *(char*) e++ - 98) < 0)
currentTokenData = currentTokenData * 64 + gCurrentTokenOperatorLevel + 64;
if (j == currentToken && (m == currentChar || m == 64)) {
if (m == currentChar) {
nextChar();
currentToken = TOKEN_OPERATOR;
}
break;
}
}
}
}
}
/*
* Emit 1 to 4 bytes of code. Little-endian, doesn't emit high bytes that
* are 0x00 or 0xff
*/
static void emitCode(int g) {
while( g && g != -1) {
*(char*) gProgramCounter++=g;
g=g>>8;
}
}
static void fixupAddress(e) {
int g;
while( e) {
g=*(int*) e;
*(int*) e=gProgramCounter-e-4;
e=g;
}
}
static int emitCodeWithImmediate( g, e) {
emitCode(g);
*(int*) gProgramCounter = e;
e = gProgramCounter;
gProgramCounter = gProgramCounter + 4;
return e;
}
static int emitLoadAccumulatorImmediate(e) {
emitCodeWithImmediate(0xb8,e); /* Move immediate a, e */
}
static int emitBranch(e) {
return emitCodeWithImmediate(0xe9,e); /* Jump relative */
}
static int emitTest( j, e) {
emitCode(0x0FC085); /* 85 C0 FC TEST */
return emitCodeWithImmediate(0x84 + j, e); /* TEST */
}
static void emitSetCC(int condition) {
emitCode( 0xC139); /* 39 C1 CMP */
emitLoadAccumulatorImmediate(0);
emitCode( 0x0F); /* Two byte opcode prefix */
emitCode( condition+0x90); /* Set byte on condition (controlled by e) */
emitCode( 0xC0); /* I think this is part of the SETcc instruction */
}
static void emitNumericOp( int op, int e) {
emitCode(op + 0x83);
emitCodeWithImmediate((e < 512) << 7 | 5, e);
}
static void parseTerminal (int level) {
int g,e,m,aa;
g=1;
if( currentToken == '"') {
emitLoadAccumulatorImmediate(gStringTable);
while( currentChar != '"') {
unescapeCurrentChar ();
*(char*) gStringTable++=currentChar;
nextChar ();
}
*(char*) gStringTable=0;
gStringTable= (char*) (((int)gStringTable) +4&-4);
nextChar();
nextToken();
}
else {
aa=gCurrentTokenOperatorLevel;
m= currentTokenData;
e=currentToken;
nextToken();
if( e == TOKEN_NUMBER) {
emitLoadAccumulatorImmediate(m);
}
else if( aa == 2) {
parseTerminal(0);
emitCodeWithImmediate(0xB9,0); /* MOV r1, immediate */
if( e == '!')emitSetCC(m);
else emitCode( m);
}
else if( e == '(') {
parseExpression ();
nextToken();
}
else if( e == '*') {
nextToken();
e=currentToken;
nextToken();
nextToken();
if( currentToken == '*') {
nextToken();
nextToken();
nextToken();
nextToken();
e=0;
}
nextToken();
parseTerminal(0);
if( currentToken == '=') {
nextToken();
emitCode( 0x50); /* PUSH r0 */
parseExpression ();
emitCode( 0x59); /* POP r1 */
emitCode( 0x188 + (e == TOKEN_INT)); /* 88 01 MOV */
}
else if( e) {
if( e == TOKEN_INT)emitCode( 0x8B); /* MOV */
else emitCode( 0xBE0F); /* 0F BE MOVSX move with sign extension */
gProgramCounter++;
}
}
else if( e == '&') {
emitNumericOp(10,*(int*) currentToken); /* 8D LEA */
nextToken();
}
else {
g=*(int*) e;
if(!g)g=dlsym(0,M);
if( currentToken == '=' & level) {
nextToken();
parseExpression ();
emitNumericOp(6,g); /* 89 MOV */
}
else if( currentToken!= '(') {
emitNumericOp(8,g); /* 8B MOV sreg */
if( gCurrentTokenOperatorLevel == 11) {
emitNumericOp(0,g); /* 83 ADD */
emitCode( currentTokenData);
nextToken();
}
}
}
}
if( currentToken == '(') {
if( g == 1)emitCode( 0x50); /* push */
m= emitCodeWithImmediate(0xEC81,0); /* 81 EC Cmp ?? */
nextToken();
level=0;
while( currentToken!= ')') {
parseExpression ();
emitCodeWithImmediate(0x248489,level); /* 89 84 24 MOV sp + level*/
if( currentToken == ',')nextToken();
level=level +4;
}
*(int*) m= level;
nextToken();
if(!g) {
e=e +4;
*(int*) e=emitCodeWithImmediate(0xE8,*(int*) e); /* Call */
}
else if( g == 1) {
emitCodeWithImmediate(0x2494FF,level); /* FF 94 24 */
level=level +4;
}
else {
emitCodeWithImmediate(0xE8,g-gProgramCounter-5); /* CALL */
}
if( level)emitCodeWithImmediate(0xC481,level); /* 81 C4 adjust stack pointer */
}
}
static void parseBinaryOp (int level) {
int e,g,m;
if( level--== 1)parseTerminal(1);
else {
parseBinaryOp (level);
m= 0;
while( level == gCurrentTokenOperatorLevel) {
g=currentToken;
e=currentTokenData;
nextToken();
if( level>8) {
m= emitTest(e,m);
parseBinaryOp (level);
}
else {
emitCode( 0x50);
parseBinaryOp (level);
emitCode( 0x59);
if( level == 4 | level == 5) {
emitSetCC(e);
}
else {
emitCode( e);
if( g == '%')emitCode( 0x92); /* XCHG */
}
}
}
if( m&&level>8) {
m= emitTest(e,m);
emitLoadAccumulatorImmediate(e^1);
emitBranch(5); /* Jump relative +5 */
fixupAddress(m);
emitLoadAccumulatorImmediate(e);
}
}
}
static void parseExpression() {
parseBinaryOp(11);
}
static int parseExpressionEmitTest() {
parseExpression();
return emitTest(0, 0);
}
static void parseStatement (int* pBreakTarget) {
int m,g,e;
if( currentToken == TOKEN_IF) {
nextToken();
nextToken();
m= parseExpressionEmitTest ();
nextToken();
parseStatement (pBreakTarget);
if( currentToken == TOKEN_ELSE) {
nextToken();
g=emitBranch(0);
fixupAddress(m);
parseStatement (pBreakTarget);
fixupAddress(g);
}
else {
fixupAddress(m);
}
}
else if ( currentToken == TOKEN_WHILE || currentToken == TOKEN_FOR) {
e = currentToken;
nextToken();
nextToken();
if( e == TOKEN_WHILE) {
g=gProgramCounter;
m= parseExpressionEmitTest ();
}
else {
if( currentToken != ';')parseExpression ();
nextToken();
g=gProgramCounter;
m= 0;
if( currentToken != ';')m= parseExpressionEmitTest ();
nextToken();
if( currentToken!= ')') {
e=emitBranch(0);
parseExpression ();
emitBranch(g-gProgramCounter-5);
fixupAddress(e);
g=e +4;
}
}
nextToken();
parseStatement(&m);
emitBranch(g-gProgramCounter-5);
fixupAddress(m);
}
else if( currentToken == '{') {
nextToken();
parseDeclarations(1);
while( currentToken != '}') parseStatement(pBreakTarget);
nextToken();
}
else {
if( currentToken == TOKEN_RETURN) {
nextToken();
if( currentToken != ';') parseExpression();
gEndOfFunctionTarget=emitBranch(gEndOfFunctionTarget);
}
else if( currentToken == TOKEN_BREAK) {
nextToken();
*pBreakTarget = emitBranch(*pBreakTarget);
}
else if( currentToken != ';') parseExpression();
nextToken();
}
}
static void parseDeclarations (int isLocal) {
int m;
while( currentToken == TOKEN_INT | currentToken != -1 & !isLocal ) {
if( currentToken == TOKEN_INT) {
nextToken();
while( currentToken != ';') {
if( isLocal ) {
gFunctionStackSize=gFunctionStackSize +4;
*(int*) currentToken=-gFunctionStackSize;
}
else {
*(char**) currentToken = gStringTable;
gStringTable=gStringTable +4;
}
nextToken();
if( currentToken == ',')nextToken();
}
nextToken();
}
else {
fixupAddress(*(int*)(currentToken + 4));
*(int*) currentToken=gProgramCounter;
nextToken();
nextToken();
m= 8;
while( currentToken != ')') {
*(int*) currentToken=m;
m= m +4;
nextToken();
if( currentToken == ',')nextToken();
}
nextToken();
gEndOfFunctionTarget=gFunctionStackSize=0;
emitCode( 0xE58955); /* 55 89 E5 PUSH */
m= emitCodeWithImmediate(0xEC81,0); /* 81 EC */
parseStatement(0);
fixupAddress(gEndOfFunctionTarget);
emitCode( 0xC3C9); /* C9 C3 LEAVE */
*(int*) m= gFunctionStackSize;
}
}
}
int main( int argc, char** argv) {
pInput = stdin;
if (argc-- > 1) {
char* file = argv[1];
argv += 1;
pInput = fopen(file, "r");
if (pInput == NULL) {
fprintf(stderr, "Could not open file \"%s\"\n", file);
return -1;
}
}
pSymbolTable = strcpy(R = calloc(1, 99999),
" int if else while break return for define main ") + 48;
gStringTable = calloc(1, 99999);
ac = calloc(1, 99999);
gProgramCounter = (int) ac;
P = calloc(1, 99999);
nextChar();
nextToken();
parseDeclarations(0);
#if 1
fwrite(R, 1, 99999, stdout);
fwrite(ac, 1, 99999, stdout);
fwrite(P, 1, 99999, stdout);
return 0;
#else
/* Look up the address of "main" in the symbol table and call it.
* We put main in at a known offset, so we know the address.
*/
return (*(int(*)()) *(int*) (P + 592))(argc, argv);
#endif
}