/*
    Fall 2009 starter code for CS241 assignments 9 and 10 that uses
    the GString, List and Dictionary libraries.

    Based on Scheme code by Gord Cormack. Java translation by Ondrej Lhotak.
    C translation by Brad Lushman with further modifications by John Beatty.
    Last modified: 22 Feb 2010 by JCBeatty.

    ---------------------------------------------------------------

    To compile on a CSCF linux machine, use:

       gcc -g -I ~cs241/cLibs/headers -L ~cs241/cLibs/libraries wlgen.c -l241-amd-linux -o wlgen

       gcc -g -DDMALLOC -DDMALLOC_FUNC_CHECK -I ~cs241/cLibs/headers -L ~cs241/cLibs/libraries wlgen.c -l241-amd-linux-dmalloc -o wlgendmalloc

    Substitute "sun-sparc" for "amd-linux" if you're compiling on a Solaris machine.

    If C_INCLUDE_PATH have been properly initialized by /u/cs241/setup
    (use "printenv C_INCLUDE_PATH" and "printenv LIBRARY_PATH" to check):

       gcc -g wlgen.c -l241-amd-linux -o wlgen

       gcc -g -DDMALLOC -DDMALLOC_FUNC_CHECK wlgen.c -l241-amd-linux-dmalloc -o wlgenDmalloc

    As above, substitute "sun-sparc" for "amd-linux" if you're compiling on a Solaris machine.

    To run:
            ./wlgen         < source.wli > source.asm
            ./wlgenDmalloc  < source.wli > source.asm
            valgrind wlgen  < source.wli > source.asm

 */

// -----------------------------------------------------------------------------------------------------
//
// Headers.
//
// -----------------------------------------------------------------------------------------------------

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include <unistd.h>

#include "GString.h"
#include "List.h"
#include "Dictionary.h"

// Dmalloc (http://dmalloc.com) provides cover routines for memory management and string routines that
// catch many ptr errors, array over/underwrites, and memory leaks. For information regarding the use of
// dmalloc in CS 241 see http://www.student.cs.uwaterloo.ca/~jcbeatty/cs241/documentation/index.html.
// Quickstart instructions: (1) make dmallocFor241.h accessible to the compiler; (2) make the library
// libdmalloc.a accessible to the compiler; (3) compile with -DDMALLOC and -DDMALLOC_FUNC_CHECK.
// Note 1: this particular program will write dmalloc output to stderr if you also compile with -DIDE.
// [See the call to dmalloc_setup(...) in main(...).]
// Note 2: marmoset will NOT define these macros when it compiles a submission and will NOT load libdmalloc.a.
#ifdef DMALLOC
   #include "dmallocFor241.h"
#endif

// Print debugging info if true.
#define DEBUG false

// -----------------------------------------------------------------------------------------------------
//
// Prototypes.
//
// -----------------------------------------------------------------------------------------------------

typedef struct StringPtrArray_ * StringPtrArrayPtr;
typedef struct TreeNode_       * TreePtr;
typedef struct SymbolTable_    * SymbolTablePtr;

void      bail(                ListPtr           rule );        // Reports a failure in pass 1 or 2 to match a rule. Should never happen.
void      panicExit(           char            * rule );        // Report a more general disaster and exit the program, reporting a non-zero status code.
void      printStringPtrArray( StringPtrArrayPtr spa  );        // For debugging only.
void      printTreeNode(       TreePtr           tree );        // For debugging only.
void      printTree(           TreePtr           tree );        // For debugging only.
void      printSymbolTable(    SymbolTablePtr symbolTable );    // For debugging only.

int       main(                int argc, char * argv[]                                          );
TreePtr   readParse(           char * grammarSymbol                                             );    // Pass one.
void      symbolsDeclaredIn(   TreePtr tree, SymbolTablePtr symbolTable                         );    // Pass two.
void      generateCodeFor(     TreePtr tree, SymbolTablePtr symbolTable, StringPointer assembly );    // Pass three.

// -----------------------------------------------------------------------------------------------------
//
// Terminal symbols.
//
// -----------------------------------------------------------------------------------------------------

/* The set of terminal symbols in the WL grammar. */
char * terminals[] = { "BOF", "BECOMES", "COMMA", "ELSE", "EOF", "EQ", "GE",
    "GT", "ID", "IF", "INT", "LBRACE", "LE", "LPAREN", "LT", "MINUS", "NE",
    "NUM", "PCT", "PLUS", "PRINTLN", "RBRACE", "RETURN", "RPAREN", "SEMI",
    "SLASH", "STAR", "WAIN", "WHILE" };

bool isTerminal( char *sym ) {
    int idx;
    for( idx=0; idx < sizeof(terminals)/sizeof(char*); idx++ )
        if( ! strcmp(terminals[idx],sym) ) return 1;
    return 0;
}

// -----------------------------------------------------------------------------------------------------
//
// Support for representing and working with a parse tree.
//
// -----------------------------------------------------------------------------------------------------

// Data structure for representing the parse tree. Abstractly, leaves are terminals & internal nodes are
// non-terminals. However, in the data structure used here epsilon rules break this abstraction.
// In particular, the node for the lhs of an epsilon rule is, of course, a non-terminal but the node
// has no children (ie .nChildren == 0 and .children == NULL). In other words, we do not explicitly
// represent epsilon by a leaf in the tree.
//
// Details: (1) leaves of the tree have nChildren == 0, but rule has TWO elements, the second of which is the
// string scanned that was recognized as an instance of this terminal symbol.
//
// (2) A leaf corresponds EITHER to a terminal node OR to a non-terminal which is the LHS of an epsilon rule
// (which has no children).
//
// (3) One weirdness: for terminals the rule field consists not just of the terminal symbol, but has the form
// { <terminal>, <lexemeForTerminal> } - eg { INT, int } or { ID, foo } or { NUM, 42 } or { WAIN, wain } -
// because for some terminals (ie NUM and ID) you must retain the originating lexical string somewhere,
// and this is as good a place as any.
//
// (4) The primary use for .rule is to compare it against the literal representation of a rule as a string
// -- eg "S BOF procedure EOF" -- in passTwo(...) or passThree(...). At first blush it therefore seems perverse
// and inefficient to represent rule as a list of token strings -- eg { "S", "BOF", "procedure", "EOF" } --
// as we then have to repeatedly tokenize the rule literals to compare them with values of .rule. Arguably,
// however, doing so makes the compiler a bit more robust in that it doesn't care how much whitespace exists
// between the tokens in such literal rule strings. And the programs we compile are generally small enough
// that the additional overhead can be forgiven.

typedef struct TreeNode_ {
    ListPtr rule;        // A list of C-strings: entry [0] is the LHS; entries [1] ... [nChildren] are the RHS.
    int     nChildren;
    ListPtr children;    // children is a pointer to a List of pointers to (other) TreeNode structs.
} TreeNode;

// Free the heap memory occupied by the contents of the tree supplied.
void freeTree( TreePtr tree ) {
    int idx;
    destroyList( tree->rule, freeCString_callback );
    if( tree->nChildren > 0 ) {
        for( idx=0; idx < tree->nChildren; idx++ ) {
            freeTree( getPtrAtIndexInList( idx, tree->children ) );
        }
        // The pointers in the list have already been freed by freeTree(...) in the loop just above.
        destroyList( tree->children, NULL );
    }
    free( tree );
}

// Divide a string into a sequence of tokens, which are represented as a list of C-strings.
// Warning: strtok(...) modifies the string it's scanning; see man 3 strtok for details.
// Applied to lines read from the *.wli file supplied as input to the compiler.
ListPtr tokenize( char * line ) {
    ListPtr   tokens    = newEmptyList( UseListArray );               // A List of C-strings.
    char    * nextToken = strtok( line, " \n" );                      // Assume there's always at least one token (RELAX ASSUMPTION?).
    do {
        char * tokenCopy = (char*) malloc( strlen(nextToken) + 1 );   // +1 for the terminating '\0'
        strcpy( tokenCopy, nextToken );                               // Because addPtrToEndOfList(...) copies the pointer, NOT the C-string it points to.
        addPtrToEndOfList( tokenCopy, tokens );
    } while( (nextToken = strtok(NULL," \n")) );
    return tokens;
}

/* Does this node's rule match otherRule? */
bool doesTreeNodeMatchRule( TreePtr tree, char * otherRule ) {

    int    result     = true;
    char * copyOfRule = (char*) malloc( strlen(otherRule) + 1  );

    // Tokenize a COPY of otherRule because strtok(...) is destructive. If we did not make a copy first,
    // we would be changing the actual value of a literal string in symbolsDeclaredIn(...) or genCode(...)!
    ListPtr tokens;
    strcpy( copyOfRule, otherRule );
    tokens = tokenize( copyOfRule );

    if( lengthOfList(tree->rule) != lengthOfList(tokens) ) {
        result = false;
    } else {
        int idx;
        for( idx=0; idx < lengthOfList(tree->rule); idx++ ) {
            if( strcmp( getPtrAtIndexInList(idx,tree->rule), getPtrAtIndexInList(idx,tokens) ) != 0 ) {
                result = false;
                break;
            }
        }
    }

    destroyList( tokens, freeCString_callback );
    free( copyOfRule );
    return result;
}

// -----------------------------------------------------------------------------------------------------
//
// Support for representing and working with a symbol table.
//
// -----------------------------------------------------------------------------------------------------

typedef struct SymbolTable_ {
    char        * argOne;     // The 1st argument to wain.
    char        * argTwo;     // The 2nd argument to wain.
    DictionaryPtr symbols;    // All the identifiers appearing in the program, including argOne and argTwo (once declared).
} SymbolTable;

// The value associated with a key added to a Dictionary when we don't actually have a value.
// We use this in the skeleton; you'll need to declare a struct with fields for whatever information
// you want a symbol table entry to contain, create one for each symbol, and pass its address instead
// of NullObjPtr when you insert an entry into the symbol table. If you're free'ing memory when you're
// done with it, you'll also need to define a callback routine to free that struct when a directory entry
// is deleted and pass that callback to destroyDictionary(...) (as explained in the documentation).
char   NullObj;
void * NullObjPtr = &NullObj;

// -----------------------------------------------------------------------------------------------------
//
// Main.
//
// -----------------------------------------------------------------------------------------------------

int main( int argc, char * argv[] ) {

    TreeNode      * parseTree;                                                      // Reconstructed from a *.wli file.
    SymbolTable     symbolTable = { NULL, NULL, makeNewDictionary(UseHashMap) };
    StringPointer   assembly    = newEmptyStringWithCapacity( 1000 );               // Where the assembly language this program generates is placed.

    #if defined(IDE) && defined(DMALLOC)
        // 1st param - one of: DMALLOC_runtimeFor241, DMALLOC_lowFor241, DMALLOC_mediumFor241, DMALLOC_highFor241.
        // 2nd param - most useful possibilities are: print-messages, log=logfile, inter=100, and log-trans
        //             (logs all calls to dmalloc cover routines). Separate these tokens by commas when you
        //             want to supply more than one - eg "print-messages,inter=10".
        dmalloc_setup( DMALLOC_highFor241, "print-messages" );
    #endif

    // argv[0] is always the name of the program being run.
    // argv[1], when present, is assumed to be a path to an input wli file;
    //          if it's not present then we just read from whateve the shell connected to stdin.
    if( argc == 2 ) {
        if( DEBUG ) fprintf( stderr, "argv[0] = %s\nargv[1] = %s\n", argv[0], argv[1] );
        // Unfortunately we can't just assign a newly opened file pointer to stdin on Solaris, so we do it this way...
        FILE * fp = fopen( argv[1], "r" );
        if( fp == NULL ) panicExit( "can't open the specified input file" );
        int fd = fileno(fp);
        if( dup2(fd,STDIN_FILENO) ) panicExit( "failed to replace stdin by the specified input file" );
        close(fd);
    }

    parseTree = readParse( "S" );                            // Read a *.wli input file, (re)building the program's parse tree.
    symbolsDeclaredIn( parseTree, &symbolTable );            // Walk the tree, building a list of the variables declared in it.
    generateCodeFor( parseTree, &symbolTable, assembly );    // Walk the parse tree, generating code.
    printf( assembly->string );

    if( DEBUG ) {
        fputc( '\n', stderr );
        printTree( parseTree );
        printSymbolTable( &symbolTable );
        fputc( '\n', stderr );
    }

    freeString( assembly );
    destroyDictionary( symbolTable.symbols, NULL );          // All the values are NullObjPtr, which is on the stack, not in the heap.
    freeTree( parseTree );

    #if defined(DMALLOC) && false
        // Actually dmalloc_shutdown(...) is called automatically at exit. But sometimes when debugging it's
        // useful to dump the list of unfreed memory *before* exiting so we can poke around with a debugger.
        dmalloc_shutdown();
    #endif

    fclose( stdin );
    return 0;
}

// -----------------------------------------------------------------------------------------------------
//
// Pass one - building the parse tree.
//
// -----------------------------------------------------------------------------------------------------

// Read a *.wli file, reconstructing and returning the program's parse tree.
TreePtr readParse( char * grammarSymbol ) {

    int              idx;
    char             line[256];
    ListPtr          tokens;
    TreePtr          tree;

    fgets( line, 256, stdin );
    tokens = tokenize( line );

    tree = malloc( sizeof(TreeNode) );
    tree->rule      = tokens;
    tree->nChildren = 0;
    tree->children  = NULL;

    // tokens.size is always > 0.
    // If tokens.size == 1 we have an eps-rule (==> token 1 is a variable and there are no children)
    // If tokens.size == 2 then either we have a terminal (==> no children) or we have a chain rule A --> B (==> one child).
    // For all other cases there are children.
    if( lengthOfList(tokens) > 1 && (! isTerminal(grammarSymbol)) ) {
        tree->children = newEmptyList( UseListLinked );
        for( idx = 1; idx < lengthOfList(tokens); idx++ ) {
            char * next = (char*) getPtrAtIndexInList( idx, tokens );
            addPtrToEndOfList( readParse( next ), tree->children );
            tree->nChildren++;
        }
    }
    return tree;
}

// -----------------------------------------------------------------------------------------------------
//
// Pass two - building a symbol table.
//
// -----------------------------------------------------------------------------------------------------

// Identify the symbols defined in tree by walking it.
// Pre:  tree is a structurally correct parse tree built by readPars(...);
//       symbolTable is a Dictionary containing symbols appearing previously in the program.
// Post: symbolTable is updated to contain the symbols appearing the subtree supplied;
//       by the time we're done (if the program is semantically correct), symbolTable->argOne
//       and symbolTable->argTwo are the identifiers supplied as arguments to wain.
void symbolsDeclaredIn( TreePtr tree, SymbolTablePtr symbolTable ) {

    if( doesTreeNodeMatchRule(tree,"S BOF procedure EOF") ) {
        // recurse on procedure
        symbolsDeclaredIn( getPtrAtIndexInList(1,tree->children), symbolTable );
    }
    else if( doesTreeNodeMatchRule(tree,"procedure INT WAIN LPAREN dcl COMMA dcl RPAREN LBRACE dcls statements RETURN expr SEMI RBRACE") ) {
        // Recurse on dcl and dcl
        symbolsDeclaredIn( getPtrAtIndexInList(3,tree->children), symbolTable );
        symbolsDeclaredIn( getPtrAtIndexInList(5,tree->children), symbolTable );
    }
    else if( doesTreeNodeMatchRule(tree,"dcl INT ID") ) {
        // recurse on ID
        symbolsDeclaredIn( getPtrAtIndexInList(1,tree->children), symbolTable );
    }
    else if( ! strcmp( getPtrAtIndexInList(0,tree->rule), "ID") ) {
        char * identifier = (char*) getPtrAtIndexInList( 1, tree->rule );
        putItemIntoDictionaryForKey( NullObjPtr, symbolTable->symbols, identifier );
        if(      symbolTable->argOne == NULL ) symbolTable->argOne = identifier;    // Must be ID in dcl --> INT ID for 1st arg to wain(...).
        else if( symbolTable->argTwo == NULL ) symbolTable->argTwo = identifier;    // Must be ID in dcl --> INT ID for 2nd arg to wain(...).
    }
    else bail( tree->rule );    // Should never happen...

    return;
}

// -----------------------------------------------------------------------------------------------------
//
// Pass three - code generation.
//
// -----------------------------------------------------------------------------------------------------

// Generate MIPS assembly code for the parse tree.
// Pre:  the identifiers appearing in the program have all been added to symbolTable by symbolsDeclaredIn(...).
// Post: returns a string containing the MIPS assembly code generated for the program fragment represented by tree.
void generateCodeFor( TreePtr tree, SymbolTablePtr symbolTable, StringPointer assembly ) {

    if( doesTreeNodeMatchRule(tree,"S BOF procedure EOF") ) {
        generateCodeFor( getPtrAtIndexInList(1,tree->children), symbolTable, assembly );
        appendCharsToString( "jr $31\n", assembly );
    }
    else if( doesTreeNodeMatchRule(tree,"procedure INT WAIN LPAREN dcl COMMA dcl RPAREN LBRACE dcls statements RETURN expr SEMI RBRACE") ) {
        generateCodeFor( getPtrAtIndexInList(11,tree->children), symbolTable, assembly );
    }
    else if( doesTreeNodeMatchRule(tree,"expr term") ) {
        generateCodeFor( getPtrAtIndexInList(0,tree->children), symbolTable, assembly );
    }
    else if( doesTreeNodeMatchRule(tree,"term factor") ) {
        generateCodeFor( getPtrAtIndexInList(0,tree->children), symbolTable, assembly );
    }
    else if( doesTreeNodeMatchRule(tree,"factor ID") ) {
        generateCodeFor( getPtrAtIndexInList(0,tree->children), symbolTable, assembly );
    }
    else if( ! strcmp( getPtrAtIndexInList(0,tree->rule), "ID") ) {    // strcmp(...) returns 0 == false iff its args are the *same* C-string.
        char * name = (char*) getPtrAtIndexInList( 1, tree->rule );
        if( ! strcmp( name, symbolTable->argOne ) ) appendCharsToString( "add $3,$0,$1\n", assembly );
        if( ! strcmp( name, symbolTable->argTwo ) ) appendCharsToString( "add $3,$0,$2\n", assembly );
    }
    else bail( tree->rule );
}

// -----------------------------------------------------------------------------------------------------
//
// Misc helpers.
//
// -----------------------------------------------------------------------------------------------------

// Print an error message regarding an un-recogized rule and exit the program, reporting a non-zero status code.
// Pre: the parameter is a List of char*'s.
void bail( ListPtr rule ) {
    int i;
    fprintf(stderr, "ERROR: unrecognized rule");
//    for (i=0; i < msg.size; i++) fprintf(stderr, " %s", msg.ptrArray[i]);
    for( i = 0; i < lengthOfList(rule); i++ ) {
        fprintf( stderr, " %s", (char*) getPtrAtIndexInList(i,rule) );
    }
    fprintf(stderr, "\n");
    exit(1);
}

// Report a more general disaster and exit the program, reporting a non-zero status code.
void panicExit( char * rule ) {
    fprintf( stderr, "ERROR: %s\n.", rule );
    exit(1);
}

// -----------------------------------------------------------------------------------------------------
//
// Useful when debugging.
//
// -----------------------------------------------------------------------------------------------------

// Pre: the parameters is a list of char *'s.
void printListOfCStrings( ListPtr list ) {
    if( list == NULL ) {
        fprintf( stderr, "list is <null>\n" );
    } else {
        fprintf( stderr, "%d: ", lengthOfList(list) );
        int idx;
        for( idx = 0; idx < lengthOfList(list); idx ++ ) {
            fprintf( stderr, "%s ", (char*) getPtrAtIndexInList(idx,list) );
        }
        fprintf( stderr, "\n" );
    }
}

void printTreeNode( TreePtr tree ) {
    fprintf( stderr, "\n" );
    if( tree == NULL ) {
        fprintf( stderr, "ptr is <null>\n" );
    } else {
        fprintf( stderr, ".rule      = " );
        printListOfCStrings( tree->rule );
        fprintf( stderr, ".nChildren = %d\n", tree->nChildren );
        int idx;
        for( idx = 0; idx < tree->nChildren; idx++ ) {
            fprintf( stderr, "    %2d: ", idx );
            TreePtr child = getPtrAtIndexInList( idx, tree->children );
            printListOfCStrings( child->rule );
        }
    }
}

void printTree( TreePtr tree ) {
    if( tree == NULL ) {
        return;
    } else {
        StringPointer gstr = listToStringUsingFunction( tree->rule, makeGStringFromCString_callback, "", " ", "" );
        fprintf( stderr, "%s\n", gstr->string );
        freeString( gstr );
        int idx;
        for( idx = 0; idx < tree->nChildren; idx++ ) {
            printTree( getPtrAtIndexInList( idx, tree->children ) );
        }
    }
}

void printSymbolTable( SymbolTablePtr symbolTable ) {
    int idx;
    fprintf( stderr, "\nSymbol Table:\n\n" );
    fprintf( stderr, "     wain( %s, %s )\n\n", symbolTable->argOne, symbolTable->argTwo );
    DictionaryPointerArray * keys = dictionaryKeys( symbolTable->symbols );
    for( idx = 0; idx < keys->length; idx++ ) {
        fprintf( stderr, "     %s\n", (char*) keys->pointers[idx] );
    }
    fputc( '\n', stderr );
    free( keys );
}

// Will be compiled only if DMALLOC is defined (eg "gcc -DDMALLOC...")
#ifdef DMALLOC
int bytesForHeapPtr( void * heapPtr ) {
    DMALLOC_SIZE    userSize;        // Bytes allocated to hold data.
    DMALLOC_SIZE    totalSize;       // Total bytes allocated, including administrative overhead.
    // The other parameters, if not null, return other info we're not interested in.
    if( dmalloc_examine( heapPtr, &userSize, &totalSize, NULL, NULL, NULL, NULL, NULL ) == DMALLOC_ERROR ) {
        fprintf( stderr, "dmalloc_examine error." );
        return -1;
    } else {
        return userSize;
    }
}
#endif

