//Created by Niemand on 8/1/07.
//No warranty that this code does anything useful, etc. 

#include <iostream>
#include <stdio.h>
#include <fstream>
#include <vector>

using namespace std;

extern char *malloc(), *strcpy();
extern long random();

//The wordNode structure is the building block of the word storage tree. 
//The storage tree is a three dimensional construction of binary trees. 
//The tree has a series of planes, and the number of planes is the depth 
//of the chains that are stored. The first word of a chain will be placed 
//in the binary tree which makes up the first plane. Each node in the first 
//plane has a tree of 'followers', words which can follow after it, which 
//are stored in a tree in the second plane. Thusevery node in the first 
//plane is the base of a tree in the second plane. For chains to depth greater 
//than 2, the nodes in the second plane will be the bases of trees in the next 
//plane for word that can follow them, and so on. 
struct wordNode{
	char* word;
	int count;
	wordNode* follow;
	wordNode* right;
	wordNode* left;
};

//creates a new wordNode and all needed subNodes for the sequence of 
//words in chain. Each word in chain will be on the plane after the 
//word preceding it. 
wordNode* allocNode(char** chain, int depth)
{
	//cout << "Allocating a node for word: " << chain[0] << " with depth " << depth << endl; //for debugging
	wordNode* node = (wordNode*)malloc(sizeof(wordNode));
	node->count=1;
	node->word=(char*)malloc(sizeof(char)*(strlen(*chain)+1)); //+1 to length for null termination!
	strcpy(node->word,*chain);
	node->left=NULL;
	node->right=NULL;
	if(depth>1) //add necessary subnodes
		node->follow=allocNode(++chain, depth-1);
	else
		node->follow=NULL;
	return(node);
}

//Adds a chain of words to the tree specified by base. That is;
//chain[0] is added to the first plane, then chain[1] is added 
//to the follower tree of chain[0], and so on for each word in 
//chain up to depth. When a word is not present in a tree it is 
//added, when already present its count is incremented. 
//base must not be NULL
void addChain(wordNode* base, char** chain, int depth)
{
	wordNode* temp = base;
	while(true){
		int res = strcmp(*chain,temp->word);
		if(res==0){ //words are the same
			if(depth == 1){ //on the final plane, so just increment count
				temp->count+=1;
				break;
			}
			else{ //descend to the next plane
				temp=temp->follow;
				++chain; //advance to the next word in the chain
				--depth;
				continue;
			}
		}
		else if(res>0){ //*chain goes after temp->word
			if(temp->right==NULL){ //that subtree doesn't exist; create it
				temp->right=allocNode(chain,depth);
				break;
			}
			else
				temp=temp->right;
		}
		else{ //*chain goes before temp->word
			if(temp->left==NULL){ //that subtree doesn't exist; create it
				temp->left=allocNode(chain,depth);
				break;
			}
			else
				temp=temp->left;
		}
	}
}

//reads all of the words in the specified stream and adds all chains
//that are found to the tree structure rooted at base. If base is null
//a new tree will be begun, and the resulting tree is returned. 
wordNode* parseFile(wordNode* base, istream &infile, int depth,int verbose)
{
	char** words = (char**)malloc(depth*sizeof(char*));
	words[0] = (char*)malloc(depth*256*sizeof(char));
	for(int i = 1; i<depth; i++)
		words[i] = words[0]+(256*i);
	char* last = words[depth-1];
	long numWords=0;
	while(infile.good()){
		infile.width(256);
		infile >> last;
		++numWords;
		if(numWords>depth){
			addChain(base,words,depth);
		}
		else if(numWords==depth){
			if(base==NULL)
				base = allocNode(words,depth);
			else
				addChain(base,words,depth);
		}
		memmove(words[0],words[1],256*(depth-1));
	}
	if(verbose)
		cout << numWords << " words read\n";
	if(base!=NULL){ //link end of text to beginning
		wordNode* temp = base;
		for(int i=0; i<(depth-1); i++){
			strcpy(words[depth-1],temp->word);
			addChain(base,words,depth);
			memmove(words[0],words[1],256*(depth-1));
			temp=temp->follow;
		}
	}
	return(base);
}

//changes the structure of the portion of the tree below base to be a linked 
//list rather than a tree. In the linked list, nodes are ordered increasingly
//with the next node being to the right of the node which precedes it; the left
//sides are ignored in this format. 
wordNode* convertToLinkedList(wordNode* base)
{
	wordNode* before=((base->left != NULL)?convertToLinkedList(base->left):NULL);
	wordNode* after=((base->right != NULL)?convertToLinkedList(base->right):NULL);
	base->right=after;
	base->left=NULL;
	if(before!=NULL){
		wordNode* temp = before;
		while(temp->right!=NULL)
			temp=temp->right;
		temp->right=base;
		return(before);
	}
	return(base);
}

//converts the final plane of the tree into linked lists rather than binary 
//trees. The count of the node which each linked list follows is set to be 
//the sum of all counts in the linked list. Thus the relative frequency of
//each follower is its count divided by the count of the word that the list
//follows. 
void vectorizeLeaves(wordNode* base)
{
	if(base->follow->follow == NULL){
		base->follow = convertToLinkedList(base->follow);
		wordNode* temp = base->follow;
		base->count=0;
		while(temp!=NULL){
			base->count+=temp->count;
			temp=temp->right;
		}
	}
	else if(base->follow != NULL)
		vectorizeLeaves(base->follow);
	if(base->left!=NULL)
		vectorizeLeaves(base->left);
	if(base->right!=NULL)
		vectorizeLeaves(base->right);
}

//finds a random follower for chain, the depth given must 
//be one less than the depth stored in base
char* findNext(wordNode* base, char** chain, int depth)
{
	wordNode* temp = base;
	//locate the followers of chain in the word tree
	while(depth>0){
		if(*chain==NULL)
			return(temp->word);
		int res = strcmp(*chain,temp->word);
		if(res==0){
			if(depth>1){
				temp=temp->follow;
				++chain;
				--depth;
				continue;
			}
			else
				break;
		}
		else if(res>0){
			temp=temp->right;
		}
		else{
			temp=temp->left;
		}
	}
	//choose a random follower according to relative frequencies
	int res = (random() % (temp->count));
	temp = temp->follow;
	while(temp != NULL){
		res-=temp->count;
		if(res<0)
			return(temp->word);
		else
			temp=temp->right;
	}
	return(NULL);
}

//write out the specified number of words. The starting point
//is taken to be the base of the word tree. 
void printText(wordNode* base, int numWords, int depth)
{
	//allocate storage for the word history
	char** words = (char**)malloc(depth*sizeof(char*));
	words[0] = (char*)malloc(depth*256*sizeof(char));
	for(int i = 1; i<depth; i++)
		words[i] = words[0]+(256*i);
	char** last = words+(depth-1);
	wordNode* seed = base;
	//read base of tree into history
	for(int i=0; i<depth-1; i++){
		words[i] = seed->word;
		seed=seed->follow;
		cout << words[i] << " ";
	}
	//follow the chains outputting words until the desired quantity has been reached
	for(int i=0; i<numWords; i++){
		*last = findNext(base, words, depth-1);
		cout << *last << " ";
		memmove(words,words+1,(depth-1)*sizeof(char*));
	}
}

//print info on using
void printHelp()
{
	cout << "\nMarkov_Madness Parameters\n";
	cout << "\t-h: print this help message\n";
	cout << "\t-v: verbose mode, prints extra information while running\n";
	cout << "\t-q: quiet mode; no verbose output. This is the default.\n";
	cout << "\t-d=[depth]: sets the depth of the word chain analysis; must be at\n\t\tleast 2 and defaults to 3\n";
	cout << "\t-l=[length]: sets the number of words output to be length, defaults to 100\n";
	cout << "\t-s=[seed]: sets the random number generator seed, defaults to time\n\t\tsince Jan 1, 1970 in seconds\n";
	cout << " All other arguments are assumed to be files from which words are to be read.\n";
	cout << " If no files are specified, reading is from standard input.\n";
	cout << " Output is written to standard output.\n";
}

/*parses a portion of a string as an integer:
Characters parsed are from start to end-1 inclusive.
The resulting value is stored to dest, and if a bad 
character is encountered, it is returned. A return of
zero means no errors occurred.*/
int parseInt(string arg, int start, int end, long &dest)
{
	long temp=0;
	short sign=1;
	for(int i=start; i<end; i++){
		char c = arg[i];
		if((c<48 || c>57) && c!=45)
			return(c);
		if(c==45)
			sign*=-1;
		else
			temp = (10*temp)+(c-48);
	}
	dest = sign*temp;
	return(0);
}

//parse a flag of the form f=### where f is the flag character
//and ### are the digits of an integer
int parseFlagValue(string arg, int j, long &val, int &len)
{
	if(j>arg.size()-3 || arg[j+1]!='='){
		cerr << "Bad parameter: " << arg << "\n\tIncorrect usage of the \'" << arg[j] << "\' flag. See the help listing.\n";
		return(1);
	}
	int k=arg.length()-1;
	for(int n=j+2; n<=arg.length(); n++){
		if(n==arg.length() || (!(arg[n]>=48 && arg[n]<=57) && arg[n]!='-')){
			k=n;
			break;
		}
	}
	if(k==j+2){
		cerr << "Bad parameter: " << arg << "\nMissing number after the \'" << arg[j] << "\' flag.\n";
		return(1);
	}
	len = k-j;
	return(parseInt(arg,j+2,k,val));
}

int main(int argc, char** argv)
{
	//set default values
	long depth = 3;
	int verbose = 1;
	long seed = time(0);
	vector<string> filenames;
	long outLen = 100;
	bool showHelp=false;
	
	//read command line parameters
	char err;
	int skip;
	for(int i=1; i<argc; i++){
		string arg = argv[i];
		if(arg.find("-")==0){ //flags
			for(int j=1; j<arg.length(); j++){
				char c = arg[j];
				//single character flags
				if(c=='v')
					verbose=1;
				else if(c=='q')
					verbose=0;
				else if(c=='h')
					showHelp=true;
				//valued flags
				else if(c=='d'){
					if(err=parseFlagValue(arg,j,depth,skip))
						exit(1);
					if(depth<2){
						cerr << "Chain depth must be >= 2\n";
						exit(1);
					}
					j+=skip;
				}
				else if(c=='l'){
					if(err=parseFlagValue(arg,j,outLen,skip))
						exit(1);
					if(outLen<0){
						cerr << "Output length must be positive\n";
						exit(1);
					}
					j+=skip;
				}
				else if(c=='s'){
					if(err=parseFlagValue(arg,j,seed,skip)){
						cout << "Error " << err << endl;
						exit(1);
					}
					j+=skip;
				}
			}
		}
		else{
			filenames.push_back(arg);
		}
	}
	if(showHelp)
		printHelp();
	srandom(seed);
	if(verbose){
		cout << "chain depth: " << depth << "\nseed: " << seed << "\noutput size: " << outLen << endl;
	}
	
	//read in data
	wordNode* base = NULL;
	if(filenames.size()>0){
		for(int i=0; i<filenames.size(); i++){
			ifstream input(filenames[i].c_str());
			if(input.fail()){
				cerr << "Failed to open: " << filenames[i] << endl;
				exit(1);
			}
			else if(verbose)
				cout << "Reading from " << filenames[i] << endl;
			base = parseFile(base,input,depth,verbose);
			input.close();
		}
	}
	else{
		if(verbose)
			cout << "Reading from standard input\n";
		base = parseFile(base,cin,depth,verbose);
	}
	
	//reformat storage for look up
	if(verbose)
		cout << "Rearranging leaves\n";
	vectorizeLeaves(base);
	
	//generate output
	if(verbose)
		cout << "Printing text\n------------\n";
	printText(base,outLen,depth);
	if(verbose)
		cout << "\n------------\nDone.\n";
	return(0);
}

