/* Code to parse and read in an OCG output file.
 * Will be loaded as a shared object into R (dyn.load('name.so')) and called from within R.
 *
 * Author: Alex T. Kalinka (alex.t.kalinka@gmail.com)
 *
 * Reads in data generated by the OCG algorithm of Becker et al. (2012).
 *
*/

#include <cstdio>
#include <cstdlib>
#include <iostream>
#include <fstream>
#include <string>
#include <sstream>

extern "C" {
#include <R.h>

using namespace std;


void readOCG(char **file, int *Modularity, double *Q)
	{

	int i, clusid = 0, noUM = 0;
	string line, datum, sub, sub2;
	bool unclust = FALSE, multi = FALSE, cluster = FALSE, first = TRUE, mod = FALSE, last = FALSE, go = FALSE;

	ifstream infile;
	ofstream outfile;
	stringstream ss (stringstream::in | stringstream::out);


	infile.open(*file, ios::in | ios::binary);
	outfile.open("OCG_numclusters.txt", ios::out | ios::binary);

	if(! infile.is_open()){
		Rprintf("\nERROR: %s not found!\n",*file); return;
		}

	
	while(getline(infile, line)){
		if(line.length() == 0 && !go){
			continue;
			}
		go = FALSE;
		sub = line.substr(0,17);
		if(sub.compare("Unclustered nodes") == 0){
			unclust = TRUE;
			sub2 = line.substr((line.length()-4),4);
			if(sub2.compare("None") == 0){
				noUM++;
				}
			continue;
		}else if(sub.compare("Multiclustered no") == 0){
			multi = TRUE;
			sub2 = line.substr((line.length()-4),4);
			if(sub2.compare("(0):") == 0){
				noUM++;
				go = TRUE;
				}
			continue;
			}
		if(!unclust && !multi){
			continue;
			}
		sub = line.substr(0,7);
		if(sub.compare(">Class ") == 0){
			if(first){
				outfile.close();
				outfile.open("OCG_clusters.txt",ios::out | ios::binary);
				first = FALSE;
				}
			cluster = TRUE;
			clusid++;
			continue;
		}else if(sub.compare("Final c") == 0){
			ss << line;
			while(ss >> datum){
				if(mod){
					*Modularity = atoi(datum.c_str());
					mod = FALSE;
					last = TRUE;
					continue;
					}
				if(datum.compare("=") == 0){
					mod = TRUE;
					continue;
					}
				if(last){
					datum.erase(datum.begin());
					datum.erase(datum.end()-2, datum.end());
					*Q = atof(datum.c_str());
					}
				}
			ss.clear();
			continue;
			}

		if(multi && !cluster && noUM == 2){ // No unclustered or multiclustered nodes.
			outfile << "NoUM" << endl;
			continue;
			}

		ss << line;
		i = 1;
		// Process this line.
		while(ss >> datum){
			if(multi && !cluster){
				if(i % 2 != 0){ // Node.
					outfile << datum << " ";
				}else{ // Number of clusters.
					datum.erase(datum.begin());
					datum.erase(datum.end()-2, datum.end());
					outfile << datum << endl;
					}
			}else if(!multi && !cluster){ // Unclustered (no separating commas).
				outfile << datum << " 0" << endl;
			}else if(multi && cluster){
				outfile << datum << " " << clusid << endl;
				}
			
			
			i++;
			}
		cluster = FALSE;
		ss.clear();
		}

	infile.close();
	outfile.close();

	}

	}








