//
//  KissReadsGraph.cpp
//  kissreads_g
//
//  Created by Pierre Peterlongo on 02/01/13.
//  Copyright (c) 2013 Pierre Peterlongo. All rights reserved.
//

#include"commons.h"
#include"Loader.h"
#include"ReadMapper.h"
#include <zlib.h> // Added by Pierre Peterlongo on 02/08/2012.


extern int size_seeds;
extern unsigned int nbits_nbseeds;
extern uint64_t  mask_nbseed ;
extern uint64_t  mask_offset_seed;

char * getVersion(){
	return (char *)"1.0.0 Beta - Copyright INRIA - CeCILL License";
}
//#define VERBOSE


void print_usage_and_exit(char * name){
	fprintf (stderr, "NAME\n%s, version %s\n", name, getVersion());
	fprintf (stderr, "\t -h prints this message and exit\n");
	fprintf (stderr, "\nUSAGE\n%s input_graph <readsC1.fasta/fastq[.gz]> [<readsC2.fasta/fastq[.gz]> [<readsC3.fasta/fastq[.gz] ...] -M -t type [-k value] [-c min_coverage] [-d max_substitutions] [-o name] [-h] \n", name);
	fprintf (stderr, "\nDESCRIPTION\n");
	fprintf (stderr, "%s maps the provided reads on the graph\n",name);
	fprintf (stderr, "With option \"-t coverage\": outputs an equivalent graph removing uncovered edges and adding:\n");
	fprintf (stderr, "\t for each node: the coverage per sample and per position\n");
    fprintf (stderr, "\t for each edge: the number of mapped reads per sample using this edge\n");
	fprintf (stderr, "With option \"-t modify\": outputs the same simplified graph:\n");
	fprintf (stderr, "\t removing low covered edges and nodes (less that min_coverage)\n");
    fprintf (stderr, "\t then phasing simple non branching paths\n");
    fprintf (stderr, "If -M option is specified, the input is considered as a Mapsembler output - thus composed of multiple independant graphes\n");
    
    
	fprintf (stderr, "\nMANDATORY\n");
	fprintf (stderr, "\t -t <type> \n");
	fprintf (stderr, "\t\t \"c\" \"coverage\" \n");
    fprintf (stderr, "\t\t \"m\" \"modify\" \n");
    
	fprintf (stderr, "\nOPTIONS\n");
    fprintf (stderr, "-M: the input is considered as a Mapsembler output\n");
	fprintf (stderr, "\t -o file_name: write obtained graph. Default: standard output \n");
	fprintf (stderr, "\t -k size_seed: will use seeds of length size_seed. Default: 25.\n");
	fprintf (stderr, "\t -c min_coverage: Will consider an edge as coherent if coverage (number of reads from all sets using this edge) is at least min_coverage. Default: 2 \n");
	fprintf (stderr, "\t -d max_substitutions: Will map a read on the graph with at most max_substitutions substitutions. Default: 1 \n");
    
    
	exit(0);
}




int main(int argc, char **argv) {
	setvbuf(stdout,(char*)NULL,_IONBF,0); // disable buffering for printf()
	size_seeds=25;
	int min_coverage=2;
    int max_substitutions = 1;
    bool modify=false;
    bool option_t_seen=false;
    bool input_mapsembler=false;
    
    if(argc<2) print_usage_and_exit(argv[0]);
    char * toCheck_file =strdup(argv[1]);
    char * output_file=NULL;
	// GET ALL THE READ FILES
	// find the number of read sets
    int number_of_read_sets=0;
	while(number_of_read_sets+2<argc && argv[number_of_read_sets+2][0]!='-') number_of_read_sets++;
	char ** reads_file_names = (char **) malloc(sizeof(char *)*number_of_read_sets);
    // copy the read set file names
	number_of_read_sets=0;
	while(number_of_read_sets+2<argc && argv[number_of_read_sets+2][0]!='-'){
		reads_file_names[number_of_read_sets]=strdup(argv[number_of_read_sets+2]);
		number_of_read_sets++;
	}
    
    if(number_of_read_sets==0)
        print_usage_and_exit(argv[0]);
    Bank *reads = new Bank(argv+2,number_of_read_sets);
    
	while (1)
	{
        int temoin = getopt (argc-number_of_read_sets-1, &argv[number_of_read_sets+1], "hd:c:k:o:t:M");
		if (temoin == -1){
			break;
		}
		switch (temoin)
		{
            case 'M':
                input_mapsembler=true;
                printf("-M option caught: The input graph is considered as generated by Mapsembler\n");
                break;
            case 'c':
                min_coverage=atoi(optarg);
                if(min_coverage<1) min_coverage=1;
                printf(" Will consider an edge as coherent if coverage (number of reads from all sets using this edge) is at least %d.\n", min_coverage);
                break;
            case 'd':
                max_substitutions=atoi(optarg);
                if(max_substitutions<0) max_substitutions=0;
                printf(" Will map a read on the graph with at most %d substitutions.\n", max_substitutions);
                break;
            case 'k':
                size_seeds=atoi(optarg);
                
                if(size_seeds<5){
                    fprintf(stderr,"%d too small to be used as seed length. kisSnpCheckReads continues with seeds of length 5\n",size_seeds);
                    size_seeds=10;
                }
                if(size_seeds%2==0){
                    size_seeds-=1;
                    printf("Need odd kmer size to avoid palindromes. I've set kmer size to %d.\n",size_seeds);
                }
                else
                    printf(" Will use seeds of length %d\n", size_seeds);
                break;
            case 'o':
                output_file=strdup(optarg);
                break;
            case 'h':
                print_usage_and_exit(argv[0]);
                break;
            case 't':
                if(strcmp(optarg,"m")==0 || strcmp(optarg,"modify")==0) {
                    printf("%s will modify the graph\n", argv[0]);
                    modify=true;
                }
                if(strcmp(optarg,"c")==0 || strcmp(optarg,"coverage")==0) {
                    printf("%s will compute the coverage of the graph\n", argv[0]);
                    modify=false;
                }
                option_t_seen=true;
                break;
            case '-':
                if(strcmp("version", optarg)==0){
                    printf("kissReads graph version %s\n", getVersion());
                    exit(0);
                }
                printf(" what next ? %s\n",optarg);
                break;
            default:
                //			printf ("Unknown option %c\n", temoin);
                print_usage_and_exit(argv[0]);
		}
	}
    
	if ( argc  - optind <2)
	{
		print_usage_and_exit(argv[0]);
	}
    
    if(!option_t_seen){
        fprintf(stderr,"Error, -t is mandatory, please chose between \"modify\" or \"coverage\"\n");
        print_usage_and_exit(argv[0]);
    }
    
    init_static_variables();
    nbits_nbseeds = 8*sizeof(uint64_t)- NBITS_OFFSET_SEED ;
    mask_nbseed  = ( 1ULL <<  (uint64_t) nbits_nbseeds  ) -1 ;
    mask_offset_seed = (1ULL << (NBITS_OFFSET_SEED)) -1 ;
    
    
    // LOAD and CHECK THE GRAPH
    printf("load graph(s)...");
    vector<DBG*> dbgs;
    if(input_mapsembler){
        dbgs = load_mapsembler_json(argv[1], number_of_read_sets);
    }
    else
        dbgs.push_back(load_json(argv[1], number_of_read_sets));
    
    
    for(int i=0;i<dbgs.size();i++){
        printf("graph %d...",i);
//        dbgs[i]->depth_first_print();
        dbgs[i]->alloc_coverage_quality(number_of_read_sets);
        dbgs[i]->threshold_substitutions=max_substitutions;
//        dbgs[i]->depth_first_print();
        
        printf(" checked...");
        dbgs[i]->depth_first_check();
        
//        dbgs[i]->init_boolean_vector();
        
        printf(" indexed\n");
        // INDEX kmers in the graph
        dbgs[i]->index_nodes();
        
        //    // Map the reads
        //    printf("\"Unpalindromize\" palindromic nodes... ");
        //    int nb_unp = dbgs[i]->unpalindromize();
        //    printf("done, %d nodes were \"Unpalindromized\"\n", nb_unp);
        
        //    exit(0);
        
        // DEBUG GET NEXT READ FROM FILE:
//        char * read;        int size_read;
//        for(int file_id=0;file_id<reads->nb_files;file_id++){
//            printf("file_id %d\n", file_id);
//            int nb=0;
//            reads->open_stream(file_id);
//            while(reads->get_next_seq_from_file(&read,&size_read, file_id)){ // each read
//                printf("%s\n", read);
//                if(nb%10==0)
//                    printf("\r %d reads mapped (file_id %d)", nb, file_id);
//                nb++;
//            }
//            reads->close_stream(file_id);
//            
//        }
        // END DEBUG GET NEXT READ FROM FILE:
            
        // Map the reads
        
        reads->rewind_all();
        printf("Mapping the reads... \n");
        uint64_t nb_mapped_reads = map_reads(reads, dbgs[i]);
        printf("done, %ld reads were mapped with a unique best match\n",(long int) nb_mapped_reads);
        

        
        
        if(modify){
            
            printf("Remove uncovered edges... ");
            int number_edges_removed = dbgs[i]->remove_uncovered_edges(min_coverage, number_of_read_sets);
            printf("done, %d edge(s) covered with less than %d reads were removed\n", number_edges_removed, min_coverage);
            
            
            printf("Remove uncovered nodes... ");
            int number_nodes_removed = dbgs[i]->remove_uncovered_nodes(min_coverage, number_of_read_sets);
            printf("done, %d nodes(s) on which all positions are covered with less than %d reads %s removed\n", number_nodes_removed, min_coverage, number_nodes_removed>1?"were":"was");
            
            
            printf("Simplify simple paths... ");
            int number_simplified = dbgs[i]->simplify_simple_paths_couples ();
            printf("done, %d nodes couple%s merged: into [==]  \n", number_simplified, number_simplified>1?"s [=]->[=] were":" [=]->[=] was");
            
            
            
            printf("Remove unreachable nodes... ");
            number_nodes_removed = dbgs[i]->depth_first_clean();
            printf("done, %d unreachable nodes(s)  %s removed\n", number_nodes_removed, number_nodes_removed>1?"were":"was");
            
//            printf("Duplicate deaded paths... ");
//            int number_de_duplicated = dbgs[i]->duplicate_deadend_nodes();
//            printf("done, %d deadend path%s duplicated\n", number_de_duplicated, number_de_duplicated>1?"s were":" was");
//            
//            printf("Phase paths... ");
//            int number_duplicated = dbgs[i]->duplicate_simple_nodes();
//            printf("done, %d path%s detected and phased\n", number_duplicated, number_duplicated>1?"s were":" was");
            
            ////#define TRIO
            //#ifdef TRIO
            //        int number_simplified = dbgs[i]->simplify_simple_paths();
            //        printf("done, %d nodes trio ->[=]->[=]->[=]-> were merged: into ->[===]->  \n", number_simplified);
            //#else
            
            //#endif
        }
    } // ALL GRAPHS
    
    
    printf("Write the obtained graph%s... \n",(input_mapsembler&&dbgs.size()>1)?"s":"");
    
    FILE * res = stdout;
    if(output_file)
        res = fopen(output_file,"w");
    if(input_mapsembler){
        DBG::output_graphs(argv[1], res, number_of_read_sets, !modify, dbgs);
    }
    
    else{
        int edge_id=0;
        dbgs[0]->output_graph(res, number_of_read_sets, edge_id, !modify);
//        if(modify)
//            dbgs[0]->output_graph(res, number_of_read_sets, false);
//        else
//            dbgs[0]->output_graph(res, number_of_read_sets, true);
    }
    if(output_file) fclose(res);
    
    if(output_file)
        printf("All done, result graph is in file %s - good bye\n", output_file);
    else
        printf("All done, good bye\n");
    return 0;
}
