// Main.cpp
// 4/16/2007 Wei-Min Chen

#define VERSION "0.0.9"

#include "Pedigree.h"
#include "Parameters.h"
#include "Kinship.h"
#include "IBD.h"
#include <time.h>
#include <math.h>
#include "scoretest.h"
#include "elimination.h"
#include "unrelated.h"

void ShowBanner()
{
#ifndef VERSION
   printf("Ghost - (c) 7/12/2007 Wei-Min Chen & Goncalo Abecasis");
#else
   printf("Ghost " VERSION " - (c) 2007 Wei-Min Chen & Goncalo Abecasis");
#endif
   printf("\n");
}

int main(int argc, char **argv)
{
   time_t sTime, cTime;
   time(&sTime);

   ShowBanner();

   String pedfile, datfile, mapfile;
   ParameterList pl;

   bool printTime = 0;

   bool Approach_SCORE1 = 0;
   bool Approach_SCORE1R = 0;
   bool Approach_SCORE2R = 0;
   bool Approach_OLS = 0;
   bool Approach_MLE = 0;
   bool Approach_SCOREA = 0;
   int IntervalMapping = 0;
   bool Approach_QTDT = 0;
   bool Approach_VC = 0;
   bool Approach_POLY = 0;
   bool Approach_HAP = 0;

   int analyticalPower = 0;
   int debugPrint = 0;
   int debugFlag = 0;
   int normalization = 0;
   int writeTransformation = 0;
   int IBDcalculation = 0;
   int writeIBD = 0;
   int permutationCount = 0;
   bool multivariateFlag = 0;
   bool balanceFlag = 0;
   bool bivariateFlag = 0;
   bool scanFlag = 0;
//   bool lessFlag = 0;
   bool moreFlag = 0;
   int randomSeed = 97;
   int sequentialFlag = 0;
   bool trimFlag = 0;

   double pvalueLessThan = _NAN_;
   int likelihood = 0;
   int printFreq = 0;
   int WriteFile = 0;
   double tolerance = _NAN_;
   double positionStart = _NAN_;
   double positionStop = _NAN_;
   double DifferenceInMissing = _NAN_;
   int performance_twoStage = 0;
   int performance_slow = 0;
   double missingThreshold = _NAN_;
   int noInfer = 0;
   String prefix("ghost");
   String hapfile("");
   String markerList("");
   String traitList("");
   String covariateList("");
   double rareCutoff = _NAN_;
   int windowSize = 0;

   BEGIN_LONG_PARAMETERS(longParameters)
      LONG_PARAMETER_GROUP("Association Test")
         LONG_PARAMETER("SCORE", &Approach_SCORE1)
         LONG_PARAMETER("ASSOC", &Approach_QTDT)
         LONG_PARAMETER("OLS", &Approach_OLS)
      LONG_PARAMETER_GROUP("Test Parameters")
         LONG_INTPARAMETER("flankingCount", &IntervalMapping)
         LONG_STRINGPARAMETER("SNP", &markerList)
         LONG_PARAMETER("multivar", &multivariateFlag)
         LONG_PARAMETER("scan", &scanFlag)
         LONG_PARAMETER("seq", &sequentialFlag)
      LONG_PARAMETER_GROUP("Simulation")
         LONG_INTPARAMETER("permutationCount", &permutationCount)
         LONG_INTPARAMETER("random", &randomSeed)
      LONG_PARAMETER_GROUP("Genotype Infer")
         LONG_PARAMETER("infer", &WriteFile)
         LONG_DOUBLEPARAMETER("minInferProb", &tolerance)
         LONG_DOUBLEPARAMETER("diffInMissing", &DifferenceInMissing)
         LONG_INTPARAMETER("noInfer", &noInfer)
      LONG_PARAMETER_GROUP("Performance")
         LONG_PARAMETER("trim", &trimFlag)
         LONG_INTPARAMETER("twoStage", &performance_twoStage)
         LONG_INTPARAMETER("slowIBD", &performance_slow)
         LONG_DOUBLEPARAMETER("missingRate", &missingThreshold)
      LONG_PARAMETER_GROUP("Polygenic Analysis")
         LONG_PARAMETER("polygenic", &Approach_POLY)
#ifndef VERSION
         LONG_PARAMETER("bivariate", &bivariateFlag)
         LONG_PARAMETER("balance", &balanceFlag)
      LONG_PARAMETER_GROUP("Haplotype Analysis")
//         LONG_PARAMETER("haplotype", &Approach_HAP)
         LONG_STRINGPARAMETER("hapfile", &hapfile)
         LONG_DOUBLEPARAMETER("rareCutoff", &rareCutoff)
         LONG_INTPARAMETER("windowSize", &windowSize)
#endif
      LONG_PARAMETER_GROUP("Positions")
         LONG_DOUBLEPARAMETER("start", &positionStart)
         LONG_DOUBLEPARAMETER("stop", &positionStop)
      LONG_PARAMETER_GROUP("Output")
         LONG_PARAMETER("more", &moreFlag)
         LONG_DOUBLEPARAMETER("pvalue", &pvalueLessThan)
         LONG_INTPARAMETER("lik", &likelihood)
         LONG_INTPARAMETER("freq", &printFreq)
         LONG_STRINGPARAMETER("prefix", &prefix)
      LONG_PARAMETER_GROUP("Model")
         LONG_STRINGPARAMETER("trait", &traitList)
         LONG_STRINGPARAMETER("covariate", &covariateList)
         LONG_PARAMETER("normal", &normalization)
      LONG_PARAMETER_GROUP("Tool")
         LONG_PARAMETER("time", &printTime)
         LONG_PARAMETER("write", &writeTransformation)
/*
#ifndef VERSION
      LONG_PARAMETER_GROUP("Debug")
         LONG_PARAMETER("ASCORE", &Approach_SCOREA)
         LONG_PARAMETER("vc", &Approach_VC)
         LONG_PARAMETER("RSCORE", &Approach_SCORE1R)
         LONG_PARAMETER("TEST", &Approach_SCORE2R)
         LONG_PARAMETER("print", &debugPrint)
         LONG_PARAMETER("debug", &debugFlag)
         LONG_PARAMETER("AnalyticalPower", &analyticalPower)
         LONG_INTPARAMETER("ibd", &writeIBD)
#endif
*/
   END_LONG_PARAMETERS()

   pl.Add(new StringParameter('d', "Data File", datfile));
   pl.Add(new StringParameter('p', "Pedigree File", pedfile));
   pl.Add(new StringParameter('m', "Map File", mapfile));
   pl.Add(new LongParameters("Additional Options", longParameters));
   pl.Read(argc, argv);
   pl.Status();

   if(!hapfile.IsEmpty()) Approach_HAP = true;

   Pedigree ped;
   if((datfile.FindChar('#')!= -1) || (pedfile.FindChar('#')!=-1) || (mapfile.FindChar('#')!=-1)){
      int count=22;
      StringArray datfiles(count), pedfiles(count), mapfiles(count);
      String temp;
      int d = datfile.FindChar('#');
      if(d==-1) for(int i = 0; i < count; i++) datfiles[i] = datfile;
      else
         for(int i = 1; i <= count; i++) {
            String name = datfile.SubStr(0, d);
            temp = i;
            name.Add(temp);
            name.Add(datfile.SubStr(d+1));
            datfiles[i-1] = name;
         }
      int p = pedfile.FindChar('#');
      if(p==-1) for(int i = 0; i < count; i++) pedfiles[i] = pedfile;
      else
         for(int i = 1; i <= count; i++) {
            String name = pedfile.SubStr(0, p);
            temp = i;
            name.Add(temp);
            name.Add(pedfile.SubStr(p+1));
            pedfiles[i-1] = name;
         }
      int m = mapfile.FindChar('#');
      if(m==-1) for(int i = 0; i < count; i++) mapfiles[i] = mapfile;
      else
         for(int i = 1; i <= count; i++) {
            String name = mapfile.SubStr(0, m);
            temp = i;
            name.Add(temp);
            name.Add(mapfile.SubStr(m+1));
            mapfiles[i-1] = name;
         }
      for(int i = 0; i < count; i++)
         ped.Prepare(datfiles[i]);  
      for(int i = 0; i < count; i++){
         ped.Prepare(datfiles[i]);
         ped.Load(pedfiles[i]);
         ped.LoadMarkerMap(mapfiles[i]);
      }
   }else{
      ped.Prepare(datfile);
      if(!pedfile.IsEmpty()) ped.Load(pedfile);
      if(!mapfile.IsEmpty()) ped.LoadMarkerMap(mapfile);
   }

   if(trimFlag) ped.Trim(0);

   if (ped.traitCount < 1)
      error("The data set includes no quantitative trait data\n\n");
   AssociationAnalysis *engine = NULL;

   if(Approach_QTDT) IBDcalculation = 1;
   if(Approach_VC) IBDcalculation = 1;
   if(writeIBD) IBDcalculation = 1;

   if(scanFlag) Approach_OLS = 1;
   if(Approach_MLE) {
      engine = new AssociationAnalysis(ped);
      engine->Approach = MLE1;
   }else if(Approach_OLS){
//      engine = new AssociationAnalysis(ped);
      engine = new OLS_Association(ped);
      engine->Approach = OLS;
   }else if(WriteFile || (writeTransformation && (!Approach_HAP))){
      engine = new AssociationAnalysis(ped);
      engine->Approach = SCORE1;
   }else if(Approach_POLY){
      engine = new AssociationAnalysis(ped);
      engine->Approach = POLYGENIC;
   }else if(Approach_HAP){
      engine = new AssociationAnalysis(ped);
      engine->Approach = HAPLOTYPE;
   }else{
      engine = new ScoreTest(ped);
      if(Approach_SCORE1) engine->Approach = SCORE1;
      if(Approach_SCORE1R) engine->Approach = SCORE1_R;
      if(Approach_SCORE2R) engine->Approach = SCORE2_R;
      if(Approach_QTDT) engine->Approach = QTDT_AT;
      if(Approach_SCOREA) engine->Approach = SCORE_A;
      if(analyticalPower) engine->analyticalPower = analyticalPower;
      if(Approach_VC) engine->Approach = LINKAGE_LRT;
   }
   if(scanFlag) engine->scanFlag = 1;
   if(sequentialFlag) engine->sequentialFlag = true;

   if(printFreq) engine->printFreq = printFreq;
   if(noInfer) engine->noInfer = noInfer;
   engine->prefix = prefix;
   if(IntervalMapping > 0) {
      if(IntervalMapping < 11)
         engine->IntervalMapping = IntervalMapping;
      else
         error("Ghost can only handle up to 10 flanking markers.");
   }
   if(pvalueLessThan != _NAN_)
      engine->pvalueLessThan = pvalueLessThan;
   if(DifferenceInMissing != _NAN_)
      engine->DifferenceInMissing = DifferenceInMissing;
   if(likelihood) engine->GenoLik = likelihood;
   if(WriteFile)
      engine->WriteFile = WriteFile;
   if(tolerance != _NAN_)
      engine->toleranceVariance = tolerance;

   if(IBDcalculation) engine->IBDcalculation = IBDcalculation;
   if(writeIBD) engine->writeIBD = writeIBD;
   if(!traitList.IsEmpty()){
//      engine->scanFlag = 1;
      engine->traitList.AddTokens(traitList, ',');
      engine->traits.Dimension(0);
      for(int i = 0; i < engine->traitList.Length(); i++){
         int k = ped.traitNames.Find(engine->traitList[i]);
         if(k==-1)
            printf("Trait %s cannot be found.\n", (const char*)engine->traitList[i]);
         else
            engine->traits.Push(k);
      }
      if(engine->traits.Length()==0)
         error("No traits are specified.");
   }else{
      engine->traits.Dimension(ped.traitCount);
      for(int i = 0; i < ped.traitCount; i++)
         engine->traits[i] = i;
   }
   if(permutationCount){
      if(!Approach_OLS)
         printf("Permutation is only implemented for OLS. Parameter is ignored.\n");
      engine->permutationCount =
      (permutationCount == 1) ? 100: permutationCount;
   }
   if(multivariateFlag)
      engine->multivariateFlag = 1;
   if(bivariateFlag){
      engine->bivariateFlag = 1;
      engine->multivariateFlag = 1;
   }
   if(balanceFlag) engine->balanceFlag = 1;

   if(!covariateList.IsEmpty()){
      engine->covariateList.AddTokens(covariateList, ',');
      engine->covariates.Dimension(0);
      for(int i = 0; i < engine->covariateList.Length(); i++){
         int k = ped.covariateNames.Find(engine->covariateList[i]);
         if(k==-1){
            if(engine->covariateList[i].Compare("sex")==0){
               for(int p = 0; p < ped.count; p++)
                  if(ped[p].sex == 0)
                     ped[p].covariates.Push(_NAN_);
                  else
                     ped[p].covariates.Push(ped[p].sex-1);
               ped.covariateNames.Push("sex");
               ped.covariateCount++;
               engine->covariates.Push(ped.covariateCount-1);
            }else
               printf("Covariate %s cannot be found.\n", (const char*)engine->covariateList[i]);
         }else{
            if(engine->covariates.Find(k)==-1)
               engine->covariates.Push(k);
            else
               printf("Covariate %s are duplicated.\n",
                  (const char*)engine->covariateList[i]);
         }
      }
      if(engine->covariates.Length()==0)
         printf("No covariates are included in the analysis.\n");
   }else{
      engine->covariates.Dimension(ped.covariateCount);
      for(int i = 0; i < ped.covariateCount; i++)
         engine->covariates[i] = i;
   }
   if(!markerList.IsEmpty()){
      StringArray tempList;
      tempList.AddTokens(markerList, ',');
      if(tempList.Length()==0) tempList.Push(" ");
      if(tempList.Length()){
         engine->markerList.Dimension(0);
         for(int i = 0; i < tempList.Length(); i++){
            int k = ped.markerNames.Find(tempList[i]);
            if(k==-1)
               printf("Marker %s cannot be found.\n", (const char*)tempList[i]);
            else
               engine->markerList.Push(k);
         }
         if(engine->markerList.Length()==0)
            printf("No markers are specified.");
      }
   }
   if(engine->Approach == POLYGENIC){
      if(normalization)
         for(int tr = 0; tr < engine->traits.Length(); tr++)
            engine->InvNorm(tr);
      if(engine->multivariateFlag)
         engine->MultiPoly();
      else engine->polygenic();
      if(printTime) printf("\nSeconds elapsed: %d\n", time(&cTime)-sTime);
      exit(0);
   }

   if(performance_twoStage) engine->performance_twoStage = performance_twoStage;
   if(performance_slow) engine->performance_fast = 0; else engine->performance_fast = 1;
   if(missingThreshold!=_NAN_) engine->missingThreshold = missingThreshold;
   if(debugPrint) engine->debugPrint = debugPrint;
   if(debugFlag) engine->debugFlag = debugFlag;
   if(normalization) engine->normalization = normalization;
   if(writeTransformation) engine->writeTransformation = writeTransformation;
   if(positionStart != _NAN_) engine->start = positionStart;
   if(positionStop != _NAN_) engine->stop = positionStop;
   if(moreFlag) engine->moreFlag = moreFlag;

   if(engine->Approach == HAPLOTYPE){
      if(rareCutoff!=_NAN_) engine->rareCutoff = rareCutoff;
      engine->haplotypeFile = hapfile;
      engine->windowSize = windowSize; 
      engine->HaploAnalysis();
      if(printTime) printf("\nSeconds elapsed: %d\n", time(&cTime)-sTime);
      exit(0);
   }

   if (ped.markerCount < 1)
      warning("The data set includes no genetic markers\n\n");

   do{
   engine->pre_genome();
   int next_chromosome = -1;
   bool many_chromosomes = false;
//   engine->first_chromosome = engine->last_chromosome = true;
   if(engine->no_chromosome==0) do {
      engine->markers.Dimension(0);
      next_chromosome = ped.SortMarkersInMapOrder(engine->markers, next_chromosome);
      many_chromosomes = many_chromosomes || (next_chromosome != 0);
//      if(next_chromosome) engine->last_chromosome = false;
//      else engine->last_chromosome = true;
      if (many_chromosomes){
         printf("\nAnalyzing Chromosome %d\n\n",
                ped.GetMarkerInfo(engine->markers[0])->chromosome);
         engine->many_chromosomes = many_chromosomes;
      }
      if(ped.GetMarkerInfo(engine->markers[0])->chromosome > 98){
         printf("Sex chromosome is not analyzed.\n\n");
         continue;
      }
      engine->SetupGlobals();
      engine->Analyze();
      engine->PrintScores();
//      engine->first_chromosome = false;
   } while (next_chromosome);
   engine->post_genome();
   if(engine->no_chromosome) break;
   }while(engine->sequentialFlag);

   if(printTime)
      printf("\nSeconds elapsed: %d\n", time(&cTime)-sTime);
   if(engine) delete engine;
}











/*
//   if(covariates) ConvertNumber(covariates, engine->covariates);
void ConvertNumber(long int number, IntArray & out, int bit)
{
   out.Dimension(bit);
   out.Zero();
   if( (number < 0) || (number > BinaryBase[30]) ) {
      printf("Failed to convert number %ld\n", number);
      return;
   }
   for(int i = 0; i < bit; i++)
      if(number & BinaryBase[i]) out[i] = 1;
}
*/
/*
   out.Dimension(0);
   if(number < 0) return;
   else if(number == 0){
      out.Dimension(1);
      out[0] = 0;
      for(int i = 1; i < bit; i++) out.Push(0);
      return;
   }
   int k = int(log(number)/log(2)+.0000000001)+1;
   for(int i = 0; i < k; i++)
      if(number & ( 1 << i)) out.Push(1);
      else out.Push(0);
   for(int i = k; i < bit; i++) out.Push(0);
   */




