#include "SecondPass.h"
#include "MathStats.h"

#include <math.h>

SecondPass::SecondPass()
{
   usableSamples   = 0;
   currentMarker   = 0;
   minGenosFail    = 0;
   HWEfail         = 0;
   concordanceFail = 0;
   mendelFail      = 0;
   mendelRateFail  = 0;
   snpsNotAssessed = 0;
   minorAlleleFail = 0;
   mafFail         = 0;
   failures        = 0;

   /* TDT default initializations */
   T  = 0;
   NT = 0;
   completeTrios = 0;
   TDTChiSq      = 0;
   TDTPvalue     = 0;
   
   /*Association test related vars*/
   assocChiSq    = 0;
   assocPvalue   = 0;

   minimumStats.Dimension(NUMSTATS + 5); // +5 is due to TDT statistics
   minimumStats.Set(1e30);
   maximumStats.Dimension(NUMSTATS + 5);
   maximumStats.Set(-1);
}

SecondPass::~SecondPass()
{
}

void SecondPass::Prepare(int markers, IntArray &mask)
{
   individuals = mask.Length();
   for (int cnt = 1; cnt < individuals; cnt++)
   {
      if (mask[cnt] == 0)
         usableSamples++;
   }
   if (usableSamples == 0)
   {
      failure = "No samples left to perform second pass";
      return;
   }
   minGenos = int(usableSamples* QC_Settings::MARKER_CALLS_MIN);
}


bool SecondPass::ProcessMarker(IntArray &genotypes, IntArray & sex, IntArray &mask, char* snpAlleles, IntArray & isDuplicateSample, IntArray & fatherColumn, IntArray & motherColumn, bool isXLinked)
{
   alleles[0] = snpAlleles[0];
   alleles[1] = snpAlleles[1];
   
   genotypeCounts = 0;
   maf = 0;
   minorAlleleCount = 0;
   minorAllele = 0;
   
   int totalAlleleCount = 0;

   /* default initializations */
   logXOdds = 0.0;
   HWEPvalues = 1.0;
   mendelErrorRates = 0.0;
   mendelErrors = 0;
   mismatchErrors = 0;

   if (!isXLinked)
   {
      for (int i = 1; i < genotypes.Length(); i++)
      {
         if (mask[i] != 0)
            continue;

         if (genotypes[i] != 0)
	 {
            genotypeCounts++;
            if (isDuplicateSample[i] == 1 || fatherColumn[i] != -1 || motherColumn[i] != -1)
	       continue;
	    totalAlleleCount += 2;
	 }
         if (genotypes[i] == 1)
            minorAlleleCount += 2;
         else if (genotypes[i] == 3)
            minorAlleleCount += 1;
      }
   }
   else
   {
      for (int i = 1; i < genotypes.Length(); i++)
      {
         if (mask[i] != 0)
            continue; 

         if (genotypes[i] != 0)
	 {
            genotypeCounts++;
	    if(sex[i] == SEX_MISSING || isDuplicateSample[i] == 1 || fatherColumn[i] != -1 || motherColumn[i] != -1)
	       continue; 
            if (sex[i] == MALE)
               totalAlleleCount += 1;
            else 
               totalAlleleCount += 2;
	 }
         if (genotypes[i] == 1 && sex[i] == MALE)
            minorAlleleCount += 1;
         else if (genotypes[i] == 1 && sex[i] == FEMALE)
            minorAlleleCount += 2;
         else if (genotypes[i] == 3)
            minorAlleleCount += 1;
      }
   }
   maf = (totalAlleleCount == 0) ? 0 : ((minorAlleleCount*1.0)/totalAlleleCount);
   minorAllele = 1;
   if (maf > 0.5)
   {
       maf = 1 - maf;
       minorAlleleCount = totalAlleleCount - minorAlleleCount;
       minorAllele = 2;
   }
       
   if (genotypeCounts == 0)
   {
      failure = "No samples left to perform second pass after quality score filter";
      return false;
   }
   return true;
}

bool SecondPass::PostProcess()
{
   tag = "";
   if (genotypeCounts < minGenos)
   {
      tag = "TOO_FEW_GENOTYPES";
      minGenosFail++;
      return false;
   }
   if (minorAlleleCount < QC_Settings::MIN_ALLELE_COUNT)
   {
      tag = "TOO_FEW_MINOR_ALLELES";
      minorAlleleFail++;
      return false;
   }
   if(maf < QC_Settings::MAF_MIN)
   {
      tag = "TOO_LOW_MAF";
      mafFail++;
      return false;
   }
   if (HWEPvalues < QC_Settings::MARKER_HWE_PVALUE)
   {
      tag = "HWE_FAILURE";
      HWEfail++;
      return false;
   }
   if (mismatchErrors > QC_Settings::MARKER_MISMATCHES_MAX)
   {
      tag = "TOO_MANY_MISMATCHES";
      concordanceFail++;
      return false;
   }
   if (mendelErrors > QC_Settings::MARKER_MAX_MENDEL)
   {
      tag = "TOO_MANY_MENDEL_ERRORS";
      mendelFail++;
      return false;
   }
   if (mendelErrorRates > QC_Settings::MARKER_MENDEL_RATE)
   {
      tag = "HIGH_MENDEL_ERROR_RATE";
      mendelRateFail++;
      return false;
   }
   return true;
}


void SecondPass::XLinkedOdds(IntArray & genotypes, IntArray & sex, IntArray & mask, IntArray & isDuplicateSample, double errorRate)
{
  logXOdds = 0.0;
  double LAuto = 1;
  double LXlinked = 1;
  double firstFreq_Auto = 0;
  double firstFreq_X = 0;
  int totalAlleles_Auto = 0, totalAlleles_X = 0;
  // Likelihood assuming Autosome.
  int numGenos = genotypes.Length();
  for (int i=1; i < numGenos; i++)
  {
     if ((mask[i] != OKAY) || (sex[i] == MISSING) || (genotypes[i] == MISSING) || isDuplicateSample[i] != 0)
        continue;

     if (genotypes[i] == FIRST_HOM)
     {
        firstFreq_Auto += 2;
        firstFreq_X += ((sex[i] == MALE) ? 1 : 2);
     }
     else if (genotypes[i] == HET)
     {
        firstFreq_Auto += 1;
        firstFreq_X += ((sex[i] == MALE) ? 0.5 : 1);
     }
     totalAlleles_Auto += 2;
     totalAlleles_X += ((sex[i] == MALE) ? 1 : 2);
  }
  firstFreq_Auto /= (totalAlleles_Auto + 1e-30);
  firstFreq_X /= (totalAlleles_X + 1e-30);

  double LGeno_Auto[3] = { (firstFreq_Auto*firstFreq_Auto),
               ((1 - firstFreq_Auto)*(1 - firstFreq_Auto)),
               (2*firstFreq_Auto*(1 - firstFreq_Auto)) };
  double LGeno_X_Females[3] = { (firstFreq_X*firstFreq_X),
                  ((1 - firstFreq_X)*(1 - firstFreq_X)),
                  (2*firstFreq_X*(1 - firstFreq_X)) };
  double LGeno_X_Males[3] = { ((1-errorRate)*firstFreq_X + errorRate*LGeno_X_Females[0]),
                ((1-errorRate)*(1 - firstFreq_X) + errorRate*LGeno_X_Females[1]),
                (errorRate*LGeno_X_Females[2]) };


  for (int i=1; i < numGenos; i++)
  {
      if ((mask[i] != OKAY) || (sex[i] == MISSING) || (genotypes[i] == MISSING) || isDuplicateSample[i] != 0)
         continue;
      LAuto += log(LGeno_Auto[genotypes[i]-1]);
      LXlinked += log(((sex[i] == MALE) ? LGeno_X_Males[genotypes[i]-1] : LGeno_X_Females[genotypes[i]-1]));
  }
  logXOdds = LAuto - LXlinked;
}

void SecondPass::SNPHWE(IntArray & genos, bool isSexLinked, IntArray & sexCodes,
                  IntArray & mask, IntArray & fatherColumn, IntArray & motherColumn, IntArray & isDuplicateSample)
{
   HWEPvalues = 0;
   int obs_hets = 0, obs_hom1 = 0, obs_hom2 = 0;
   for (int cnt = 1; cnt < genos.Length(); cnt++)
   {
      // use only founders -- unique first sample
      if (isDuplicateSample[cnt] != 0)   continue;
      if (mask[cnt] > 0 || fatherColumn[cnt] != -1 || motherColumn[cnt] != -1)
         continue;
      if (sexCodes[cnt] == MALE && isSexLinked)
         continue;

      if (genos[cnt] == 0)
         continue;
      else if (genos[cnt] == 1)
         obs_hom1++;
      else if (genos[cnt] == 2)
         obs_hom2++;
      else
         obs_hets++;
   }
   if (obs_hom1 < 0 || obs_hom2 < 0 || obs_hets < 0) 
      {
      printf("FATAL ERROR - SNP-HWE: Current genotype configuration (%d  %d %d ) includes a"
             " negative count", obs_hets, obs_hom1, obs_hom2);
      exit(EXIT_FAILURE);
      }

   int obs_homc = obs_hom1 < obs_hom2 ? obs_hom2 : obs_hom1;
   int obs_homr = obs_hom1 < obs_hom2 ? obs_hom1 : obs_hom2;

   int rare_copies = 2 * obs_homr + obs_hets;
   int genotypes   = obs_hets + obs_homc + obs_homr;
   if (genotypes == 0)
   {
      failure.catprintf("WARNING: Line %d. No Genotypes to compute HWE with\n", currentMarker);
      logMgr.WriteToLog(failure, 20);
      failure.Clear();
      return;
   }

   double * het_probs = (double *) malloc((size_t) (rare_copies + 1) * sizeof(double));
   if (het_probs == NULL) 
      {
      printf("FATAL ERROR - SNP-HWE: Unable to allocate array for heterozygote probabilities");
      exit(EXIT_FAILURE);
      }
   
   int i;
   for (i = 0; i <= rare_copies; i++)
      het_probs[i] = 0.0;

   /* start at midpoint */
   int mid = rare_copies * (2 * genotypes - rare_copies) / (2 * genotypes);

   /* check to ensure that midpoint and rare alleles have same parity */
   if ((rare_copies & 1) ^ (mid & 1))
      mid++;

   int curr_hets = mid;
   int curr_homr = (rare_copies - mid) / 2;
   int curr_homc = genotypes - curr_hets - curr_homr;

   het_probs[mid] = 1.0;
   double sum = het_probs[mid];
   for (curr_hets = mid; curr_hets > 1; curr_hets -= 2)
      {
      het_probs[curr_hets - 2] = het_probs[curr_hets] * curr_hets * (curr_hets - 1.0)
                               / (4.0 * (curr_homr + 1.0) * (curr_homc + 1.0));
      sum += het_probs[curr_hets - 2];

      /* 2 fewer heterozygotes for next iteration -> add one rare, one common homozygote */
      curr_homr++;
      curr_homc++;
      }

   curr_hets = mid;
   curr_homr = (rare_copies - mid) / 2;
   curr_homc = genotypes - curr_hets - curr_homr;
   for (curr_hets = mid; curr_hets <= rare_copies - 2; curr_hets += 2)
      {
      het_probs[curr_hets + 2] = het_probs[curr_hets] * 4.0 * curr_homr * curr_homc
                            /((curr_hets + 2.0) * (curr_hets + 1.0));
      sum += het_probs[curr_hets + 2];

      /* add 2 heterozygotes for next iteration -> subtract one rare, one common homozygote */
      curr_homr--;
      curr_homc--;
      }

   for (i = 0; i <= rare_copies; i++)
      het_probs[i] /= sum;

   /* alternate p-value calculation for p_hi/p_lo
   double p_hi = het_probs[obs_hets];
   for (i = obs_hets + 1; i <= rare_copies; i++)
     p_hi += het_probs[i];
   
   double p_lo = het_probs[obs_hets];
   for (i = obs_hets - 1; i >= 0; i--)
      p_lo += het_probs[i];

   
   double p_hi_lo = p_hi < p_lo ? 2.0 * p_hi : 2.0 * p_lo;
   */

   double p_hwe = 0.0;
   /*  p-value calculation for p_hwe  */
   for (i = 0; i <= rare_copies; i++)
      {
      if (het_probs[i] > het_probs[obs_hets])
         continue;
      p_hwe += het_probs[i];
      }
   
   p_hwe = p_hwe > 1.0 ? 1.0 : p_hwe;

   free(het_probs);

   HWEPvalues = p_hwe;
}


void SecondPass::ConcordanceChecks(IntArray & genotypes, IntArray * duplicateSampleSets, int numSets)
{
   mismatchErrors = 0;
   //int matrix
   if (duplicateSampleSets == NULL)
      return;
   IntArray consensusGenos;
   consensusGenos.Dimension(numSets);
   for (int i = 0; i < numSets; i++)
   {
      if (duplicateSampleSets[i].Length() == 2)
      {
         if (genotypes[duplicateSampleSets[i][0]] == 0 || genotypes[duplicateSampleSets[i][1]] == 0)
            continue;
         if(genotypes[duplicateSampleSets[i][0]] != genotypes[duplicateSampleSets[i][1]])
               mismatchErrors++;
      }
      else
      {
         int genoCnts[4] = {0,0,0,0};
         for (int j = 0; j < duplicateSampleSets[i].Length(); j++)
         {
            genoCnts[genotypes[(duplicateSampleSets[i][j])]];
         }
	 
	 if (genoCnts[1] + genoCnts[2] + genoCnts[3] == 0) //all the duplicate genotypes are missing
	    continue;
         // IF all equal counts, then 1/1 preferred over 2/2 preferred over 1/2
         consensusGenos[i] = ((genoCnts[1] > genoCnts[2]) ? 1 : 2);
         if (genoCnts[consensusGenos[i]] < genoCnts[3])
            consensusGenos[i] = 3;
      
         mismatchErrors += (duplicateSampleSets[i].Length() - genoCnts[consensusGenos[i]] - genoCnts[0]);
      }
   }
}

void SecondPass::writeOutputHeader(FILE * markerFile, bool outTDT, bool outAssoc)
{
   fprintf(markerFile, "Marker\tMinorAllele\tMAF\tCompleteness\tHWEPvalue\tMendelErrors\tImpliedMendelErrorRate\tMismatches\tXlinkedOdds\tAvgQualityScore\t");
   if (outTDT)
      fprintf(markerFile, "Allele1\tAllele2\tTransmit1\tTransmit2\tTDTTrios\tTDTChisq\tTDTPvalue\t");
   if (outAssoc)
      fprintf(markerFile, "AssocChiSq\tAssocPvalue\t#Strata\t");
   fprintf(markerFile, "Flagged\tComments\n");
}

void SecondPass::outputNotAssessed(FILE * notAssessed, String & markerLabel, String failure)
{
   fprintf(notAssessed, "%s\t%s\n", (const char *)markerLabel, (const char *)failure);
   snpsNotAssessed++;
}

void SecondPass::outputMarkerStatistics(FILE * markerFile, String & markerLabel, double avgQualityScore, bool outTDT, bool outAssoc)
{
   char minAllele = (minorAllele == 1) ? alleles[0] : alleles[1];
   if (minAllele == 0)
      minAllele = '?';
   fprintf(markerFile, "%s\t%c\t%f\t%f\t%.6g\t%d\t%f\t%d\t%f\t", (const char *)markerLabel, minAllele,
      maf, (genotypeCounts*1.0)/usableSamples, HWEPvalues, mendelErrors,
      mendelErrorRates, mismatchErrors, logXOdds);
   if (avgQualityScore == 0)
      fprintf(markerFile, "N/A\t");
   else
      fprintf(markerFile, "%.4f\t", avgQualityScore);

   if (outTDT)
      if (completeTrios == 0)
         fprintf(markerFile, "-\t-\t-\t-\t0\t-\t-\t");
      else
         fprintf(markerFile, "%c\t%c\t%.0f\t%.0f\t%d\t%.3f\t%.6g\t", alleles[0], alleles[1], T, NT, completeTrios, TDTChiSq, TDTPvalue);

   if (outAssoc)
      if (assocChiSq >= 0.0)
         fprintf(markerFile, "%.3f\t%.6g\t%d\t", assocChiSq, assocPvalue, numStrataTested);
      else
         fprintf(markerFile, "-\t-\t-\t");

   if (tag.Length() == 0)
      fprintf(markerFile, "PASSED\t-\n");
   else
      fprintf(markerFile, "FAILED\t%s\n", (const char *)tag);
   UpdateLimits(avgQualityScore);
}


void SecondPass::writeOutputSummary()
{
   printf("Masking poor quality markers...\n");
   printf("===============================\n\n");

   printf("Checking total number of genotype calls\n");
   printf("   Flagged %d markers with < %.3f genotyping proportion (<%d/%d samples)\n\n",
           minGenosFail, QC_Settings::MARKER_CALLS_MIN, minGenos, usableSamples);

   printf("Checking minor alleles counts and frequencies\n");
   printf("   Flagged %d markers with < %d minor alleles\n", minorAlleleFail, QC_Settings::MIN_ALLELE_COUNT);
   printf("   Flagged %d markers with minor allele frequency < %.3f\n\n", mafFail, QC_Settings::MAF_MIN);


   printf("Checking Hardy-Weinberg Equilibrium p-values\n");
   printf("   Flagged %d markers with HWE pvalue < %f\n\n", HWEfail, QC_Settings::MARKER_HWE_PVALUE);

   printf("Checking concordance rates\n");
   printf("   Flagged %d markers with > %d mismatches in duplicates\n\n", concordanceFail,
          QC_Settings::MARKER_MISMATCHES_MAX);

   printf("Checking mendelian inconsitencies\n");
   printf("   Flagged %d markers with > %d mendelian inconsistencies\n", mendelFail,
          QC_Settings::MARKER_MAX_MENDEL);
   printf("   Flagged %d markers with > %f implied mendelian error rate\n\n", mendelRateFail,
          QC_Settings::MARKER_MENDEL_RATE);

   failures += (minGenosFail + HWEfail + concordanceFail + mafFail + minorAlleleFail + mendelFail + mendelRateFail);

   printf("   %d markers passed quality control\n\n", (currentMarker - snpsNotAssessed - failures));
}

void SecondPass::UpdateLimits(double avgQualityScore)
{
   if (maf < minimumStats[MAF])                                         minimumStats[MAF] = maf;
   if ((genotypeCounts*1.0)/usableSamples < minimumStats[COMPLETENESS]) minimumStats[COMPLETENESS] = (genotypeCounts*1.0)/usableSamples;
   if (HWEPvalues < minimumStats[HWE])                                  minimumStats[HWE] = HWEPvalues;
   if (mendelErrors < minimumStats[MENDEL])                             minimumStats[MENDEL] = mendelErrors;
   if (mendelErrorRates < minimumStats[MENDELRATE])                     minimumStats[MENDELRATE] = mendelErrorRates;
   if (mismatchErrors < minimumStats[MISMATCHES])                       minimumStats[MISMATCHES] = mismatchErrors;
   if (logXOdds < minimumStats[XODDS])                                  minimumStats[XODDS] = logXOdds;
   if (avgQualityScore < minimumStats[QUALITYSCORE])                    minimumStats[QUALITYSCORE] = avgQualityScore;
   
   if (T < minimumStats[NUMSTATS+0])                                    minimumStats[NUMSTATS+0] = T;
   if (NT < minimumStats[NUMSTATS+1])                                   minimumStats[NUMSTATS+1] = NT;
   if (completeTrios < minimumStats[NUMSTATS+2])                        minimumStats[NUMSTATS+2] = completeTrios;
   if (TDTChiSq < minimumStats[NUMSTATS+3])                             minimumStats[NUMSTATS+3] = TDTChiSq;
   if (TDTPvalue < minimumStats[NUMSTATS+4])                            minimumStats[NUMSTATS+4] = TDTPvalue;
   
   
   if (maf > maximumStats[MAF])                                         maximumStats[MAF] = maf;
   if ((genotypeCounts*1.0)/usableSamples > maximumStats[COMPLETENESS]) maximumStats[COMPLETENESS] = (genotypeCounts*1.0)/usableSamples;
   if (HWEPvalues > maximumStats[HWE])                                  maximumStats[HWE] = HWEPvalues;
   if (mendelErrors > maximumStats[MENDEL])                             maximumStats[MENDEL] = mendelErrors;
   if (mendelErrorRates > maximumStats[MENDELRATE])                     maximumStats[MENDELRATE] = mendelErrorRates;
   if (mismatchErrors > maximumStats[MISMATCHES])                       maximumStats[MISMATCHES] = mismatchErrors;
   if (logXOdds > maximumStats[XODDS])                                  maximumStats[XODDS] = logXOdds;
   if (avgQualityScore > maximumStats[QUALITYSCORE])                    maximumStats[QUALITYSCORE] = avgQualityScore;
   
   if (T > maximumStats[NUMSTATS+0])                                    maximumStats[NUMSTATS+0] = T;
   if (NT > maximumStats[NUMSTATS+1])                                   maximumStats[NUMSTATS+1] = NT;
   if (completeTrios > maximumStats[NUMSTATS+2])                        maximumStats[NUMSTATS+2] = completeTrios;
   if (TDTChiSq > maximumStats[NUMSTATS+3])                             maximumStats[NUMSTATS+3] = TDTChiSq;
   if (TDTPvalue > maximumStats[NUMSTATS+4])                            maximumStats[NUMSTATS+4] = TDTPvalue;
}

void SecondPass::TDTStatistic(IntArray & genotypes, IntArray & fatherColumn, IntArray & motherColumn, IntArray & mask, IntArray & sexCodes, IntArray & mendelErrors, IntArray & isDuplicateSample, bool isXLinked)
{
   int length = genotypes.Length();
   
   completeTrios = 0;
   T = 0, NT = 0;
   TDTChiSq = 0; 
   TDTPvalue = 0;
   
   for (int i = 1; i < length; i++)
   {
      double trans = 0;
      if (isDuplicateSample[i] == 1)   continue; // is not the first sample of this sampleId;
      if (fatherColumn[i] == -1 && motherColumn[i] == -1)   continue;  // no parents
      if (mask[i] > 0 || (mask[fatherColumn[i]] > 0  && mask[motherColumn[i]] > 0))   continue;  // failed sample or both parents failed first pass
      // missing sample or missing both parents genotypes
      if (genotypes[i] == 0 || (genotypes[fatherColumn[i]] == 0 && genotypes[motherColumn[i]] == 0))   continue;
      if (isXLinked && sexCodes[i] == 0)   continue;  // X-linked marker and sample of unknown sex
      // mendel error in family for this marker
      if (mendelErrors[i] > 0 || mendelErrors[fatherColumn[i]] > 0 || mendelErrors[motherColumn[i]] > 0)   continue; 

      // is mother heterozygous and valid, ||ly is father heterozygous and valid
      bool motherHet = (motherColumn[i] != -1) ? (genotypes[motherColumn[i]] == 3 && mask[motherColumn[i]] == 0) : false;
      bool fatherHet = (fatherColumn[i] != -1) ? (genotypes[fatherColumn[i]] == 3 && mask[fatherColumn[i]] == 0) : false;
      
      if (motherHet && fatherHet) // both parents valid and heterozygous -- cant be sex linked
      {
         trans = genotypes[i]%2; // genos 1 and 3 have at least 1 copy of the 1 allele
         (genotypes[i] == 1) ? (++trans) : 0; // if geno is 1 then one more copy of 1 allele
	 T  += trans;
	 NT += 2 - trans;
	 completeTrios++; 
      }
      else if (motherHet)
      {
         if (isXLinked && sexCodes[i] == 1)
	 {
	    trans = (genotypes[i]%2);
	 }
	 else 
	 {
	    bool noFather = (fatherColumn[i] == -1) ? true : (genotypes[fatherColumn[i]] == 0 || mask[fatherColumn[i]] > 0);
	    if (noFather)
	    {
	       continue;
//	       trans = (genotypes[i]%2)/2.0;
//	       trans += (genotypes[i] == 1) ? 0.5 : 0;
	    }
	    else if (genotypes[fatherColumn[i]] == 1)
	    {
	       trans = (genotypes[i] == 1) ? 1 : 0;
	    }
	    else if (genotypes[fatherColumn[i]] == 2)
	    {
	       trans = (genotypes[i] == 3) ? 1 : 0;
	    }
	 }
	 
	 T  += trans;
	 NT += 1 - trans;
	 completeTrios++; 
      }
      else if (fatherHet)
      {
         bool noMother = (motherColumn[i] == -1) ? true : (genotypes[motherColumn[i]] == 0 || mask[motherColumn[i]] > 0);
	 if (noMother)
	 {
	    continue;
//	    trans = (genotypes[i]%2)/2.0;
//	    trans += (genotypes[i] == 1) ? 0.5 : 0;
	 }
	 else if (genotypes[motherColumn[i]] == 1)
	 {
	    trans = (genotypes[i] == 1) ? 1 : 0;
	 }
	 else if (genotypes[motherColumn[i]] == 2)
	 {
	    trans = (genotypes[i] == 3) ? 1 : 0;
	 }

	 T  += trans;
	 NT += 1 - trans;
	 completeTrios++; 
      }
   }
   if (completeTrios != 0)
   {
      TDTChiSq  = (T-NT)*(T-NT)/(T+NT);
      TDTPvalue = chidist(TDTChiSq, 1.0);
   }
}

void SecondPass::AssociationTest(IntArray &genotypes, IntArray &sampleLabels, IntArray &isDuplicateSample, IntArray & sexCodes, IntArray &mask, bool isSexLinked, int numLabels)
{
   IntArray observedCounts[2];
   int totalCount = 0;

   for (int i = 0 ; i < 2; i++)
   {
      observedCounts[i].Dimension(numLabels);
      observedCounts[i].Zero();
   }

   double rowProbs[2];
   double columnProbs[numLabels];

   if (!isSexLinked)
   {
      for (int i = 1; i < genotypes.Length(); i++)
      {
         if (mask[i] != 0)   continue;
         if (isDuplicateSample[i] == 1)   continue; 
         if (sampleLabels[i-1] == -1)   continue;
         if (genotypes[i] == 0)   continue;
         totalCount += 2;
         switch (genotypes[i])
         {
            case 1: observedCounts[0][sampleLabels[i-1]] += 2;
                    break;
            case 2: observedCounts[1][sampleLabels[i-1]] += 2;
                    break;
            case 3: observedCounts[0][sampleLabels[i-1]] ++;
                    observedCounts[1][sampleLabels[i-1]] ++;
                    break;
         }
      }
   }
   else
   {
      for (int i = 1; i < genotypes.Length(); i++)
      {
         if (mask[i] != 0)   continue;
         if (sexCodes[i] == SEX_MISSING)   continue;
         if (isDuplicateSample[i] == 1)    continue; 
         if (sampleLabels[i-1] == -1)        continue;
         if (genotypes[i] == 0)            continue;
	 int numAlleles = ((sexCodes[i] == MALE) ? 1 : 2);
         totalCount += numAlleles;
         switch (genotypes[i])
         {
            case 1: observedCounts[0][sampleLabels[i-1]] += numAlleles;
                    break;
            case 2: observedCounts[1][sampleLabels[i-1]] += numAlleles;
                    break;
            case 3: observedCounts[0][sampleLabels[i-1]] += numAlleles/2;
                    observedCounts[1][sampleLabels[i-1]] += numAlleles/2;
                    break;
         }
      }
   }
      
   if (totalCount == 0)
   {
      assocChiSq = -1.0;
      assocPvalue = -1.0;
      return;
   }
   rowProbs[0] = double(observedCounts[0].Sum())/totalCount;
   rowProbs[1] = double(observedCounts[1].Sum())/totalCount;
   
   numStrataTested = numLabels;
   assocChiSq = 0.0;
   for (int i = 0; i < numLabels; i++)
   {
      columnProbs[i] = double(observedCounts[0][i] + observedCounts[1][i])/ totalCount;
      if (columnProbs[i] == 0.0)
      {
         numStrataTested--;
	 continue;
      }
      assocChiSq += pow((observedCounts[0][i] - (totalCount*rowProbs[0]*columnProbs[i])), 2)/(totalCount*rowProbs[0]*columnProbs[i] + 1e-30);
      assocChiSq += pow((observedCounts[1][i] - (totalCount*rowProbs[1]*columnProbs[i])), 2)/(totalCount*rowProbs[1]*columnProbs[i] + 1e-30);
   }
   
   if (numStrataTested < 2 || rowProbs[0] == 0.0 || rowProbs[1] == 0.0)
   {
      assocChiSq = -1.0;
      assocPvalue = -1.0;
      return;
   }
   assocPvalue = chidist(assocChiSq, numLabels - 1);//numLabels - 1;
}

void SecondPass::ReleaseMemory()
{
}

