////////////////////////////////////////////////////////////////////// 
// ldmax/LdMax.cpp 
// (c) 2000-2001 Goncalo Abecasis
// 
// This file is distributed as part of the GOLD source code package   
// and may not be redistributed in any form, without prior written    
// permission from the author. Permission is granted for you to       
// modify this file for your own personal use, but modified versions  
// must retain this copyright notice and must not be distributed.     
// 
// Permission is granted for you to use this file to compile GOLD.    
// 
// All computer programs have bugs. Use this file at your own risk.   
// 
// Thursday November 08, 2001
// 
 
#include "LdMax.h"
#include "MathConstant.h"
#include "MathStats.h"
#include "Error.h"

#include <math.h>

LDMax::LDMax()
   {
   alleleFrequencies = (Vector *) NULL;
   haplotypeList1 = (IntArray *) NULL;
   haplotypeList2 = (IntArray *) NULL;
   ngeno = nhaplo = nloci = 0;
   foundersOnly = true;
   }

LDMax::~LDMax()
   {
   if (alleleFrequencies != NULL) delete [] alleleFrequencies;
   if (haplotypeList1 != NULL) delete [] haplotypeList1;
   if (haplotypeList2 != NULL) delete [] haplotypeList2;
   }

bool LDMax::SelectMarkers(const IntArray & new_markers, Pedigree & ped)
   {
   if (alleleFrequencies != NULL) delete [] alleleFrequencies;
   if (haplotypeList1 != NULL) delete [] haplotypeList1;
   if (haplotypeList2 != NULL) delete [] haplotypeList2;
   alleleFrequencies = NULL;
   haplotypeList1 = haplotypeList2 = NULL;

   markers = new_markers;

   nloci = markers.Length();

   // Get no. of alleles (and genotypes) at each locus
   // as well as observed allele frequencies and genotype counts
   nAlleles.Dimension(nloci);
   nGenotypes.Dimension(nloci);
   haplotype1.Dimension(nloci);
   haplotype2.Dimension(nloci);
   genotype.Dimension(nloci);

   alleleFrequencies = new Vector[nloci];

   for (int i = 0; i < nloci; i++)
      {
      int m = markers[i];

      nAlleles[i] = ped.CountAlleles(m);
      nGenotypes[i] = nAlleles[i] * (nAlleles[i] + 1) / 2;

      alleleFrequencies[i].Dimension(nAlleles[i]);
      alleleFrequencies[i].Set(0);
      }

   // calculate the total number of haplotypes and genotypes
   nhaplo = ngeno = 1;

   for (int i = 0; i < nloci; i++)
      {
      nhaplo *= nAlleles[i];
      ngeno *= nGenotypes[i];
      }

   if (nhaplo == 0)
      return false;

   haplotypeFrequencies.Dimension(nhaplo);
   haplotypeFrequencies.Set(1.0 / nhaplo);
   genotypeFrequencies.Dimension(ngeno);

   // count observed alleles and genotypes
   genotypeCounts.Dimension(ngeno);
   genotypeCounts.Set(0);
   count = 0;

   for (int i = 0; i < ped.count; i++)
      {
      if (foundersOnly && !ped[i].isFounder()) continue;
      if (!isGenotyped(ped[i])) continue;

      GetGenotype(ped[i]);
      genotypeCounts[GenotypeIndex()]++;
      count++;

      for (int j = 0; j < nloci; j++)
         {
         int m = markers[j];

         alleleFrequencies[j][ped[i].markers[m].one-1]++;
         alleleFrequencies[j][ped[i].markers[m].two-1]++;
         }
      }

   if (count < nhaplo)
      { return false; }

   for (int j = 0; j < nloci; j++)
      alleleFrequencies[j] /= (count * 2.0);

   // build genotype list
   haplotypeList1 = new IntArray[ngeno];
   haplotypeList2 = new IntArray[ngeno];

   ResetHaplotype(haplotype1);
   do {
      haplotype2 = haplotype1;
      do {
         GetGenotype();
         int idx = GenotypeIndex();
         haplotypeList1[idx].Append(HaplotypeIndex(haplotype1));
         haplotypeList2[idx].Append(HaplotypeIndex(haplotype2));
      } while (IncrementHaplotype(haplotype2));
   } while (IncrementHaplotype(haplotype1));

   UpdateGenotypeFrequencies();

   return true;
   }

bool LDMax::isGenotyped(Person & p)
   {
   for (int i = 0; i < nloci; i++)
      if (!p.markers[markers[i]].isKnown())
         return false;
   return true;
   }

void LDMax::GetGenotype(Person & p)
   {
   for (int i = 0; i < nloci; i++)
      {
      int m = markers[i];
      int a1 = max(p.markers[m].one, p.markers[m].two);
      int a2 = min(p.markers[m].one, p.markers[m].two);

      genotype[i] = a1 * (a1 - 1) / 2 + a2;
      }

   }

void LDMax::GetGenotype()
   {
   for (int i = 0; i < nloci; i++)
      {
      int a1 = max(haplotype1[i], haplotype2[i]);
      int a2 = min(haplotype1[i], haplotype2[i]);

      genotype[i] = a1 * (a1 - 1) / 2 + a2;
      }
   }

int LDMax::GenotypeIndex()
   {
   int result = 0;
   int factor = 1;

   for (int i = 0; i < nloci; i++)
      {
      result += (genotype[i] - 1) * factor;
      factor *= nGenotypes[i];
      }

   return result;
   }

void LDMax::ResetHaplotype(IntArray & haplo)
   {
   haplo.Set(1);
   }

bool LDMax::isLastHaplotype(IntArray & haplo)
   {
   for (int i = 0; i < nloci; i++)
      if (haplo[i] != nAlleles[i])
         return false;
   return true;
   }

bool LDMax::IncrementHaplotype(IntArray & haplo)
   {
   int carry = 0;
   for (int i = 0; i < nloci; i++)
      if (haplo[i] != nAlleles[i])
         {
         haplo[i]++;
         while (carry--) haplo[--i] = 1;
         return true;
         }
      else
         carry++;

   return false;
   }

int LDMax::HaplotypeIndex(IntArray & haplo)
   {
   int result = 0;
   int factor = 1;

   for (int i = 0; i < nloci; i++)
      {
      result += (haplo[i] - 1) * factor;
      factor *= nAlleles[i];
      }

   return result;
   }

void LDMax::TheWorks()
   {
   deltasq = Dprime = D = 0.0;

   // ML stuff
   Maximize();
   estimates = haplotypeFrequencies;
   logLKfull = LogLikelihood();

   // Null hypothesis
   NullFrequencies();
   defaults = haplotypeFrequencies;
   logLKnull = LogLikelihood();

   if (nloci == 2 && nAlleles[0] == 2 && nAlleles[1] == 2)
      {
      double denominator =
            (estimates[0] + estimates[2]) * (estimates[1] + estimates[3]) *
            (estimates[0] + estimates[1]) * (estimates[2] + estimates[3]);

      if (denominator > 1e-7)
         {
         deltasq = (estimates[0] * estimates[3] - estimates[1] * estimates[2]);
         deltasq *= deltasq;
         deltasq /= denominator;
         }
      }

   if (nloci == 2)
      {
      ResetHaplotype(haplotype1);
      do {
         double observed = estimates[HaplotypeIndex(haplotype1)];

         double p = alleleFrequencies[0][haplotype1[0]-1], q = 1 - p;
         double r = alleleFrequencies[1][haplotype1[1]-1], s = 1 - r;

         double expected = p * r;
         double D = observed - expected, Dmax;

         if (fabs(D) > 1e-7)
            {
            Dmax = (D >= 0) ? min(p*s, q*r) : min(p*r, q*s);

            Dprime += fabs(D/(Dmax + TINY)) * expected;
            }
      } while (IncrementHaplotype(haplotype1));
      }

   ChiSq = 2 * (logLKfull - logLKnull);
   dfFull = count - nhaplo + 1;
   dfNull = count;
   for (int i = 0; i < nloci; i++) dfNull -= nAlleles[i] - 1;
   pvalue = (dfFull < dfNull) ? chidist(max(ChiSq,0.0), dfNull - dfFull) : 1.0;
   }

double LDMax::NullFreq(IntArray & haplo)
   {
   double result = 1.0;

   for (int i = 0; i < nloci; i++)
      result *= alleleFrequencies[i][haplo[i]-1];

   return result;
   }

void LDMax::GetHaplotype(IntArray & haplo, int number)
   {
   int factor = 1;

   for (int i = 0; i < nloci; i++)
      factor *= nAlleles[i];

   for (int i = nloci - 1; i >= 0; i--)
      {
      factor /= nAlleles[i];
      haplo[i] = number / factor + 1;
      number = number % factor;
      }
   }

void LDMax::UpdateGenotypeFrequencies()
   {
   genotypeFrequencies.Set(TINY);

   for (int i = 0; i < ngeno; i++)
      for (int j = 0; j < haplotypeList1[i].Length(); j++)
         {
         int h1 = haplotypeList1[i][j];
         int h2 = haplotypeList2[i][j];

         double f = haplotypeFrequencies[h1] * haplotypeFrequencies[h2];

         if (h1 == h2)
            genotypeFrequencies[i] += f;
         else
            genotypeFrequencies[i] += 2*f;
         }
   }

void LDMax::UpdateHaplotypeFrequencies()
   {
   Vector next(haplotypeFrequencies.dim);
   next.Zero();

   for (int i = 0; i < ngeno; i++)
      {
      if (!genotypeCounts[i]) continue;
      double k = (double) genotypeCounts[i] / (double) count;

      for (int j = 0; j < haplotypeList1[i].Length(); j++)
         {
         int h1 = haplotypeList1[i][j];
         int h2 = haplotypeList2[i][j];

         double f = haplotypeFrequencies[h1] * haplotypeFrequencies[h2];

         if (h1 != h2) f *= 2;

         next[h1] += k * f / genotypeFrequencies[i];
         next[h2] += k * f / genotypeFrequencies[i];
         }
      }
   next *= 0.5;
   haplotypeFrequencies = next;
   }

void LDMax::NullFrequencies()
   {
   for (int i = 0; i < nhaplo; i++)
      {
      GetHaplotype(haplotype1, i);
      haplotypeFrequencies[i] = NullFreq(haplotype1);
      }
   UpdateGenotypeFrequencies();
   }

double LDMax::LogLikelihood()
   {
   double likelihood = 0;

   for (int i = 0; i < ngeno; i++)
      if (genotypeCounts[i])
         likelihood += log(genotypeFrequencies[i]) * genotypeCounts[i];

   return likelihood;
   }

void LDMax::Maximize()
   {
   double last = LogLikelihood(), current, delta;

   do {
      UpdateHaplotypeFrequencies();
      UpdateGenotypeFrequencies();
      current = LogLikelihood();
      delta = last - current;
      last = current;
   } while (fabs(delta) > fabs(last * TOL));
   }

 
