////////////////////////////////////////////////////////////////////// 
// libsrc/Pedigree.cpp 
// (c) 2000-2001 Goncalo Abecasis
// 
// This file is distributed as part of the GOLD source code package   
// and may not be redistributed in any form, without prior written    
// permission from the author. Permission is granted for you to       
// modify this file for your own personal use, but modified versions  
// must retain this copyright notice and must not be distributed.     
// 
// Permission is granted for you to use this file to compile GOLD.    
// 
// All computer programs have bugs. Use this file at your own risk.   
// 
// Thursday November 08, 2001
// 
 
#include "Pedigree.h"
#include "Constant.h"
#include "Error.h"
#include "Sort.h"

#include <stdlib.h>

int    Pedigree::writeHalves = 0;
String Pedigree::missing("-99.999");

Pedigree::Pedigree() : pd ()
   {
   haveTwins = count = 0;
   size = 10000;
   persons = new Person *[size];
   familyCount = 0;
   families = new Family * [1];
   }

Pedigree::~Pedigree()
   {
   for (int i = 0; i < count; i++)
      delete persons[i];

   for (int i = 0; i < familyCount; i++)
      delete families[i];

   delete [] families;
   delete [] persons;
   }

void Pedigree::Sort()
   {
   QuickSort(persons, count, sizeof (Person *),
         COMPAREFUNC Pedigree::ComparePersons);

   haveTwins = 0;

   // Check that we have no duplicates...
   for (int i = 1; i < count; i++)
      if (ComparePersons( (const Person **) &persons[i-1],
                          (const Person **) &persons[i]) == 0)
         error ("Family %s: Person %s is duplicated",
                (const char *) persons[i]->famid,
                (const char *) persons[i]->pid);

   // Assign parents...
   for (int i = 0; i < count; i++)
      {
      persons[i]->serial = i;
      persons[i]->father = FindPerson(persons[i]->famid, persons[i]->fatid);
      persons[i]->mother = FindPerson(persons[i]->famid, persons[i]->motid);
      persons[i]->AssessStatus();

      // Check if we have any twins...
      haveTwins |= persons[i]->zygosity;
      }

   MakeSibships();
   MakeFamilies();
   }

void Pedigree::MakeSibships()
   {
   Person ** sibs = new Person * [count];
   for (int i = 0; i < count; i++)
      sibs[i] = persons[i];

   QuickSort(sibs, count, sizeof (Person *),
             COMPAREFUNC Pedigree::CompareParents);

   for (int first = 0; first < count; first++)
      if (!sibs[first]->isFounder())
         {
         int last = first + 1;
         while (last < count)
            if (sibs[first]-> mother != sibs[last]->mother ||
                sibs[first]-> father != sibs[last]->father)
               break;
            else last++;
         last --;

         for (int j = first; j <= last; j++)
            {
            if (sibs[j]->sibCount) delete [] sibs[j]->sibs;
            sibs[j]->sibCount = last - first + 1;
            sibs[j]->sibs = new Person * [sibs[j]->sibCount];
            for (int k = first; k <= last; k++)
               sibs[j]->sibs[k - first] = sibs[k];
            }
         first = last;
         }
   delete [] sibs;
   }

void Pedigree::MakeFamilies()
   {
   for (int i = 0; i < familyCount; i++)
      delete families[i];
   delete [] families;

   familyCount = 0;
   families = new Family * [count];

   for (int first=0; first < count; first++)
      {
      int last = first;
      while (last < count)
         if (SlowCompare(persons[first]->famid, persons[last]->famid) == 0)
            last++;
         else break;

      families[familyCount] = new Family(*this, first, --last, familyCount);

      first = last;
      familyCount++;
      }
   }

// Utility functions for finding a person in a pedigree

struct PedigreeKey
   {
   const char * famid;
   const char * pid;
   };

int CompareKeyToPerson(PedigreeKey * key, Person ** p)
   {
   int result = SlowCompare(key->famid, (**p).famid);

   if (result != 0)
      return result;

   return SlowCompare(key->pid, (**p).pid);
   }

int CompareKeyToFamily(PedigreeKey * key, Family ** f)
   {
   return SlowCompare(key->famid, (**f).famid);
   }

Person * Pedigree::FindPerson(const char * famid, const char * pid)
   {
   PedigreeKey key;
   key.famid = famid;
   key.pid   = pid;

   Person ** result = (Person **) BinarySearch
                      (&key, persons, count, sizeof(Person *),
                      COMPAREFUNC CompareKeyToPerson);

   return (result == NULL) ? (Person *) NULL : *result;
   }

Person * Pedigree::FindPerson(const char *famid, const char *pid, int universe)
   {
   PedigreeKey key;
   key.famid = famid;
   key.pid   = pid;

   Person ** result = (Person **) BinarySearch
                      (&key, persons, universe, sizeof(Person *),
                      COMPAREFUNC CompareKeyToPerson);

   return (result == NULL) ? (Person *) NULL : *result;
   }

Family * Pedigree::FindFamily(const char * famid)
   {
   PedigreeKey key;
   key.famid = famid;

   Family ** result = (Family **) BinarySearch
                     (&key, families, familyCount, sizeof(Family *),
                     COMPAREFUNC CompareKeyToFamily);

   return (result == NULL) ? (Family *) NULL : *result;
   }

int Pedigree::CountAlleles(int marker)
   { return ::CountAlleles(*this, marker); }

void Pedigree::LumpAlleles(double min)
   {
   printf("Lumping alleles with frequencies of %.2lf or less...\n\n", min);

   for (int m=0; m < markerCount; m++)
      ::LumpAlleles(*this, min, m);
   }

void Pedigree::EstimateFrequencies(int estimator)
   {
   bool estimated = false;
   int  line = 3;

   const char * estimators[] =
      { "using all genotypes", "using founder genotypes", "assumed equal" };

   for (int m=0; m < markerCount; m++)
      if (::EstimateFrequencies(*this, m, estimator))
         {
         if (!estimated)
            printf("Estimating allele frequencies... [%s]\n   ",
                    estimators[estimator], estimated = true);

         if ( line + markerNames[m].Length() + 1 > 79)
            printf("\n   ", line = 3);

         printf("%s ", (const char *) markerNames[m]);
         line += markerNames[m].Length() + 1;
         }

   if (estimated) printf("\n\n");
   }

int Pedigree::ComparePersons(const Person ** p1, const Person ** p2)
   {
   int result = SlowCompare((**p1).famid, (**p2).famid);

   if (result != 0) return result;

   return SlowCompare((**p1).pid, (**p2).pid);
   }

int Pedigree::CompareParents(const Person ** p1, const Person ** p2)
   {
   int result = SlowCompare((**p1).famid, (**p2).famid);

   if (result) return result;

   result = SlowCompare((**p1).fatid, (**p2).fatid);

   if (result) return result;

   return SlowCompare((**p1).motid, (**p2).motid);
   }

void Pedigree::Grow()
   {
   size *= 2;

   Person ** temp = new Person * [size];
   if (temp == NULL) error("Out of memory");

   for (int i=0; i<count; i++)
      temp[i] = persons[i];

   delete [] persons;
   persons = temp;
   }

void Pedigree::WriteDataFile(FILE * output)
   {
   // write in the following order:
   // markers, traits, affections, covariates

   for (int m = 0; m < markerCount; m++)
      fprintf(output, " M  %s \n", (const char *) markerNames[m]);

   for (int t = 0; t < traitCount; t++)
      fprintf(output, " T  %s \n", (const char *) traitNames[t]);

   for (int a = 0; a < affectionCount; a++)
      fprintf(output, " A  %s \n", (const char *) affectionNames[a]);

   for (int c = 0; c < covariateCount; c++)
      fprintf(output, " C  %s \n", (const char *) covariateNames[c]);

   fprintf(output, " E  END-OF-DATA \n");
   }

void Pedigree::WritePedigreeFile(FILE * output)
   {
   for (int i = 0; i < count; i++)
      WritePerson(output, i);
   fprintf(output, "end\n");
   }

void Pedigree::WritePerson(FILE * output, int person)
   {
   WritePerson(output, person, persons[person]->famid,
               persons[person]->pid, persons[person]->fatid,
               persons[person]->motid);
   }

void Pedigree::WritePerson(FILE * output, int person, const char * famid,
                const char * pid, const char * fatid, const char * motid)
   {
   Person * p = persons[person];

   // write in the following order:
   // markers, traits, affections, covariates

   fprintf(output, "%4s %2s %2s %2s  %d  ",
           famid, pid, fatid, motid, p->sex);

   for (int m = 0; m < markerCount; m++)
      fprintf(output, "%2d/%2d  ", p->markers[m][0], p->markers[m][1]);

   for (int t = 0; t < traitCount; t++)
      if (p->isPhenotyped(t))
         fprintf(output, "%7.3f  ", p->traits[t]);
      else
         fprintf(output, "      x  ");

   for (int a = 0; a < affectionCount; a++)
      if (p->isDiagnosed(a))
         fprintf(output, "%2d  ", p->affections[a]);
      else
         fprintf(output, " x  ");

   for (int c = 0; c < covariateCount; c++)
      if (p->isControlled(c))
         fprintf(output, "%7.3f  ", p->covariates[c]);
      else
         fprintf(output, "      x  ");

   fprintf(output, "\n");
   }

void Pedigree::WriteDataFile(const char * output)
   {
   FILE * f = fopen(output, "wt");
   if (f == NULL) error("Couldn't open data file %s", output);
   WriteDataFile(f);
   fclose(f);
   }

void Pedigree::WritePedigreeFile(const char * output)
   {
   FILE * f = fopen(output, "wt");
   if (f == NULL) error("Couldn't open pedigree file %s", output);
   WritePedigreeFile(f);
   fclose(f);
   }

void Pedigree::PrepareDichotomization()
   {

   for (int t = 0; t < traitCount; t++)
      {
      String new_affection = traitNames[t] + "*";
      GetAffectionID(new_affection);
      }
   }

int Pedigree::Dichotomize(int t, double mean)
   {
   String new_affection = traitNames[t] + "*";

   int af = GetAffectionID(new_affection);

   if (mean == _NAN_)
      {
      mean  = 0.0;
      double dcount = 0;
      for (int i = 0; i < count; i++)
         if (persons[i]->isPhenotyped(t) &&
             !persons[i]->isFounder())
            {
            mean += persons[i]->traits[t];
            dcount ++;
            }

      if (!dcount) return af;

      mean /= dcount;
      }

   printf("Dichotomizing %s around mean of %.3f ...\n",
          (const char *) traitNames[t], mean);

   for (int i = 0; i < count; i++)
      if (persons[i]->isPhenotyped(t) && !persons[i]->isFounder())
         persons[i]->affections[af] = persons[i]->traits[t] > mean ? 2 : 1;
      else
         persons[i]->affections[af] = 0;

   Sort();

   return af;
   }

void Pedigree::DichotomizeAll(double mean)
   {
   for (int t = 0; t < traitCount; t++)
      Dichotomize(t, mean);
   }

void Pedigree::InheritanceCheck()
   {
   // Arrays indicating which alleles and homozygotes occur
   IntArray haplos, genos, counts;

   if (haveTwins) TwinCheck();

   bool fail = false;

   // For each marker ...
   for (int m = 0; m < markerCount; m++)
      {
      // Summary for marker
      int alleleCount = CountAlleles(m);
      int genoCount = alleleCount * (alleleCount + 1) / 2;

      // Initialize arrays
      haplos.Dimension(alleleCount + 1);
      haplos.Set(-1);

      genos.Dimension(genoCount + 1);
      genos.Set(-1);

      counts.Dimension(alleleCount + 1);

      for (int i = 0; i < count; i++)
         if  (!persons[i]->isFounder() && persons[i]->sibs[0] == persons[i])
            {
            // This loop runs once per sibship
            Alleles fat = persons[i]->father->markers[m];
            Alleles mot = persons[i]->mother->markers[m];
            bool    fgeno = fat.isKnown();
            bool    mgeno = mot.isKnown();

            // Number of alleles, homozygotes and genotypes in this sibship
            int haplo = 0, homo = 0, diplo = 0;

            // No. of different genotypes per allele
            counts.Zero();

            // In general, there should be no more than 3 genotypes per allele
            bool too_many_genos = false;

            for (int j = 0; j < persons[i]->sibCount; j++)
               if (persons[i]->sibs[j]->isGenotyped(m))
                  {
                  Alleles geno = persons[i]->sibs[j]->markers[m];

                  int fat1 = fat.hasAllele(geno.one);
                  int fat2 = fat.hasAllele(geno.two);
                  int mot1 = mot.hasAllele(geno.one);
                  int mot2 = mot.hasAllele(geno.two);

                  if (fgeno && mgeno && !(fat1 && mot2 || fat2 && mot1) ||
                      fgeno && !(fat1 || fat2) || mgeno && !(mot1 || mot2))
                      {
                      printf("[%s %d/%d] genotype for [%s.%s] "
                         "in brood of [%d/%d]*[%d/%d]\n",
                         (const char *) markerNames[m],
                         geno.one, geno.two,
                         (const char *) persons[i]->famid,
                         (const char *) persons[i]->sibs[j]->pid,
                         fat.one, fat.two, mot.one, mot.two);
                      fail = true;
                      }
                  else
                     {
                     if (haplos[geno.one] != i) { haplo++; haplos[geno.one] = i;};
                     if (haplos[geno.two] != i) { haplo++; haplos[geno.two] = i;};

                     int index = geno.SequenceCoded();

                     if (genos[index] != i)
                        {
                        genos[index] = i;
                        diplo++;
                        counts[geno.one]++;
                        if (geno.isHomozygous())
                           homo++;
                        else
                           counts[geno.two]++;
                        if (counts[geno.one] > 2) too_many_genos = true;
                        if (counts[geno.two] > 2) too_many_genos = true;
                        }
                     }
                  }

            if (fgeno)
               {
               if (haplos[fat.one] != i) { haplo++; haplos[fat.one] = i; }
               if (haplos[fat.two] != i) { haplo++; haplos[fat.two] = i; }
               homo += fat.isHomozygous();
               }

            if (mgeno)
               {
               if (haplos[mot.one] != i) { haplo++; haplos[mot.one] = i; }
               if (haplos[mot.two] != i) { haplo++; haplos[mot.two] = i; }
               homo += mot.isHomozygous();
               }

            if (diplo > 4 || haplo + homo > 4 || haplo == 4 && too_many_genos )
               {
               printf("[%s", (const char *) markerNames[m]);
               for (int j = 0; j < persons[i]->sibCount; j++)
                  printf(" %d/%d", persons[i]->sibs[j]->markers[m].one,
                                   persons[i]->sibs[j]->markers[m].two);
               printf("] in brood of [%s.%s %d/%d]*[%s.%s %d/%d]\n",
                      (const char *) persons[i]->famid,
                      (const char *) persons[i]->father->pid, fat.one, fat.two,
                      (const char *) persons[i]->famid,
                      (const char *) persons[i]->mother->pid, mot.one, mot.two);
               fail = true;
               }
            }
      }
   if (fail) error("Mendelian inheritance errors detected");
   }




 
