////////////////////////////////////////////////////////////////////// 
// libsrc/PedigreeAlleleFreq.cpp 
// (c) 2000-2001 Goncalo Abecasis
// 
// This file is distributed as part of the GOLD source code package   
// and may not be redistributed in any form, without prior written    
// permission from the author. Permission is granted for you to       
// modify this file for your own personal use, but modified versions  
// must retain this copyright notice and must not be distributed.     
// 
// Permission is granted for you to use this file to compile GOLD.    
// 
// All computer programs have bugs. Use this file at your own risk.   
// 
// Thursday November 08, 2001
// 
 
#include "PedigreeAlleleFreq.h"
#include "Error.h"

int CountAlleles(Pedigree & ped, int marker)
   {
   int result = 0;

   for (int i=0; i < ped.count; i++)
      {
      if (ped[i].markers[marker][1] > result)
         result = ped[i].markers[marker][1];
      if (ped[i].markers[marker][0] > result)
         result = ped[i].markers[marker][0];
      }

   return result;
   }

void LumpAlleles(Pedigree & ped, double threshold, int marker)
   {
   // find out how many alleles there are
   int alleles = ped.CountAlleles(marker);

   if (alleles < 3) return;

   int * counts = new int [alleles+1];
   int * index = new int [alleles+1];

   // count all alleles ...
   for (int i = 0; i <= alleles; i++)
      {
      counts[i] = 0;
      index[i] = i;
      }

   for (int i = 0; i < ped.count; i++)
      {
      counts[ped[i].markers[marker][0]]++;
      counts[ped[i].markers[marker][1]]++;
      }

   // sort allele counts
   // Insertion sort should be OK, for N < 20
   for (int j = 2; j <= alleles; j++)
      {
      int tmp = counts[j];    // index[j] = j
      int i = j - 1;
      for ( ; (i > 0) && (counts[index[i]] < tmp); i--)
         index[i + 1] = index[i];
      index[i + 1] = j;
      }

   int total = 0;
   for (int i = 1; i <= alleles; i++)
      total += counts[i];
   int tresh = int(total * threshold);

   // recode alleles
   // all alleles where frequency < tresh are labelled N
   // use counts array to keep track of labels
   int N = 1;
   for (int i = 1; i <= alleles; i++)
      if (counts[index[i]] > tresh)
         {
         counts[index[i]] = i;
         N++;
         }
      else
         counts[index[i]] = N;
   counts[0] = 0;

   for (int i = 0; i < ped.count; i++)
      {
      Alleles & current = ped[i].markers[marker];
      current[0] = counts[current[0]];
      current[1] = counts[current[1]];
      }

   delete [] index;
   delete [] counts;
   }

bool EstimateFrequencies(Pedigree & ped, int marker, int estimator)
   {
   int alleleCount = CountAlleles(ped, marker);

   IntArray founder(alleleCount + 1);
   IntArray all(alleleCount + 1);

   founder.Zero();
   all.Zero();

   for (int i = 0; i < ped.count; i++)
      {
      all[ped[i].markers[marker][0]]++;
      all[ped[i].markers[marker][1]]++;
      if (!ped[i].isFounder()) continue;
      founder[ped[i].markers[marker][0]]++;
      founder[ped[i].markers[marker][1]]++;
      }

   MarkerInfo * info = ped.GetMarkerInfo(marker);

   if (info->freq.dim > 0)
      {
      // previous allele frequency information is available
      if (alleleCount > info->freq.dim)
         error("Although marker %s has been specified with %d alleles,\n"
               "allele %d occurs in the pedigree\n",
               (const char *) info->name, info->freq.dim, alleleCount);

      for (int i = 1; i <= alleleCount; i++)
         if (founder[i] > 0 && info->freq[i] <= 0.0)
            error("Although allele %d for marker %s has frequency zero,\n"
                  "it occurs %d times in the pedigree",
                  i, (const char *) info->name, founder[i]);
      return false;
      }
   else
      {
      if (alleleCount <= 1)
         {
         // If no one is genotyped, default to two equifrequent allele
         // since some programs do not like monomorphic markers
         info->freq.Dimension(3);
         info->freq[1] = 0.99999;
         info->freq[2] = 0.00001;
         return true;
         }

      info->freq.Dimension(alleleCount + 1);
      info->freq.Zero();

      if (estimator == FREQ_FOUNDERS && founder.Sum() > founder[0])
         {
         // Make sure the frequency of alleles occuring in the pedigree
         // is never zero
         for (int i = 1; i <= alleleCount; i++)
            if (founder[i] == 0 && all[i] > 0)
               founder[i] = 1;

         // To get frequencies, just multiply counts by 1 / total_counts
         double factor = 1.0 / (founder.Sum() - founder[0]);

         for (int i = 1; i <= alleleCount; i++)
            info->freq[i] = founder[i] * factor;
         }
      else if (estimator == FREQ_ALL || estimator == FREQ_FOUNDERS)
         {
         // To get frequencies, just multiply counts by 1 / total_counts
         double factor = 1.0 / (all.Sum() - all[0]);

         for (int i = 1; i <= alleleCount; i++)
            info->freq[i] = all[i] * factor;
         }
      else if (estimator == FREQ_EQUAL)
         // Assume all alleles have equal frequency
         // TODO -- should all occuring alleles have equal frequency instead?
         info->freq.Set(1.0 / alleleCount);
      }

   return true;
   }

 
