////////////////////////////////////////////////////////////////////// 
// haploxt/Core.cpp 
// (c) 2000-2001 Goncalo Abecasis
// 
// This file is distributed as part of the GOLD source code package   
// and may not be redistributed in any form, without prior written    
// permission from the author. Permission is granted for you to       
// modify this file for your own personal use, but modified versions  
// must retain this copyright notice and must not be distributed.     
// 
// Permission is granted for you to use this file to compile GOLD.    
// 
// All computer programs have bugs. Use this file at your own risk.   
// 
// Thursday November 08, 2001
// 
 
#include "Core.h"
#include "Error.h"

#include "stdlib.h"

int ScanHaplotypes ( HaploVector & hv )
   {
   printf("Scanning haplotypes ...\n\n");

   FILE * f = fopen ("HAPLO.LST", "rb");

   if (f == NULL)
      error("Couldn't find haplotype input file HAPLO.LST\n");
   else
      {
      hv.ReadFromFile(f);
      hv.PrintStats();
      fclose (f);
      }

   if (hv.count == 0)
      error("Haplotype input file HAPLO.LST is empty\n");

   return hv.count;
   }


void CrossTabAll ( HaploVector & hv )
   {
   AssocChi * ac = new AssocChi[hv.length * (hv.length + 1) / 2];
   AssocEntropy * ae = new AssocEntropy[hv.length * (hv.length + 1) / 2];
   AssocLD * al = new AssocLD[hv.length * (hv.length + 1) / 2];

   int idx = 0;

   for (int i = 0; i < hv.length; i++)
      for (int j = i + 1; j < hv.length; j++)
         {
         CrossTabPair(hv, i, j, ac[idx], ae[idx], al[idx]);
         idx ++;
         }

   printf("NUMBER OF PAIRS SCORED\n");
   printf("======================\n\n");

   idx = 0;

   printf("%3s ", "");
   for (int j = 1; j < hv.length; j++)
      printf("%6d ", j+1);
   printf("\n");

   for (int i = 0; i < (hv.length - 1); i++)
      {
      printf("%3d ", i + 1);
      for (int j = 1; j < hv.length; j++)
         if (j > i)
            {
            if (ac[idx].isValid)
               printf("%6.0f ", ac[idx].sum);
            else
               printf("%6s ", "-");
            idx++;
            }
         else
            printf("%6s ", "");
      printf("\n");
      }
   printf("\n");


   printf("CHI-SQUARED BASED MEASURES OF ASSOCIATION\n"
         "=========================================\n\n");

   idx = 0;
   printf("Chi-Squared Values\n");
   printf("------------------\n");

   printf("%3s ", "");
   for (int j = 1; j < hv.length; j++)
      printf("%6d ", j+1);
   printf("\n");

   for (int i = 0; i < (hv.length - 1); i++)
      {
      printf("%3d ", i + 1);
      for (int j = 1; j < hv.length; j++)
         if (j > i)
            {
            if (ac[idx].isValid)
               printf("%6.2f ", ac[idx].chisq);
            else
               printf("%6s ", "-");
            idx++;
            }
         else
            printf("%6s ", "");
      printf("\n");
      }
   printf("\n");

   idx = 0;
   printf("Chi-Squared Degrees of Freedom\n");
   printf("------------------------------\n");

   printf("%3s ", "");
   for (int j = 1; j < hv.length; j++)
      printf("%6d ", j+1);
   printf("\n");

   for (int i = 0; i < (hv.length - 1); i++)
      {
      printf("%3d ", i + 1);
      for (int j = 1; j < hv.length; j++)
         if (j > i)
            {
            if (ac[idx].isValid)
               printf("%6.0f ", ac[idx].df);
            else
               printf("%6s ", "-");
            idx++;
            }
         else
            printf("%6s ", "");
      printf("\n");
      }
   printf("\n");

   idx = 0;
   printf("Chi-Squared Probabilities\n");
   printf("-------------------------\n");

   printf("%3s ", "");
   for (int j = 1; j < hv.length; j++)
      printf("%6d ", j+1);
   printf("\n");

   for (int i = 0; i < (hv.length - 1); i++)
      {
      printf("%3d ", i + 1);
      for (int j = 1; j < hv.length; j++)
         if (j > i)
            {
            if (ac[idx].isValid)
               printf("%6.4f ", ac[idx].prob);
            else
               printf("%6s ", "-");
            idx++;
            }
         else
            printf("%6s ", "");
      printf("\n");
      }
   printf("\n");

   idx = 0;
   printf("-Log(Chi-Squared Probabilities)\n");
   printf("-------------------------------\n");

   printf("%3s ", "");
   for (int j = 1; j < hv.length; j++)
      printf("%6d ", j+1);
   printf("\n");

   for (int i = 0; i < (hv.length - 1); i++)
      {
      printf("%3d ", i + 1);
      for (int j = 1; j < hv.length; j++)
         if (j > i)
            {
            if (ac[idx].isValid)
               printf("%6.2f ", ac[idx].lop);
            else
               printf("%6s ", "-");
            idx++;
            }
         else
            printf("%6s ", "");
      printf("\n");
      }
   printf("\n");


   idx = 0;
   printf("Cramer's V - a transformation of ChiSq into [0,1]\n");
   printf("-------------------------------------------------\n");

   printf("%3s ", "");
   for (int j = 1; j < hv.length; j++)
      printf("%6d ", j+1);
   printf("\n");

   for (int i = 0; i < (hv.length - 1); i++)
      {
      printf("%3d ", i + 1);
      for (int j = 1; j < hv.length; j++)
         if (j > i)
            {
            if (ac[idx].isValid)
               printf("%6.4f ", ac[idx].cramrv);
            else
               printf("%6s ", "-");
            idx++;
            }
         else
            printf("%6s ", "");
      printf("\n");
      }
   printf("\n");

   printf("ENTROPY BASED MEASURES OF ASSOCIATION\n"
         "=====================================\n\n");

   idx = 0;
   printf("U uncertainty coefficient (How much information on one marker given by the other)\n");
   printf("---------------------------------------------------------------------------------\n");

   printf("%3s ", "");
   for (int j = 1; j < hv.length; j++)
      printf("%6d ", j+1);
   printf("\n");

   for (int i = 0; i < (hv.length - 1); i++)
      {
      printf("%3d ", i + 1);
      for (int j = 1; j < hv.length; j++)
         if (j > i)
            {
            if (ac[idx].isValid)
               printf("%6.4f ", ae[idx].uxy);
            else
               printf("%6s ", "-");
            idx++;
            }
         else
            printf("%6s ", "");
      printf("\n");
      }
   printf("\n");

   printf("STANDARDIZED DISEQUILIBRIUM COEFFICIENTS\n"
          "========================================\n\n");

   idx = 0;

   printf("%3s ", "");
   for (int j = 1; j < hv.length; j++)
      printf("%6d ", j+1);
   printf("\n");

   for (int i = 0; i < (hv.length - 1); i++)
      {
      printf("%3d ", i + 1);
      for (int j = 1; j < hv.length; j++)
         if (j > i)
            {
            if (ac[idx].isValid)
               printf("%6.4f ", al[idx].standardD);
            else
               printf("%6s ", "-");
            idx++;
            }
         else
            printf("%6s ", "");
      printf("\n");
      }
   printf("\n");

   bool biallelic = true;
   for (int i = 0; i < (hv.length - 1) * hv.length / 2; i++)
      if (ac[i].df > 1)
         biallelic = false;

   if (biallelic)
      {
      printf("DELTA SQUARED\n"
             "=============\n\n");

      idx = 0;

      printf("%3s ", "");
      for (int j = 1; j < hv.length; j++)
         printf("%6d ", j+1);
      printf("\n");

      for (int i = 0; i < (hv.length - 1); i++)
         {
         printf("%3d ", i + 1);
         for (int j = 1; j < hv.length; j++)
            if (j > i)
               {
               if (ac[idx].isValid)
                  printf("%6.4f ", ac[idx].cramrv * ac[idx].cramrv);
               else
                  printf("%6s ", "-");
               idx++;
               }
            else
               printf("%6s ", "");
         printf("\n");
         }
      printf("\n");
      }

   FILE * f = fopen("LD.XT", "wt");
   if (f == NULL)
      error("Opening output file for summary statistics, LD.XT");

   fprintf(f, "%4s %4s %6s %4s %9s %6s %6s %6s %6s %6s %6s %6s\n", "M1", "M2",
              "N", "df", "chisq", "p", "LOP", "Cramer", "U", "D", "D'",
              biallelic ? "Delta2" : "");

   for (int i = 0; i < hv.length - 1; i++)
      for (int j = i + 1; j < hv.length; j++)
         {
         idx = i * (hv.length * 2 - 3 - i) / 2 + j - 1;
         fprintf(f,
           "%4d %4d %6.0f %4.0f %9.3f %6.4f %6.2f %6.4f %6.4f %6.3f %6.3f",
           i + 1, j + 1, ac[idx].sum, ac[idx].df, ac[idx].chisq, ac[idx].prob,
           ac[idx].lop, ac[idx].cramrv, ae[idx].uxy, al[idx].D,
           al[idx].standardD);
         fprintf(f, biallelic ? "%6.3f\n" : "\n", 
                 ac[idx].cramrv * ac[idx].cramrv);
         }

   fclose(f);

   delete [] al;
   delete [] ac;
   delete [] ae;
   }

#define XT_M1POOL (m1alleles)
#define XT_M2POOL (m2alleles)

void CrossTabPair ( HaploVector & hv, int m1, int m2,
                    AssocChi & ac, AssocEntropy & ae, AssocLD & al )
   {
   int totals[XT_MAX_ALLELES][XT_MAX_ALLELES];     // observations of [m1][m2]
   int m1max = 0, m2max = 0;                 // max allele no for m1 and m2
   int m1Totals[XT_MAX_ALLELES];                // observations of [m1]
   int m2Totals[XT_MAX_ALLELES];                // observations of [m2]
   int grandTotal = 0;                       // total observations

   if (verbose) printf("CrossTabulating Markers %d and %d\n", m1 + 1, m2 + 1);

   int ti, ci, i;
   int tj, cj, j;

   // Initialize tables to zeros
   //

   for (i = 0; i < XT_MAX_ALLELES; i++)
      for (j = 0; j < XT_MAX_ALLELES; j++)
         totals[i][j] = 0;

   for (i = 0; i < XT_MAX_ALLELES; i++)
      {
      m1Totals[i] = 0;
      m2Totals[i] = 0;
      }

   // Score pairs when both markers are defined
   //

   for (int h = 0; h < hv.count; h ++)
      if (hv.IsUseful(h))
         {
         int a1 = hv.states[h][m1];
         int a2 = hv.states[h][m2];
         if ( a1 * a2 )       // if neither value is 0
            {
            a1 = a1 - 1;
            a2 = a2 - 1;
            totals[a1][a2]++;
            m1Totals[a1]++;
            m2Totals[a2]++;
            if (a1 > m1max) m1max = a1;
            if (a2 > m2max) m2max = a2;
            grandTotal++;
            }
         }

   // Check that we have scored something!
   //

   if (!grandTotal)
      {
      if (verbose) printf("No observations\n\n");
      return;
      }

   // Pool alleles with less poolTresh frequency
   //

   int magicNumber = grandTotal * poolTresh / 100;
   if (magicNumber == 0) magicNumber++;

   // Count common alleles and pooled alleles
   //

   int m1alleles = 0, m2alleles = 0;
   int m1Pool = 0, m2Pool = 0;

   for (i = 0; i < XT_MAX_ALLELES; i++)
      {
      if (m1Totals[i] >= magicNumber)
         m1alleles++;
      else
         if (m1Totals[i]) m1Pool++;
      if (m2Totals[i] >= magicNumber)
         m2alleles++;
      else
         if (m2Totals[i]) m2Pool++;
      }

   // Save transformed table in sumTotals
   //

   int * sumTotals[XT_MAX_ALLELES + 1];

   for (i = 0; i < XT_MAX_ALLELES + 1; i++)
      {
      sumTotals[i] = new int[XT_MAX_ALLELES+1];
      for (j = 0; j < XT_MAX_ALLELES + 1; j ++)
         sumTotals[i][j] = 0;
      }

   for (i = 0, ci = 0; i < XT_MAX_ALLELES; i++)
      if (m1Totals[i])
         {
         if (m1Totals[i] >= magicNumber)
            ti = ci++;
         else
            ti = XT_M1POOL;
         for (j = 0, cj = 0; j < XT_MAX_ALLELES; j++)
            if (m2Totals[j])
            {
            if (m2Totals[j] >= magicNumber)
               tj = cj++;
            else
               tj = XT_M2POOL;
            sumTotals[ti][tj] += totals[i][j];
            }
         }

   // Calcutate partial totals
   //

   int m1SumTotals[XT_MAX_ALLELES + 1], m2SumTotals[XT_MAX_ALLELES + 1];

   for (i = 0; i <= XT_MAX_ALLELES; i++)
      {
      m1SumTotals[i] = 0;
      m2SumTotals[i] = 0;
      for (j = 0; j <= XT_MAX_ALLELES; j++)
         {
         m1SumTotals[i] += sumTotals[i][j];
         m2SumTotals[i] += sumTotals[j][i];
         }
      }

   // Calculate Statistics from table sumTotals
   //
   int ni = XT_M1POOL +
      ( (smallPools || (m1SumTotals[XT_M1POOL] > magicNumber)) ? 1 : 0 );
   int nj = XT_M2POOL +
      ( (smallPools || (m2SumTotals[XT_M2POOL] > magicNumber)) ? 1 : 0 );

   ac.Calc(sumTotals, ni, nj);
   ae.Calc(sumTotals, ni, nj);
   al.Calc(sumTotals, ni, nj);

   if (verbose)
   {
      // Print out table
      //

      printf("%6s", "");
      for (j = 0, tj = 0; j < m2alleles; j++)
         {
         while (m2Totals[tj] < magicNumber)
            tj++;
         printf("%6d", ++tj);
         }
      printf("%6s%6s\n", "Pool", "Total");

      for (i = 0, ti = 0; i < m1alleles; i++)
         {
         while (m1Totals[ti] < magicNumber)
            ti++;
         printf("%6d", ++ti);
         for (j = 0; j < m2alleles; j++)
            printf("%6d", sumTotals[i][j]);
         printf("%6d", sumTotals[i][XT_M2POOL]);
         printf("%6d\n", m1SumTotals[i]);
         }

      printf("%6s", "Pool");
      for (j = 0; j < m2alleles; j ++)
         printf("%6d", sumTotals[XT_M1POOL][j]);
      printf("%6d", sumTotals[XT_M1POOL][XT_M2POOL]);
      printf("%6d\n", m1SumTotals[XT_M1POOL]);

      printf("%6s", "Total");
      for (j = 0; j < m2alleles; j ++)
         printf("%6d", m2SumTotals[j]);
      printf("%6d", m2SumTotals[XT_M2POOL]);
      printf("%6d\n", grandTotal);

      // Print out a legend for the table
      //

      printf("Marker %d is scored vertically", m1 + 1);
      if (m1SumTotals[XT_M1POOL])
         {
            printf(" (Alleles ");
            for (i = 0, ti = 0; i < m1Pool; i++)
               {
               while ((m1Totals[ti] >= magicNumber) || (m1Totals[ti] == 0))
                  ti++;
               printf(" %d", ++ti);
               }
            printf(" were pooled)");
         }
      printf("\n");

      printf("Marker %d is scored horizontally", m2 + 1);
      if (m2SumTotals[XT_M2POOL])
         {
            printf(" (Alleles");
            for (i = 0, ti = 0; i < m2Pool; i++)
               {
               while ((m2Totals[ti] >= magicNumber) || (m2Totals[ti] == 0))
                  ti++;
               printf(" %d", ++ti);
               }
            printf(" were pooled)");
         }

      printf("\n");

      // Print out the statistics
      //
      printf("Chi-sq:        %10.5f Prob:   %7.5f df: %.0f\n",
             ac.chisq, ac.prob, ac.df);
      printf("Association U: %10.5f D/Dmax: %7.5f\n", ae.uxy, al.standardD);

      printf("\n");
   }

   // Free memory
   //
   for (i = 0; i < XT_MAX_ALLELES + 1; i++)
      delete [] sumTotals[i];
   }
 
