////////////////////////////////////////////////////////////////////// 
// libsrc/PedigreeDescription.cpp 
// (c) 2000-2001 Goncalo Abecasis
// 
// This file is distributed as part of the GOLD source code package   
// and may not be redistributed in any form, without prior written    
// permission from the author. Permission is granted for you to       
// modify this file for your own personal use, but modified versions  
// must retain this copyright notice and must not be distributed.     
// 
// Permission is granted for you to use this file to compile GOLD.    
// 
// All computer programs have bugs. Use this file at your own risk.   
// 
// Thursday November 08, 2001
// 
 
#include "PedigreeDescription.h"
#include "MapFunction.h"
#include "MathVector.h"
#include "Constant.h"
#include "Error.h"

#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include <math.h>

PedigreeDescription::PedigreeDescription()
   {
   columnCount = 0;
   }

PedigreeDescription::~PedigreeDescription()
   { };

PedigreeDescription & PedigreeDescription::operator = (PedigreeDescription & rhs)
   {
   columnCount = rhs.columnCount;

   columns = rhs.columns;
   columnHash = rhs.columnHash;

   return *this;
   };

void PedigreeDescription::Load(FILE * input)
   {
   // Check if we are dealing with a linkage format data file
   String      buffer;
   StringArray tokens;

   ReadLineHelper(input, buffer, tokens);
   rewind(input);

   if (tokens.Length() == 4 && isdigit(tokens[0][0]))
      {
      printf("Data file looks like a LINKAGE format file...\n\n");
      LoadLinkageDataFile(input);
      return;
      }

   columns.Clear();
   columnHash.Clear();
   columnCount = 0;

   int done = 0, line = 0;

   while (!feof(input) && !done)
      {
      int   i;

      buffer.ReadLine(input);
      line++;

      tokens.Clear();
      tokens.AddTokens(buffer, WHITESPACE);

      if (tokens.Length() < 1) continue;

      if (tokens.Length() == 1)
         error("Item #%d (of type %s) has no name", columnCount+1, tokens[0][1]);

      switch (toupper(tokens[0][0]))
         {
         case 'A' :
            columnHash.Push(GetAffectionID(tokens[1]));
            columns.Push(pcAffection);
            columnCount++;
            break;
         case 'M' :
            columnHash.Push(GetMarkerID(tokens[1]));
            columns.Push(pcMarker);
            columnCount++;
            break;
         case 'T' :
            columnHash.Push(GetTraitID(tokens[1]));
            columns.Push(pcTrait);
            columnCount++;
            break;
         case 'C' :
            columnHash.Push(GetCovariateID(tokens[1]));
            columns.Push(pcCovariate);
            columnCount++;
            break;
         case 'S' :
            i = (int) tokens[0].SubStr(1);
            i = i > 0 ? i : 1;
            while (i--)
               {
               columns.Push(pcSkip);
               columnHash.Push(0);
               columnCount++;
               }
            break;
         case 'Z' :
            columnHash.Push(0);
            columns.Push(pcZygosity);
            columnCount++;
            break;
         case 'E' :
            done = 1;
            break;
         default :
            error ("Problem in data file (line %d):\n%s\n",
                   line, (const char *) buffer);
         }
      }

   columns.Push(pcEnd);
   columnHash.Push(0);
   };

void PedigreeDescription::Load(const char * filename)
   {
   FILE * f = fopen(filename, "rb");

   if (f == NULL)
      error("Opening data file %s", filename);

   Load(f);
   fclose(f);
   };

void PedigreeDescription::LoadMap(const char * filename)
   {
   FILE * f = fopen(filename, "rb");

   if (f == NULL)
      error("Opening map file %s", filename);

   LoadMap(f);
   fclose(f);
   };

void PedigreeDescription::LoadMap(FILE * input)
   {
   columns.Clear();
   columnHash.Clear();
   columnCount = 0;

   int         lastposition = 0;
   String      buffer;
   StringArray tokens;

   buffer.ReadLine(input);
   tokens.AddTokens(buffer, WHITESPACE);

   while (tokens.Length() == 0 && !feof(input))
      {
      buffer.ReadLine(input);
      tokens.AddTokens(buffer, WHITESPACE);
      }

   if (tokens.Length() != 3)
      error("Error reading map file header, which has %d columns.\n"
            "Three columns were expected, corresponding to\n"
            "MARKER_ID, MARKER_NAME and BASE_PAIR_POSITION\n"
            "The offending header is transcribed below:\n\n"
            "%s", tokens.Length(), (const char *) buffer);
   else
      printf("Map file column labels\n"
             "  -- COLUMN 1, Expecting MARKER_ID, Read %s\n"
             "  -- COLUMN 2, Expecting MARKER_NAME, Read %s\n"
             "  -- COLUMN 3, Expection BASE_PAIR_POSITION, Read %s\n\n",
             (const char *) (tokens[0]), (const char *) (tokens[1]),
             (const char *) (tokens[2]));

   int line = 1;
   while (!feof(input))
      {
      int    serial;
      long   position;

      buffer.ReadLine(input);
      line++;

      tokens.Clear();
      tokens.AddTokens(buffer, WHITESPACE);

      if (tokens.Length() < 1) continue;
      if (tokens.Length() != 3)
         error("Each line in the map file should have 3 tokens, corresponding\n"
               "to MARKER_ID, MARKER_NAME and BASE_PAIR_POSITION respectively\n"
               "However, there are %d tokens in line %d, transcribed below:\n\n"
               "%s", tokens.Length(), line, (const char *) buffer);

      serial = (int) tokens[0];
      if (serial != columnCount + 1)
         error("Reading Marker Index from Map File...\n"
               "Markers should be indexed consecutively starting at 1\n"
               "Marker %d does not fit this pattern\n", columnCount + 1);

      position = (int) tokens[2];
      if (position < lastposition)
         error("Reading Marker Position from Map File...\n"
               "Marker position should be in base-pairs\n"
               "and markers should be in map order\n");

      // TODO -- store marker locations somewhere!
      lastposition = position;

      columnHash.Push(GetMarkerID(tokens[1]));
      columns.Push(pcMarker);
      columnCount++;

      GetMarkerInfo(tokens[1])->position = position * 1e-8;
      }

   columns.Push(pcEnd);
   columnHash.Push(0);
   };

int PedigreeDescription::CountTextColumns()
   {
   int count = 0;

   for (int i = 0; i < columnCount; i++, count++)
      if (columns[i] == pcMarker)
         count++;

   return count;
   }

void PedigreeDescription::LoadLinkageDataFile(const char * filename)
   {
   FILE * f = fopen(filename, "rb");

   if (f == NULL)
      error("Opening linkage data file %s", filename);

   LoadLinkageDataFile(f);
   fclose(f);
   };

void PedigreeDescription::LoadLinkageDataFile(FILE * input)
   {
   columns.Clear();
   columnHash.Clear();
   columnCount = 0;

   String      buffer;
   StringArray tokens;

   ReadLineHelper(input, buffer, tokens);

   if (tokens.Length() != 4 || tokens[2].AsInteger() != 0 ||
       tokens[0].AsInteger() < 0 )
      error("Cannot handle first line of data file\n\n"
            "Expecting four (4) numeric values, which correspond to:\n"
            "   num-loci   -- number of loci in the pedigree\n"
            "                 this value must be positive\n"
            "   risk-locus -- locus for which risks should be calculated\n"
            "                 this value will be ignored\n"
            "   sex-link   -- are the loci sex linked [0 - No, 1 - Yes]\n"
            "                 this value currently must be ZERO (0)\n"
            "   program    -- which LINKAGE program do you want to use?\n"
            "                 this value will also be ignored\n\n"
            "The actual input read:\n%s\n", (const char *) buffer);

   int numloci = tokens[0];

   ReadLineHelper(input, buffer, tokens);

   if (tokens.Length() != 4 ||
       tokens[0].AsInteger() != 0 ||
       tokens[3].AsInteger() != 0)
      error("Cannot handle second line of data file\n\n"
      "Expecting four (4) numeric values, which correspond to:\n"
      "   mutation-model         -- must be zero, corresponding to no mutation\n"
      "   male-mutation-rate     -- ignored\n"
      "   female-mutation-rate   -- ignored\n"
      "   linkage-disequilibrium -- must be zero, may be used in the future to\n"
      "                             read haplotype frequencies\n\n"
      "The actual input read:\n%s\n", (const char *) buffer);

   StringArray markerOrder;
   int         unknown = 0;

   ReadLineHelper(input, buffer, markerOrder);

   if (markerOrder.Length() > numloci)
      error("The third line of the data file lists marker order\n\n"
            "Although %d loci are defined [in the first line],\n"
            "this line includes %d values:\n%s\n",
            numloci, markerOrder.Length(), (const char *) buffer);

   IntArray    locus;
   bool need_blank_line = false;

   while (!feof(input) && numloci--)
      {
      if (ReadLineHelper(input, buffer, tokens) == 0)
         error("Linkage data file ends unexpectedly");

      if (tokens.Length() < 2)
         error("Incomplete locus information in data file\n"
               "Information for each locus should include 2 or more fiels\n"
               "The expected fields are:\n"
               "   field_type  -- indicator of locus type (trait, marker,...)\n"
               "   alleles     -- number of alleles\n"
               "   name        -- locus name, preceded by hash (#) sign\n\n"
               "The actual input read:\n%s\n", (const char *) buffer);

      int locus_type = (int) tokens[0];
      int alleles    = (int) tokens[1];

      String locus_name("LOCUS");
      locus_name += ++unknown;

      if (tokens.Length() > 2 && tokens[2][0] == '#')
         if (tokens[2][1] != 0)
            locus_name = tokens[2].SubStr(1);
         else if (tokens.Length() > 3)
            locus_name = tokens[3];

      if ( ReadLineHelper(input, buffer, tokens) != alleles)
         error("Expecting %d allele frequencies, but input has %d columns:\n"
               "%s\n", alleles, tokens.Length(), (const char *) buffer);

      Vector frequencies(alleles + 1);

      frequencies[0] = 0.0;
      for (int i = 1; i <= alleles; i++)
         frequencies[i] = (double) tokens[i - 1];

      double sum = frequencies.Sum();

      if (sum <= 0.0)
         error("Locus %s frequencies sum to %f, which doesn't make sense\n",
               (const char *) locus_name, sum);

      if ( fabs(sum - 1.0) > 1e-5 )
         {
         printf("Locus %s frequencies sum to %f, adjusted to 1.0\n",
                (const char *) locus_name, sum);
         need_blank_line = true;
         }

      if ( sum != 1.0)
         frequencies *= 1.0 / sum;

      switch (locus_type)
         {
         case 1 : {
            // Affection
            columnHash.Push(GetAffectionID(locus_name));
            columns.Push(pcAffection);
            columnCount++;

            // Read number of liability classes
            if (ReadLineHelper(input, buffer, tokens) == 0)
               error("Linkage data file ends unexpectedly\n");

            // Skip liability class data
            int classes = tokens[0];
            if (classes > 1)
               { columnHash.Push(0); columns.Push(pcSkip); }

            while (classes--)
               if (ReadLineHelper(input, buffer, tokens) == 0)
                  error("Linkage data file ends unexpectedly\n");

            // Ignore map location for quantitative variables
            locus.Push(-1);
            } break;
         case 3 :
            columnHash.Push(GetMarkerID(locus_name));
            columns.Push(pcMarker);
            columnCount++;

            // Store allele frequencies
            GetMarkerInfo(locus_name)->freq = frequencies;

            // Store marker id, so that we can track map location
            locus.Push(GetMarkerID(locus_name));
            break;
         case 0 : {
            // Read number of quantitative variables
            if (ReadLineHelper(input, buffer, tokens) == 0)
               error("Linkage data file ends unexpectedly\n");

            // Add each quantitative variable to pedigree
            // Discard information on means
            for (int vars = tokens[0], i = 0; i < vars; i++)
               {
               if (ReadLineHelper(input, buffer, tokens) == 0)
                  error("Linkage data file ends unexpectedly\n");

               String trait_name(locus_name);

               if (i) { trait_name += "."; trait_name += i + 1; }

               columnHash.Push(GetTraitID(trait_name));
               columns.Push(pcTrait);
               columnCount++;
               }

            // Skip var-covar matrix
            if (ReadLineHelper(input, buffer, tokens) == 0)
               error("Linkage data file ends unexpectedly\n");

            // Skip heterozygote scaling factor for var-covar matrix
            if (ReadLineHelper(input, buffer, tokens) == 0)
               error("Linkage data file ends unexpectedly\n");

            // Ignore map location for quantitative variables
            locus.Push(-1);
            } break;
         case 2 :
            error ("The data file includes binary factors\n"
                   "Regretably, loci of this type are not supported\n\n");
            break;
         default :
            error ("Unsupported locus type [%d] in data file", locus_type);
            break;
         }
      }

   if (need_blank_line) printf("\n");

   columns.Push(pcEnd);
   columnHash.Push(0);

   ReadLineHelper(input, buffer, tokens);
   if (tokens.Length() != 2 ||
       tokens[0].AsInteger() != 0 ||
       tokens[1].AsInteger() != 0)
      error("Error retrieving recombination information\n\n"
      "Expecting two (2) numeric values, which correspond to:\n"
      "   sex-difference   -- must be zero, that is, no sex difference\n"
      "   map-function     -- must be zero, that is, no interference\n"
      "The actual input read:\n%s\n", (const char *) buffer);

   ReadLineHelper(input, buffer, tokens);
   if (tokens.Length() != markerOrder.Length() - 1)
      error("Error retrieving recombination information\n\n"
      "Expecting %d recombination fractions (current map includes %d loci)\n"
      "Instead the following line was input:\n%s\n",
      markerOrder.Length() - 1, markerOrder.Length(), (const char *) buffer);

   double position = 0.0;

   for (int i = 0, moving = false; i < markerOrder.Length(); i++)
      {
      int m = markerOrder[i].AsInteger() - 1;

      if (m < 0 || m >= locus.Length())
         error("The marker order in the linkage datafile is invalid\n");

      m = locus[m];

      if (m != -1)
         {
         MarkerInfo * info = GetMarkerInfo(m);
         info->chromosome = 0;
         info->position = position;
         moving = true;
         }

      if (i < markerOrder.Length() - 1 && moving)
         position += RecombinationToDistance(tokens[i]);
      }
   }

int PedigreeDescription::ReadLineHelper(FILE * input,
                                         String & buffer,
                                         StringArray & tokens)
   {
   do {
      // Read Line
      buffer.ReadLine(input);

      // Strip comments marked with >>
      int pos = buffer.FastFind(">>");
      if (pos == -1) pos = buffer.FastFind("<<");
      if (pos == -1) pos = buffer.Length() + 1;

      // Find space/tab delimited tokens
      tokens.Clear();
      tokens.AddTokens(buffer.Left(pos - 1), WHITESPACE);
      
   } while (tokens.Length() == 0 && !feof(input));

   return tokens.Length();
   }

int PedigreeDescription::CountColumns(int type)
   {
   int count = 0;

   for (int i = 0; i < columns.Length(); i++)
      if (columns[i] == type)
         count++;

   return count;
   }

const char * PedigreeDescription::ColumnSummary(String & string)
   {
   string.Clear();
   UpdateSummary(string, pcMarker, " markers [x2 cols]");
   UpdateSummary(string, pcTrait, " traits");
   UpdateSummary(string, pcAffection, " discrete traits");
   UpdateSummary(string, pcCovariate, " covariates");
   UpdateSummary(string, pcZygosity, " zygosity");
   UpdateSummary(string, pcSkip, " skipped");
   return string;
   }

void PedigreeDescription::UpdateSummary(String & string, int type, const char * label)
   {
   int count = CountColumns(type);

   if (count)
      {
      if (string.Length())
         string += ", ";
      string += count;
      string += label;
      }
   }

 
