////////////////////////////////////////////////////////////////////// 
// libsrc/PedigreeLoader.cpp 
// (c) 2000-2001 Goncalo Abecasis
// 
// This file is distributed as part of the GOLD source code package   
// and may not be redistributed in any form, without prior written    
// permission from the author. Permission is granted for you to       
// modify this file for your own personal use, but modified versions  
// must retain this copyright notice and must not be distributed.     
// 
// Permission is granted for you to use this file to compile GOLD.    
// 
// All computer programs have bugs. Use this file at your own risk.   
// 
// Thursday November 08, 2001
// 
 
#include "Pedigree.h"
#include "Error.h"

#include <stdlib.h>
#include <ctype.h>
#include <string.h>

void Pedigree::Prepare(FILE * input)
   {
   pd.Load(input);
   }

void Pedigree::Load(FILE * input)
   {
   int textCols = pd.CountTextColumns() + 5;
   int oldCount = count;
   bool    warn = true;
   int     line = 0;

   String      buffer;
   StringArray tokens;

   while (!feof(input))
      {
      int field = 0;

      if (count == size)
         Grow();

      buffer.ReadLine(input);

      tokens.Clear();
      tokens.AddTokens(buffer, SEPARATORS);

      if (tokens.Length() == 0) continue;
      if (tokens[0].SlowCompare("end") == 0) break;

      line++;

      if (tokens.Length() < textCols)
         {
         if (buffer.Length() > 79)
            {
            buffer.SetLength(75);
            buffer += " ...";
            }
         error("Loading Pedigree...\n\n"
               "Line %d includes too few data items.\n"
               "Expecting %d columns, but read %d columns.\n\n"
               "The problem line is transcribed below:\n%s\n",
               line, textCols, tokens.Length(), (const char  *) buffer);
         }

      if (tokens.Length() > textCols && warn && textCols > 5)
         {
         pd.ColumnSummary(buffer);
         printf("WARNING -- Trailing columns in pedigree file will be ignored\n"
                "  Expecting %d data columns (%s)\n"
                "  However line %d, for example, has %d data coluomns\n\n",
                textCols - 5, (const char *) buffer, line, tokens.Length() - 5);
         warn = false;
         }

      Person * p;

      // create a new person if necessary
      if (oldCount==0 || (p = FindPerson(tokens[0], tokens[1], oldCount))==NULL)
         p = persons[count++] = new Person;

      p->famid = tokens[field++];         // famid
      p->pid = tokens[field++];           // pid
      p->fatid = tokens[field++];         // fatid
      p->motid = tokens[field++];         // motid
      const char * sex = tokens[field++]; // sex

      switch (sex[0])
         {
         case 'x' : case 'X' : case '?' :
            p->sex = 0; break;
         case '1' : case 'm' : case 'M' :
            p->sex = 1; break;
         case '2' : case 'f' : case 'F' :
            p->sex = 2; break;
         default :
            p->sex = atoi(sex);
            if (p->sex != 0 && p->sex != 1 && p->sex != 2)
            error("Can't interpret the sex of individual #%d\n"
                  "Family: %s  Individual: %s  Sex Code: %s", count,
                  (const char *) p->famid, (const char *) p->pid, sex);
         };

      for (int col = 0; col < pd.columnCount; col++)
         switch ( pd.columns[col] )
            {
            case pcAffection :
               {
               int a = pd.columnHash[col];
               int new_status;

               const char * affection = tokens[field++];

               switch (toupper(affection[0]))
                  {
                  case '1' : case 'N' : case 'U' :
                     new_status = 1;
                     break;
                  case '2' : case 'D' : case 'A' : case 'Y' :
                     new_status = 2;
                     break;
                  default :
                     new_status = atoi(affection);
                     if (new_status < 0 || new_status > 2)
                        error("Incorrect formating for affection status "
                              "Col %d, Affection %s\n"
                              "Family: %s  Individual: %s  Status: %s",
                              col, (const char *) affectionNames[a],
                              (const char *) p->famid, (const char *) p->pid,
                              affection);
                  }
               if (new_status != 0 && p->affections[a] != 0 &&
                   new_status != p->affections[a])
                  error("Conflict with previous affection status - "
                        "Col %d, Affection %s\n"
                        "Family: %s  Individual: %s  Old: %d New: %d",
                        col, (const char *) affectionNames[a],
                        (const char *) p->famid, (const char *) p->pid,
                        p->affections[a], new_status);
               if (new_status) p->affections[a] = new_status;
               break;
               }
            case pcMarker :
               {
               int m = pd.columnHash[col];
               Alleles new_genotype;

               new_genotype[0] = tokens[field++];
               new_genotype[1] = tokens[field++];

               if (p->markers[m].isKnown() && new_genotype.isKnown() &&
                   new_genotype != p->markers[m])
                  error("Conflict with previous genotype - Col %d, Marker %s\n"
                        "Family: %s  Individual: %s  Old: %d/%d New: %d/%d",
                        col, (const char *) markerNames[m],
                        (const char *) p->famid, (const char *) p->pid,
                        p->markers[m][0], p->markers[m][1],
                        new_genotype[0], new_genotype[1]);

               if (new_genotype.isKnown()) p->markers[m] = new_genotype;
               break;
               }
            case pcTrait :
               {
               int t = pd.columnHash[col];
               double new_pheno = _NAN_;

               const char * value = tokens[field++];
               char * flag = "\0";

               if ( missing == (const char *) NULL || strcmp(value, missing) != 0)
                  new_pheno = strtod(value, &flag);
               if ( *flag ) new_pheno = _NAN_;

               if ( p->traits[t] != _NAN_ && new_pheno != _NAN_ &&
                    new_pheno != p->traits[t])
                  error("Conflict with previous phenotype - Col %d, Trait %s\n"
                        "Family: %s  Individual: %s  Old: %f New: %f",
                        col, (const char *) traitNames[t],
                        (const char *) p->famid, (const char *) p->pid,
                        p->traits[t], new_pheno);

               if ( new_pheno != _NAN_) p->traits[t] = new_pheno;
               break;
               }
            case pcCovariate :
               {
               int c = pd.columnHash[col];
               double new_covar = _NAN_;

               const char * value = tokens[field++];
               char * flag = "\0";

               if ( missing == (const char *) NULL || strcmp(value, missing) != 0)
                  new_covar = strtod(value, &flag);
               if ( *flag ) new_covar = _NAN_;

               if ( p->covariates[c] != _NAN_ && new_covar != _NAN_ &&
                    new_covar != p->covariates[c])
                  error("Conflict with previous value - Col %d, Covariate %s\n"
                        "Family: %s  Individual: %s  Old: %f New: %f",
                        col, (const char *) covariateNames[c],
                        (const char *) p->famid, (const char *) p->pid,
                        p->covariates[c], new_covar);

               if ( new_covar != _NAN_) p->covariates[c] = new_covar;
               break;
               }
            case pcSkip :
               field++;
               break;
            case pcZygosity :
               {
               int new_zygosity;

               const char * zygosity = tokens[field++];

               switch (zygosity[0])
                  {
                  case 'D' : case 'd' :
                     new_zygosity = 2;
                     break;
                  case 'M' : case 'm' :
                     new_zygosity = 1;
                     break;
                  default :
                     new_zygosity = atoi(zygosity);
                  }
               if (p->zygosity != 0 && new_zygosity != p->zygosity)
                  error("Conflict with previous zygosity - "
                        "Column %d in pedigree\n"
                        "Family: %s  Individual: %s  Old: %d New: %d\n",
                        col, (const char *) p->famid, (const char *) p->pid,
                        p->zygosity, new_zygosity);
               p->zygosity = new_zygosity;
               break;
               }
            case pcEnd :
               break;
            default :
               error ("Inconsistent Pedigree Description -- Internal Error");
            }

      fscanf(input, " ");
      }

   Sort();
   }

void Pedigree::Prepare(const char * input)
   {
   pd.Load(input);
   }

void Pedigree::Load(const char * input)
   {
   FILE * f = fopen(input, "rb");
   if (f == NULL) error("Couldn't open pedigree file %s", input);
   Load(f);
   fclose(f);
   }



 
