////////////////////////////////////////////////////////////////////// 
// haploxt/HaploVector.cpp 
// (c) 2000-2001 Goncalo Abecasis
// 
// This file is distributed as part of the GOLD source code package   
// and may not be redistributed in any form, without prior written    
// permission from the author. Permission is granted for you to       
// modify this file for your own personal use, but modified versions  
// must retain this copyright notice and must not be distributed.     
// 
// Permission is granted for you to use this file to compile GOLD.    
// 
// All computer programs have bugs. Use this file at your own risk.   
// 
// Thursday November 08, 2001
// 
 
#include "HaploVector.h"
#include "StringBasics.h"
#include "StringArray.h"

#include "string.h"
#include "stdlib.h"
#include "ctype.h"

int haploTresh = HV_INFOTRESH;

HaploVector::HaploVector(int s)
   {
   size = s;
   count = 0;
   length = 0;

   names.Dimension(size);
   states = new IntArray [size];
   }

HaploVector::~HaploVector()
   {
   delete [] states;
   }

int HaploVector::AppendSimwalkFile(FILE * f, const char * label)
   {
   char lineBuffer[BUFSIZE];

   fscanf(f, "%*[^_]");

   for (int i = 0; i < HV_SKIPLINES; i++)
      fgets(lineBuffer, BUFSIZE, f);

   while (count < size)
      {
      fgets(lineBuffer, BUFSIZE, f);

      char * tok = strtok(lineBuffer, SEPARATORS);

      if (!isalpha(*tok))
         return count;

      if (label != NULL)
         {
         names[count] = label;
         names[count] += ".";
         }
      else
         names[count] = "";
      names[count] += tok;              // Read Haplotype Label

      strtok ( NULL, SEPARATORS);        // Skip "=" sign

      states[count].Clear();
      length = 0;                      // Read and Count Marker States
      while ((tok = strtok(NULL, SEPARATORS)) != NULL)
         {
         states[count].Push(atoi(tok));
         length++;
         }
      count++;

      fgets(lineBuffer, BUFSIZE, f);
      }
      
   if (count >= size)
      printf("WARNING - Maximum haplotype vector size %d reached,\n"
            "extra haplotypes may have been ignored.\n\n", size);
      
   return count;
   }
   
int HaploVector::WriteToFile(FILE * f) const
   {
   for (int i = 0; i < count; i++)
      {
      fprintf(f, "%15s",  (const char *) (names[i]));
      for (int j = 0; j < length; j++)
         fprintf(f, "%3d", states[i][j]);
      fprintf(f, "\n");
      }
      
   return count;  
   }
   
int HaploVector::ReadFromFile(FILE * f)
   {
   String linebuffer;
   StringArray tokens;

   while (count < size)
      {
      if (feof(f)) break;

      linebuffer.ReadLine(f);

      tokens.Clear();
      tokens.AddTokens(linebuffer, SEPARATORS);

      if (tokens.Length() < 2) continue;
      if (!isalnum(tokens[0][0])) break;

      names[count] = tokens[0]; // Read Haplotype Label

      length = tokens.Length() - 1;
      states[count].Dimension(length);

      for (int i = 0; i < length; i++)
         states[count][i] = atoi(tokens[i + 1]);

      count++;
      }

   if (count >= size)
      printf("WARNING - Maximum haplotype vector size %d reached,\n"
            "extra haplotypes may have been ignored.\n\n", size);

   return count;
   }

void HaploVector::PrintStats()
   {
   int   info = 0;

   for (int i=0; i < count; i++)
      if (IsUseful(i)) info++;
      
   printf("Vector contains %d haplotypes\n"
         "       of which %d are typed at %d%% of loci or more\n\n",
         count, info, haploTresh);
   }
   
int HaploVector::IsUseful(int i)
   {
   if (cache.Length() != count)
      {
      cache.Dimension(count);

      for (int k = 0; k < count; k++)
         {
         int info = 0;

         for (int j = 0; j < length; j++)
            if (states[k][j] != 0) info ++;

         cache[k] = (info >= (length * haploTresh / 100));
         }
      }

   return cache[i];
   }
 
