////////////////////////////////////////////////////////////////////// 
// collect-simwalk2/HaploVector.cpp 
// (c) 2000-2001 Goncalo Abecasis
// 
// This file is distributed as part of the GOLD source code package   
// and may not be redistributed in any form, without prior written    
// permission from the author. Permission is granted for you to       
// modify this file for your own personal use, but modified versions  
// must retain this copyright notice and must not be distributed.     
// 
// Permission is granted for you to use this file to compile GOLD.    
// 
// All computer programs have bugs. Use this file at your own risk.   
// 
// Thursday November 08, 2001
// 
 
#include "HaploVector.h"
#include "StringBasics.h"
#include "StringArray.h"
#include "Sort.h"

#include "string.h"
#include "stdlib.h"
#include "ctype.h"

int HaploVector::maxHaplotypes = 2000;

int hvEntry::Compare(hvEntry * one, hvEntry * two)
   {
   return SlowCompare(one->name, two->name);
   }

char * hvEntry::AsString(int * filter)
   {
   static char str[100];
   char next[10];

   str[0] = '.'; str[1] = 0;

   for (;*filter != 0; filter++)
      {
      sprintf(next, "%d.", states[*filter - 1]);
      strcat(str, next);
      }
   return str;
   }

HaploVector::HaploVector(int s)
   {
   size = s ? s : maxHaplotypes;
   count = 0;
   length = 0;

   entries = new hvEntry[size];
   }

int HaploVector::AppendSimwalkFile(FILE * f, const char * label)
   {
   while (fgetc(f) != '_')
      {
      fscanf(f, "%*[^_]");
      if (feof(f)) return 0;
      }

   StringArray buffer;

   buffer.Read(f);

   int index = HV_SKIPLINES;

   while (count < size - 1)
      {
      StringArray tokens;

      tokens.AddTokens(buffer[index++], WHITESPACE);

      if (tokens.Length() < 3) break;

      entries[count].name = label;        // Label haplotype
      entries[count].name += '.';
      entries[count].name += tokens[0];
      entries[count].states.Clear();

      switch (tokens[1][0])
         {
         case '=' :
            // Horizontal haplotype layout
            for (length = 0; length + 2 < tokens.Length(); length++)
               entries[count].states.Push(tokens[length + 2].AsInteger());
            count++;
            index++;
            break;
         case '|' :
            // Vertical haplotype layout
            entries[count + 1].name = label;
            entries[count + 1].name += '.';
            entries[count + 1].name += tokens[2];
            entries[count + 1].states.Clear();

            tokens.Clear();
            for (length = 0; tokens.AddTokens(buffer[index+length]) != 0;
                 length++)
                {
                entries[count].states.Push(tokens[0].AsInteger());
                entries[count+1].states.Push(tokens[2].AsInteger());
                tokens.Clear();
                }

            count += 2;
            index += length + 2;
            break;
         default :
            printf("Error parsing haplotype file\n");
            return 0;
         }
      }

   if (count >= size)
      printf("WARNING - Maximum haplotype vector size %d reached,\n"
            "extra haplotypes may have been ignored.\n\n", size);

   return count;
   }

int HaploVector::WriteToFile(FILE * f) const
   {
   for (int i = 0; i < count; i++)
      {
      fprintf(f, "%15s", (const char *) entries[i].name);
      for (int j = 0; j < length; j++)
         fprintf(f, "%3d", entries[i].states[j]);
      fprintf(f, "\n");
      }

   return count;  
   }

void HaploVector::Sort()
   {
   QuickSort(entries, count, sizeof(hvEntry), COMPAREFUNC hvEntry::Compare);
   }

hvEntry * HaploVector::Find(char * key)
   {
   hvEntry dummy;
   dummy.name = key;

   return (hvEntry *) BinarySearch(&dummy, entries, count, sizeof(hvEntry),
                                   COMPAREFUNC hvEntry::Compare);
   }


 
