////////////////////////////////////////////////////////////////////// 
// libsrc/PedigreeGlobals.cpp 
// (c) 2000-2001 Goncalo Abecasis
// 
// This file is distributed as part of the GOLD source code package   
// and may not be redistributed in any form, without prior written    
// permission from the author. Permission is granted for you to       
// modify this file for your own personal use, but modified versions  
// must retain this copyright notice and must not be distributed.     
// 
// Permission is granted for you to use this file to compile GOLD.    
// 
// All computer programs have bugs. Use this file at your own risk.   
// 
// Thursday November 08, 2001
// 
 
#include "PedigreeGlobals.h"
#include "Sort.h"
#include "Error.h"

#include <string.h>
#include <ctype.h>

int PedigreeGlobals::traitCount = 0;
int PedigreeGlobals::affectionCount = 0;
int PedigreeGlobals::covariateCount = 0;
int PedigreeGlobals::markerCount = 0;

StringArray PedigreeGlobals::traitNames;
StringArray PedigreeGlobals::markerNames;
StringArray PedigreeGlobals::covariateNames;
StringArray PedigreeGlobals::affectionNames;
StringMap   PedigreeGlobals::markerLookup;

int PedigreeGlobals::markerInfoCount = 0;
int PedigreeGlobals::markerInfoSize = 0;

MarkerInfo ** PedigreeGlobals::markerInfo = NULL;

int MarkerInfo::ComparePosition(MarkerInfo ** left, MarkerInfo ** right)
   {
   if ((*left)->chromosome != (*right)->chromosome)
      return (*left)->chromosome - (*right)->chromosome;

   double difference = (*left)->position - (*right)->position;

   if (difference >  0.0)
      return 1;
   else if (difference == 0.0)
      return 0;
   else
      return -1;
   }

int PedigreeGlobals::GetTraitID(const char * name)
   {
   int idx = traitNames.SlowFind(name);

   if (idx != -1) return idx;

   traitNames.Add(name);
   return traitCount++;
   }

int PedigreeGlobals::GetAffectionID(const char * name)
   {
   int idx = affectionNames.SlowFind(name);

   if (idx != -1) return idx;

   affectionNames.Add(name);
   return affectionCount++;
   }

int PedigreeGlobals::GetCovariateID(const char * name)
   {
   int idx = covariateNames.SlowFind(name);

   if (idx != -1) return idx;

   covariateNames.Add(name);
   return covariateCount++;
   }

int PedigreeGlobals::GetMarkerID(const char * name)
   {
   int idx = markerLookup.Integer(name);

   if (idx != -1) return idx;

   markerNames.Add(name);
   markerLookup.SetInteger(name, markerCount);
   return markerCount++;
   }

MarkerInfo * PedigreeGlobals::GetMarkerInfo(String & name)
   {
   if (markerInfoCount == 0)
      {
      GrowMarkerInfo();
      return markerInfo[markerInfoCount++] = new MarkerInfo(name);
      }

   int left = 0;
   int right = markerInfoCount - 1;

   while (right > left)
      {
      int probe = (left + right) / 2;
      int test  = name.SlowCompare(markerInfo[probe]->name);

      if (test == 0)
         return markerInfo[probe];

      if (test < 0)
         right = probe - 1;
      else
         left  = probe + 1;
      }

   int insertAt = left;
   int test = name.SlowCompare(markerInfo[left]->name);

   if (test == 0)
      return markerInfo[left];

   if (test > 0) insertAt++;

   if (insertAt < markerInfoCount)
      memmove(&markerInfo[insertAt + 1], &markerInfo[insertAt],
              (markerInfoCount - insertAt) * sizeof(MarkerInfo *));

   markerInfo[insertAt] = new MarkerInfo(name);

   if (++markerInfoCount == markerInfoSize)
      GrowMarkerInfo();

   return markerInfo[insertAt];
   }

void PedigreeGlobals::GrowMarkerInfo()
   {
   int newSize = markerInfoSize ? 2 * markerInfoSize : 32;

   MarkerInfo ** newArray = new MarkerInfo * [newSize];

   if (markerInfoSize)
      {
      memcpy(newArray, markerInfo, sizeof(MarkerInfo *) * markerInfoSize);
      delete [] markerInfo;
      }

   markerInfo = newArray;
   markerInfoSize = newSize;
   }

int PedigreeGlobals::SortMarkersInMapOrder(IntArray & markers, int chromosome)
   {
   if (markers.Length() == 0)
      {
      markers.Dimension(markerCount);
      markers.SetSequence(0, 1);
      }

   MarkerInfo ** subset = new MarkerInfo * [markers.Length()];

   int count = 0;
   IntArray missingMarkers;

   for (int i = 0; i < markers.Length(); i++)
      {
      MarkerInfo * info = GetMarkerInfo(markers[i]);

      if (info->chromosome != -1)
         subset[count++] = info;
      else if (chromosome == -1)
         missingMarkers.Push(i);
      }

   if (missingMarkers.Length() && chromosome == -1)
      {
      StringMap names;

      printf("These markers couldn't be placed and won't be analysed:");

      for (int i = 0; i < missingMarkers.Length(); i++)
         names.Add(GetMarkerInfo(missingMarkers[i])->name);

      for (int i = 0, line = 80; i < missingMarkers.Length(); i++)
         {
         if (line + names[i].Length() + 1 > 79)
            printf("\n   ", line = 3);

         printf("%s ", (const char *) names[i]);
         line += names[i].Length() + 1;
         }

      printf("\n\n");
      }

   QuickSort(subset, count, sizeof(MarkerInfo *),
             COMPAREFUNC MarkerInfo::ComparePosition);

   markers.Clear();

   int  current_chromosome = -1, next_chromosome = 0;

   for (int i = 0; i < count; i++)
      if (subset[i]->chromosome < chromosome)
         continue;
      else if (current_chromosome == -1 ||
               subset[i]->chromosome == current_chromosome)
         {
         markers.Push(GetMarkerID(subset[i]->name));
         current_chromosome = subset[i]->chromosome;
         }
      else if (!next_chromosome)
         next_chromosome = subset[i]->chromosome;

   delete [] subset;

   return next_chromosome;
   }

void PedigreeGlobals::LoadAlleleFrequencies(const char * filename)
   {
   FILE * f = fopen(filename, "rb");
   if (f == NULL) return;
   LoadAlleleFrequencies(f);
   fclose(f);
   }

void PedigreeGlobals::LoadAlleleFrequencies(FILE * input)
   {
   int         done = 0;
   String      buffer;
   StringArray tokens;
   MarkerInfo *info = NULL;

   while (!feof(input) && !done)
      {
      int   i;

      buffer.ReadLine(input);

      tokens.Clear();
      tokens.AddTokens(buffer, WHITESPACE);

      if (tokens.Length() < 1) continue;

      switch (toupper(tokens[0][0]))
         {
         case 'M' :
            if (tokens.Length() == 1)
               error("Unnamed marker in allele frequency file");
            info = GetMarkerInfo(tokens[1]);
            info->freq.Clear();
            info->freq.Push(0.0);
            break;
         case 'F' :
            if (info != NULL)
               for ( i = 1; i < tokens.Length(); i++)
                  info->freq.Push(tokens[i]);
            break;
         case 'E' :
            done = 1;
            break;
         default :
            error ("Problem in allele frequency file.\n"
                   "Lines in this file should be of two types:\n"
                   "  -- Marker name lines begin with an M\n"
                   "  -- Frequency lines begin with an F\n\n"
                   "However the following line is different:\n%s\n",
                   (const char *) buffer);
         }
      }
   }

void PedigreeGlobals::LoadMarkerMap(const char * filename)
   {
   FILE * f = fopen(filename, "rb");
   if (f == NULL) return;
   LoadMarkerMap(f);
   fclose(f);
   }

void PedigreeGlobals::LoadMarkerMap(FILE * input)
   {
   String      buffer;
   StringArray tokens;

   while (!feof(input))
      {
      buffer.ReadLine(input);

      tokens.Clear();
      tokens.AddTokens(buffer, WHITESPACE);

      if (tokens.Length() < 1) continue;

      if (tokens.Length() != 3)
         error("Error reading map file\n"
               "Each line in this file should include 3 fields:\n"
               "CHROMOSOME, MARKER_NAME, and POSITION\n\n"
               "However the following line has %d fields\n%s\n",
               tokens.Length(), (const char *) buffer);

      MarkerInfo * info = GetMarkerInfo(tokens[1]);

      info->chromosome = tokens[0];
      info->position = (double) tokens[2] * 0.01;
      }
   }

int PedigreeGlobals::instanceCount = 0;

PedigreeGlobals::~PedigreeGlobals()
   {
   if (--instanceCount == 0 && markerInfoSize)
      {
      for (int i = 0; i < markerInfoCount; i++)
         delete markerInfo[i];
      delete [] markerInfo;
      }
   }

void PedigreeGlobals::WriteMapFile(const char * filename)
   {
   FILE * output = fopen(filename, "wt");

   if (output == NULL)
      error("Creating map file \"%s\"", filename);

   WriteMapFile(output);
   fclose(output);
   }

void PedigreeGlobals::WriteMapFile(FILE * output)
   {
   fprintf(output, "CHR  MARKER    POS\n");
   for (int i = 0; i < markerInfoCount; i++)
      fprintf(output, "%3d  % 10s %g\n",
         markerInfo[i]->chromosome,
         (const char *) markerInfo[i]->name,
         markerInfo[i]->position * 100.0);
   }

void PedigreeGlobals::WriteFreqFile(const char * filename)
   {
   FILE * output = fopen(filename, "wt");

   if (output == NULL)
      error("Creating allele frequency file \"%s\"", filename);

   WriteFreqFile(output);
   fclose(output);
   }

void PedigreeGlobals::WriteFreqFile(FILE * output)
   {
   for (int i = 0; i < markerInfoCount; i++)
      {
      MarkerInfo * info = markerInfo[i];

      fprintf(output, "M %s\nF ", (const char *) info->name);

      for (int j = 1; j < info->freq.dim; j++)
         fprintf(output, "%.5f%s", info->freq[j],
                 j == info->freq.dim - 1 ? "\n" : j % 7 == 0 ? "\nF " : " ");
      }
   }





 
