#include "AssayInfo.h"

#include <ctype.h>

#define COL_PREFERREDID    0
#define COL_RSID           1
#define COL_BUILD          2
#define COL_CHROMOSOME     3
#define COL_POSITION       4
#define COL_QCTYPE         5
#define COL_SEQUENCE       6
#define COL_SOURCE         7
#define COL_STRAND         8

AssayInfo::AssayInfo()
   {
   duplicateEntries = 0;
   invalidAlleles = 0;
   failure = "";
   }

AssayInfo::~AssayInfo()
   {
   for (int i = 0; i < snpArray.Capacity(); i++)
      if (snpArray.SlotInUse(i))
         delete (SNPInfo *) snpArray.Object(i);
   }

bool AssayInfo::parseHeader(FILE * input)
{
   buffer.ReadLine(input);
   tokens.ReplaceTokens(buffer);

   for (int cnt = 0; cnt < 9; cnt++)
      columnPos[cnt] = -1;

   for (int cnt = 0; cnt < tokens.Length(); cnt++)
   {
      if (PREFERRED_ID.SlowCompare(tokens[cnt]) == 0)
      {
         if (columnPos[COL_PREFERREDID] == -1)
            columnPos[COL_PREFERREDID] = cnt;
         else
	 {
            failure.catprintf("WARNING: Multiple columns have %s header", (const char *)PREFERRED_ID);
	    logMgr.WriteToLog(failure, 3);
	    failure = "";
	 }

      }
      if (RS_ID.SlowCompare(tokens[cnt]) == 0)
      {
         if (columnPos[COL_RSID] == -1)
            columnPos[COL_RSID] = cnt;
         else
	 {
            failure.catprintf("WARNING: Multiple columns have %s header", (const char *)RS_ID);
	    logMgr.WriteToLog(failure, 3);
	    failure = "";
	 }
      }
      if (BUILD.SlowCompare(tokens[cnt]) == 0)
      {
         if (columnPos[COL_BUILD] == -1)
            columnPos[COL_BUILD] = cnt;
         else
	 {
            failure.catprintf("WARNING: Multiple columns have %s header", (const char *)BUILD);
	    logMgr.WriteToLog(failure, 3);
	    failure = "";
	 }
      }
      if (CHR.SlowCompare(tokens[cnt]) == 0)
      {
         if (columnPos[COL_CHROMOSOME] == -1)
            columnPos[COL_CHROMOSOME] = cnt;
         else
	 {
            failure.catprintf("WARNING: Multiple columns have %s header", (const char *)CHR);
	    logMgr.WriteToLog(failure, 3);
	    failure = "";
	 }
      }
      if (POSITION.SlowCompare(tokens[cnt]) == 0)
      {
         if (columnPos[COL_POSITION] == -1)
            columnPos[COL_POSITION] = cnt;
         else
	 {
            failure.catprintf("WARNING: Multiple columns have %s header", (const char *)POSITION);
	    logMgr.WriteToLog(failure, 3);
	    failure = "";
	 }
      }
      if (QC_TYPE.SlowCompare(tokens[cnt]) == 0)
      {
         if (columnPos[COL_QCTYPE] == -1)
            columnPos[COL_QCTYPE] = cnt;
         else
	 {
            failure.catprintf("WARNING: Multiple columns have %s header", (const char *)QC_TYPE);
	    logMgr.WriteToLog(failure, 3);
	    failure = "";
	 }
      }
      if (SEQUENCE.SlowCompare(tokens[cnt]) == 0)
      {
         if (columnPos[COL_SEQUENCE] == -1)
            columnPos[COL_SEQUENCE] = cnt;
         else
	 {
            failure.catprintf("WARNING: Multiple columns have %s header", (const char *)SEQUENCE);
	    logMgr.WriteToLog(failure, 3);
	    failure = "";
         }
      }
      if (SOURCE.SlowCompare(tokens[cnt]) == 0)
      {
         if (columnPos[COL_SOURCE] == -1)
            columnPos[COL_SOURCE] = cnt;
         else
	 {
            failure.catprintf("WARNING: Multiple columns have %s header", (const char *)SOURCE);
	    logMgr.WriteToLog(failure, 3);
	    failure = "";
	 }
      }

      if (STRAND.SlowCompare(tokens[cnt]) == 0)
      {
         if (columnPos[COL_STRAND] == -1)
            columnPos[COL_STRAND] = cnt;
         else
	 {
            failure.catprintf("WARNING: Multiple columns have %s header", (const char *)STRAND);
	    logMgr.WriteToLog(failure, 3);
	    failure = "";
	    
         }
      }
   }
   
   if (columnPos[COL_PREFERREDID] == -1 || columnPos[COL_CHROMOSOME] == -1 || columnPos[COL_POSITION] == -1 ||
      columnPos[COL_QCTYPE] == -1 || columnPos[COL_SEQUENCE] == -1)
   {
      if (columnPos[COL_PREFERREDID] == -1) printf("SNP Information file is missing essential column %s\n", (const char *) PREFERRED_ID);
      if (columnPos[COL_CHROMOSOME] == -1) printf("SNP Information file is missing essential column %s\n", (const char *) CHR);
      if (columnPos[COL_POSITION] == -1) printf("SNP Information file is missing essential column %s\n", (const char *) POSITION);
      if (columnPos[COL_QCTYPE] == -1) printf("SNP Information file is missing essential column %s\n", (const char *) QC_TYPE);
      if (columnPos[COL_SEQUENCE] == -1) printf("SNP Information file is missing essential column %s\n", (const char *) SEQUENCE);
      return false;
   }
   numOfCols = tokens.Length();
   return true;
}

bool AssayInfo::loadAssayInfo(FILE * input)
{
   int line = 1;
   int formattingErrors = 0;
   SNPInfo * snp;
   do
   {
      buffer.ReadLine(input);
      tokens.ReplaceTokens(buffer);
      line++;

      if (tokens.Length() == 0)
         continue;

      if (tokens.Length() != numOfCols)
      {
         // warn about insufficient num of Columns
         failure.catprintf("INVALID FORMAT: Line %d has %d columns, but header has %d columns",
                           line, tokens.Length(), numOfCols);
	 logMgr.WriteToLog(failure, 4);
	 failure = "";
         if (++formattingErrors > 1000)   return false;
         else continue;
      }

     if (snpArray.Find(tokens[columnPos[COL_PREFERREDID]]) >= 0)
       {
       // We should check that duplicate entries provide consistent information
       // if they do, no problem -- if they don't, generate an error
       // probably requires an == operator for SNPinfo
       duplicateEntries++;
       continue;
       }

      snp = new SNPInfo();

      snp->preferredID = tokens[columnPos[COL_PREFERREDID]];

      if (columnPos[COL_RSID] < 0)
         snp->rsID = "unknown_rs";
      else
         snp->rsID = isdigit(tokens[columnPos[COL_RSID]][0]) ?
                     ("rs" + tokens[columnPos[COL_RSID]]) : tokens[columnPos[COL_RSID]];

      snp->build = columnPos[COL_BUILD] == -1 ? "unknown_build" : (const char *) tokens[columnPos[COL_BUILD]];
      snp->chromosome = tokens[columnPos[COL_CHROMOSOME]];
      snp->position = (tokens[columnPos[COL_POSITION]]).AsInteger();
      snp->strand = columnPos[COL_STRAND] == -1 ? '+' : tokens[columnPos[COL_STRAND]][0];

      if (tokens[columnPos[COL_QCTYPE]].SlowCompare("A") == 0)
         snp->qcType = __ASSAY_QC_AUTOSOME__;
      else if ((tokens[columnPos[COL_QCTYPE]].SlowCompare("X") == 0) ||
               (tokens[columnPos[COL_QCTYPE]].SlowCompare("Y") == 0))
         snp->qcType = __ASSAY_QC_SEXLINKED__;
      else if ((tokens[columnPos[COL_QCTYPE]].SlowCompare("NA") == 0))
         snp->qcType = __ASSAY_QC_NONE__;
      else
      {
         // warn about invalid qcType, default to autosomal
         failure.catprintf("WARNING: Marker %s on line %d has invalid QC type. Defaulting to Autosomal", (const char *)snp->preferredID, line);
	 logMgr.WriteToLog(failure, 5);
	 failure = "";
         snp->qcType = __ASSAY_QC_NONE__;
      }
      snp->sequence = tokens[columnPos[COL_SEQUENCE]];
      snp->source = columnPos[COL_SOURCE] == -1 ? "unknown_source" : (const char *) tokens[columnPos[COL_SOURCE]];
      if(!snp->FindAlleles())
      {
         // this snp information does not have proper allele info..
         // so skip this marker
         invalidAlleles++;
         delete snp;
         continue;
      }

      snpArray.SetObject(snp->preferredID, snp);

      if (columnPos[COL_RSID] >= 0)
         rsArray.SetObject(snp->rsID, snp);

   } while(!feof(input));
   return true;
}


SNPInfo* AssayInfo::getSNPInfo(String & markerName)
{
   return (SNPInfo *)snpArray.Object(markerName);
}

SNPInfo * AssayInfo::getRSIDInfo(String & markerName)
   {
   return (SNPInfo *)rsArray.Object(markerName);
   }
