////////////////////////////////////////////////////////////////////// 
// vcfCooker/Main.cpp 
// (c) 2010 Hyun Min Kang, Matthew Flickenger, Matthew Snyder, Paul Anderson
//          Tom Blackwell, Mary Kate Trost, and Goncalo Abecasis
// 
// This file is distributed as part of the vcfCooker source code package   
// and may not be redistributed in any form, without prior written    
// permission from the author. Permission is granted for you to       
// modify this file for your own personal use, but modified versions  
// must retain this copyright notice and must not be distributed.     
// 
// Permission is granted for you to use this file to compile vcfCooker
// 
// All computer programs have bugs. Use this file at your own risk.   
// 
// Thursday November 11th, 2010

#include <map>
#include <limits.h>

#include "SamFile.h"
#include "Cigar.h"
#include "Parameters.h"
#include "InputFile.h"
#include "Error.h"
#include "VcfFile.h"
#include "Logger.h"

class CigarDiffKey {
public:
  std::string readName;
  bool first;
  CigarDiffKey(const char* s, bool b) : readName(s), first(b) {}
  bool operator== (const CigarDiffKey& other) const {
    return ( ( readName.compare(other.readName) == 0 ) &&
	     ( first == other.first ) );
  }
  bool operator< (const CigarDiffKey& other) const {
    int c = readName.compare(other.readName);
    if ( c < 0 ) return true;
    else if ( first < other.first ) return true;
    return false;
  }
};

class CigarDiffVal {
public:
  int refID;
  int bp;
  std::string cigarString;
  CigarDiffVal(int n, int p, const char* s) : refID(n), bp(p), cigarString(s) {}
  CigarDiffVal() {}
  void cigarDiff (const CigarDiffVal& other) const {
    if ( refID != other.refID ) {
      Logger::gLogger->error("Reads were mapped to different chromosomes");
    }

    CigarRoller roller1( cigarString.c_str() );
    CigarRoller roller2( other.cigarString.c_str() );

    int len = roller1.getExpectedQueryBaseCount();
    if ( len != roller2.getExpectedQueryBaseCount() ) {
      Logger::gLogger->error("Different # expected query base count");
    }
    for(int j=0; j < len; ++j) {
      int offset1 = roller1.getRefOffset(j);
      int offset2 = roller2.getRefOffset(j);

      if ( ( offset1 == Cigar::INDEX_NA ) && ( offset1 == Cigar::INDEX_NA ) ) {
	// OK
      }
      else if ( offset1 == Cigar::INDEX_NA ) {
	Logger::gLogger->writeLog("Affected Position 2 %d:%d", refID, offset2 + other.bp );
      }
      else if ( offset2 == Cigar::INDEX_NA ) {
	Logger::gLogger->writeLog("Affected Position 1 %d:%d", refID, offset1 + bp );
      }
      else if ( bp + offset1 != other.bp + offset2 ) {
	Logger::gLogger->writeLog("Affected Position 1 %d:%d", refID, offset1 + bp );
	Logger::gLogger->writeLog("Affected Position 2 %d:%d", refID, offset2 + other.bp );
      }
    }
  }
};

void cigarDiff( const CigarDiffVal & v1, const CigarDiffVal & v2 ) {
  v1.cigarDiff(v2);
}

Logger* Logger::gLogger = NULL;

int main(int argc, char ** argv)
{
   printf("bamCigarDiff 0.0.1 -- Find difference in cigarStrings in a pair of BAMs\n"
          "(c) 2010 Hyun Min Kang\n\n");

   String sBam1, sBam2;
   String sFasta("/data/local/ref/karma.ref/human.g1k.v37.fa");
   String sOut("./bamCigarDiff");
   bool bVerbose = true;
   String sRegion;

   ParameterList pl;

   BEGIN_LONG_PARAMETERS(longParameters)
     LONG_PARAMETER_GROUP("Input file options")
     LONG_STRINGPARAMETER("bam1",&sBam1)
     LONG_STRINGPARAMETER("bam2",&sBam2)
     LONG_STRINGPARAMETER("region",&sRegion)

     LONG_PARAMETER_GROUP("Output file Options")
     LONG_STRINGPARAMETER("out",&sOut)

     LONG_PARAMETER_GROUP("Other options")
     LONG_PARAMETER("verbose",&bVerbose)
   END_LONG_PARAMETERS();

   pl.Add(new LongParameters("Available Options", longParameters));
   pl.Read(argc, argv);
   pl.Status();
   
   // create objects for logging
   if ( sOut.IsEmpty() ) {
     fprintf(stderr,"ERROR: output prefix is empty");
     abort();
   }
   Logger::gLogger = new Logger((sOut+".log").c_str(), bVerbose);

   time_t t;
   time(&t);
   Logger::gLogger->writeLog("Analysis started on %s", ctime(&t));

   ////////////////////////////////////////////////////////////
   // check the compatibility of arguments
   ///////////////////////////////////////////////////////////
   // Check the sanity of input file arguments
   ///////////////////////////////////////////////////////////
   if ( sBam1.IsEmpty() || sBam2.IsEmpty() ) {
     Logger::gLogger->error("Both --bam1 and --bam2 options must be specified");
   }

   SamRecord rec1, rec2;
   SamFile sam1, sam2;
   SamFileHeader header1, header2;

   // open the BAM files
   if ( ! (sam1.OpenForRead(sBam1.c_str())) ) {
     Logger::gLogger->error("Cannot open BAM file %s for reading - %s",sBam1.c_str(), SamStatus::getStatusString(sam1.GetStatus()) );
   }
   if ( ! (sam2.OpenForRead(sBam2.c_str())) ) {
     Logger::gLogger->error("Cannot open BAM file %s for reading - %s",sBam2.c_str(), SamStatus::getStatusString(sam2.GetStatus()) );
   }

   String sIndex1, sIndex2;
   sIndex1 = sBam1 + ".bai";
   sIndex2 = sBam2 + ".bai";
  
   if ( ! sam1.ReadBamIndex( sIndex1.c_str() ) ) {
     Logger::gLogger->error("Cannot open BAM file index %s for reading %s", sIndex1.c_str(),sBam1.c_str());
   }   
   if ( ! sam2.ReadBamIndex( sIndex2.c_str() ) ) {
     Logger::gLogger->error("Cannot open BAM file index %s for reading %s", sIndex2.c_str(),sBam2.c_str());
   }   

   // parse the sRegion into chr:begin-end
   int regionRefID1, regionRefID2, begin, end;
   String regionChr;
   sam1.ReadHeader(header1);
   sam2.ReadHeader(header2);

   if ( !sRegion.IsEmpty() ) {
     StringArray tok;
     tok.ReplaceTokens(sRegion,":-");
     if ( tok.Length() != 3 ) {
       Logger::gLogger->error("The --region option cannot be parsed into [chr]:[begin]-[end] format");
     }
     regionChr = tok[0];
     begin = atoi(tok[1].c_str());
     end = atoi(tok[2].c_str());

     // if sRegion is set, then read BAM headers to find refID matching
     // the sequence name
     SamHeaderRecord* hRec;
     regionRefID1 = 0;
     while( ( hRec = header1.getNextHeaderRecord() ) != NULL ) {
       if ( hRec->getType() == SamHeaderRecord::SQ ) {
	 if ( regionChr.Compare(hRec->getTagValue("SN")) == 0 ) {
	   break;
	 }
	 else {
	   ++regionRefID1;
	 }
       }
     }
     Logger::gLogger->writeLog("Found refID=%d as matching to chromosome Name %s",regionRefID1,regionChr.c_str());

     // do the same thing for the next header
     regionRefID2 = 0;
     while( ( hRec = header2.getNextHeaderRecord() ) != NULL ) {
       if ( hRec->getType() == SamHeaderRecord::SQ ) {
	 if ( regionChr.Compare(hRec->getTagValue("SN")) == 0 ) {
	   break;
	 }
	 else {
	   ++regionRefID2;
	 }
       }
     }
     Logger::gLogger->writeLog("Found refID=%d as matching to chromosome Name %s",regionRefID2,regionChr.c_str());
   }
   else {
     regionRefID1 = regionRefID2 = begin = end = 01;
   }

   if ( regionRefID1 >= 0 ) {
     sam1.SetReadSection( regionRefID1, begin-1, end );
     sam2.SetReadSection( regionRefID2, begin-1, end );
   }


   // store unresolved cigar, position, and readnames
   std::map<CigarDiffKey,CigarDiffVal> dict1;
   std::map<CigarDiffKey,CigarDiffVal> dict2;

   // read the first records
   sam1.ReadRecord(header1,rec1);
   sam2.ReadRecord(header2,rec2);
   bool adv1 = true, adv2 = true;
   bool valid1 = true, valid2 = true;

   while( valid1 && valid2 ) {
     // check if the record has same position and flags
     bool first1 = ((rec1.getFlag() & 0x0040) > 0);
     bool first2 = ((rec2.getFlag() & 0x0040) > 0);

     CigarDiffKey key1(rec1.getReadName(), first1);
     CigarDiffKey key2(rec2.getReadName(), first2);
     CigarDiffVal val1(rec1.getReferenceID(), rec1.get1BasedPosition(), rec1.getCigar());
     CigarDiffVal val2(rec1.getReferenceID(), rec2.get1BasedPosition(), rec2.getCigar());

     if ( key1 == key2 ) {
       cigarDiff( val1, val2 );
       adv1 = adv2 = true;
       //Logger::gLogger->writeLog("foo");
     }
     else {
       // if key1 matches to dict2
       if ( dict2.find(key1) != dict2.end() ) {
	 cigarDiff( val1, dict2[key1] );
	 dict2.erase(key1);
	 adv1 = true;
       }
       // if key2 matches to dict1
       else if ( dict1.find(key2) != dict1.end() ) {
	 cigarDiff( val2, dict1[key2] );
	 dict1.erase(key2);
	 adv2 = true;
       }
       // if neither matches
       else {
	 dict1[key1] = val1;
	 dict2[key2] = val2;
	 adv1 = adv2 = true;
       }
     }

     if ( adv1 ) {
       valid1 = sam1.ReadRecord( header1, rec1 );
       //Logger::gLogger->writeLog("adv1");
     }
     
     if ( adv2 ) {
       valid2 = sam2.ReadRecord( header2, rec2 );
       //Logger::gLogger->writeLog("adv2");
     }
   }

   time(&t);
   Logger::gLogger->writeLog("Analysis finished on %s", ctime(&t));

   return 0;
}


