////////////////////////////////////////////////////////////////////// 
// setup-simwalk2/setup-simwalk2.cpp 
// (c) 2000-2001 Goncalo Abecasis
// 
// This file is distributed as part of the GOLD source code package   
// and may not be redistributed in any form, without prior written    
// permission from the author. Permission is granted for you to       
// modify this file for your own personal use, but modified versions  
// must retain this copyright notice and must not be distributed.     
// 
// Permission is granted for you to use this file to compile GOLD.    
// 
// All computer programs have bugs. Use this file at your own risk.   
// 
// Thursday November 08, 2001
// 
 
#include "Constant.h"
#include "Parameters.h"
#include "Pedigree.h"
#include "Error.h"

#include "stdio.h"

BEGIN_OPTION_LIST(freqOptions)
   {'a', "ALL INDIVIDUALS", FREQ_ALL},
   {'e', "EQUIFREQUENT", FREQ_EQUAL},
   {'f', "FOUNDERS ONLY", FREQ_FOUNDERS}
END_OPTION_LIST("ALL INDIVIDUALS");

void WriteSimwalkDataFile(Pedigree & ped, int estimator);
void WriteSimwalkPedigreeFile(Pedigree & ped);
void WriteSimwalkBatchFile();

void main(int argc, char * argv[])
   {
   printf("setup-simwalk2 1.1 - Sets up input files for SimWalk2 haplotyping run\n"
          "(c) 1999-2001 Goncalo Abecasis\n\n");

   String pedName = "makeped.ped";
   String dataName = "map.gm";
   int    estimator = 0;

   ParameterList pl;

   pl.Add(new StringParameter('p', "Pre-Makeped Pedigree", pedName));
   pl.Add(new StringParameter('m', "Map File", dataName));
   pl.Add(new ListParameter('a', "Allele Frequencies", estimator, freqOptions));

   pl.Read(argc, argv);
   pl.Status();

   Pedigree ped;

   ped.pd.LoadMap(dataName);
   ped.Load(pedName);

   WriteSimwalkBatchFile();
   WriteSimwalkDataFile(ped, estimator);
   WriteSimwalkPedigreeFile(ped);

   printf("\n\nYou may now run SimWalk2 to estimate haplotypes.\n\n");
   };

void WriteSimwalkBatchFile()
   {
   printf("Preparing SimWalk2 Run Specification...\n");

   FILE * f = fopen("BATCH2.DAT", "wt");

   // Specify that we want to perform IBD analysises
   fprintf(f, "\n000001\n1\n");

   // Specify a descriptive run title
   fprintf(f, "\n000003\nGOLD Haplotype Analysis\n");

   // No trait locus in the pedgree file
   fprintf(f, "\n000013\nN\n");

   // Recombination frequencies between markers
   fprintf(f, "\n000015    "
              "*** Recombination fractions below assume "
              "1,000,000 bp equals 1 cM ***\n"
              "%d\n", Person::markerCount);
   for (int i = 0; i < Person::markerCount - 1; i++)
      fprintf(f, "%8.6f\n",
                Person::GetMarkerInfo(i + 1)->position -
                Person::GetMarkerInfo(i)->position);

   // Output format: Horizontal (Not Vertical), Founder Labels,
   // Do not Use Common Alleles as Default
   fprintf(f, "\n000046\nN\nY\nN\n");

   // do not calculate IBD between markers
   fprintf(f, "\n000049\n1\n");

   // end of batch file
   fprintf(f, "\n000050\n\n");

   fclose(f);
   };

void WriteSimwalkDataFile(Pedigree & ped, int estimator)
   {
   ped.EstimateFrequencies(estimator);

   printf("Preparing SimWalk2 Data Description...\n");

   FILE * f = fopen("LOCUS.DAT", "wt");

   // The actual markers, in the same order as in the data file
   for (int m = 0; m < Person::markerCount; m++)
      {
      MarkerInfo * info = ped.GetMarkerInfo(m);

      if (info->name.Length() > 8)
         printf("   WARNING - Marker name %s truncated to %-8.8s\n",
                (const char *) info->name, (const char *) info->name);

      fprintf(f, "%-8.8sAUTOSOME%2d 0\n",
               (const char *) info->name, info->freq.Length() - 1);

      for (int i = 1; i < info->freq.Length(); i++)
         fprintf(f, "%8d%8.6f\n", i, info->freq[i]);
      }

   fclose(f);
   };

void WriteSimwalkPedigreeFile(Pedigree & ped)
   {
   printf("Preparing SimWalk2 Pedigree Data...\n");

   FILE * f = fopen("PEDIGREE.DAT", "wt");

   // Specify the output format
   fprintf(f, "(I5,1X,A8)\n(3A8,1X,2A1,(T28,4(A8),:))\n");

   // The major hazards are exact column widths and
   // converting missing data to empty space
   for (int i = 0; i < ped.familyCount; i++)
      {
      char * key = " MF";

      fprintf(f, "%5d %8.8s\n", (const char *) ped.families[i]->count,
                                (const char *) ped.families[i]->famid);

      for (int j = ped.families[i]->first; j <= ped.families[i]->last; j++)
         {
         if (ped[j].isFounder())
            fprintf(f, "%-8.8s%-8.8s%-8.8s %c ",
                    (const char *) ped[j].pid, "", "", key[ped[j].sex]);
         else
            fprintf(f, "%-8.8s%-8.8s%-8.8s %c ",
                    (const char *) ped[j].pid,
                    (const char *) ped[j].fatid,
                    (const char *) ped[j].motid, key[ped[j].sex]);

         for (int m = 0; m < Person::markerCount; m++)
            {
            if ((m != 0) && (m % 4 == 0))
               fprintf(f,"\n%27s", "");
            if (ped[j].markers[m].isKnown())
              fprintf(f," %3d/%3d", ped[j].markers[m][0], ped[j].markers[m][1]);
            else
              fprintf(f,"        ");
            }
         fprintf(f, "\n");
         }
      }

   fclose(f);
   }
 
