libStatGen Software  1
FortranFormat.cpp
00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #include "FortranFormat.h"
00019 #include "Error.h"
00020 
00021 FortranFormat::FortranFormat()
00022 {
00023     inputPos = -1;
00024     endOfPattern = false;
00025 }
00026 
00027 void FortranFormat::SetInputFile(IFILE & file)
00028 {
00029     input = file;
00030     inputPos = -1;
00031     endOfPattern = false;
00032 }
00033 
00034 void FortranFormat::SetFormat(const String & formatString)
00035 {
00036     format = formatString;
00037 
00038     inputPos = -1;
00039     endOfPattern = false;
00040 
00041     repeatCount = 0;
00042 
00043     format.Clear();
00044 
00045     // Remove blank spaces from format statement and extract
00046     // the first bracketed expression
00047     int level = 0;
00048     for (int i = 0; i < formatString.Length(); i++)
00049     {
00050         if (formatString[i] == ' '  || formatString[i] == '\t' ||
00051                 formatString[i] == '\n' || formatString[i] == '\r')
00052             continue;
00053 
00054         if (formatString[i] == '(')
00055             level++;
00056 
00057         if (formatString[i] == ')')
00058             level--;
00059 
00060         format += formatString[i];
00061 
00062         if (level == 0) break;
00063     }
00064 
00065     if (format[0] != '(' || format[format.Length() - 1] != ')')
00066         error("Invalid FORTRAN format statement\n\n"
00067               "The statement \"%s\" is not bracketed correctly.\n",
00068               (const char *) formatString);
00069 
00070     lastBracket = 1;
00071     lastCount = 0;
00072 
00073     formatPos = 1;
00074     repeatCount = 0;
00075 
00076     bracketStack.Clear();
00077     bracketCounter.Clear();
00078     bracketCount.Clear();
00079 }
00080 
00081 int FortranFormat::GetNextInteger()
00082 {
00083     GetNextField(buffer);
00084 
00085     return buffer.AsInteger();
00086 }
00087 
00088 char FortranFormat::GetNextCharacter()
00089 {
00090     GetNextField(buffer);
00091 
00092     return buffer[0];
00093 }
00094 
00095 void FortranFormat::GetNextField(String & field)
00096 {
00097     while (!ProcessToken(field))
00098         ;
00099 }
00100 
00101 bool FortranFormat::ProcessToken(String & field)
00102 {
00103     // This flag only gets set if we encounter the final bracket or a ':'
00104     endOfPattern = false;
00105 
00106     // Read input from file, if appropriate
00107     if (inputPos == -1)
00108     {
00109         inputLine.ReadLine(input);
00110         inputPos = 0;
00111     }
00112 
00113     // First read repeat count specifier
00114     if (repeatCount == 0)
00115         repeatCount = GetIntegerFromFormat();
00116 
00117     // By default, the repeat count should be 1
00118     if (repeatCount == 0)
00119         repeatCount = 1;
00120 
00121     int repeatPos = formatPos;
00122 
00123     // Check if this is a new bracketed grouping
00124     if (format[formatPos] == '(')
00125     {
00126         formatPos++;
00127 
00128         bracketStack.Push(formatPos);
00129         bracketCounter.Push(repeatCount);
00130         bracketCount.Push(repeatCount);
00131 
00132         repeatCount = 0;
00133 
00134         return false;
00135     }
00136 
00137     // Check if this an 'X' field
00138     if (format[formatPos] == 'X')
00139     {
00140         formatPos++;
00141 
00142         // No width specifier allowed for these fields
00143         RejectWidth('X');
00144 
00145         // Skip appropriate number of characters
00146         inputPos += repeatCount;
00147 
00148         // Reset repeat count
00149         repeatCount = 0;
00150 
00151         FinishField();
00152 
00153         return false;
00154     }
00155 
00156     // Check if this is a '/' (vertical tab field)
00157     if (format[formatPos] == '/')
00158     {
00159         formatPos++;
00160 
00161         // No width specifier allowed for these fields
00162         RejectWidth('/');
00163 
00164         // Skip the appropriate number of lines
00165         while (repeatCount--)
00166             inputLine.ReadLine(input);
00167 
00168         inputPos = 0;
00169 
00170         // Separators are optional, so we might already be at the next field
00171         if (format[formatPos] == ',' || format[formatPos] || ')')
00172             FinishField();
00173 
00174         return false;
00175     }
00176 
00177     // Check that we haven't encountered a rare, but unsupported input type
00178     if (format[formatPos] == 'Q' || format[formatPos] == 'P' || format[formatPos] == 'B')
00179     {
00180         formatPos++;
00181 
00182         int problemStart = formatPos;
00183 
00184         while (format[formatPos] != ',' && format[formatPos] != ')' && format[formatPos] != '/')
00185             formatPos++;
00186 
00187         error("Unsupported pattern in FORMAT statement\n\n"
00188               "Statement \"%s\" includes unsupporterd pattern '%s'\n",
00189               (const char *) format,
00190               (const char *) format.SubStr(problemStart, formatPos - problemStart));
00191     }
00192 
00193     if (format[formatPos] == ':')
00194     {
00195         formatPos++;
00196 
00197         if (format[formatPos] == ',' || format[formatPos] || ')')
00198             FinishField();
00199 
00200         repeatCount = 0;
00201 
00202         endOfPattern = true;
00203 
00204         return false;
00205     }
00206 
00207     // All the other types we recognize include a width specifier
00208 
00209     // Identify the location of the type specifier
00210     int typeStart = formatPos;
00211 
00212     while (CharacterFollows())
00213         formatPos++;
00214 
00215     int typeLen = formatPos - typeStart;
00216 
00217     // Retrieve the field width
00218     int width = GetIntegerFromFormat();
00219 
00220     if (width == 0)
00221         error("Unrecognized FORMAT statement\n\n"
00222               "Statement \"%s\" is missing a width specifier for a field of type '%s'\n",
00223               (const char *) format, (const char *) format.SubStr(typeStart, typeLen));
00224 
00225     // Check for horizontal tab character
00226     if (format[typeStart] == 'T')
00227     {
00228         // Move left by a specified number of characters
00229         if (format[typeStart + 1] == 'L')
00230             inputPos = width > inputPos ? 0 : inputPos - width;
00231         // Move right by a specified number of characters
00232         else if (format[typeStart + 1] == 'R')
00233             inputPos += width;
00234         // Or simply set the appropriate horizontal position
00235         else
00236             inputPos = width;
00237 
00238         repeatCount--;
00239 
00240         if (repeatCount)
00241             formatPos = repeatPos;
00242         else
00243             FinishField();
00244 
00245         return false;
00246     }
00247 
00248     // Assume that if we got here, we are looking at a data field!
00249     field.Copy(inputLine, inputPos, width);
00250     field.Trim();
00251 
00252     inputPos += width;
00253 
00254     repeatCount--;
00255 
00256     if (repeatCount)
00257         formatPos = repeatPos;
00258     else
00259         FinishField();
00260 
00261     return true;
00262 }
00263 
00264 int FortranFormat::GetIntegerFromFormat()
00265 {
00266     int result = 0;
00267 
00268     while (DigitFollows())
00269         result = result * 10 + (int)(format[formatPos++] - '0');
00270 
00271     return result;
00272 }
00273 
00274 bool FortranFormat::DigitFollows()
00275 {
00276     return (format[formatPos] >= '0') && (format[formatPos] <= '9');
00277 }
00278 
00279 bool FortranFormat::CharacterFollows()
00280 {
00281     return (format[formatPos] >= 'A') && (format[formatPos] <= 'Z');
00282 }
00283 
00284 void FortranFormat::RejectWidth(char ch)
00285 {
00286     // No width allowed for field types 'X' and '\'
00287     if (DigitFollows())
00288         error("Unrecognized FORTRAN format statement\n\n"
00289               "The statement \"%s\" includes width specifier for field of type '%c'.\n",
00290               (const char *) format, ch);
00291 }
00292 
00293 void FortranFormat::FinishField(bool)
00294 {
00295     // Find the next field separator
00296     while (format[formatPos] != ',' && format[formatPos] != ')')
00297     {
00298         if (format[formatPos] == '/')
00299             return;
00300 
00301         formatPos++;
00302     }
00303 
00304     // Skip commas
00305     if (format[formatPos] == ',')
00306     {
00307         formatPos++;
00308         return;
00309     }
00310 
00311     // If we found a bracket, then it is either the end of the statement
00312     // (if bracketStack is empty) or we finish an internal grouping
00313     if (bracketStack.Length())
00314     {
00315         // Retrieve information about this grouping
00316         lastBracket = bracketStack.Pop();
00317         lastCount = bracketCount.Pop();
00318         int lastCounter = bracketCounter.Pop() - 1;
00319 
00320         // Loop if required
00321         if (lastCounter)
00322         {
00323             bracketStack.Push(lastBracket);
00324             bracketCount.Push(lastCount);
00325             bracketCounter.Push(lastCounter);
00326 
00327             formatPos = lastBracket;
00328         }
00329         else
00330             // Otherwise find the next separator
00331         {
00332             formatPos++;
00333             FinishField();
00334             return;
00335         }
00336     }
00337     else
00338     {
00339         // If we finished the input line, then activate reset input counter
00340         inputPos = -1;
00341         endOfPattern = true;
00342 
00343         // And re-use input tokens starting at the last bracket
00344         formatPos = lastBracket;
00345 
00346         if (lastBracket == 1)
00347             return;
00348 
00349         // With appropriate repeat counts
00350         bracketStack.Push(lastBracket);
00351         bracketCounter.Push(lastCount);
00352         bracketCount.Push(lastCount);
00353     }
00354 }
00355 
00356 void FortranFormat::Flush()
00357 {
00358     while (!endOfPattern)
00359         ProcessToken(buffer);
00360 
00361     inputPos = -1;
00362 
00363     lastBracket = 1;
00364     lastCount = 0;
00365 
00366     formatPos = 1;
00367     repeatCount = 0;
00368 
00369     bracketStack.Clear();
00370     bracketCounter.Clear();
00371     bracketCount.Clear();
00372 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends