libStatGen Software  1
STLUtilities.cpp
00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #include "STLUtilities.h"
00019 
00020 namespace STLUtilities
00021 {
00022 
00023 //
00024 // Split the string input into words delimited by the character
00025 // delimiter.  For a given number of input delimiters, result.size()
00026 // will not change, regardless of the data in between the delimiters.
00027 //
00028 // Refactor this to pre-allocate the word that we place data into,
00029 // then we have minimal data copy.
00030 //
00031 int Tokenize(std::vector<std::string> &result, const char *input, char delimiter)
00032 {
00033     if (*input=='\0')
00034     {
00035         result.clear();
00036         result.resize(1);   // one word, and it is empty
00037         return 0;
00038     }
00039 
00040     size_t wordCount = 1;
00041 
00042     // since input is non-empty, we know we will have at least
00043     // one word, so we allocate it here, and begin to fill it in
00044     if (result.size()<wordCount) result.resize(1);
00045     else result[0].clear();
00046 
00047     std::string *word = &result[0];
00048 
00049     while (*input)
00050     {
00051         if (*input==delimiter)
00052         {
00053             // we got a delimeter, and since an empty word following
00054             // a delimeter still counts as a word, we allocate it here
00055             wordCount++;
00056             if (result.size()<wordCount) result.resize(wordCount);
00057             else
00058             {
00059                 result[wordCount-1].clear();
00060             }
00061             word = &result[wordCount-1];
00062         }
00063         else
00064         {
00065             // save the char in this word
00066             word->push_back(*input);
00067         }
00068         input++;
00069     }
00070 
00071     if (wordCount < result.size()) result.resize(wordCount);  // potentially truncate to wordCount elements
00072 
00073     return result.size();
00074 }
00075 
00076 } // end of namespace STLUtilities
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends