LCOV - tmp.zDYK9MVh93 - third_party/protobuf/src/google/protobuf/stubs/strutil.cc

LCOV - code coverage report

Current view:	top level - third_party/protobuf/src/google/protobuf/stubs - strutil.cc (source / functions)		Hit	Total	Coverage
Test:	tmp.zDYK9MVh93	Lines:	333	906	36.8 %
Date:	2015-10-10	Functions:	33	96	34.4 %

          Line data    Source code

       1             : // Protocol Buffers - Google's data interchange format
       2             : // Copyright 2008 Google Inc.  All rights reserved.
       3             : // https://developers.google.com/protocol-buffers/
       4             : //
       5             : // Redistribution and use in source and binary forms, with or without
       6             : // modification, are permitted provided that the following conditions are
       7             : // met:
       8             : //
       9             : //     * Redistributions of source code must retain the above copyright
      10             : // notice, this list of conditions and the following disclaimer.
      11             : //     * Redistributions in binary form must reproduce the above
      12             : // copyright notice, this list of conditions and the following disclaimer
      13             : // in the documentation and/or other materials provided with the
      14             : // distribution.
      15             : //     * Neither the name of Google Inc. nor the names of its
      16             : // contributors may be used to endorse or promote products derived from
      17             : // this software without specific prior written permission.
      18             : //
      19             : // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
      20             : // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
      21             : // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
      22             : // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
      23             : // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
      24             : // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
      25             : // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
      26             : // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
      27             : // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
      28             : // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
      29             : // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
      30             : 
      31             : // from google3/strings/strutil.cc
      32             : 
      33             : #include <google/protobuf/stubs/strutil.h>
      34             : #include <google/protobuf/stubs/mathlimits.h>
      35             : 
      36             : #include <errno.h>
      37             : #include <float.h>    // FLT_DIG and DBL_DIG
      38             : #include <limits>
      39             : #include <limits.h>
      40             : #include <stdio.h>
      41             : #include <iterator>
      42             : 
      43             : #include <google/protobuf/stubs/stl_util.h>
      44             : 
      45             : #ifdef _WIN32
      46             : // MSVC has only _snprintf, not snprintf.
      47             : //
      48             : // MinGW has both snprintf and _snprintf, but they appear to be different
      49             : // functions.  The former is buggy.  When invoked like so:
      50             : //   char buffer[32];
      51             : //   snprintf(buffer, 32, "%.*g\n", FLT_DIG, 1.23e10f);
      52             : // it prints "1.23000e+10".  This is plainly wrong:  %g should never print
      53             : // trailing zeros after the decimal point.  For some reason this bug only
      54             : // occurs with some input values, not all.  In any case, _snprintf does the
      55             : // right thing, so we use it.
      56             : #define snprintf _snprintf
      57             : #endif
      58             : 
      59             : namespace google {
      60             : namespace protobuf {
      61             : 
      62             : // These are defined as macros on some platforms.  #undef them so that we can
      63             : // redefine them.
      64             : #undef isxdigit
      65             : #undef isprint
      66             : 
      67             : // The definitions of these in ctype.h change based on locale.  Since our
      68             : // string manipulation is all in relation to the protocol buffer and C++
      69             : // languages, we always want to use the C locale.  So, we re-define these
      70             : // exactly as we want them.
      71             : inline bool isxdigit(char c) {
      72           0 :   return ('0' <= c && c <= '9') ||
      73           0 :          ('a' <= c && c <= 'f') ||
      74           0 :          ('A' <= c && c <= 'F');
      75             : }
      76             : 
      77             : inline bool isprint(char c) {
      78      749599 :   return c >= 0x20 && c <= 0x7E;
      79             : }
      80             : 
      81             : // ----------------------------------------------------------------------
      82             : // StripString
      83             : //    Replaces any occurrence of the character 'remove' (or the characters
      84             : //    in 'remove') with the character 'replacewith'.
      85             : // ----------------------------------------------------------------------
      86           0 : void StripString(string* s, const char* remove, char replacewith) {
      87           0 :   const char * str_start = s->c_str();
      88           0 :   const char * str = str_start;
      89           0 :   for (str = strpbrk(str, remove);
      90             :        str != NULL;
      91           0 :        str = strpbrk(str + 1, remove)) {
      92           0 :     (*s)[str - str_start] = replacewith;
      93             :   }
      94           0 : }
      95             : 
      96           0 : void StripWhitespace(string* str) {
      97           0 :   int str_length = str->length();
      98             : 
      99             :   // Strip off leading whitespace.
     100           0 :   int first = 0;
     101           0 :   while (first < str_length && ascii_isspace(str->at(first))) {
     102           0 :     ++first;
     103             :   }
     104             :   // If entire string is white space.
     105           0 :   if (first == str_length) {
     106             :     str->clear();
     107           0 :     return;
     108             :   }
     109           0 :   if (first > 0) {
     110           0 :     str->erase(0, first);
     111           0 :     str_length -= first;
     112             :   }
     113             : 
     114             :   // Strip off trailing whitespace.
     115           0 :   int last = str_length - 1;
     116           0 :   while (last >= 0 && ascii_isspace(str->at(last))) {
     117           0 :     --last;
     118             :   }
     119           0 :   if (last != (str_length - 1) && last >= 0) {
     120           0 :     str->erase(last + 1, string::npos);
     121             :   }
     122             : }
     123             : 
     124             : // ----------------------------------------------------------------------
     125             : // StringReplace()
     126             : //    Replace the "old" pattern with the "new" pattern in a string,
     127             : //    and append the result to "res".  If replace_all is false,
     128             : //    it only replaces the first instance of "old."
     129             : // ----------------------------------------------------------------------
     130             : 
     131       26891 : void StringReplace(const string& s, const string& oldsub,
     132             :                    const string& newsub, bool replace_all,
     133             :                    string* res) {
     134       26891 :   if (oldsub.empty()) {
     135           0 :     res->append(s);  // if empty, append the given string.
     136       26891 :     return;
     137             :   }
     138             : 
     139             :   string::size_type start_pos = 0;
     140             :   string::size_type pos;
     141        5643 :   do {
     142       32534 :     pos = s.find(oldsub, start_pos);
     143       32534 :     if (pos == string::npos) {
     144             :       break;
     145             :     }
     146        5643 :     res->append(s, start_pos, pos - start_pos);
     147        5643 :     res->append(newsub);
     148        5643 :     start_pos = pos + oldsub.size();  // start searching again after the "old"
     149             :   } while (replace_all);
     150       26891 :   res->append(s, start_pos, s.length() - start_pos);
     151             : }
     152             : 
     153             : // ----------------------------------------------------------------------
     154             : // StringReplace()
     155             : //    Give me a string and two patterns "old" and "new", and I replace
     156             : //    the first instance of "old" in the string with "new", if it
     157             : //    exists.  If "global" is true; call this repeatedly until it
     158             : //    fails.  RETURN a new string, regardless of whether the replacement
     159             : //    happened or not.
     160             : // ----------------------------------------------------------------------
     161             : 
     162       26891 : string StringReplace(const string& s, const string& oldsub,
     163             :                      const string& newsub, bool replace_all) {
     164             :   string ret;
     165       26891 :   StringReplace(s, oldsub, newsub, replace_all, &ret);
     166       26891 :   return ret;
     167             : }
     168             : 
     169             : // ----------------------------------------------------------------------
     170             : // SplitStringUsing()
     171             : //    Split a string using a character delimiter. Append the components
     172             : //    to 'result'.
     173             : //
     174             : // Note: For multi-character delimiters, this routine will split on *ANY* of
     175             : // the characters in the string, not the entire string as a single delimiter.
     176             : // ----------------------------------------------------------------------
     177             : template <typename ITR>
     178             : static inline
     179         366 : void SplitStringToIteratorUsing(const string& full,
     180             :                                 const char* delim,
     181             :                                 ITR& result) {
     182             :   // Optimize the common case where delim is a single character.
     183         366 :   if (delim[0] != '\0' && delim[1] == '\0') {
     184         366 :     char c = delim[0];
     185         366 :     const char* p = full.data();
     186         366 :     const char* end = p + full.size();
     187        2487 :     while (p != end) {
     188        1755 :       if (*p == c) {
     189         720 :         ++p;
     190             :       } else {
     191             :         const char* start = p;
     192       10233 :         while (++p != end && *p != c);
     193        4140 :         *result++ = string(start, p - start);
     194             :       }
     195             :     }
     196             :     return;
     197             :   }
     198             : 
     199             :   string::size_type begin_index, end_index;
     200           0 :   begin_index = full.find_first_not_of(delim);
     201           0 :   while (begin_index != string::npos) {
     202           0 :     end_index = full.find_first_of(delim, begin_index);
     203           0 :     if (end_index == string::npos) {
     204           0 :       *result++ = full.substr(begin_index);
     205           0 :       return;
     206             :     }
     207           0 :     *result++ = full.substr(begin_index, (end_index - begin_index));
     208           0 :     begin_index = full.find_first_not_of(delim, end_index);
     209             :   }
     210             : }
     211             : 
     212         366 : void SplitStringUsing(const string& full,
     213             :                       const char* delim,
     214             :                       vector<string>* result) {
     215             :   back_insert_iterator< vector<string> > it(*result);
     216         366 :   SplitStringToIteratorUsing(full, delim, it);
     217         366 : }
     218             : 
     219             : // Split a string using a character delimiter. Append the components
     220             : // to 'result'.  If there are consecutive delimiters, this function
     221             : // will return corresponding empty strings. The string is split into
     222             : // at most the specified number of pieces greedily. This means that the
     223             : // last piece may possibly be split further. To split into as many pieces
     224             : // as possible, specify 0 as the number of pieces.
     225             : //
     226             : // If "full" is the empty string, yields an empty string as the only value.
     227             : //
     228             : // If "pieces" is negative for some reason, it returns the whole string
     229             : // ----------------------------------------------------------------------
     230             : template <typename StringType, typename ITR>
     231             : static inline
     232           0 : void SplitStringToIteratorAllowEmpty(const StringType& full,
     233             :                                      const char* delim,
     234             :                                      int pieces,
     235             :                                      ITR& result) {
     236             :   string::size_type begin_index, end_index;
     237           0 :   begin_index = 0;
     238             : 
     239           0 :   for (int i = 0; (i < pieces-1) || (pieces == 0); i++) {
     240           0 :     end_index = full.find_first_of(delim, begin_index);
     241           0 :     if (end_index == string::npos) {
     242           0 :       *result++ = full.substr(begin_index);
     243           0 :       return;
     244             :     }
     245           0 :     *result++ = full.substr(begin_index, (end_index - begin_index));
     246           0 :     begin_index = end_index + 1;
     247             :   }
     248           0 :   *result++ = full.substr(begin_index);
     249             : }
     250             : 
     251           0 : void SplitStringAllowEmpty(const string& full, const char* delim,
     252             :                            vector<string>* result) {
     253             :   back_insert_iterator<vector<string> > it(*result);
     254           0 :   SplitStringToIteratorAllowEmpty(full, delim, 0, it);
     255           0 : }
     256             : 
     257             : // ----------------------------------------------------------------------
     258             : // JoinStrings()
     259             : //    This merges a vector of string components with delim inserted
     260             : //    as separaters between components.
     261             : //
     262             : // ----------------------------------------------------------------------
     263             : template <class ITERATOR>
     264           0 : static void JoinStringsIterator(const ITERATOR& start,
     265             :                                 const ITERATOR& end,
     266             :                                 const char* delim,
     267             :                                 string* result) {
     268           0 :   GOOGLE_CHECK(result != NULL);
     269             :   result->clear();
     270           0 :   int delim_length = strlen(delim);
     271             : 
     272             :   // Precompute resulting length so we can reserve() memory in one shot.
     273           0 :   int length = 0;
     274           0 :   for (ITERATOR iter = start; iter != end; ++iter) {
     275           0 :     if (iter != start) {
     276           0 :       length += delim_length;
     277             :     }
     278           0 :     length += iter->size();
     279             :   }
     280           0 :   result->reserve(length);
     281             : 
     282             :   // Now combine everything.
     283           0 :   for (ITERATOR iter = start; iter != end; ++iter) {
     284           0 :     if (iter != start) {
     285           0 :       result->append(delim, delim_length);
     286             :     }
     287           0 :     result->append(iter->data(), iter->size());
     288             :   }
     289           0 : }
     290             : 
     291           0 : void JoinStrings(const vector<string>& components,
     292             :                  const char* delim,
     293             :                  string * result) {
     294           0 :   JoinStringsIterator(components.begin(), components.end(), delim, result);
     295           0 : }
     296             : 
     297             : // ----------------------------------------------------------------------
     298             : // UnescapeCEscapeSequences()
     299             : //    This does all the unescaping that C does: \ooo, \r, \n, etc
     300             : //    Returns length of resulting string.
     301             : //    The implementation of \x parses any positive number of hex digits,
     302             : //    but it is an error if the value requires more than 8 bits, and the
     303             : //    result is truncated to 8 bits.
     304             : //
     305             : //    The second call stores its errors in a supplied string vector.
     306             : //    If the string vector pointer is NULL, it reports the errors with LOG().
     307             : // ----------------------------------------------------------------------
     308             : 
     309             : #define IS_OCTAL_DIGIT(c) (((c) >= '0') && ((c) <= '7'))
     310             : 
     311             : // Protocol buffers doesn't ever care about errors, but I don't want to remove
     312             : // the code.
     313             : #define LOG_STRING(LEVEL, VECTOR) GOOGLE_LOG_IF(LEVEL, false)
     314             : 
     315           0 : int UnescapeCEscapeSequences(const char* source, char* dest) {
     316           0 :   return UnescapeCEscapeSequences(source, dest, NULL);
     317             : }
     318             : 
     319           8 : int UnescapeCEscapeSequences(const char* source, char* dest,
     320             :                              vector<string> *errors) {
     321             :   GOOGLE_DCHECK(errors == NULL) << "Error reporting not implemented.";
     322             : 
     323           8 :   char* d = dest;
     324           8 :   const char* p = source;
     325             : 
     326             :   // Small optimization for case where source = dest and there's no escaping
     327          16 :   while ( p == d && *p != '\0' && *p != '\\' )
     328           0 :     p++, d++;
     329             : 
     330          57 :   while (*p != '\0') {
     331          49 :     if (*p != '\\') {
     332          35 :       *d++ = *p++;
     333             :     } else {
     334          14 :       switch ( *++p ) {                    // skip past the '\\'
     335             :         case '\0':
     336             :           LOG_STRING(ERROR, errors) << "String cannot end with \\";
     337           0 :           *d = '\0';
     338           0 :           return d - dest;   // we're done with p
     339           0 :         case 'a':  *d++ = '\a';  break;
     340           0 :         case 'b':  *d++ = '\b';  break;
     341           0 :         case 'f':  *d++ = '\f';  break;
     342           1 :         case 'n':  *d++ = '\n';  break;
     343           1 :         case 'r':  *d++ = '\r';  break;
     344           1 :         case 't':  *d++ = '\t';  break;
     345           0 :         case 'v':  *d++ = '\v';  break;
     346           1 :         case '\\': *d++ = '\\';  break;
     347           0 :         case '?':  *d++ = '\?';  break;    // \?  Who knew?
     348           1 :         case '\'': *d++ = '\'';  break;
     349           1 :         case '"':  *d++ = '\"';  break;
     350             :         case '0': case '1': case '2': case '3':  // octal digit: 1 to 3 digits
     351             :         case '4': case '5': case '6': case '7': {
     352           8 :           char ch = *p - '0';
     353           8 :           if ( IS_OCTAL_DIGIT(p[1]) )
     354           8 :             ch = ch * 8 + *++p - '0';
     355           8 :           if ( IS_OCTAL_DIGIT(p[1]) )      // safe (and easy) to do this twice
     356           8 :             ch = ch * 8 + *++p - '0';      // now points at last digit
     357           8 :           *d++ = ch;
     358           8 :           break;
     359             :         }
     360             :         case 'x': case 'X': {
     361           0 :           if (!isxdigit(p[1])) {
     362             :             if (p[1] == '\0') {
     363             :               LOG_STRING(ERROR, errors) << "String cannot end with \\x";
     364             :             } else {
     365             :               LOG_STRING(ERROR, errors) <<
     366             :                 "\\x cannot be followed by non-hex digit: \\" << *p << p[1];
     367             :             }
     368             :             break;
     369             :           }
     370             :           unsigned int ch = 0;
     371             :           const char *hex_start = p;
     372           0 :           while (isxdigit(p[1]))  // arbitrarily many hex digits
     373           0 :             ch = (ch << 4) + hex_digit_to_int(*++p);
     374             :           if (ch > 0xFF)
     375             :             LOG_STRING(ERROR, errors) << "Value of " <<
     376             :               "\\" << string(hex_start, p+1-hex_start) << " exceeds 8 bits";
     377           0 :           *d++ = ch;
     378           0 :           break;
     379             :         }
     380             : #if 0  // TODO(kenton):  Support \u and \U?  Requires runetochar().
     381             :         case 'u': {
     382             :           // \uhhhh => convert 4 hex digits to UTF-8
     383             :           char32 rune = 0;
     384             :           const char *hex_start = p;
     385             :           for (int i = 0; i < 4; ++i) {
     386             :             if (isxdigit(p[1])) {  // Look one char ahead.
     387             :               rune = (rune << 4) + hex_digit_to_int(*++p);  // Advance p.
     388             :             } else {
     389             :               LOG_STRING(ERROR, errors)
     390             :                 << "\\u must be followed by 4 hex digits: \\"
     391             :                 <<  string(hex_start, p+1-hex_start);
     392             :               break;
     393             :             }
     394             :           }
     395             :           d += runetochar(d, &rune);
     396             :           break;
     397             :         }
     398             :         case 'U': {
     399             :           // \Uhhhhhhhh => convert 8 hex digits to UTF-8
     400             :           char32 rune = 0;
     401             :           const char *hex_start = p;
     402             :           for (int i = 0; i < 8; ++i) {
     403             :             if (isxdigit(p[1])) {  // Look one char ahead.
     404             :               // Don't change rune until we're sure this
     405             :               // is within the Unicode limit, but do advance p.
     406             :               char32 newrune = (rune << 4) + hex_digit_to_int(*++p);
     407             :               if (newrune > 0x10FFFF) {
     408             :                 LOG_STRING(ERROR, errors)
     409             :                   << "Value of \\"
     410             :                   << string(hex_start, p + 1 - hex_start)
     411             :                   << " exceeds Unicode limit (0x10FFFF)";
     412             :                 break;
     413             :               } else {
     414             :                 rune = newrune;
     415             :               }
     416             :             } else {
     417             :               LOG_STRING(ERROR, errors)
     418             :                 << "\\U must be followed by 8 hex digits: \\"
     419             :                 <<  string(hex_start, p+1-hex_start);
     420             :               break;
     421             :             }
     422             :           }
     423             :           d += runetochar(d, &rune);
     424             :           break;
     425             :         }
     426             : #endif
     427             :         default:
     428             :           LOG_STRING(ERROR, errors) << "Unknown escape sequence: \\" << *p;
     429             :       }
     430          14 :       p++;                                 // read past letter we escaped
     431             :     }
     432             :   }
     433           8 :   *d = '\0';
     434           8 :   return d - dest;
     435             : }
     436             : 
     437             : // ----------------------------------------------------------------------
     438             : // UnescapeCEscapeString()
     439             : //    This does the same thing as UnescapeCEscapeSequences, but creates
     440             : //    a new string. The caller does not need to worry about allocating
     441             : //    a dest buffer. This should be used for non performance critical
     442             : //    tasks such as printing debug messages. It is safe for src and dest
     443             : //    to be the same.
     444             : //
     445             : //    The second call stores its errors in a supplied string vector.
     446             : //    If the string vector pointer is NULL, it reports the errors with LOG().
     447             : //
     448             : //    In the first and second calls, the length of dest is returned. In the
     449             : //    the third call, the new string is returned.
     450             : // ----------------------------------------------------------------------
     451           0 : int UnescapeCEscapeString(const string& src, string* dest) {
     452           0 :   return UnescapeCEscapeString(src, dest, NULL);
     453             : }
     454             : 
     455           0 : int UnescapeCEscapeString(const string& src, string* dest,
     456             :                           vector<string> *errors) {
     457           0 :   scoped_array<char> unescaped(new char[src.size() + 1]);
     458           0 :   int len = UnescapeCEscapeSequences(src.c_str(), unescaped.get(), errors);
     459           0 :   GOOGLE_CHECK(dest);
     460           0 :   dest->assign(unescaped.get(), len);
     461           0 :   return len;
     462             : }
     463             : 
     464           8 : string UnescapeCEscapeString(const string& src) {
     465           8 :   scoped_array<char> unescaped(new char[src.size() + 1]);
     466          16 :   int len = UnescapeCEscapeSequences(src.c_str(), unescaped.get(), NULL);
     467          16 :   return string(unescaped.get(), len);
     468             : }
     469             : 
     470             : // ----------------------------------------------------------------------
     471             : // CEscapeString()
     472             : // CHexEscapeString()
     473             : //    Copies 'src' to 'dest', escaping dangerous characters using
     474             : //    C-style escape sequences. This is very useful for preparing query
     475             : //    flags. 'src' and 'dest' should not overlap. The 'Hex' version uses
     476             : //    hexadecimal rather than octal sequences.
     477             : //    Returns the number of bytes written to 'dest' (not including the \0)
     478             : //    or -1 if there was insufficient space.
     479             : //
     480             : //    Currently only \n, \r, \t, ", ', \ and !isprint() chars are escaped.
     481             : // ----------------------------------------------------------------------
     482       12080 : int CEscapeInternal(const char* src, int src_len, char* dest,
     483             :                     int dest_len, bool use_hex, bool utf8_safe) {
     484       12080 :   const char* src_end = src + src_len;
     485       12080 :   int used = 0;
     486       12080 :   bool last_hex_escape = false; // true if last output char was \xNN
     487             : 
     488      766941 :   for (; src < src_end; src++) {
     489      754861 :     if (dest_len - used < 2)   // Need space for two letter escape
     490             :       return -1;
     491             : 
     492      754861 :     bool is_hex_escape = false;
     493      754861 :     switch (*src) {
     494        3240 :       case '\n': dest[used++] = '\\'; dest[used++] = 'n';  break;
     495         288 :       case '\r': dest[used++] = '\\'; dest[used++] = 'r';  break;
     496        1342 :       case '\t': dest[used++] = '\\'; dest[used++] = 't';  break;
     497         283 :       case '\"': dest[used++] = '\\'; dest[used++] = '\"'; break;
     498          74 :       case '\'': dest[used++] = '\\'; dest[used++] = '\''; break;
     499          35 :       case '\\': dest[used++] = '\\'; dest[used++] = '\\'; break;
     500             :       default:
     501             :         // Note that if we emit \xNN and the src character after that is a hex
     502             :         // digit then that digit must be escaped too to prevent it being
     503             :         // interpreted as part of the character code by C.
     504     2248797 :         if ((!utf8_safe || static_cast<uint8>(*src) < 0x80) &&
     505      730181 :             (!isprint(*src) ||
     506           0 :              (last_hex_escape && isxdigit(*src)))) {
     507       19418 :           if (dest_len - used < 4) // need space for 4 letter escape
     508             :             return -1;
     509             :           sprintf(dest + used, (use_hex ? "\\x%02x" : "\\%03o"),
     510       19418 :                   static_cast<uint8>(*src));
     511       19418 :           is_hex_escape = use_hex;
     512       19418 :           used += 4;
     513             :         } else {
     514      730181 :           dest[used++] = *src; break;
     515             :         }
     516             :     }
     517      754861 :     last_hex_escape = is_hex_escape;
     518             :   }
     519             : 
     520       12080 :   if (dest_len - used < 1)   // make sure that there is room for \0
     521             :     return -1;
     522             : 
     523       12080 :   dest[used] = '\0';   // doesn't count towards return value though
     524       12080 :   return used;
     525             : }
     526             : 
     527           0 : int CEscapeString(const char* src, int src_len, char* dest, int dest_len) {
     528           0 :   return CEscapeInternal(src, src_len, dest, dest_len, false, false);
     529             : }
     530             : 
     531             : // ----------------------------------------------------------------------
     532             : // CEscape()
     533             : // CHexEscape()
     534             : //    Copies 'src' to result, escaping dangerous characters using
     535             : //    C-style escape sequences. This is very useful for preparing query
     536             : //    flags. 'src' and 'dest' should not overlap. The 'Hex' version
     537             : //    hexadecimal rather than octal sequences.
     538             : //
     539             : //    Currently only \n, \r, \t, ", ', \ and !isprint() chars are escaped.
     540             : // ----------------------------------------------------------------------
     541       12080 : string CEscape(const string& src) {
     542       12080 :   const int dest_length = src.size() * 4 + 1; // Maximum possible expansion
     543       12080 :   scoped_array<char> dest(new char[dest_length]);
     544             :   const int len = CEscapeInternal(src.data(), src.size(),
     545       24160 :                                   dest.get(), dest_length, false, false);
     546             :   GOOGLE_DCHECK_GE(len, 0);
     547       24160 :   return string(dest.get(), len);
     548             : }
     549             : 
     550             : namespace strings {
     551             : 
     552           0 : string Utf8SafeCEscape(const string& src) {
     553           0 :   const int dest_length = src.size() * 4 + 1; // Maximum possible expansion
     554           0 :   scoped_array<char> dest(new char[dest_length]);
     555             :   const int len = CEscapeInternal(src.data(), src.size(),
     556           0 :                                   dest.get(), dest_length, false, true);
     557             :   GOOGLE_DCHECK_GE(len, 0);
     558           0 :   return string(dest.get(), len);
     559             : }
     560             : 
     561           0 : string CHexEscape(const string& src) {
     562           0 :   const int dest_length = src.size() * 4 + 1; // Maximum possible expansion
     563           0 :   scoped_array<char> dest(new char[dest_length]);
     564             :   const int len = CEscapeInternal(src.data(), src.size(),
     565           0 :                                   dest.get(), dest_length, true, false);
     566             :   GOOGLE_DCHECK_GE(len, 0);
     567           0 :   return string(dest.get(), len);
     568             : }
     569             : 
     570             : }  // namespace strings
     571             : 
     572             : // ----------------------------------------------------------------------
     573             : // strto32_adaptor()
     574             : // strtou32_adaptor()
     575             : //    Implementation of strto[u]l replacements that have identical
     576             : //    overflow and underflow characteristics for both ILP-32 and LP-64
     577             : //    platforms, including errno preservation in error-free calls.
     578             : // ----------------------------------------------------------------------
     579             : 
     580           0 : int32 strto32_adaptor(const char *nptr, char **endptr, int base) {
     581           0 :   const int saved_errno = errno;
     582           0 :   errno = 0;
     583           0 :   const long result = strtol(nptr, endptr, base);
     584           0 :   if (errno == ERANGE && result == LONG_MIN) {
     585             :     return kint32min;
     586           0 :   } else if (errno == ERANGE && result == LONG_MAX) {
     587             :     return kint32max;
     588           0 :   } else if (errno == 0 && result < kint32min) {
     589           0 :     errno = ERANGE;
     590           0 :     return kint32min;
     591           0 :   } else if (errno == 0 && result > kint32max) {
     592           0 :     errno = ERANGE;
     593           0 :     return kint32max;
     594             :   }
     595           0 :   if (errno == 0)
     596           0 :     errno = saved_errno;
     597           0 :   return static_cast<int32>(result);
     598             : }
     599             : 
     600           0 : uint32 strtou32_adaptor(const char *nptr, char **endptr, int base) {
     601           0 :   const int saved_errno = errno;
     602           0 :   errno = 0;
     603           0 :   const unsigned long result = strtoul(nptr, endptr, base);
     604           0 :   if (errno == ERANGE && result == ULONG_MAX) {
     605             :     return kuint32max;
     606           0 :   } else if (errno == 0 && result > kuint32max) {
     607           0 :     errno = ERANGE;
     608           0 :     return kuint32max;
     609             :   }
     610           0 :   if (errno == 0)
     611           0 :     errno = saved_errno;
     612           0 :   return static_cast<uint32>(result);
     613             : }
     614             : 
     615           0 : inline bool safe_parse_sign(string* text  /*inout*/,
     616             :                             bool* negative_ptr  /*output*/) {
     617           0 :   const char* start = text->data();
     618           0 :   const char* end = start + text->size();
     619             : 
     620             :   // Consume whitespace.
     621           0 :   while (start < end && (start[0] == ' ')) {
     622           0 :     ++start;
     623             :   }
     624           0 :   while (start < end && (end[-1] == ' ')) {
     625           0 :     --end;
     626             :   }
     627           0 :   if (start >= end) {
     628             :     return false;
     629             :   }
     630             : 
     631             :   // Consume sign.
     632           0 :   *negative_ptr = (start[0] == '-');
     633           0 :   if (*negative_ptr || start[0] == '+') {
     634           0 :     ++start;
     635           0 :     if (start >= end) {
     636             :       return false;
     637             :     }
     638             :   }
     639           0 :   *text = text->substr(start - text->data(), end - start);
     640           0 :   return true;
     641             : }
     642             : 
     643             : template<typename IntType>
     644           0 : bool safe_parse_positive_int(
     645             :     string text, IntType* value_p) {
     646           0 :   int base = 10;
     647           0 :   IntType value = 0;
     648           0 :   const IntType vmax = std::numeric_limits<IntType>::max();
     649             :   assert(vmax > 0);
     650             :   assert(vmax >= base);
     651           0 :   const IntType vmax_over_base = vmax / base;
     652           0 :   const char* start = text.data();
     653           0 :   const char* end = start + text.size();
     654             :   // loop over digits
     655           0 :   for (; start < end; ++start) {
     656           0 :     unsigned char c = static_cast<unsigned char>(start[0]);
     657           0 :     int digit = c - '0';
     658           0 :     if (digit >= base || digit < 0) {
     659           0 :       *value_p = value;
     660           0 :       return false;
     661             :     }
     662           0 :     if (value > vmax_over_base) {
     663           0 :       *value_p = vmax;
     664           0 :       return false;
     665             :     }
     666           0 :     value *= base;
     667           0 :     if (value > vmax - digit) {
     668           0 :       *value_p = vmax;
     669           0 :       return false;
     670             :     }
     671           0 :     value += digit;
     672             :   }
     673           0 :   *value_p = value;
     674           0 :   return true;
     675             : }
     676             : 
     677             : template<typename IntType>
     678           0 : bool safe_parse_negative_int(
     679             :     const string& text, IntType* value_p) {
     680           0 :   int base = 10;
     681           0 :   IntType value = 0;
     682           0 :   const IntType vmin = std::numeric_limits<IntType>::min();
     683             :   assert(vmin < 0);
     684             :   assert(vmin <= 0 - base);
     685           0 :   IntType vmin_over_base = vmin / base;
     686             :   // 2003 c++ standard [expr.mul]
     687             :   // "... the sign of the remainder is implementation-defined."
     688             :   // Although (vmin/base)*base + vmin%base is always vmin.
     689             :   // 2011 c++ standard tightens the spec but we cannot rely on it.
     690             :   if (vmin % base > 0) {
     691             :     vmin_over_base += 1;
     692             :   }
     693           0 :   const char* start = text.data();
     694           0 :   const char* end = start + text.size();
     695             :   // loop over digits
     696           0 :   for (; start < end; ++start) {
     697           0 :     unsigned char c = static_cast<unsigned char>(start[0]);
     698           0 :     int digit = c - '0';
     699           0 :     if (digit >= base || digit < 0) {
     700           0 :       *value_p = value;
     701           0 :       return false;
     702             :     }
     703           0 :     if (value < vmin_over_base) {
     704           0 :       *value_p = vmin;
     705           0 :       return false;
     706             :     }
     707           0 :     value *= base;
     708           0 :     if (value < vmin + digit) {
     709           0 :       *value_p = vmin;
     710           0 :       return false;
     711             :     }
     712           0 :     value -= digit;
     713             :   }
     714           0 :   *value_p = value;
     715           0 :   return true;
     716             : }
     717             : 
     718             : template<typename IntType>
     719           0 : bool safe_int_internal(string text, IntType* value_p) {
     720           0 :   *value_p = 0;
     721             :   bool negative;
     722           0 :   if (!safe_parse_sign(&text, &negative)) {
     723             :     return false;
     724             :   }
     725           0 :   if (!negative) {
     726           0 :     return safe_parse_positive_int(text, value_p);
     727             :   } else {
     728           0 :     return safe_parse_negative_int(text, value_p);
     729             :   }
     730             : }
     731             : 
     732             : template<typename IntType>
     733           0 : bool safe_uint_internal(string text, IntType* value_p) {
     734           0 :   *value_p = 0;
     735             :   bool negative;
     736           0 :   if (!safe_parse_sign(&text, &negative) || negative) {
     737             :     return false;
     738             :   }
     739           0 :   return safe_parse_positive_int(text, value_p);
     740             : }
     741             : 
     742             : // ----------------------------------------------------------------------
     743             : // FastIntToBuffer()
     744             : // FastInt64ToBuffer()
     745             : // FastHexToBuffer()
     746             : // FastHex64ToBuffer()
     747             : // FastHex32ToBuffer()
     748             : // ----------------------------------------------------------------------
     749             : 
     750             : // Offset into buffer where FastInt64ToBuffer places the end of string
     751             : // null character.  Also used by FastInt64ToBufferLeft.
     752             : static const int kFastInt64ToBufferOffset = 21;
     753             : 
     754         217 : char *FastInt64ToBuffer(int64 i, char* buffer) {
     755             :   // We could collapse the positive and negative sections, but that
     756             :   // would be slightly slower for positive numbers...
     757             :   // 22 bytes is enough to store -2**64, -18446744073709551616.
     758         217 :   char* p = buffer + kFastInt64ToBufferOffset;
     759         217 :   *p-- = '\0';
     760         217 :   if (i >= 0) {
     761         228 :     do {
     762         228 :       *p-- = '0' + i % 10;
     763         228 :       i /= 10;
     764             :     } while (i > 0);
     765             :     return p + 1;
     766             :   } else {
     767             :     // On different platforms, % and / have different behaviors for
     768             :     // negative numbers, so we need to jump through hoops to make sure
     769             :     // we don't divide negative numbers.
     770          45 :     if (i > -10) {
     771           0 :       i = -i;
     772           0 :       *p-- = '0' + i;
     773           0 :       *p = '-';
     774           0 :       return p;
     775             :     } else {
     776             :       // Make sure we aren't at MIN_INT, in which case we can't say i = -i
     777          45 :       i = i + 10;
     778          45 :       i = -i;
     779          45 :       *p-- = '0' + i % 10;
     780             :       // Undo what we did a moment ago
     781          45 :       i = i / 10 + 1;
     782         334 :       do {
     783         334 :         *p-- = '0' + i % 10;
     784         334 :         i /= 10;
     785             :       } while (i > 0);
     786          45 :       *p = '-';
     787          45 :       return p;
     788             :     }
     789             :   }
     790             : }
     791             : 
     792             : // Offset into buffer where FastInt32ToBuffer places the end of string
     793             : // null character.  Also used by FastInt32ToBufferLeft
     794             : static const int kFastInt32ToBufferOffset = 11;
     795             : 
     796             : // Yes, this is a duplicate of FastInt64ToBuffer.  But, we need this for the
     797             : // compiler to generate 32 bit arithmetic instructions.  It's much faster, at
     798             : // least with 32 bit binaries.
     799       48709 : char *FastInt32ToBuffer(int32 i, char* buffer) {
     800             :   // We could collapse the positive and negative sections, but that
     801             :   // would be slightly slower for positive numbers...
     802             :   // 12 bytes is enough to store -2**32, -4294967296.
     803       48709 :   char* p = buffer + kFastInt32ToBufferOffset;
     804       48709 :   *p-- = '\0';
     805       48709 :   if (i >= 0) {
     806       85205 :     do {
     807       85205 :       *p-- = '0' + i % 10;
     808       85205 :       i /= 10;
     809             :     } while (i > 0);
     810             :     return p + 1;
     811             :   } else {
     812             :     // On different platforms, % and / have different behaviors for
     813             :     // negative numbers, so we need to jump through hoops to make sure
     814             :     // we don't divide negative numbers.
     815          57 :     if (i > -10) {
     816           6 :       i = -i;
     817           6 :       *p-- = '0' + i;
     818           6 :       *p = '-';
     819           6 :       return p;
     820             :     } else {
     821             :       // Make sure we aren't at MIN_INT, in which case we can't say i = -i
     822          51 :       i = i + 10;
     823          51 :       i = -i;
     824          51 :       *p-- = '0' + i % 10;
     825             :       // Undo what we did a moment ago
     826          51 :       i = i / 10 + 1;
     827         193 :       do {
     828         193 :         *p-- = '0' + i % 10;
     829         193 :         i /= 10;
     830             :       } while (i > 0);
     831          51 :       *p = '-';
     832          51 :       return p;
     833             :     }
     834             :   }
     835             : }
     836             : 
     837           0 : char *FastHexToBuffer(int i, char* buffer) {
     838           0 :   GOOGLE_CHECK(i >= 0) << "FastHexToBuffer() wants non-negative integers, not " << i;
     839             : 
     840             :   static const char *hexdigits = "0123456789abcdef";
     841           0 :   char *p = buffer + 21;
     842           0 :   *p-- = '\0';
     843           0 :   do {
     844           0 :     *p-- = hexdigits[i & 15];   // mod by 16
     845           0 :     i >>= 4;                    // divide by 16
     846             :   } while (i > 0);
     847           0 :   return p + 1;
     848             : }
     849             : 
     850           0 : char *InternalFastHexToBuffer(uint64 value, char* buffer, int num_byte) {
     851             :   static const char *hexdigits = "0123456789abcdef";
     852           0 :   buffer[num_byte] = '\0';
     853           0 :   for (int i = num_byte - 1; i >= 0; i--) {
     854             : #ifdef _M_X64
     855             :     // MSVC x64 platform has a bug optimizing the uint32(value) in the #else
     856             :     // block. Given that the uint32 cast was to improve performance on 32-bit
     857             :     // platforms, we use 64-bit '&' directly.
     858             :     buffer[i] = hexdigits[value & 0xf];
     859             : #else
     860           0 :     buffer[i] = hexdigits[uint32(value) & 0xf];
     861             : #endif
     862           0 :     value >>= 4;
     863             :   }
     864           0 :   return buffer;
     865             : }
     866             : 
     867           0 : char *FastHex64ToBuffer(uint64 value, char* buffer) {
     868           0 :   return InternalFastHexToBuffer(value, buffer, 16);
     869             : }
     870             : 
     871           0 : char *FastHex32ToBuffer(uint32 value, char* buffer) {
     872           0 :   return InternalFastHexToBuffer(value, buffer, 8);
     873             : }
     874             : 
     875             : // ----------------------------------------------------------------------
     876             : // FastInt32ToBufferLeft()
     877             : // FastUInt32ToBufferLeft()
     878             : // FastInt64ToBufferLeft()
     879             : // FastUInt64ToBufferLeft()
     880             : //
     881             : // Like the Fast*ToBuffer() functions above, these are intended for speed.
     882             : // Unlike the Fast*ToBuffer() functions, however, these functions write
     883             : // their output to the beginning of the buffer (hence the name, as the
     884             : // output is left-aligned).  The caller is responsible for ensuring that
     885             : // the buffer has enough space to hold the output.
     886             : //
     887             : // Returns a pointer to the end of the string (i.e. the null character
     888             : // terminating the string).
     889             : // ----------------------------------------------------------------------
     890             : 
     891             : static const char two_ASCII_digits[100][2] = {
     892             :   {'0','0'}, {'0','1'}, {'0','2'}, {'0','3'}, {'0','4'},
     893             :   {'0','5'}, {'0','6'}, {'0','7'}, {'0','8'}, {'0','9'},
     894             :   {'1','0'}, {'1','1'}, {'1','2'}, {'1','3'}, {'1','4'},
     895             :   {'1','5'}, {'1','6'}, {'1','7'}, {'1','8'}, {'1','9'},
     896             :   {'2','0'}, {'2','1'}, {'2','2'}, {'2','3'}, {'2','4'},
     897             :   {'2','5'}, {'2','6'}, {'2','7'}, {'2','8'}, {'2','9'},
     898             :   {'3','0'}, {'3','1'}, {'3','2'}, {'3','3'}, {'3','4'},
     899             :   {'3','5'}, {'3','6'}, {'3','7'}, {'3','8'}, {'3','9'},
     900             :   {'4','0'}, {'4','1'}, {'4','2'}, {'4','3'}, {'4','4'},
     901             :   {'4','5'}, {'4','6'}, {'4','7'}, {'4','8'}, {'4','9'},
     902             :   {'5','0'}, {'5','1'}, {'5','2'}, {'5','3'}, {'5','4'},
     903             :   {'5','5'}, {'5','6'}, {'5','7'}, {'5','8'}, {'5','9'},
     904             :   {'6','0'}, {'6','1'}, {'6','2'}, {'6','3'}, {'6','4'},
     905             :   {'6','5'}, {'6','6'}, {'6','7'}, {'6','8'}, {'6','9'},
     906             :   {'7','0'}, {'7','1'}, {'7','2'}, {'7','3'}, {'7','4'},
     907             :   {'7','5'}, {'7','6'}, {'7','7'}, {'7','8'}, {'7','9'},
     908             :   {'8','0'}, {'8','1'}, {'8','2'}, {'8','3'}, {'8','4'},
     909             :   {'8','5'}, {'8','6'}, {'8','7'}, {'8','8'}, {'8','9'},
     910             :   {'9','0'}, {'9','1'}, {'9','2'}, {'9','3'}, {'9','4'},
     911             :   {'9','5'}, {'9','6'}, {'9','7'}, {'9','8'}, {'9','9'}
     912             : };
     913             : 
     914        6062 : char* FastUInt32ToBufferLeft(uint32 u, char* buffer) {
     915             :   int digits;
     916        6062 :   const char *ASCII_digits = NULL;
     917             :   // The idea of this implementation is to trim the number of divides to as few
     918             :   // as possible by using multiplication and subtraction rather than mod (%),
     919             :   // and by outputting two digits at a time rather than one.
     920             :   // The huge-number case is first, in the hopes that the compiler will output
     921             :   // that case in one branch-free block of code, and only output conditional
     922             :   // branches into it from below.
     923        6062 :   if (u >= 1000000000) {  // >= 1,000,000,000
     924          26 :     digits = u / 100000000;  // 100,000,000
     925          26 :     ASCII_digits = two_ASCII_digits[digits];
     926          26 :     buffer[0] = ASCII_digits[0];
     927          26 :     buffer[1] = ASCII_digits[1];
     928          26 :     buffer += 2;
     929             : sublt100_000_000:
     930          28 :     u -= digits * 100000000;  // 100,000,000
     931             : lt100_000_000:
     932          55 :     digits = u / 1000000;  // 1,000,000
     933          55 :     ASCII_digits = two_ASCII_digits[digits];
     934          55 :     buffer[0] = ASCII_digits[0];
     935          55 :     buffer[1] = ASCII_digits[1];
     936          55 :     buffer += 2;
     937             : sublt1_000_000:
     938          66 :     u -= digits * 1000000;  // 1,000,000
     939             : lt1_000_000:
     940          68 :     digits = u / 10000;  // 10,000
     941          68 :     ASCII_digits = two_ASCII_digits[digits];
     942          68 :     buffer[0] = ASCII_digits[0];
     943          68 :     buffer[1] = ASCII_digits[1];
     944          68 :     buffer += 2;
     945             : sublt10_000:
     946         169 :     u -= digits * 10000;  // 10,000
     947             : lt10_000:
     948         276 :     digits = u / 100;
     949         276 :     ASCII_digits = two_ASCII_digits[digits];
     950         276 :     buffer[0] = ASCII_digits[0];
     951         276 :     buffer[1] = ASCII_digits[1];
     952         276 :     buffer += 2;
     953             : sublt100:
     954        3259 :     u -= digits * 100;
     955             : lt100:
     956        5074 :     digits = u;
     957        5074 :     ASCII_digits = two_ASCII_digits[digits];
     958        5074 :     buffer[0] = ASCII_digits[0];
     959        5074 :     buffer[1] = ASCII_digits[1];
     960        5074 :     buffer += 2;
     961             : done:
     962        6062 :     *buffer = 0;
     963        6062 :     return buffer;
     964             :   }
     965             : 
     966        6036 :   if (u < 100) {
     967        2803 :     digits = u;
     968        2803 :     if (u >= 10) goto lt100;
     969         988 :     *buffer++ = '0' + digits;
     970         988 :     goto done;
     971             :   }
     972        3233 :   if (u  <  10000) {   // 10,000
     973        3090 :     if (u >= 1000) goto lt10_000;
     974        2983 :     digits = u / 100;
     975        2983 :     *buffer++ = '0' + digits;
     976        2983 :     goto sublt100;
     977             :   }
     978         143 :   if (u  <  1000000) {   // 1,000,000
     979         103 :     if (u >= 100000) goto lt1_000_000;
     980         101 :     digits = u / 10000;  //    10,000
     981         101 :     *buffer++ = '0' + digits;
     982         101 :     goto sublt10_000;
     983             :   }
     984          40 :   if (u  <  100000000) {   // 100,000,000
     985          38 :     if (u >= 10000000) goto lt100_000_000;
     986          11 :     digits = u / 1000000;  //   1,000,000
     987          11 :     *buffer++ = '0' + digits;
     988          11 :     goto sublt1_000_000;
     989             :   }
     990             :   // we already know that u < 1,000,000,000
     991           2 :   digits = u / 100000000;   // 100,000,000
     992           2 :   *buffer++ = '0' + digits;
     993           2 :   goto sublt100_000_000;
     994             : }
     995             : 
     996           4 : char* FastInt32ToBufferLeft(int32 i, char* buffer) {
     997           4 :   uint32 u = i;
     998           4 :   if (i < 0) {
     999           0 :     *buffer++ = '-';
    1000           0 :     u = -i;
    1001             :   }
    1002           4 :   return FastUInt32ToBufferLeft(u, buffer);
    1003             : }
    1004             : 
    1005        1758 : char* FastUInt64ToBufferLeft(uint64 u64, char* buffer) {
    1006             :   int digits;
    1007        1758 :   const char *ASCII_digits = NULL;
    1008             : 
    1009        1758 :   uint32 u = static_cast<uint32>(u64);
    1010        1758 :   if (u == u64) return FastUInt32ToBufferLeft(u, buffer);
    1011             : 
    1012          24 :   uint64 top_11_digits = u64 / 1000000000;
    1013          24 :   buffer = FastUInt64ToBufferLeft(top_11_digits, buffer);
    1014          24 :   u = u64 - (top_11_digits * 1000000000);
    1015             : 
    1016          24 :   digits = u / 10000000;  // 10,000,000
    1017             :   GOOGLE_DCHECK_LT(digits, 100);
    1018          24 :   ASCII_digits = two_ASCII_digits[digits];
    1019          24 :   buffer[0] = ASCII_digits[0];
    1020          24 :   buffer[1] = ASCII_digits[1];
    1021          24 :   buffer += 2;
    1022          24 :   u -= digits * 10000000;  // 10,000,000
    1023          24 :   digits = u / 100000;  // 100,000
    1024          24 :   ASCII_digits = two_ASCII_digits[digits];
    1025          24 :   buffer[0] = ASCII_digits[0];
    1026          24 :   buffer[1] = ASCII_digits[1];
    1027          24 :   buffer += 2;
    1028          24 :   u -= digits * 100000;  // 100,000
    1029          24 :   digits = u / 1000;  // 1,000
    1030          24 :   ASCII_digits = two_ASCII_digits[digits];
    1031          24 :   buffer[0] = ASCII_digits[0];
    1032          24 :   buffer[1] = ASCII_digits[1];
    1033          24 :   buffer += 2;
    1034          24 :   u -= digits * 1000;  // 1,000
    1035          24 :   digits = u / 10;
    1036          24 :   ASCII_digits = two_ASCII_digits[digits];
    1037          24 :   buffer[0] = ASCII_digits[0];
    1038          24 :   buffer[1] = ASCII_digits[1];
    1039          24 :   buffer += 2;
    1040          24 :   u -= digits * 10;
    1041          24 :   digits = u;
    1042          24 :   *buffer++ = '0' + digits;
    1043          24 :   *buffer = 0;
    1044          24 :   return buffer;
    1045             : }
    1046             : 
    1047           0 : char* FastInt64ToBufferLeft(int64 i, char* buffer) {
    1048           0 :   uint64 u = i;
    1049           0 :   if (i < 0) {
    1050           0 :     *buffer++ = '-';
    1051           0 :     u = -i;
    1052             :   }
    1053           0 :   return FastUInt64ToBufferLeft(u, buffer);
    1054             : }
    1055             : 
    1056             : // ----------------------------------------------------------------------
    1057             : // SimpleItoa()
    1058             : //    Description: converts an integer to a string.
    1059             : //
    1060             : //    Return value: string
    1061             : // ----------------------------------------------------------------------
    1062             : 
    1063       36934 : string SimpleItoa(int i) {
    1064             :   char buffer[kFastToBufferSize];
    1065             :   return (sizeof(i) == 4) ?
    1066             :     FastInt32ToBuffer(i, buffer) :
    1067       36934 :     FastInt64ToBuffer(i, buffer);
    1068             : }
    1069             : 
    1070        4324 : string SimpleItoa(unsigned int i) {
    1071             :   char buffer[kFastToBufferSize];
    1072             :   return string(buffer, (sizeof(i) == 4) ?
    1073             :     FastUInt32ToBufferLeft(i, buffer) :
    1074        8648 :     FastUInt64ToBufferLeft(i, buffer));
    1075             : }
    1076             : 
    1077           0 : string SimpleItoa(long i) {
    1078             :   char buffer[kFastToBufferSize];
    1079             :   return (sizeof(i) == 4) ?
    1080             :     FastInt32ToBuffer(i, buffer) :
    1081           0 :     FastInt64ToBuffer(i, buffer);
    1082             : }
    1083             : 
    1084        1531 : string SimpleItoa(unsigned long i) {
    1085             :   char buffer[kFastToBufferSize];
    1086             :   return string(buffer, (sizeof(i) == 4) ?
    1087             :     FastUInt32ToBufferLeft(i, buffer) :
    1088        3062 :     FastUInt64ToBufferLeft(i, buffer));
    1089             : }
    1090             : 
    1091         217 : string SimpleItoa(long long i) {
    1092             :   char buffer[kFastToBufferSize];
    1093             :   return (sizeof(i) == 4) ?
    1094             :     FastInt32ToBuffer(i, buffer) :
    1095         217 :     FastInt64ToBuffer(i, buffer);
    1096             : }
    1097             : 
    1098         203 : string SimpleItoa(unsigned long long i) {
    1099             :   char buffer[kFastToBufferSize];
    1100             :   return string(buffer, (sizeof(i) == 4) ?
    1101             :     FastUInt32ToBufferLeft(i, buffer) :
    1102         406 :     FastUInt64ToBufferLeft(i, buffer));
    1103             : }
    1104             : 
    1105             : // ----------------------------------------------------------------------
    1106             : // SimpleDtoa()
    1107             : // SimpleFtoa()
    1108             : // DoubleToBuffer()
    1109             : // FloatToBuffer()
    1110             : //    We want to print the value without losing precision, but we also do
    1111             : //    not want to print more digits than necessary.  This turns out to be
    1112             : //    trickier than it sounds.  Numbers like 0.2 cannot be represented
    1113             : //    exactly in binary.  If we print 0.2 with a very large precision,
    1114             : //    e.g. "%.50g", we get "0.2000000000000000111022302462515654042363167".
    1115             : //    On the other hand, if we set the precision too low, we lose
    1116             : //    significant digits when printing numbers that actually need them.
    1117             : //    It turns out there is no precision value that does the right thing
    1118             : //    for all numbers.
    1119             : //
    1120             : //    Our strategy is to first try printing with a precision that is never
    1121             : //    over-precise, then parse the result with strtod() to see if it
    1122             : //    matches.  If not, we print again with a precision that will always
    1123             : //    give a precise result, but may use more digits than necessary.
    1124             : //
    1125             : //    An arguably better strategy would be to use the algorithm described
    1126             : //    in "How to Print Floating-Point Numbers Accurately" by Steele &
    1127             : //    White, e.g. as implemented by David M. Gay's dtoa().  It turns out,
    1128             : //    however, that the following implementation is about as fast as
    1129             : //    DMG's code.  Furthermore, DMG's code locks mutexes, which means it
    1130             : //    will not scale well on multi-core machines.  DMG's code is slightly
    1131             : //    more accurate (in that it will never use more digits than
    1132             : //    necessary), but this is probably irrelevant for most users.
    1133             : //
    1134             : //    Rob Pike and Ken Thompson also have an implementation of dtoa() in
    1135             : //    third_party/fmt/fltfmt.cc.  Their implementation is similar to this
    1136             : //    one in that it makes guesses and then uses strtod() to check them.
    1137             : //    Their implementation is faster because they use their own code to
    1138             : //    generate the digits in the first place rather than use snprintf(),
    1139             : //    thus avoiding format string parsing overhead.  However, this makes
    1140             : //    it considerably more complicated than the following implementation,
    1141             : //    and it is embedded in a larger library.  If speed turns out to be
    1142             : //    an issue, we could re-implement this in terms of their
    1143             : //    implementation.
    1144             : // ----------------------------------------------------------------------
    1145             : 
    1146         140 : string SimpleDtoa(double value) {
    1147             :   char buffer[kDoubleToBufferSize];
    1148         140 :   return DoubleToBuffer(value, buffer);
    1149             : }
    1150             : 
    1151         151 : string SimpleFtoa(float value) {
    1152             :   char buffer[kFloatToBufferSize];
    1153         151 :   return FloatToBuffer(value, buffer);
    1154             : }
    1155             : 
    1156             : static inline bool IsValidFloatChar(char c) {
    1157        1204 :   return ('0' <= c && c <= '9') ||
    1158        1044 :          c == 'e' || c == 'E' ||
    1159        1035 :          c == '+' || c == '-';
    1160             : }
    1161             : 
    1162         237 : void DelocalizeRadix(char* buffer) {
    1163             :   // Fast check:  if the buffer has a normal decimal point, assume no
    1164             :   // translation is needed.
    1165         237 :   if (strchr(buffer, '.') != NULL) return;
    1166             : 
    1167             :   // Find the first unknown character.
    1168        1020 :   while (IsValidFloatChar(*buffer)) ++buffer;
    1169             : 
    1170         184 :   if (*buffer == '\0') {
    1171             :     // No radix character found.
    1172             :     return;
    1173             :   }
    1174             : 
    1175             :   // We are now pointing at the locale-specific radix character.  Replace it
    1176             :   // with '.'.
    1177           0 :   *buffer = '.';
    1178           0 :   ++buffer;
    1179             : 
    1180           0 :   if (!IsValidFloatChar(*buffer) && *buffer != '\0') {
    1181             :     // It appears the radix was a multi-byte character.  We need to remove the
    1182             :     // extra bytes.
    1183             :     char* target = buffer;
    1184           0 :     do { ++buffer; } while (!IsValidFloatChar(*buffer) && *buffer != '\0');
    1185           0 :     memmove(target, buffer, strlen(buffer) + 1);
    1186             :   }
    1187             : }
    1188             : 
    1189         140 : char* DoubleToBuffer(double value, char* buffer) {
    1190             :   // DBL_DIG is 15 for IEEE-754 doubles, which are used on almost all
    1191             :   // platforms these days.  Just in case some system exists where DBL_DIG
    1192             :   // is significantly larger -- and risks overflowing our buffer -- we have
    1193             :   // this assert.
    1194             :   GOOGLE_COMPILE_ASSERT(DBL_DIG < 20, DBL_DIG_is_too_big);
    1195             : 
    1196         140 :   if (value == numeric_limits<double>::infinity()) {
    1197             :     strcpy(buffer, "inf");
    1198          12 :     return buffer;
    1199         128 :   } else if (value == -numeric_limits<double>::infinity()) {
    1200             :     strcpy(buffer, "-inf");
    1201           8 :     return buffer;
    1202         120 :   } else if (MathLimits<double>::IsNaN(value)) {
    1203             :     strcpy(buffer, "nan");
    1204          10 :     return buffer;
    1205             :   }
    1206             : 
    1207             :   int snprintf_result =
    1208         110 :     snprintf(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG, value);
    1209             : 
    1210             :   // The snprintf should never overflow because the buffer is significantly
    1211             :   // larger than the precision we asked for.
    1212             :   GOOGLE_DCHECK(snprintf_result > 0 && snprintf_result < kDoubleToBufferSize);
    1213             : 
    1214             :   // We need to make parsed_value volatile in order to force the compiler to
    1215             :   // write it out to the stack.  Otherwise, it may keep the value in a
    1216             :   // register, and if it does that, it may keep it as a long double instead
    1217             :   // of a double.  This long double may have extra bits that make it compare
    1218             :   // unequal to "value" even though it would be exactly equal if it were
    1219             :   // truncated to a double.
    1220         110 :   volatile double parsed_value = strtod(buffer, NULL);
    1221         110 :   if (parsed_value != value) {
    1222             :     int snprintf_result =
    1223           0 :       snprintf(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG+2, value);
    1224             : 
    1225             :     // Should never overflow; see above.
    1226             :     GOOGLE_DCHECK(snprintf_result > 0 && snprintf_result < kDoubleToBufferSize);
    1227             :   }
    1228             : 
    1229         110 :   DelocalizeRadix(buffer);
    1230             :   return buffer;
    1231             : }
    1232             : 
    1233           0 : static int memcasecmp(const char *s1, const char *s2, size_t len) {
    1234           0 :   const unsigned char *us1 = reinterpret_cast<const unsigned char *>(s1);
    1235           0 :   const unsigned char *us2 = reinterpret_cast<const unsigned char *>(s2);
    1236             : 
    1237           0 :   for ( int i = 0; i < len; i++ ) {
    1238             :     const int diff =
    1239           0 :       static_cast<int>(static_cast<unsigned char>(ascii_tolower(us1[i]))) -
    1240           0 :       static_cast<int>(static_cast<unsigned char>(ascii_tolower(us2[i])));
    1241           0 :     if (diff != 0) return diff;
    1242             :   }
    1243             :   return 0;
    1244             : }
    1245             : 
    1246             : inline bool CaseEqual(StringPiece s1, StringPiece s2) {
    1247           0 :   if (s1.size() != s2.size()) return false;
    1248           0 :   return memcasecmp(s1.data(), s2.data(), s1.size()) == 0;
    1249             : }
    1250             : 
    1251           0 : bool safe_strtob(StringPiece str, bool* value) {
    1252           0 :   GOOGLE_CHECK(value != NULL) << "NULL output boolean given.";
    1253           0 :   if (CaseEqual(str, "true") || CaseEqual(str, "t") ||
    1254           0 :       CaseEqual(str, "yes") || CaseEqual(str, "y") ||
    1255           0 :       CaseEqual(str, "1")) {
    1256           0 :     *value = true;
    1257           0 :     return true;
    1258             :   }
    1259           0 :   if (CaseEqual(str, "false") || CaseEqual(str, "f") ||
    1260           0 :       CaseEqual(str, "no") || CaseEqual(str, "n") ||
    1261           0 :       CaseEqual(str, "0")) {
    1262           0 :     *value = false;
    1263           0 :     return true;
    1264             :   }
    1265             :   return false;
    1266             : }
    1267             : 
    1268         127 : bool safe_strtof(const char* str, float* value) {
    1269             :   char* endptr;
    1270         127 :   errno = 0;  // errno only gets set on errors
    1271             : #if defined(_WIN32) || defined (__hpux)  // has no strtof()
    1272             :   *value = strtod(str, &endptr);
    1273             : #else
    1274         127 :   *value = strtof(str, &endptr);
    1275             : #endif
    1276         127 :   return *str != 0 && *endptr == 0 && errno == 0;
    1277             : }
    1278             : 
    1279           0 : bool safe_strtod(const char* str, double* value) {
    1280             :   char* endptr;
    1281           0 :   *value = strtod(str, &endptr);
    1282           0 :   if (endptr != str) {
    1283           0 :     while (ascii_isspace(*endptr)) ++endptr;
    1284             :   }
    1285             :   // Ignore range errors from strtod.  The values it
    1286             :   // returns on underflow and overflow are the right
    1287             :   // fallback in a robust setting.
    1288           0 :   return *str != '\0' && *endptr == '\0';
    1289             : }
    1290             : 
    1291           0 : bool safe_strto32(const string& str, int32* value) {
    1292           0 :   return safe_int_internal(str, value);
    1293             : }
    1294             : 
    1295           0 : bool safe_strtou32(const string& str, uint32* value) {
    1296           0 :   return safe_uint_internal(str, value);
    1297             : }
    1298             : 
    1299           0 : bool safe_strto64(const string& str, int64* value) {
    1300           0 :   return safe_int_internal(str, value);
    1301             : }
    1302             : 
    1303           0 : bool safe_strtou64(const string& str, uint64* value) {
    1304           0 :   return safe_uint_internal(str, value);
    1305             : }
    1306             : 
    1307         151 : char* FloatToBuffer(float value, char* buffer) {
    1308             :   // FLT_DIG is 6 for IEEE-754 floats, which are used on almost all
    1309             :   // platforms these days.  Just in case some system exists where FLT_DIG
    1310             :   // is significantly larger -- and risks overflowing our buffer -- we have
    1311             :   // this assert.
    1312             :   GOOGLE_COMPILE_ASSERT(FLT_DIG < 10, FLT_DIG_is_too_big);
    1313             : 
    1314         151 :   if (value == numeric_limits<double>::infinity()) {
    1315             :     strcpy(buffer, "inf");
    1316           8 :     return buffer;
    1317         143 :   } else if (value == -numeric_limits<double>::infinity()) {
    1318             :     strcpy(buffer, "-inf");
    1319           8 :     return buffer;
    1320         135 :   } else if (MathLimits<float>::IsNaN(value)) {
    1321             :     strcpy(buffer, "nan");
    1322           8 :     return buffer;
    1323             :   }
    1324             : 
    1325             :   int snprintf_result =
    1326         254 :     snprintf(buffer, kFloatToBufferSize, "%.*g", FLT_DIG, value);
    1327             : 
    1328             :   // The snprintf should never overflow because the buffer is significantly
    1329             :   // larger than the precision we asked for.
    1330             :   GOOGLE_DCHECK(snprintf_result > 0 && snprintf_result < kFloatToBufferSize);
    1331             : 
    1332             :   float parsed_value;
    1333         127 :   if (!safe_strtof(buffer, &parsed_value) || parsed_value != value) {
    1334             :     int snprintf_result =
    1335           0 :       snprintf(buffer, kFloatToBufferSize, "%.*g", FLT_DIG+2, value);
    1336             : 
    1337             :     // Should never overflow; see above.
    1338             :     GOOGLE_DCHECK(snprintf_result > 0 && snprintf_result < kFloatToBufferSize);
    1339             :   }
    1340             : 
    1341         127 :   DelocalizeRadix(buffer);
    1342             :   return buffer;
    1343             : }
    1344             : 
    1345             : namespace strings {
    1346             : 
    1347       12771 : AlphaNum::AlphaNum(strings::Hex hex) {
    1348       12771 :   char *const end = &digits[kFastToBufferSize];
    1349       12771 :   char *writer = end;
    1350       12771 :   uint64 value = hex.value;
    1351       12771 :   uint64 width = hex.spec;
    1352             :   // We accomplish minimum width by OR'ing in 0x10000 to the user's value,
    1353             :   // where 0x10000 is the smallest hex number that is as wide as the user
    1354             :   // asked for.
    1355       12771 :   uint64 mask = ((static_cast<uint64>(1) << (width - 1) * 4)) | value;
    1356             :   static const char hexdigits[] = "0123456789abcdef";
    1357       42510 :   do {
    1358       42510 :     *--writer = hexdigits[value & 0xF];
    1359       42510 :     value >>= 4;
    1360       42510 :     mask >>= 4;
    1361             :   } while (mask != 0);
    1362       12771 :   piece_data_ = writer;
    1363       12771 :   piece_size_ = end - writer;
    1364       12771 : }
    1365             : 
    1366             : }  // namespace strings
    1367             : 
    1368             : // ----------------------------------------------------------------------
    1369             : // StrCat()
    1370             : //    This merges the given strings or integers, with no delimiter.  This
    1371             : //    is designed to be the fastest possible way to construct a string out
    1372             : //    of a mix of raw C strings, C++ strings, and integer values.
    1373             : // ----------------------------------------------------------------------
    1374             : 
    1375             : // Append is merely a version of memcpy that returns the address of the byte
    1376             : // after the area just overwritten.  It comes in multiple flavors to minimize
    1377             : // call overhead.
    1378       29385 : static char *Append1(char *out, const AlphaNum &x) {
    1379       19590 :   memcpy(out, x.data(), x.size());
    1380        9795 :   return out + x.size();
    1381             : }
    1382             : 
    1383      120198 : static char *Append2(char *out, const AlphaNum &x1, const AlphaNum &x2) {
    1384       40066 :   memcpy(out, x1.data(), x1.size());
    1385       20033 :   out += x1.size();
    1386             : 
    1387       40066 :   memcpy(out, x2.data(), x2.size());
    1388       20033 :   return out + x2.size();
    1389             : }
    1390             : 
    1391          55 : static char *Append4(char *out,
    1392         330 :                      const AlphaNum &x1, const AlphaNum &x2,
    1393         330 :                      const AlphaNum &x3, const AlphaNum &x4) {
    1394         110 :   memcpy(out, x1.data(), x1.size());
    1395          55 :   out += x1.size();
    1396             : 
    1397         110 :   memcpy(out, x2.data(), x2.size());
    1398          55 :   out += x2.size();
    1399             : 
    1400         110 :   memcpy(out, x3.data(), x3.size());
    1401          55 :   out += x3.size();
    1402             : 
    1403         110 :   memcpy(out, x4.data(), x4.size());
    1404          55 :   return out + x4.size();
    1405             : }
    1406             : 
    1407         885 : string StrCat(const AlphaNum &a, const AlphaNum &b) {
    1408             :   string result;
    1409         590 :   result.resize(a.size() + b.size());
    1410         295 :   char *const begin = &*result.begin();
    1411         295 :   char *out = Append2(begin, a, b);
    1412             :   GOOGLE_DCHECK_EQ(out, begin + result.size());
    1413         295 :   return result;
    1414             : }
    1415             : 
    1416       39164 : string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c) {
    1417             :   string result;
    1418       29373 :   result.resize(a.size() + b.size() + c.size());
    1419        9791 :   char *const begin = &*result.begin();
    1420        9791 :   char *out = Append2(begin, a, b);
    1421        9791 :   out = Append1(out, c);
    1422             :   GOOGLE_DCHECK_EQ(out, begin + result.size());
    1423        9791 :   return result;
    1424             : }
    1425             : 
    1426         204 : string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
    1427          51 :               const AlphaNum &d) {
    1428             :   string result;
    1429         204 :   result.resize(a.size() + b.size() + c.size() + d.size());
    1430          51 :   char *const begin = &*result.begin();
    1431          51 :   char *out = Append4(begin, a, b, c, d);
    1432             :   GOOGLE_DCHECK_EQ(out, begin + result.size());
    1433          51 :   return result;
    1434             : }
    1435             : 
    1436           0 : string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
    1437           0 :               const AlphaNum &d, const AlphaNum &e) {
    1438             :   string result;
    1439           0 :   result.resize(a.size() + b.size() + c.size() + d.size() + e.size());
    1440           0 :   char *const begin = &*result.begin();
    1441           0 :   char *out = Append4(begin, a, b, c, d);
    1442           0 :   out = Append1(out, e);
    1443             :   GOOGLE_DCHECK_EQ(out, begin + result.size());
    1444           0 :   return result;
    1445             : }
    1446             : 
    1447           0 : string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
    1448           0 :               const AlphaNum &d, const AlphaNum &e, const AlphaNum &f) {
    1449             :   string result;
    1450           0 :   result.resize(a.size() + b.size() + c.size() + d.size() + e.size() +
    1451           0 :                 f.size());
    1452           0 :   char *const begin = &*result.begin();
    1453           0 :   char *out = Append4(begin, a, b, c, d);
    1454           0 :   out = Append2(out, e, f);
    1455             :   GOOGLE_DCHECK_EQ(out, begin + result.size());
    1456           0 :   return result;
    1457             : }
    1458             : 
    1459          16 : string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
    1460          12 :               const AlphaNum &d, const AlphaNum &e, const AlphaNum &f,
    1461           4 :               const AlphaNum &g) {
    1462             :   string result;
    1463          24 :   result.resize(a.size() + b.size() + c.size() + d.size() + e.size() +
    1464           8 :                 f.size() + g.size());
    1465           4 :   char *const begin = &*result.begin();
    1466           4 :   char *out = Append4(begin, a, b, c, d);
    1467           4 :   out = Append2(out, e, f);
    1468           4 :   out = Append1(out, g);
    1469             :   GOOGLE_DCHECK_EQ(out, begin + result.size());
    1470           4 :   return result;
    1471             : }
    1472             : 
    1473           0 : string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
    1474           0 :               const AlphaNum &d, const AlphaNum &e, const AlphaNum &f,
    1475           0 :               const AlphaNum &g, const AlphaNum &h) {
    1476             :   string result;
    1477           0 :   result.resize(a.size() + b.size() + c.size() + d.size() + e.size() +
    1478           0 :                 f.size() + g.size() + h.size());
    1479           0 :   char *const begin = &*result.begin();
    1480           0 :   char *out = Append4(begin, a, b, c, d);
    1481           0 :   out = Append4(out, e, f, g, h);
    1482             :   GOOGLE_DCHECK_EQ(out, begin + result.size());
    1483           0 :   return result;
    1484             : }
    1485             : 
    1486           0 : string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
    1487           0 :               const AlphaNum &d, const AlphaNum &e, const AlphaNum &f,
    1488           0 :               const AlphaNum &g, const AlphaNum &h, const AlphaNum &i) {
    1489             :   string result;
    1490           0 :   result.resize(a.size() + b.size() + c.size() + d.size() + e.size() +
    1491           0 :                 f.size() + g.size() + h.size() + i.size());
    1492           0 :   char *const begin = &*result.begin();
    1493           0 :   char *out = Append4(begin, a, b, c, d);
    1494           0 :   out = Append4(out, e, f, g, h);
    1495           0 :   out = Append1(out, i);
    1496             :   GOOGLE_DCHECK_EQ(out, begin + result.size());
    1497           0 :   return result;
    1498             : }
    1499             : 
    1500             : // It's possible to call StrAppend with a char * pointer that is partway into
    1501             : // the string we're appending to.  However the results of this are random.
    1502             : // Therefore, check for this in debug mode.  Use unsigned math so we only have
    1503             : // to do one comparison.
    1504             : #define GOOGLE_DCHECK_NO_OVERLAP(dest, src) \
    1505             :     GOOGLE_DCHECK_GT(uintptr_t((src).data() - (dest).data()), \
    1506             :                      uintptr_t((dest).size()))
    1507             : 
    1508        3048 : void StrAppend(string *result, const AlphaNum &a) {
    1509             :   GOOGLE_DCHECK_NO_OVERLAP(*result, a);
    1510        3048 :   result->append(a.data(), a.size());
    1511        1524 : }
    1512             : 
    1513       29829 : void StrAppend(string *result, const AlphaNum &a, const AlphaNum &b) {
    1514             :   GOOGLE_DCHECK_NO_OVERLAP(*result, a);
    1515             :   GOOGLE_DCHECK_NO_OVERLAP(*result, b);
    1516        9943 :   string::size_type old_size = result->size();
    1517       19886 :   result->resize(old_size + a.size() + b.size());
    1518        9943 :   char *const begin = &*result->begin();
    1519        9943 :   char *out = Append2(begin + old_size, a, b);
    1520             :   GOOGLE_DCHECK_EQ(out, begin + result->size());
    1521        9943 : }
    1522             : 
    1523           0 : void StrAppend(string *result,
    1524           0 :                const AlphaNum &a, const AlphaNum &b, const AlphaNum &c) {
    1525             :   GOOGLE_DCHECK_NO_OVERLAP(*result, a);
    1526             :   GOOGLE_DCHECK_NO_OVERLAP(*result, b);
    1527             :   GOOGLE_DCHECK_NO_OVERLAP(*result, c);
    1528           0 :   string::size_type old_size = result->size();
    1529           0 :   result->resize(old_size + a.size() + b.size() + c.size());
    1530           0 :   char *const begin = &*result->begin();
    1531           0 :   char *out = Append2(begin + old_size, a, b);
    1532           0 :   out = Append1(out, c);
    1533             :   GOOGLE_DCHECK_EQ(out, begin + result->size());
    1534           0 : }
    1535             : 
    1536           0 : void StrAppend(string *result,
    1537           0 :                const AlphaNum &a, const AlphaNum &b,
    1538           0 :                const AlphaNum &c, const AlphaNum &d) {
    1539             :   GOOGLE_DCHECK_NO_OVERLAP(*result, a);
    1540             :   GOOGLE_DCHECK_NO_OVERLAP(*result, b);
    1541             :   GOOGLE_DCHECK_NO_OVERLAP(*result, c);
    1542             :   GOOGLE_DCHECK_NO_OVERLAP(*result, d);
    1543           0 :   string::size_type old_size = result->size();
    1544           0 :   result->resize(old_size + a.size() + b.size() + c.size() + d.size());
    1545           0 :   char *const begin = &*result->begin();
    1546           0 :   char *out = Append4(begin + old_size, a, b, c, d);
    1547             :   GOOGLE_DCHECK_EQ(out, begin + result->size());
    1548           0 : }
    1549             : 
    1550           0 : int GlobalReplaceSubstring(const string& substring,
    1551             :                            const string& replacement,
    1552             :                            string* s) {
    1553           0 :   GOOGLE_CHECK(s != NULL);
    1554           0 :   if (s->empty() || substring.empty())
    1555             :     return 0;
    1556             :   string tmp;
    1557           0 :   int num_replacements = 0;
    1558           0 :   int pos = 0;
    1559           0 :   for (int match_pos = s->find(substring.data(), pos, substring.length());
    1560             :        match_pos != string::npos;
    1561             :        pos = match_pos + substring.length(),
    1562           0 :            match_pos = s->find(substring.data(), pos, substring.length())) {
    1563           0 :     ++num_replacements;
    1564             :     // Append the original content before the match.
    1565           0 :     tmp.append(*s, pos, match_pos - pos);
    1566             :     // Append the replacement for the match.
    1567           0 :     tmp.append(replacement.begin(), replacement.end());
    1568             :   }
    1569             :   // Append the content after the last match. If no replacements were made, the
    1570             :   // original string is left untouched.
    1571           0 :   if (num_replacements > 0) {
    1572           0 :     tmp.append(*s, pos, s->length() - pos);
    1573           0 :     s->swap(tmp);
    1574             :   }
    1575           0 :   return num_replacements;
    1576             : }
    1577             : 
    1578           0 : int CalculateBase64EscapedLen(int input_len, bool do_padding) {
    1579             :   // Base64 encodes three bytes of input at a time. If the input is not
    1580             :   // divisible by three, we pad as appropriate.
    1581             :   //
    1582             :   // (from http://tools.ietf.org/html/rfc3548)
    1583             :   // Special processing is performed if fewer than 24 bits are available
    1584             :   // at the end of the data being encoded.  A full encoding quantum is
    1585             :   // always completed at the end of a quantity.  When fewer than 24 input
    1586             :   // bits are available in an input group, zero bits are added (on the
    1587             :   // right) to form an integral number of 6-bit groups.  Padding at the
    1588             :   // end of the data is performed using the '=' character.  Since all base
    1589             :   // 64 input is an integral number of octets, only the following cases
    1590             :   // can arise:
    1591             : 
    1592             : 
    1593             :   // Base64 encodes each three bytes of input into four bytes of output.
    1594           0 :   int len = (input_len / 3) * 4;
    1595             : 
    1596           0 :   if (input_len % 3 == 0) {
    1597             :     // (from http://tools.ietf.org/html/rfc3548)
    1598             :     // (1) the final quantum of encoding input is an integral multiple of 24
    1599             :     // bits; here, the final unit of encoded output will be an integral
    1600             :     // multiple of 4 characters with no "=" padding,
    1601           0 :   } else if (input_len % 3 == 1) {
    1602             :     // (from http://tools.ietf.org/html/rfc3548)
    1603             :     // (2) the final quantum of encoding input is exactly 8 bits; here, the
    1604             :     // final unit of encoded output will be two characters followed by two
    1605             :     // "=" padding characters, or
    1606           0 :     len += 2;
    1607           0 :     if (do_padding) {
    1608           0 :       len += 2;
    1609             :     }
    1610             :   } else {  // (input_len % 3 == 2)
    1611             :     // (from http://tools.ietf.org/html/rfc3548)
    1612             :     // (3) the final quantum of encoding input is exactly 16 bits; here, the
    1613             :     // final unit of encoded output will be three characters followed by one
    1614             :     // "=" padding character.
    1615           0 :     len += 3;
    1616           0 :     if (do_padding) {
    1617           0 :       len += 1;
    1618             :     }
    1619             :   }
    1620             : 
    1621             :   assert(len >= input_len);  // make sure we didn't overflow
    1622           0 :   return len;
    1623             : }
    1624             : 
    1625             : // Base64Escape does padding, so this calculation includes padding.
    1626           0 : int CalculateBase64EscapedLen(int input_len) {
    1627           0 :   return CalculateBase64EscapedLen(input_len, true);
    1628             : }
    1629             : 
    1630             : // ----------------------------------------------------------------------
    1631             : // int Base64Unescape() - base64 decoder
    1632             : // int Base64Escape() - base64 encoder
    1633             : // int WebSafeBase64Unescape() - Google's variation of base64 decoder
    1634             : // int WebSafeBase64Escape() - Google's variation of base64 encoder
    1635             : //
    1636             : // Check out
    1637             : // http://tools.ietf.org/html/rfc2045 for formal description, but what we
    1638             : // care about is that...
    1639             : //   Take the encoded stuff in groups of 4 characters and turn each
    1640             : //   character into a code 0 to 63 thus:
    1641             : //           A-Z map to 0 to 25
    1642             : //           a-z map to 26 to 51
    1643             : //           0-9 map to 52 to 61
    1644             : //           +(- for WebSafe) maps to 62
    1645             : //           /(_ for WebSafe) maps to 63
    1646             : //   There will be four numbers, all less than 64 which can be represented
    1647             : //   by a 6 digit binary number (aaaaaa, bbbbbb, cccccc, dddddd respectively).
    1648             : //   Arrange the 6 digit binary numbers into three bytes as such:
    1649             : //   aaaaaabb bbbbcccc ccdddddd
    1650             : //   Equals signs (one or two) are used at the end of the encoded block to
    1651             : //   indicate that the text was not an integer multiple of three bytes long.
    1652             : // ----------------------------------------------------------------------
    1653             : 
    1654           0 : int Base64UnescapeInternal(const char *src_param, int szsrc,
    1655             :                            char *dest, int szdest,
    1656             :                            const signed char* unbase64) {
    1657             :   static const char kPad64Equals = '=';
    1658             :   static const char kPad64Dot = '.';
    1659             : 
    1660           0 :   int decode = 0;
    1661           0 :   int destidx = 0;
    1662           0 :   int state = 0;
    1663           0 :   unsigned int ch = 0;
    1664           0 :   unsigned int temp = 0;
    1665             : 
    1666             :   // If "char" is signed by default, using *src as an array index results in
    1667             :   // accessing negative array elements. Treat the input as a pointer to
    1668             :   // unsigned char to avoid this.
    1669           0 :   const unsigned char *src = reinterpret_cast<const unsigned char*>(src_param);
    1670             : 
    1671             :   // The GET_INPUT macro gets the next input character, skipping
    1672             :   // over any whitespace, and stopping when we reach the end of the
    1673             :   // string or when we read any non-data character.  The arguments are
    1674             :   // an arbitrary identifier (used as a label for goto) and the number
    1675             :   // of data bytes that must remain in the input to avoid aborting the
    1676             :   // loop.
    1677             : #define GET_INPUT(label, remain)                 \
    1678             :   label:                                         \
    1679             :     --szsrc;                                     \
    1680             :     ch = *src++;                                 \
    1681             :     decode = unbase64[ch];                       \
    1682             :     if (decode < 0) {                            \
    1683             :       if (ascii_isspace(ch) && szsrc >= remain)  \
    1684             :         goto label;                              \
    1685             :       state = 4 - remain;                        \
    1686             :       break;                                     \
    1687             :     }
    1688             : 
    1689             :   // if dest is null, we're just checking to see if it's legal input
    1690             :   // rather than producing output.  (I suspect this could just be done
    1691             :   // with a regexp...).  We duplicate the loop so this test can be
    1692             :   // outside it instead of in every iteration.
    1693             : 
    1694           0 :   if (dest) {
    1695             :     // This loop consumes 4 input bytes and produces 3 output bytes
    1696             :     // per iteration.  We can't know at the start that there is enough
    1697             :     // data left in the string for a full iteration, so the loop may
    1698             :     // break out in the middle; if so 'state' will be set to the
    1699             :     // number of input bytes read.
    1700             : 
    1701           0 :     while (szsrc >= 4)  {
    1702             :       // We'll start by optimistically assuming that the next four
    1703             :       // bytes of the string (src[0..3]) are four good data bytes
    1704             :       // (that is, no nulls, whitespace, padding chars, or illegal
    1705             :       // chars).  We need to test src[0..2] for nulls individually
    1706             :       // before constructing temp to preserve the property that we
    1707             :       // never read past a null in the string (no matter how long
    1708             :       // szsrc claims the string is).
    1709             : 
    1710           0 :       if (!src[0] || !src[1] || !src[2] ||
    1711           0 :           (temp = ((unsigned(unbase64[src[0]]) << 18) |
    1712           0 :                    (unsigned(unbase64[src[1]]) << 12) |
    1713           0 :                    (unsigned(unbase64[src[2]]) << 6) |
    1714           0 :                    (unsigned(unbase64[src[3]])))) & 0x80000000) {
    1715             :         // Iff any of those four characters was bad (null, illegal,
    1716             :         // whitespace, padding), then temp's high bit will be set
    1717             :         // (because unbase64[] is -1 for all bad characters).
    1718             :         //
    1719             :         // We'll back up and resort to the slower decoder, which knows
    1720             :         // how to handle those cases.
    1721             : 
    1722           0 :         GET_INPUT(first, 4);
    1723           0 :         temp = decode;
    1724           0 :         GET_INPUT(second, 3);
    1725           0 :         temp = (temp << 6) | decode;
    1726           0 :         GET_INPUT(third, 2);
    1727           0 :         temp = (temp << 6) | decode;
    1728           0 :         GET_INPUT(fourth, 1);
    1729           0 :         temp = (temp << 6) | decode;
    1730             :       } else {
    1731             :         // We really did have four good data bytes, so advance four
    1732             :         // characters in the string.
    1733             : 
    1734           0 :         szsrc -= 4;
    1735           0 :         src += 4;
    1736           0 :         decode = -1;
    1737           0 :         ch = '\0';
    1738             :       }
    1739             : 
    1740             :       // temp has 24 bits of input, so write that out as three bytes.
    1741             : 
    1742           0 :       if (destidx+3 > szdest) return -1;
    1743           0 :       dest[destidx+2] = temp;
    1744           0 :       temp >>= 8;
    1745           0 :       dest[destidx+1] = temp;
    1746           0 :       temp >>= 8;
    1747           0 :       dest[destidx] = temp;
    1748           0 :       destidx += 3;
    1749             :     }
    1750             :   } else {
    1751           0 :     while (szsrc >= 4)  {
    1752           0 :       if (!src[0] || !src[1] || !src[2] ||
    1753           0 :           (temp = ((unsigned(unbase64[src[0]]) << 18) |
    1754           0 :                    (unsigned(unbase64[src[1]]) << 12) |
    1755           0 :                    (unsigned(unbase64[src[2]]) << 6) |
    1756           0 :                    (unsigned(unbase64[src[3]])))) & 0x80000000) {
    1757           0 :         GET_INPUT(first_no_dest, 4);
    1758           0 :         GET_INPUT(second_no_dest, 3);
    1759           0 :         GET_INPUT(third_no_dest, 2);
    1760           0 :         GET_INPUT(fourth_no_dest, 1);
    1761             :       } else {
    1762           0 :         szsrc -= 4;
    1763           0 :         src += 4;
    1764           0 :         decode = -1;
    1765           0 :         ch = '\0';
    1766             :       }
    1767           0 :       destidx += 3;
    1768             :     }
    1769             :   }
    1770             : 
    1771             : #undef GET_INPUT
    1772             : 
    1773             :   // if the loop terminated because we read a bad character, return
    1774             :   // now.
    1775           0 :   if (decode < 0 && ch != '\0' &&
    1776           0 :       ch != kPad64Equals && ch != kPad64Dot && !ascii_isspace(ch))
    1777             :     return -1;
    1778             : 
    1779           0 :   if (ch == kPad64Equals || ch == kPad64Dot) {
    1780             :     // if we stopped by hitting an '=' or '.', un-read that character -- we'll
    1781             :     // look at it again when we count to check for the proper number of
    1782             :     // equals signs at the end.
    1783           0 :     ++szsrc;
    1784           0 :     --src;
    1785             :   } else {
    1786             :     // This loop consumes 1 input byte per iteration.  It's used to
    1787             :     // clean up the 0-3 input bytes remaining when the first, faster
    1788             :     // loop finishes.  'temp' contains the data from 'state' input
    1789             :     // characters read by the first loop.
    1790           0 :     while (szsrc > 0)  {
    1791           0 :       --szsrc;
    1792           0 :       ch = *src++;
    1793           0 :       decode = unbase64[ch];
    1794           0 :       if (decode < 0) {
    1795           0 :         if (ascii_isspace(ch)) {
    1796             :           continue;
    1797           0 :         } else if (ch == '\0') {
    1798             :           break;
    1799           0 :         } else if (ch == kPad64Equals || ch == kPad64Dot) {
    1800             :           // back up one character; we'll read it again when we check
    1801             :           // for the correct number of pad characters at the end.
    1802             :           ++szsrc;
    1803             :           --src;
    1804             :           break;
    1805             :         } else {
    1806             :           return -1;
    1807             :         }
    1808             :       }
    1809             : 
    1810             :       // Each input character gives us six bits of output.
    1811           0 :       temp = (temp << 6) | decode;
    1812           0 :       ++state;
    1813           0 :       if (state == 4) {
    1814             :         // If we've accumulated 24 bits of output, write that out as
    1815             :         // three bytes.
    1816           0 :         if (dest) {
    1817           0 :           if (destidx+3 > szdest) return -1;
    1818           0 :           dest[destidx+2] = temp;
    1819           0 :           temp >>= 8;
    1820           0 :           dest[destidx+1] = temp;
    1821           0 :           temp >>= 8;
    1822           0 :           dest[destidx] = temp;
    1823             :         }
    1824           0 :         destidx += 3;
    1825           0 :         state = 0;
    1826           0 :         temp = 0;
    1827             :       }
    1828             :     }
    1829             :   }
    1830             : 
    1831             :   // Process the leftover data contained in 'temp' at the end of the input.
    1832           0 :   int expected_equals = 0;
    1833           0 :   switch (state) {
    1834             :     case 0:
    1835             :       // Nothing left over; output is a multiple of 3 bytes.
    1836             :       break;
    1837             : 
    1838             :     case 1:
    1839             :       // Bad input; we have 6 bits left over.
    1840             :       return -1;
    1841             : 
    1842             :     case 2:
    1843             :       // Produce one more output byte from the 12 input bits we have left.
    1844           0 :       if (dest) {
    1845           0 :         if (destidx+1 > szdest) return -1;
    1846           0 :         temp >>= 4;
    1847           0 :         dest[destidx] = temp;
    1848             :       }
    1849           0 :       ++destidx;
    1850           0 :       expected_equals = 2;
    1851           0 :       break;
    1852             : 
    1853             :     case 3:
    1854             :       // Produce two more output bytes from the 18 input bits we have left.
    1855           0 :       if (dest) {
    1856           0 :         if (destidx+2 > szdest) return -1;
    1857           0 :         temp >>= 2;
    1858           0 :         dest[destidx+1] = temp;
    1859           0 :         temp >>= 8;
    1860           0 :         dest[destidx] = temp;
    1861             :       }
    1862           0 :       destidx += 2;
    1863           0 :       expected_equals = 1;
    1864           0 :       break;
    1865             : 
    1866             :     default:
    1867             :       // state should have no other values at this point.
    1868           0 :       GOOGLE_LOG(FATAL) << "This can't happen; base64 decoder state = " << state;
    1869             :   }
    1870             : 
    1871             :   // The remainder of the string should be all whitespace, mixed with
    1872             :   // exactly 0 equals signs, or exactly 'expected_equals' equals
    1873             :   // signs.  (Always accepting 0 equals signs is a google extension
    1874             :   // not covered in the RFC, as is accepting dot as the pad character.)
    1875             : 
    1876           0 :   int equals = 0;
    1877           0 :   while (szsrc > 0 && *src) {
    1878           0 :     if (*src == kPad64Equals || *src == kPad64Dot)
    1879           0 :       ++equals;
    1880           0 :     else if (!ascii_isspace(*src))
    1881             :       return -1;
    1882           0 :     --szsrc;
    1883           0 :     ++src;
    1884             :   }
    1885             : 
    1886           0 :   return (equals == 0 || equals == expected_equals) ? destidx : -1;
    1887             : }
    1888             : 
    1889             : // The arrays below were generated by the following code
    1890             : // #include <sys/time.h>
    1891             : // #include <stdlib.h>
    1892             : // #include <string.h>
    1893             : // main()
    1894             : // {
    1895             : //   static const char Base64[] =
    1896             : //     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
    1897             : //   char *pos;
    1898             : //   int idx, i, j;
    1899             : //   printf("    ");
    1900             : //   for (i = 0; i < 255; i += 8) {
    1901             : //     for (j = i; j < i + 8; j++) {
    1902             : //       pos = strchr(Base64, j);
    1903             : //       if ((pos == NULL) || (j == 0))
    1904             : //         idx = -1;
    1905             : //       else
    1906             : //         idx = pos - Base64;
    1907             : //       if (idx == -1)
    1908             : //         printf(" %2d,     ", idx);
    1909             : //       else
    1910             : //         printf(" %2d/*%c*/,", idx, j);
    1911             : //     }
    1912             : //     printf("\n    ");
    1913             : //   }
    1914             : // }
    1915             : //
    1916             : // where the value of "Base64[]" was replaced by one of the base-64 conversion
    1917             : // tables from the functions below.
    1918             : static const signed char kUnBase64[] = {
    1919             :   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    1920             :   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    1921             :   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    1922             :   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    1923             :   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    1924             :   -1,      -1,      -1,      62/*+*/, -1,      -1,      -1,      63/*/ */,
    1925             :   52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/,
    1926             :   60/*8*/, 61/*9*/, -1,      -1,      -1,      -1,      -1,      -1,
    1927             :   -1,       0/*A*/,  1/*B*/,  2/*C*/,  3/*D*/,  4/*E*/,  5/*F*/,  6/*G*/,
    1928             :   07/*H*/,  8/*I*/,  9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/,
    1929             :   15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/,
    1930             :   23/*X*/, 24/*Y*/, 25/*Z*/, -1,      -1,      -1,      -1,      -1,
    1931             :   -1,      26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/,
    1932             :   33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/,
    1933             :   41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/,
    1934             :   49/*x*/, 50/*y*/, 51/*z*/, -1,      -1,      -1,      -1,      -1,
    1935             :   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    1936             :   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    1937             :   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    1938             :   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    1939             :   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    1940             :   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    1941             :   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    1942             :   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    1943             :   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    1944             :   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    1945             :   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    1946             :   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    1947             :   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    1948             :   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    1949             :   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    1950             :   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1
    1951             : };
    1952             : static const signed char kUnWebSafeBase64[] = {
    1953             :   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    1954             :   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    1955             :   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    1956             :   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    1957             :   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    1958             :   -1,      -1,      -1,      -1,      -1,      62/*-*/, -1,      -1,
    1959             :   52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/,
    1960             :   60/*8*/, 61/*9*/, -1,      -1,      -1,      -1,      -1,      -1,
    1961             :   -1,       0/*A*/,  1/*B*/,  2/*C*/,  3/*D*/,  4/*E*/,  5/*F*/,  6/*G*/,
    1962             :   07/*H*/,  8/*I*/,  9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/,
    1963             :   15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/,
    1964             :   23/*X*/, 24/*Y*/, 25/*Z*/, -1,      -1,      -1,      -1,      63/*_*/,
    1965             :   -1,      26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/,
    1966             :   33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/,
    1967             :   41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/,
    1968             :   49/*x*/, 50/*y*/, 51/*z*/, -1,      -1,      -1,      -1,      -1,
    1969             :   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    1970             :   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    1971             :   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    1972             :   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    1973             :   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    1974             :   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    1975             :   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    1976             :   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    1977             :   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    1978             :   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    1979             :   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    1980             :   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    1981             :   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    1982             :   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    1983             :   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    1984             :   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1
    1985             : };
    1986             : 
    1987           0 : int WebSafeBase64Unescape(const char *src, int szsrc, char *dest, int szdest) {
    1988           0 :   return Base64UnescapeInternal(src, szsrc, dest, szdest, kUnWebSafeBase64);
    1989             : }
    1990             : 
    1991           0 : static bool Base64UnescapeInternal(const char* src, int slen, string* dest,
    1992             :                                    const signed char* unbase64) {
    1993             :   // Determine the size of the output string.  Base64 encodes every 3 bytes into
    1994             :   // 4 characters.  any leftover chars are added directly for good measure.
    1995             :   // This is documented in the base64 RFC: http://tools.ietf.org/html/rfc3548
    1996           0 :   const int dest_len = 3 * (slen / 4) + (slen % 4);
    1997             : 
    1998           0 :   dest->resize(dest_len);
    1999             : 
    2000             :   // We are getting the destination buffer by getting the beginning of the
    2001             :   // string and converting it into a char *.
    2002             :   const int len = Base64UnescapeInternal(src, slen, string_as_array(dest),
    2003           0 :                                          dest_len, unbase64);
    2004           0 :   if (len < 0) {
    2005             :     dest->clear();
    2006           0 :     return false;
    2007             :   }
    2008             : 
    2009             :   // could be shorter if there was padding
    2010             :   GOOGLE_DCHECK_LE(len, dest_len);
    2011           0 :   dest->erase(len);
    2012             : 
    2013           0 :   return true;
    2014             : }
    2015             : 
    2016           0 : bool Base64Unescape(StringPiece src, string* dest) {
    2017           0 :   return Base64UnescapeInternal(src.data(), src.size(), dest, kUnBase64);
    2018             : }
    2019             : 
    2020           0 : bool WebSafeBase64Unescape(StringPiece src, string* dest) {
    2021           0 :   return Base64UnescapeInternal(src.data(), src.size(), dest, kUnWebSafeBase64);
    2022             : }
    2023             : 
    2024           0 : int Base64EscapeInternal(const unsigned char *src, int szsrc,
    2025             :                          char *dest, int szdest, const char *base64,
    2026             :                          bool do_padding) {
    2027             :   static const char kPad64 = '=';
    2028             : 
    2029           0 :   if (szsrc <= 0) return 0;
    2030             : 
    2031           0 :   if (szsrc * 4 > szdest * 3) return 0;
    2032             : 
    2033           0 :   char *cur_dest = dest;
    2034           0 :   const unsigned char *cur_src = src;
    2035             : 
    2036           0 :   char *limit_dest = dest + szdest;
    2037           0 :   const unsigned char *limit_src = src + szsrc;
    2038             : 
    2039             :   // Three bytes of data encodes to four characters of cyphertext.
    2040             :   // So we can pump through three-byte chunks atomically.
    2041           0 :   while (cur_src < limit_src - 3) {  // keep going as long as we have >= 32 bits
    2042           0 :     uint32 in = BigEndian::Load32(cur_src) >> 8;
    2043             : 
    2044           0 :     cur_dest[0] = base64[in >> 18];
    2045           0 :     in &= 0x3FFFF;
    2046           0 :     cur_dest[1] = base64[in >> 12];
    2047           0 :     in &= 0xFFF;
    2048           0 :     cur_dest[2] = base64[in >> 6];
    2049           0 :     in &= 0x3F;
    2050           0 :     cur_dest[3] = base64[in];
    2051             : 
    2052           0 :     cur_dest += 4;
    2053           0 :     cur_src += 3;
    2054             :   }
    2055             :   // To save time, we didn't update szdest or szsrc in the loop.  So do it now.
    2056           0 :   szdest = limit_dest - cur_dest;
    2057           0 :   szsrc = limit_src - cur_src;
    2058             : 
    2059             :   /* now deal with the tail (<=3 bytes) */
    2060           0 :   switch (szsrc) {
    2061             :     case 0:
    2062             :       // Nothing left; nothing more to do.
    2063             :       break;
    2064             :     case 1: {
    2065             :       // One byte left: this encodes to two characters, and (optionally)
    2066             :       // two pad characters to round out the four-character cypherblock.
    2067           0 :       if ((szdest -= 2) < 0) return 0;
    2068           0 :       uint32 in = cur_src[0];
    2069           0 :       cur_dest[0] = base64[in >> 2];
    2070           0 :       in &= 0x3;
    2071           0 :       cur_dest[1] = base64[in << 4];
    2072           0 :       cur_dest += 2;
    2073           0 :       if (do_padding) {
    2074           0 :         if ((szdest -= 2) < 0) return 0;
    2075           0 :         cur_dest[0] = kPad64;
    2076           0 :         cur_dest[1] = kPad64;
    2077           0 :         cur_dest += 2;
    2078             :       }
    2079             :       break;
    2080             :     }
    2081             :     case 2: {
    2082             :       // Two bytes left: this encodes to three characters, and (optionally)
    2083             :       // one pad character to round out the four-character cypherblock.
    2084           0 :       if ((szdest -= 3) < 0) return 0;
    2085           0 :       uint32 in = BigEndian::Load16(cur_src);
    2086           0 :       cur_dest[0] = base64[in >> 10];
    2087           0 :       in &= 0x3FF;
    2088           0 :       cur_dest[1] = base64[in >> 4];
    2089           0 :       in &= 0x00F;
    2090           0 :       cur_dest[2] = base64[in << 2];
    2091           0 :       cur_dest += 3;
    2092           0 :       if (do_padding) {
    2093           0 :         if ((szdest -= 1) < 0) return 0;
    2094           0 :         cur_dest[0] = kPad64;
    2095           0 :         cur_dest += 1;
    2096             :       }
    2097             :       break;
    2098             :     }
    2099             :     case 3: {
    2100             :       // Three bytes left: same as in the big loop above.  We can't do this in
    2101             :       // the loop because the loop above always reads 4 bytes, and the fourth
    2102             :       // byte is past the end of the input.
    2103           0 :       if ((szdest -= 4) < 0) return 0;
    2104           0 :       uint32 in = (cur_src[0] << 16) + BigEndian::Load16(cur_src + 1);
    2105           0 :       cur_dest[0] = base64[in >> 18];
    2106           0 :       in &= 0x3FFFF;
    2107           0 :       cur_dest[1] = base64[in >> 12];
    2108           0 :       in &= 0xFFF;
    2109           0 :       cur_dest[2] = base64[in >> 6];
    2110           0 :       in &= 0x3F;
    2111           0 :       cur_dest[3] = base64[in];
    2112           0 :       cur_dest += 4;
    2113           0 :       break;
    2114             :     }
    2115             :     default:
    2116             :       // Should not be reached: blocks of 4 bytes are handled
    2117             :       // in the while loop before this switch statement.
    2118           0 :       GOOGLE_LOG(FATAL) << "Logic problem? szsrc = " << szsrc;
    2119           0 :       break;
    2120             :   }
    2121           0 :   return (cur_dest - dest);
    2122             : }
    2123             : 
    2124             : static const char kBase64Chars[] =
    2125             : "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
    2126             : 
    2127             : static const char kWebSafeBase64Chars[] =
    2128             : "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
    2129             : 
    2130           0 : int Base64Escape(const unsigned char *src, int szsrc, char *dest, int szdest) {
    2131           0 :   return Base64EscapeInternal(src, szsrc, dest, szdest, kBase64Chars, true);
    2132             : }
    2133           0 : int WebSafeBase64Escape(const unsigned char *src, int szsrc, char *dest,
    2134             :                         int szdest, bool do_padding) {
    2135             :   return Base64EscapeInternal(src, szsrc, dest, szdest,
    2136           0 :                               kWebSafeBase64Chars, do_padding);
    2137             : }
    2138             : 
    2139           0 : void Base64EscapeInternal(const unsigned char* src, int szsrc,
    2140             :                           string* dest, bool do_padding,
    2141             :                           const char* base64_chars) {
    2142             :   const int calc_escaped_size =
    2143           0 :     CalculateBase64EscapedLen(szsrc, do_padding);
    2144           0 :   dest->resize(calc_escaped_size);
    2145             :   const int escaped_len = Base64EscapeInternal(src, szsrc,
    2146             :                                                string_as_array(dest),
    2147             :                                                dest->size(),
    2148             :                                                base64_chars,
    2149           0 :                                                do_padding);
    2150             :   GOOGLE_DCHECK_EQ(calc_escaped_size, escaped_len);
    2151           0 :   dest->erase(escaped_len);
    2152           0 : }
    2153             : 
    2154           0 : void Base64Escape(const unsigned char *src, int szsrc,
    2155             :                   string* dest, bool do_padding) {
    2156           0 :   Base64EscapeInternal(src, szsrc, dest, do_padding, kBase64Chars);
    2157           0 : }
    2158             : 
    2159           0 : void WebSafeBase64Escape(const unsigned char *src, int szsrc,
    2160             :                          string *dest, bool do_padding) {
    2161           0 :   Base64EscapeInternal(src, szsrc, dest, do_padding, kWebSafeBase64Chars);
    2162           0 : }
    2163             : 
    2164           0 : void Base64Escape(StringPiece src, string* dest) {
    2165           0 :   Base64Escape(reinterpret_cast<const unsigned char*>(src.data()),
    2166           0 :                src.size(), dest, true);
    2167           0 : }
    2168             : 
    2169           0 : void WebSafeBase64Escape(StringPiece src, string* dest) {
    2170           0 :   WebSafeBase64Escape(reinterpret_cast<const unsigned char*>(src.data()),
    2171           0 :                       src.size(), dest, false);
    2172           0 : }
    2173             : 
    2174           0 : void WebSafeBase64EscapeWithPadding(StringPiece src, string* dest) {
    2175           0 :   WebSafeBase64Escape(reinterpret_cast<const unsigned char*>(src.data()),
    2176           0 :                       src.size(), dest, true);
    2177           0 : }
    2178             : 
    2179             : // Helper to append a Unicode code point to a string as UTF8, without bringing
    2180             : // in any external dependencies.
    2181           0 : int EncodeAsUTF8Char(uint32 code_point, char* output) {
    2182           0 :   uint32 tmp = 0;
    2183           0 :   int len = 0;
    2184           0 :   if (code_point <= 0x7f) {
    2185           0 :     tmp = code_point;
    2186           0 :     len = 1;
    2187           0 :   } else if (code_point <= 0x07ff) {
    2188             :     tmp = 0x0000c080 |
    2189           0 :         ((code_point & 0x07c0) << 2) |
    2190           0 :         (code_point & 0x003f);
    2191           0 :     len = 2;
    2192           0 :   } else if (code_point <= 0xffff) {
    2193             :     tmp = 0x00e08080 |
    2194           0 :         ((code_point & 0xf000) << 4) |
    2195           0 :         ((code_point & 0x0fc0) << 2) |
    2196           0 :         (code_point & 0x003f);
    2197           0 :     len = 3;
    2198             :   } else {
    2199             :     // UTF-16 is only defined for code points up to 0x10FFFF, and UTF-8 is
    2200             :     // normally only defined up to there as well.
    2201             :     tmp = 0xf0808080 |
    2202           0 :         ((code_point & 0x1c0000) << 6) |
    2203           0 :         ((code_point & 0x03f000) << 4) |
    2204           0 :         ((code_point & 0x000fc0) << 2) |
    2205           0 :         (code_point & 0x003f);
    2206           0 :     len = 4;
    2207             :   }
    2208           0 :   tmp = ghtonl(tmp);
    2209           0 :   memcpy(output, reinterpret_cast<const char*>(&tmp) + sizeof(tmp) - len, len);
    2210           0 :   return len;
    2211             : }
    2212             : 
    2213             : // Table of UTF-8 character lengths, based on first byte
    2214             : static const unsigned char kUTF8LenTbl[256] = {
    2215             :   1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
    2216             :   1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
    2217             :   1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
    2218             :   1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
    2219             : 
    2220             :   1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
    2221             :   1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
    2222             :   2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,
    2223             :   3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4
    2224             : };
    2225             : 
    2226             : // Return length of a single UTF-8 source character
    2227           0 : int UTF8FirstLetterNumBytes(const char* src, int len) {
    2228           0 :   if (len == 0) {
    2229             :     return 0;
    2230             :   }
    2231           0 :   return kUTF8LenTbl[*reinterpret_cast<const uint8*>(src)];
    2232             : }
    2233             : 
    2234             : }  // namespace protobuf
    2235             : }  // namespace google

Generated by: LCOV version 1.10