Line data Source code
1 : // Protocol Buffers - Google's data interchange format
2 : // Copyright 2008 Google Inc. All rights reserved.
3 : // https://developers.google.com/protocol-buffers/
4 : //
5 : // Redistribution and use in source and binary forms, with or without
6 : // modification, are permitted provided that the following conditions are
7 : // met:
8 : //
9 : // * Redistributions of source code must retain the above copyright
10 : // notice, this list of conditions and the following disclaimer.
11 : // * Redistributions in binary form must reproduce the above
12 : // copyright notice, this list of conditions and the following disclaimer
13 : // in the documentation and/or other materials provided with the
14 : // distribution.
15 : // * Neither the name of Google Inc. nor the names of its
16 : // contributors may be used to endorse or promote products derived from
17 : // this software without specific prior written permission.
18 : //
19 : // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 : // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 : // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 : // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 : // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 : // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 : // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 : // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 : // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 : // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 : // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 :
31 : // A StringPiece points to part or all of a string, Cord, double-quoted string
32 : // literal, or other string-like object. A StringPiece does *not* own the
33 : // string to which it points. A StringPiece is not null-terminated.
34 : //
35 : // You can use StringPiece as a function or method parameter. A StringPiece
36 : // parameter can receive a double-quoted string literal argument, a "const
37 : // char*" argument, a string argument, or a StringPiece argument with no data
38 : // copying. Systematic use of StringPiece for arguments reduces data
39 : // copies and strlen() calls.
40 : //
41 : // Prefer passing StringPieces by value:
42 : // void MyFunction(StringPiece arg);
43 : // If circumstances require, you may also pass by const reference:
44 : // void MyFunction(const StringPiece& arg); // not preferred
45 : // Both of these have the same lifetime semantics. Passing by value
46 : // generates slightly smaller code. For more discussion, see the thread
47 : // go/stringpiecebyvalue on c-users.
48 : //
49 : // StringPiece is also suitable for local variables if you know that
50 : // the lifetime of the underlying object is longer than the lifetime
51 : // of your StringPiece variable.
52 : //
53 : // Beware of binding a StringPiece to a temporary:
54 : // StringPiece sp = obj.MethodReturningString(); // BAD: lifetime problem
55 : //
56 : // This code is okay:
57 : // string str = obj.MethodReturningString(); // str owns its contents
58 : // StringPiece sp(str); // GOOD, because str outlives sp
59 : //
60 : // StringPiece is sometimes a poor choice for a return value and usually a poor
61 : // choice for a data member. If you do use a StringPiece this way, it is your
62 : // responsibility to ensure that the object pointed to by the StringPiece
63 : // outlives the StringPiece.
64 : //
65 : // A StringPiece may represent just part of a string; thus the name "Piece".
66 : // For example, when splitting a string, vector<StringPiece> is a natural data
67 : // type for the output. For another example, a Cord is a non-contiguous,
68 : // potentially very long string-like object. The Cord class has an interface
69 : // that iteratively provides StringPiece objects that point to the
70 : // successive pieces of a Cord object.
71 : //
72 : // A StringPiece is not null-terminated. If you write code that scans a
73 : // StringPiece, you must check its length before reading any characters.
74 : // Common idioms that work on null-terminated strings do not work on
75 : // StringPiece objects.
76 : //
77 : // There are several ways to create a null StringPiece:
78 : // StringPiece()
79 : // StringPiece(NULL)
80 : // StringPiece(NULL, 0)
81 : // For all of the above, sp.data() == NULL, sp.length() == 0,
82 : // and sp.empty() == true. Also, if you create a StringPiece with
83 : // a non-NULL pointer then sp.data() != NULL. Once created,
84 : // sp.data() will stay either NULL or not-NULL, except if you call
85 : // sp.clear() or sp.set().
86 : //
87 : // Thus, you can use StringPiece(NULL) to signal an out-of-band value
88 : // that is different from other StringPiece values. This is similar
89 : // to the way that const char* p1 = NULL; is different from
90 : // const char* p2 = "";.
91 : //
92 : // There are many ways to create an empty StringPiece:
93 : // StringPiece()
94 : // StringPiece(NULL)
95 : // StringPiece(NULL, 0)
96 : // StringPiece("")
97 : // StringPiece("", 0)
98 : // StringPiece("abcdef", 0)
99 : // StringPiece("abcdef"+6, 0)
100 : // For all of the above, sp.length() will be 0 and sp.empty() will be true.
101 : // For some empty StringPiece values, sp.data() will be NULL.
102 : // For some empty StringPiece values, sp.data() will not be NULL.
103 : //
104 : // Be careful not to confuse: null StringPiece and empty StringPiece.
105 : // The set of empty StringPieces properly includes the set of null StringPieces.
106 : // That is, every null StringPiece is an empty StringPiece,
107 : // but some non-null StringPieces are empty Stringpieces too.
108 : //
109 : // All empty StringPiece values compare equal to each other.
110 : // Even a null StringPieces compares equal to a non-null empty StringPiece:
111 : // StringPiece() == StringPiece("", 0)
112 : // StringPiece(NULL) == StringPiece("abc", 0)
113 : // StringPiece(NULL, 0) == StringPiece("abcdef"+6, 0)
114 : //
115 : // Look carefully at this example:
116 : // StringPiece("") == NULL
117 : // True or false? TRUE, because StringPiece::operator== converts
118 : // the right-hand side from NULL to StringPiece(NULL),
119 : // and then compares two zero-length spans of characters.
120 : // However, we are working to make this example produce a compile error.
121 : //
122 : // Suppose you want to write:
123 : // bool TestWhat?(StringPiece sp) { return sp == NULL; } // BAD
124 : // Do not do that. Write one of these instead:
125 : // bool TestNull(StringPiece sp) { return sp.data() == NULL; }
126 : // bool TestEmpty(StringPiece sp) { return sp.empty(); }
127 : // The intent of TestWhat? is unclear. Did you mean TestNull or TestEmpty?
128 : // Right now, TestWhat? behaves likes TestEmpty.
129 : // We are working to make TestWhat? produce a compile error.
130 : // TestNull is good to test for an out-of-band signal.
131 : // TestEmpty is good to test for an empty StringPiece.
132 : //
133 : // Caveats (again):
134 : // (1) The lifetime of the pointed-to string (or piece of a string)
135 : // must be longer than the lifetime of the StringPiece.
136 : // (2) There may or may not be a '\0' character after the end of
137 : // StringPiece data.
138 : // (3) A null StringPiece is empty.
139 : // An empty StringPiece may or may not be a null StringPiece.
140 :
141 : #ifndef GOOGLE_PROTOBUF_STUBS_STRINGPIECE_H_
142 : #define GOOGLE_PROTOBUF_STUBS_STRINGPIECE_H_
143 :
144 : #include <assert.h>
145 : #include <stddef.h>
146 : #include <string.h>
147 : #include <iosfwd>
148 : #include <limits>
149 : #include <string>
150 :
151 : #include <google/protobuf/stubs/common.h>
152 :
153 : namespace google {
154 : namespace protobuf {
155 : // StringPiece has *two* size types.
156 : // StringPiece::size_type
157 : // is unsigned
158 : // is 32 bits in LP32, 64 bits in LP64, 64 bits in LLP64
159 : // no future changes intended
160 : // stringpiece_ssize_type
161 : // is signed
162 : // is 32 bits in LP32, 64 bits in LP64, 64 bits in LLP64
163 : // future changes intended: http://go/64BitStringPiece
164 : //
165 : typedef string::difference_type stringpiece_ssize_type;
166 :
167 : // STRINGPIECE_CHECK_SIZE protects us from 32-bit overflows.
168 : // TODO(mec): delete this after stringpiece_ssize_type goes 64 bit.
169 : #if !defined(NDEBUG)
170 : #define STRINGPIECE_CHECK_SIZE 1
171 : #elif defined(_FORTIFY_SOURCE) && _FORTIFY_SOURCE > 0
172 : #define STRINGPIECE_CHECK_SIZE 1
173 : #else
174 : #define STRINGPIECE_CHECK_SIZE 0
175 : #endif
176 :
177 : class LIBPROTOBUF_EXPORT StringPiece {
178 : private:
179 : const char* ptr_;
180 : stringpiece_ssize_type length_;
181 :
182 : // Prevent overflow in debug mode or fortified mode.
183 : // sizeof(stringpiece_ssize_type) may be smaller than sizeof(size_t).
184 : static stringpiece_ssize_type CheckedSsizeTFromSizeT(size_t size) {
185 : #if STRINGPIECE_CHECK_SIZE > 0
186 : #ifdef max
187 : #undef max
188 : #endif
189 92 : if (size > static_cast<size_t>(
190 : std::numeric_limits<stringpiece_ssize_type>::max())) {
191 : // Some people grep for this message in logs
192 : // so take care if you ever change it.
193 0 : LogFatalSizeTooBig(size, "size_t to int conversion");
194 : }
195 : #endif
196 92 : return static_cast<stringpiece_ssize_type>(size);
197 : }
198 :
199 : // Out-of-line error path.
200 : static void LogFatalSizeTooBig(size_t size, const char* details);
201 :
202 : public:
203 : // We provide non-explicit singleton constructors so users can pass
204 : // in a "const char*" or a "string" wherever a "StringPiece" is
205 : // expected.
206 : //
207 : // Style guide exception granted:
208 : // http://goto/style-guide-exception-20978288
209 0 : StringPiece() : ptr_(NULL), length_(0) {}
210 :
211 92 : StringPiece(const char* str) // NOLINT(runtime/explicit)
212 92 : : ptr_(str), length_(0) {
213 92 : if (str != NULL) {
214 184 : length_ = CheckedSsizeTFromSizeT(strlen(str));
215 : }
216 92 : }
217 :
218 : template <class Allocator>
219 0 : StringPiece( // NOLINT(runtime/explicit)
220 : const std::basic_string<char, std::char_traits<char>, Allocator>& str)
221 0 : : ptr_(str.data()), length_(0) {
222 0 : length_ = CheckedSsizeTFromSizeT(str.size());
223 0 : }
224 : #if defined(HAS_GLOBAL_STRING)
225 : template <class Allocator>
226 : StringPiece( // NOLINT(runtime/explicit)
227 : const basic_string<char, std::char_traits<char>, Allocator>& str)
228 : : ptr_(str.data()), length_(0) {
229 : length_ = CheckedSsizeTFromSizeT(str.size());
230 : }
231 : #endif
232 :
233 : StringPiece(const char* offset, stringpiece_ssize_type len)
234 0 : : ptr_(offset), length_(len) {
235 : assert(len >= 0);
236 : }
237 :
238 : // Substring of another StringPiece.
239 : // pos must be non-negative and <= x.length().
240 : StringPiece(StringPiece x, stringpiece_ssize_type pos);
241 : // Substring of another StringPiece.
242 : // pos must be non-negative and <= x.length().
243 : // len must be non-negative and will be pinned to at most x.length() - pos.
244 : StringPiece(StringPiece x,
245 : stringpiece_ssize_type pos,
246 : stringpiece_ssize_type len);
247 :
248 : // data() may return a pointer to a buffer with embedded NULs, and the
249 : // returned buffer may or may not be null terminated. Therefore it is
250 : // typically a mistake to pass data() to a routine that expects a NUL
251 : // terminated string.
252 : const char* data() const { return ptr_; }
253 : stringpiece_ssize_type size() const { return length_; }
254 : stringpiece_ssize_type length() const { return length_; }
255 0 : bool empty() const { return length_ == 0; }
256 :
257 : void clear() {
258 : ptr_ = NULL;
259 : length_ = 0;
260 : }
261 :
262 : void set(const char* data, stringpiece_ssize_type len) {
263 : assert(len >= 0);
264 : ptr_ = data;
265 : length_ = len;
266 : }
267 :
268 : void set(const char* str) {
269 : ptr_ = str;
270 : if (str != NULL)
271 : length_ = CheckedSsizeTFromSizeT(strlen(str));
272 : else
273 : length_ = 0;
274 : }
275 :
276 : void set(const void* data, stringpiece_ssize_type len) {
277 : ptr_ = reinterpret_cast<const char*>(data);
278 : length_ = len;
279 : }
280 :
281 : char operator[](stringpiece_ssize_type i) const {
282 : assert(0 <= i);
283 : assert(i < length_);
284 0 : return ptr_[i];
285 : }
286 :
287 : void remove_prefix(stringpiece_ssize_type n) {
288 : assert(length_ >= n);
289 0 : ptr_ += n;
290 0 : length_ -= n;
291 : }
292 :
293 : void remove_suffix(stringpiece_ssize_type n) {
294 : assert(length_ >= n);
295 0 : length_ -= n;
296 : }
297 :
298 : // returns {-1, 0, 1}
299 : int compare(StringPiece x) const {
300 : const stringpiece_ssize_type min_size =
301 : length_ < x.length_ ? length_ : x.length_;
302 : int r = memcmp(ptr_, x.ptr_, min_size);
303 : if (r < 0) return -1;
304 : if (r > 0) return 1;
305 : if (length_ < x.length_) return -1;
306 : if (length_ > x.length_) return 1;
307 : return 0;
308 : }
309 :
310 : string as_string() const {
311 : return ToString();
312 : }
313 : // We also define ToString() here, since many other string-like
314 : // interfaces name the routine that converts to a C++ string
315 : // "ToString", and it's confusing to have the method that does that
316 : // for a StringPiece be called "as_string()". We also leave the
317 : // "as_string()" method defined here for existing code.
318 184 : string ToString() const {
319 92 : if (ptr_ == NULL) return string();
320 92 : return string(data(), size());
321 : }
322 :
323 : operator string() const {
324 : return ToString();
325 : }
326 :
327 : void CopyToString(string* target) const;
328 : void AppendToString(string* target) const;
329 :
330 : bool starts_with(StringPiece x) const {
331 0 : return (length_ >= x.length_) && (memcmp(ptr_, x.ptr_, x.length_) == 0);
332 : }
333 :
334 0 : bool ends_with(StringPiece x) const {
335 0 : return ((length_ >= x.length_) &&
336 0 : (memcmp(ptr_ + (length_-x.length_), x.ptr_, x.length_) == 0));
337 : }
338 :
339 : // Checks whether StringPiece starts with x and if so advances the beginning
340 : // of it to past the match. It's basically a shortcut for starts_with
341 : // followed by remove_prefix.
342 : bool Consume(StringPiece x);
343 : // Like above but for the end of the string.
344 : bool ConsumeFromEnd(StringPiece x);
345 :
346 : // standard STL container boilerplate
347 : typedef char value_type;
348 : typedef const char* pointer;
349 : typedef const char& reference;
350 : typedef const char& const_reference;
351 : typedef size_t size_type;
352 : typedef ptrdiff_t difference_type;
353 : static const size_type npos;
354 : typedef const char* const_iterator;
355 : typedef const char* iterator;
356 : typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
357 : typedef std::reverse_iterator<iterator> reverse_iterator;
358 : iterator begin() const { return ptr_; }
359 : iterator end() const { return ptr_ + length_; }
360 : const_reverse_iterator rbegin() const {
361 : return const_reverse_iterator(ptr_ + length_);
362 : }
363 : const_reverse_iterator rend() const {
364 : return const_reverse_iterator(ptr_);
365 : }
366 : stringpiece_ssize_type max_size() const { return length_; }
367 : stringpiece_ssize_type capacity() const { return length_; }
368 :
369 : // cpplint.py emits a false positive [build/include_what_you_use]
370 : stringpiece_ssize_type copy(char* buf, size_type n, size_type pos = 0) const; // NOLINT
371 :
372 : bool contains(StringPiece s) const;
373 :
374 : stringpiece_ssize_type find(StringPiece s, size_type pos = 0) const;
375 : stringpiece_ssize_type find(char c, size_type pos = 0) const;
376 : stringpiece_ssize_type rfind(StringPiece s, size_type pos = npos) const;
377 : stringpiece_ssize_type rfind(char c, size_type pos = npos) const;
378 :
379 : stringpiece_ssize_type find_first_of(StringPiece s, size_type pos = 0) const;
380 : stringpiece_ssize_type find_first_of(char c, size_type pos = 0) const {
381 0 : return find(c, pos);
382 : }
383 : stringpiece_ssize_type find_first_not_of(StringPiece s,
384 : size_type pos = 0) const;
385 : stringpiece_ssize_type find_first_not_of(char c, size_type pos = 0) const;
386 : stringpiece_ssize_type find_last_of(StringPiece s,
387 : size_type pos = npos) const;
388 : stringpiece_ssize_type find_last_of(char c, size_type pos = npos) const {
389 0 : return rfind(c, pos);
390 : }
391 : stringpiece_ssize_type find_last_not_of(StringPiece s,
392 : size_type pos = npos) const;
393 : stringpiece_ssize_type find_last_not_of(char c, size_type pos = npos) const;
394 :
395 : StringPiece substr(size_type pos, size_type n = npos) const;
396 : };
397 :
398 : // This large function is defined inline so that in a fairly common case where
399 : // one of the arguments is a literal, the compiler can elide a lot of the
400 : // following comparisons.
401 : inline bool operator==(StringPiece x, StringPiece y) {
402 : stringpiece_ssize_type len = x.size();
403 : if (len != y.size()) {
404 : return false;
405 : }
406 :
407 : return x.data() == y.data() || len <= 0 ||
408 : memcmp(x.data(), y.data(), len) == 0;
409 : }
410 :
411 : inline bool operator!=(StringPiece x, StringPiece y) {
412 : return !(x == y);
413 : }
414 :
415 : inline bool operator<(StringPiece x, StringPiece y) {
416 : const stringpiece_ssize_type min_size =
417 : x.size() < y.size() ? x.size() : y.size();
418 : const int r = memcmp(x.data(), y.data(), min_size);
419 : return (r < 0) || (r == 0 && x.size() < y.size());
420 : }
421 :
422 : inline bool operator>(StringPiece x, StringPiece y) {
423 : return y < x;
424 : }
425 :
426 : inline bool operator<=(StringPiece x, StringPiece y) {
427 : return !(x > y);
428 : }
429 :
430 : inline bool operator>=(StringPiece x, StringPiece y) {
431 : return !(x < y);
432 : }
433 :
434 : // allow StringPiece to be logged
435 : extern std::ostream& operator<<(std::ostream& o, StringPiece piece);
436 :
437 : } // namespace protobuf
438 : } // namespace google
439 :
440 : #endif // STRINGS_STRINGPIECE_H_
|