Line data Source code
1 : // Protocol Buffers - Google's data interchange format
2 : // Copyright 2008 Google Inc. All rights reserved.
3 : // https://developers.google.com/protocol-buffers/
4 : //
5 : // Redistribution and use in source and binary forms, with or without
6 : // modification, are permitted provided that the following conditions are
7 : // met:
8 : //
9 : // * Redistributions of source code must retain the above copyright
10 : // notice, this list of conditions and the following disclaimer.
11 : // * Redistributions in binary form must reproduce the above
12 : // copyright notice, this list of conditions and the following disclaimer
13 : // in the documentation and/or other materials provided with the
14 : // distribution.
15 : // * Neither the name of Google Inc. nor the names of its
16 : // contributors may be used to endorse or promote products derived from
17 : // this software without specific prior written permission.
18 : //
19 : // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 : // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 : // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 : // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 : // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 : // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 : // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 : // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 : // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 : // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 : // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 :
31 : // Author: kenton@google.com (Kenton Varda)
32 : // Based on original Protocol Buffers design by
33 : // Sanjay Ghemawat, Jeff Dean, and others.
34 : //
35 : // Interface for manipulating databases of descriptors.
36 :
37 : #ifndef GOOGLE_PROTOBUF_DESCRIPTOR_DATABASE_H__
38 : #define GOOGLE_PROTOBUF_DESCRIPTOR_DATABASE_H__
39 :
40 : #include <map>
41 : #include <string>
42 : #include <utility>
43 : #include <vector>
44 : #include <google/protobuf/stubs/common.h>
45 : #include <google/protobuf/descriptor.h>
46 :
47 : namespace google {
48 : namespace protobuf {
49 :
50 : // Defined in this file.
51 : class DescriptorDatabase;
52 : class SimpleDescriptorDatabase;
53 : class EncodedDescriptorDatabase;
54 : class DescriptorPoolDatabase;
55 : class MergedDescriptorDatabase;
56 :
57 : // Abstract interface for a database of descriptors.
58 : //
59 : // This is useful if you want to create a DescriptorPool which loads
60 : // descriptors on-demand from some sort of large database. If the database
61 : // is large, it may be inefficient to enumerate every .proto file inside it
62 : // calling DescriptorPool::BuildFile() for each one. Instead, a DescriptorPool
63 : // can be created which wraps a DescriptorDatabase and only builds particular
64 : // descriptors when they are needed.
65 : class LIBPROTOBUF_EXPORT DescriptorDatabase {
66 : public:
67 63 : inline DescriptorDatabase() {}
68 : virtual ~DescriptorDatabase();
69 :
70 : // Find a file by file name. Fills in in *output and returns true if found.
71 : // Otherwise, returns false, leaving the contents of *output undefined.
72 : virtual bool FindFileByName(const string& filename,
73 : FileDescriptorProto* output) = 0;
74 :
75 : // Find the file that declares the given fully-qualified symbol name.
76 : // If found, fills in *output and returns true, otherwise returns false
77 : // and leaves *output undefined.
78 : virtual bool FindFileContainingSymbol(const string& symbol_name,
79 : FileDescriptorProto* output) = 0;
80 :
81 : // Find the file which defines an extension extending the given message type
82 : // with the given field number. If found, fills in *output and returns true,
83 : // otherwise returns false and leaves *output undefined. containing_type
84 : // must be a fully-qualified type name.
85 : virtual bool FindFileContainingExtension(const string& containing_type,
86 : int field_number,
87 : FileDescriptorProto* output) = 0;
88 :
89 : // Finds the tag numbers used by all known extensions of
90 : // extendee_type, and appends them to output in an undefined
91 : // order. This method is best-effort: it's not guaranteed that the
92 : // database will find all extensions, and it's not guaranteed that
93 : // FindFileContainingExtension will return true on all of the found
94 : // numbers. Returns true if the search was successful, otherwise
95 : // returns false and leaves output unchanged.
96 : //
97 : // This method has a default implementation that always returns
98 : // false.
99 0 : virtual bool FindAllExtensionNumbers(const string& /* extendee_type */,
100 : vector<int>* /* output */) {
101 0 : return false;
102 : }
103 :
104 :
105 : private:
106 : GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(DescriptorDatabase);
107 : };
108 :
109 : // A DescriptorDatabase into which you can insert files manually.
110 : //
111 : // FindFileContainingSymbol() is fully-implemented. When you add a file, its
112 : // symbols will be indexed for this purpose. Note that the implementation
113 : // may return false positives, but only if it isn't possible for the symbol
114 : // to be defined in any other file. In particular, if a file defines a symbol
115 : // "Foo", then searching for "Foo.[anything]" will match that file. This way,
116 : // the database does not need to aggressively index all children of a symbol.
117 : //
118 : // FindFileContainingExtension() is mostly-implemented. It works if and only
119 : // if the original FieldDescriptorProto defining the extension has a
120 : // fully-qualified type name in its "extendee" field (i.e. starts with a '.').
121 : // If the extendee is a relative name, SimpleDescriptorDatabase will not
122 : // attempt to resolve the type, so it will not know what type the extension is
123 : // extending. Therefore, calling FindFileContainingExtension() with the
124 : // extension's containing type will never actually find that extension. Note
125 : // that this is an unlikely problem, as all FileDescriptorProtos created by the
126 : // protocol compiler (as well as ones created by calling
127 : // FileDescriptor::CopyTo()) will always use fully-qualified names for all
128 : // types. You only need to worry if you are constructing FileDescriptorProtos
129 : // yourself, or are calling compiler::Parser directly.
130 : class LIBPROTOBUF_EXPORT SimpleDescriptorDatabase : public DescriptorDatabase {
131 : public:
132 : SimpleDescriptorDatabase();
133 : ~SimpleDescriptorDatabase();
134 :
135 : // Adds the FileDescriptorProto to the database, making a copy. The object
136 : // can be deleted after Add() returns. Returns false if the file conflicted
137 : // with a file already in the database, in which case an error will have
138 : // been written to GOOGLE_LOG(ERROR).
139 : bool Add(const FileDescriptorProto& file);
140 :
141 : // Adds the FileDescriptorProto to the database and takes ownership of it.
142 : bool AddAndOwn(const FileDescriptorProto* file);
143 :
144 : // implements DescriptorDatabase -----------------------------------
145 : bool FindFileByName(const string& filename,
146 : FileDescriptorProto* output);
147 : bool FindFileContainingSymbol(const string& symbol_name,
148 : FileDescriptorProto* output);
149 : bool FindFileContainingExtension(const string& containing_type,
150 : int field_number,
151 : FileDescriptorProto* output);
152 : bool FindAllExtensionNumbers(const string& extendee_type,
153 : vector<int>* output);
154 :
155 : private:
156 : // So that it can use DescriptorIndex.
157 : friend class EncodedDescriptorDatabase;
158 :
159 : // An index mapping file names, symbol names, and extension numbers to
160 : // some sort of values.
161 : template <typename Value>
162 138 : class DescriptorIndex {
163 : public:
164 : // Helpers to recursively add particular descriptors and all their contents
165 : // to the index.
166 103 : bool AddFile(const FileDescriptorProto& file,
167 : Value value);
168 : bool AddSymbol(const string& name, Value value);
169 : bool AddNestedExtensions(const DescriptorProto& message_type,
170 : Value value);
171 0 : bool AddExtension(const FieldDescriptorProto& field,
172 : Value value);
173 :
174 : Value FindFile(const string& filename);
175 : Value FindSymbol(const string& name);
176 : Value FindExtension(const string& containing_type, int field_number);
177 : bool FindAllExtensionNumbers(const string& containing_type,
178 : vector<int>* output);
179 :
180 : private:
181 : map<string, Value> by_name_;
182 : map<string, Value> by_symbol_;
183 : map<pair<string, int>, Value> by_extension_;
184 :
185 : // Invariant: The by_symbol_ map does not contain any symbols which are
186 : // prefixes of other symbols in the map. For example, "foo.bar" is a
187 : // prefix of "foo.bar.baz" (but is not a prefix of "foo.barbaz").
188 : //
189 : // This invariant is important because it means that given a symbol name,
190 : // we can find a key in the map which is a prefix of the symbol in O(lg n)
191 : // time, and we know that there is at most one such key.
192 : //
193 : // The prefix lookup algorithm works like so:
194 : // 1) Find the last key in the map which is less than or equal to the
195 : // search key.
196 : // 2) If the found key is a prefix of the search key, then return it.
197 : // Otherwise, there is no match.
198 : //
199 : // I am sure this algorithm has been described elsewhere, but since I
200 : // wasn't able to find it quickly I will instead prove that it works
201 : // myself. The key to the algorithm is that if a match exists, step (1)
202 : // will find it. Proof:
203 : // 1) Define the "search key" to be the key we are looking for, the "found
204 : // key" to be the key found in step (1), and the "match key" to be the
205 : // key which actually matches the serach key (i.e. the key we're trying
206 : // to find).
207 : // 2) The found key must be less than or equal to the search key by
208 : // definition.
209 : // 3) The match key must also be less than or equal to the search key
210 : // (because it is a prefix).
211 : // 4) The match key cannot be greater than the found key, because if it
212 : // were, then step (1) of the algorithm would have returned the match
213 : // key instead (since it finds the *greatest* key which is less than or
214 : // equal to the search key).
215 : // 5) Therefore, the found key must be between the match key and the search
216 : // key, inclusive.
217 : // 6) Since the search key must be a sub-symbol of the match key, if it is
218 : // not equal to the match key, then search_key[match_key.size()] must
219 : // be '.'.
220 : // 7) Since '.' sorts before any other character that is valid in a symbol
221 : // name, then if the found key is not equal to the match key, then
222 : // found_key[match_key.size()] must also be '.', because any other value
223 : // would make it sort after the search key.
224 : // 8) Therefore, if the found key is not equal to the match key, then the
225 : // found key must be a sub-symbol of the match key. However, this would
226 : // contradict our map invariant which says that no symbol in the map is
227 : // a sub-symbol of any other.
228 : // 9) Therefore, the found key must match the match key.
229 : //
230 : // The above proof assumes the match key exists. In the case that the
231 : // match key does not exist, then step (1) will return some other symbol.
232 : // That symbol cannot be a super-symbol of the search key since if it were,
233 : // then it would be a match, and we're assuming the match key doesn't exist.
234 : // Therefore, step 2 will correctly return no match.
235 :
236 : // Find the last entry in the by_symbol_ map whose key is less than or
237 : // equal to the given name.
238 : typename map<string, Value>::iterator FindLastLessOrEqual(
239 : const string& name);
240 :
241 : // True if either the arguments are equal or super_symbol identifies a
242 : // parent symbol of sub_symbol (e.g. "foo.bar" is a parent of
243 : // "foo.bar.baz", but not a parent of "foo.barbaz").
244 : bool IsSubSymbol(const string& sub_symbol, const string& super_symbol);
245 :
246 : // Returns true if and only if all characters in the name are alphanumerics,
247 : // underscores, or periods.
248 : bool ValidateSymbolName(const string& name);
249 : };
250 :
251 :
252 : DescriptorIndex<const FileDescriptorProto*> index_;
253 : vector<const FileDescriptorProto*> files_to_delete_;
254 :
255 : // If file is non-NULL, copy it into *output and return true, otherwise
256 : // return false.
257 : bool MaybeCopy(const FileDescriptorProto* file,
258 : FileDescriptorProto* output);
259 :
260 : GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(SimpleDescriptorDatabase);
261 : };
262 :
263 : // Very similar to SimpleDescriptorDatabase, but stores all the descriptors
264 : // as raw bytes and generally tries to use as little memory as possible.
265 : //
266 : // The same caveats regarding FindFileContainingExtension() apply as with
267 : // SimpleDescriptorDatabase.
268 : class LIBPROTOBUF_EXPORT EncodedDescriptorDatabase : public DescriptorDatabase {
269 : public:
270 : EncodedDescriptorDatabase();
271 : ~EncodedDescriptorDatabase();
272 :
273 : // Adds the FileDescriptorProto to the database. The descriptor is provided
274 : // in encoded form. The database does not make a copy of the bytes, nor
275 : // does it take ownership; it's up to the caller to make sure the bytes
276 : // remain valid for the life of the database. Returns false and logs an error
277 : // if the bytes are not a valid FileDescriptorProto or if the file conflicted
278 : // with a file already in the database.
279 : bool Add(const void* encoded_file_descriptor, int size);
280 :
281 : // Like Add(), but makes a copy of the data, so that the caller does not
282 : // need to keep it around.
283 : bool AddCopy(const void* encoded_file_descriptor, int size);
284 :
285 : // Like FindFileContainingSymbol but returns only the name of the file.
286 : bool FindNameOfFileContainingSymbol(const string& symbol_name,
287 : string* output);
288 :
289 : // implements DescriptorDatabase -----------------------------------
290 : bool FindFileByName(const string& filename,
291 : FileDescriptorProto* output);
292 : bool FindFileContainingSymbol(const string& symbol_name,
293 : FileDescriptorProto* output);
294 : bool FindFileContainingExtension(const string& containing_type,
295 : int field_number,
296 : FileDescriptorProto* output);
297 : bool FindAllExtensionNumbers(const string& extendee_type,
298 : vector<int>* output);
299 :
300 : private:
301 : SimpleDescriptorDatabase::DescriptorIndex<pair<const void*, int> > index_;
302 : vector<void*> files_to_delete_;
303 :
304 : // If encoded_file.first is non-NULL, parse the data into *output and return
305 : // true, otherwise return false.
306 : bool MaybeParse(pair<const void*, int> encoded_file,
307 : FileDescriptorProto* output);
308 :
309 : GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(EncodedDescriptorDatabase);
310 : };
311 :
312 : // A DescriptorDatabase that fetches files from a given pool.
313 : class LIBPROTOBUF_EXPORT DescriptorPoolDatabase : public DescriptorDatabase {
314 : public:
315 : explicit DescriptorPoolDatabase(const DescriptorPool& pool);
316 : ~DescriptorPoolDatabase();
317 :
318 : // implements DescriptorDatabase -----------------------------------
319 : bool FindFileByName(const string& filename,
320 : FileDescriptorProto* output);
321 : bool FindFileContainingSymbol(const string& symbol_name,
322 : FileDescriptorProto* output);
323 : bool FindFileContainingExtension(const string& containing_type,
324 : int field_number,
325 : FileDescriptorProto* output);
326 : bool FindAllExtensionNumbers(const string& extendee_type,
327 : vector<int>* output);
328 :
329 : private:
330 : const DescriptorPool& pool_;
331 : GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(DescriptorPoolDatabase);
332 : };
333 :
334 : // A DescriptorDatabase that wraps two or more others. It first searches the
335 : // first database and, if that fails, tries the second, and so on.
336 : class LIBPROTOBUF_EXPORT MergedDescriptorDatabase : public DescriptorDatabase {
337 : public:
338 : // Merge just two databases. The sources remain property of the caller.
339 : MergedDescriptorDatabase(DescriptorDatabase* source1,
340 : DescriptorDatabase* source2);
341 : // Merge more than two databases. The sources remain property of the caller.
342 : // The vector may be deleted after the constructor returns but the
343 : // DescriptorDatabases need to stick around.
344 : explicit MergedDescriptorDatabase(const vector<DescriptorDatabase*>& sources);
345 : ~MergedDescriptorDatabase();
346 :
347 : // implements DescriptorDatabase -----------------------------------
348 : bool FindFileByName(const string& filename,
349 : FileDescriptorProto* output);
350 : bool FindFileContainingSymbol(const string& symbol_name,
351 : FileDescriptorProto* output);
352 : bool FindFileContainingExtension(const string& containing_type,
353 : int field_number,
354 : FileDescriptorProto* output);
355 : // Merges the results of calling all databases. Returns true iff any
356 : // of the databases returned true.
357 : bool FindAllExtensionNumbers(const string& extendee_type,
358 : vector<int>* output);
359 :
360 :
361 : private:
362 : vector<DescriptorDatabase*> sources_;
363 : GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(MergedDescriptorDatabase);
364 : };
365 :
366 : } // namespace protobuf
367 :
368 : } // namespace google
369 : #endif // GOOGLE_PROTOBUF_DESCRIPTOR_DATABASE_H__
|