Line data Source code
1 : /*
2 : *
3 : * Copyright 2015, Google Inc.
4 : * All rights reserved.
5 : *
6 : * Redistribution and use in source and binary forms, with or without
7 : * modification, are permitted provided that the following conditions are
8 : * met:
9 : *
10 : * * Redistributions of source code must retain the above copyright
11 : * notice, this list of conditions and the following disclaimer.
12 : * * Redistributions in binary form must reproduce the above
13 : * copyright notice, this list of conditions and the following disclaimer
14 : * in the documentation and/or other materials provided with the
15 : * distribution.
16 : * * Neither the name of Google Inc. nor the names of its
17 : * contributors may be used to endorse or promote products derived from
18 : * this software without specific prior written permission.
19 : *
20 : * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 : * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 : * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 : * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 : * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 : * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 : * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 : * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 : * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 : * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 : * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 : *
32 : */
33 :
34 : #include "src/core/client_config/uri_parser.h"
35 :
36 : #include <string.h>
37 :
38 : #include <grpc/support/alloc.h>
39 : #include <grpc/support/log.h>
40 : #include <grpc/support/port_platform.h>
41 : #include <grpc/support/string_util.h>
42 :
43 : /** a size_t default value... maps to all 1's */
44 : #define NOT_SET (~(size_t)0)
45 :
46 905 : static grpc_uri *bad_uri(const char *uri_text, size_t pos, const char *section,
47 : int suppress_errors) {
48 : char *line_prefix;
49 : size_t pfx_len;
50 :
51 905 : if (!suppress_errors) {
52 5 : gpr_asprintf(&line_prefix, "bad uri.%s: '", section);
53 5 : pfx_len = strlen(line_prefix) + pos;
54 5 : gpr_log(GPR_ERROR, "%s%s'", line_prefix, uri_text);
55 5 : gpr_free(line_prefix);
56 :
57 5 : line_prefix = gpr_malloc(pfx_len + 1);
58 5 : memset(line_prefix, ' ', pfx_len);
59 5 : line_prefix[pfx_len] = 0;
60 5 : gpr_log(GPR_ERROR, "%s^ here", line_prefix);
61 5 : gpr_free(line_prefix);
62 : }
63 :
64 905 : return NULL;
65 : }
66 :
67 : /** Returns a copy of \a src[begin, end) */
68 45475 : static char *copy_component(const char *src, size_t begin, size_t end) {
69 45475 : char *out = gpr_malloc(end - begin + 1);
70 45475 : memcpy(out, src + begin, end - begin);
71 45475 : out[end - begin] = 0;
72 45475 : return out;
73 : }
74 :
75 : /** Returns how many chars to advance if \a uri_text[i] begins a valid \a pchar
76 : * production. If \a uri_text[i] introduces an invalid \a pchar (such as percent
77 : * sign not followed by two hex digits), NOT_SET is returned. */
78 326 : static size_t parse_pchar(const char *uri_text, size_t i) {
79 : /* pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
80 : * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
81 : * pct-encoded = "%" HEXDIG HEXDIG
82 : * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
83 : / "*" / "+" / "," / ";" / "=" */
84 326 : char c = uri_text[i];
85 326 : if (((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z')) ||
86 34 : ((c >= '0') && (c <= '9')) ||
87 42 : (c == '-' || c == '.' || c == '_' || c == '~') || /* unreserved */
88 22 : (c == '!' || c == '$' || c == '&' || c == '\'' || c == '$' || c == '&' ||
89 21 : c == '(' || c == ')' || c == '*' || c == '+' || c == ',' || c == ';' ||
90 : c == '=') /* sub-delims */) {
91 315 : return 1;
92 : }
93 11 : if (c == '%') { /* pct-encoded */
94 : size_t j;
95 1 : if (uri_text[i + 1] == 0 || uri_text[i + 2] == 0) {
96 1 : return NOT_SET;
97 : }
98 0 : for (j = i + 1; j < 2; j++) {
99 0 : c = uri_text[j];
100 0 : if (!(((c >= '0') && (c <= '9')) || ((c >= 'a') && (c <= 'f')) ||
101 0 : ((c >= 'A') && (c <= 'F')))) {
102 0 : return NOT_SET;
103 : }
104 : }
105 0 : return 2;
106 : }
107 10 : return 0;
108 : }
109 :
110 : /* *( pchar / "?" / "/" ) */
111 25 : static int parse_fragment_or_query(const char *uri_text, size_t *i) {
112 : char c;
113 368 : while ((c = uri_text[*i]) != 0) {
114 326 : const size_t advance = parse_pchar(uri_text, *i); /* pchar */
115 326 : switch (advance) {
116 : case 0: /* uri_text[i] isn't in pchar */
117 : /* maybe it's ? or / */
118 10 : if (uri_text[*i] == '?' || uri_text[*i] == '/') {
119 3 : (*i)++;
120 3 : break;
121 : } else {
122 7 : return 1;
123 : }
124 : GPR_UNREACHABLE_CODE(return 0);
125 : default:
126 315 : (*i) += advance;
127 315 : break;
128 : case NOT_SET: /* uri_text[i] introduces an invalid URI */
129 1 : return 0;
130 : }
131 : }
132 : /* *i is the first uri_text position past the \a query production, maybe \0 */
133 17 : return 1;
134 : }
135 :
136 10000 : grpc_uri *grpc_uri_parse(const char *uri_text, int suppress_errors) {
137 : grpc_uri *uri;
138 9804 : size_t scheme_begin = 0;
139 9804 : size_t scheme_end = NOT_SET;
140 9804 : size_t authority_begin = NOT_SET;
141 9804 : size_t authority_end = NOT_SET;
142 9804 : size_t path_begin = NOT_SET;
143 9804 : size_t path_end = NOT_SET;
144 9804 : size_t query_begin = NOT_SET;
145 9804 : size_t query_end = NOT_SET;
146 9804 : size_t fragment_begin = NOT_SET;
147 9804 : size_t fragment_end = NOT_SET;
148 : size_t i;
149 :
150 69938 : for (i = scheme_begin; uri_text[i] != 0; i++) {
151 69263 : if (uri_text[i] == ':') {
152 9099 : scheme_end = i;
153 9099 : break;
154 : }
155 60164 : if (uri_text[i] >= 'a' && uri_text[i] <= 'z') continue;
156 1765 : if (uri_text[i] >= 'A' && uri_text[i] <= 'Z') continue;
157 1765 : if (i != scheme_begin) {
158 1541 : if (uri_text[i] >= '0' && uri_text[i] <= '9') continue;
159 713 : if (uri_text[i] == '+') continue;
160 712 : if (uri_text[i] == '-') continue;
161 75 : if (uri_text[i] == '.') continue;
162 : }
163 224 : break;
164 : }
165 10000 : if (scheme_end == NOT_SET) {
166 901 : return bad_uri(uri_text, i, "scheme", suppress_errors);
167 : }
168 :
169 9099 : if (uri_text[scheme_end + 1] == '/' && uri_text[scheme_end + 2] == '/') {
170 4116 : authority_begin = scheme_end + 3;
171 12444 : for (i = authority_begin; uri_text[i] != 0 && authority_end == NOT_SET;
172 4212 : i++) {
173 4212 : if (uri_text[i] == '/' || uri_text[i] == '?' || uri_text[i] == '#') {
174 4008 : authority_end = i;
175 : }
176 : }
177 4116 : if (authority_end == NOT_SET && uri_text[i] == 0) {
178 10 : authority_end = i;
179 : }
180 4116 : if (authority_end == NOT_SET) {
181 0 : return bad_uri(uri_text, i, "authority", suppress_errors);
182 : }
183 : /* TODO(ctiller): parse the authority correctly */
184 4018 : path_begin = authority_end;
185 : } else {
186 4915 : path_begin = scheme_end + 1;
187 : }
188 :
189 128516 : for (i = path_begin; uri_text[i] != 0; i++) {
190 119438 : if (uri_text[i] == '?' || uri_text[i] == '#') {
191 21 : path_end = i;
192 21 : break;
193 : }
194 : }
195 9099 : if (path_end == NOT_SET && uri_text[i] == 0) {
196 9078 : path_end = i;
197 : }
198 9099 : if (path_end == NOT_SET) {
199 0 : return bad_uri(uri_text, i, "path", suppress_errors);
200 : }
201 :
202 9099 : if (uri_text[i] == '?') {
203 19 : query_begin = ++i;
204 19 : if (!parse_fragment_or_query(uri_text, &i)) {
205 1 : return bad_uri(uri_text, i, "query", suppress_errors);
206 18 : } else if (uri_text[i] != 0 && uri_text[i] != '#') {
207 : /* We must be at the end or at the beginning of a fragment */
208 2 : return bad_uri(uri_text, i, "query", suppress_errors);
209 : }
210 16 : query_end = i;
211 : }
212 9096 : if (uri_text[i] == '#') {
213 6 : fragment_begin = ++i;
214 6 : if (!parse_fragment_or_query(uri_text, &i)) {
215 0 : return bad_uri(uri_text, i - fragment_end, "fragment", suppress_errors);
216 6 : } else if (uri_text[i] != 0) {
217 : /* We must be at the end */
218 1 : return bad_uri(uri_text, i, "fragment", suppress_errors);
219 : }
220 5 : fragment_end = i;
221 : }
222 :
223 9095 : uri = gpr_malloc(sizeof(*uri));
224 9095 : memset(uri, 0, sizeof(*uri));
225 9095 : uri->scheme = copy_component(uri_text, scheme_begin, scheme_end);
226 9095 : uri->authority = copy_component(uri_text, authority_begin, authority_end);
227 9095 : uri->path = copy_component(uri_text, path_begin, path_end);
228 9095 : uri->query = copy_component(uri_text, query_begin, query_end);
229 9095 : uri->fragment = copy_component(uri_text, fragment_begin, fragment_end);
230 :
231 9095 : return uri;
232 : }
233 :
234 9995 : void grpc_uri_destroy(grpc_uri *uri) {
235 19990 : if (!uri) return;
236 9095 : gpr_free(uri->scheme);
237 9095 : gpr_free(uri->authority);
238 9095 : gpr_free(uri->path);
239 9095 : gpr_free(uri->query);
240 9095 : gpr_free(uri->fragment);
241 9095 : gpr_free(uri);
242 : }
|