File: | libsofia-sip-ua/http/http_parser.c |
Warning: | line 283, column 20 Although the value stored to 'bsiz' is used in the enclosing expression, the value is never actually read from 'bsiz' |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* |
2 | * This file is part of the Sofia-SIP package |
3 | * |
4 | * Copyright (C) 2005 Nokia Corporation. |
5 | * |
6 | * Contact: Pekka Pessi <pekka.pessi@nokia.com> |
7 | * |
8 | * This library is free software; you can redistribute it and/or |
9 | * modify it under the terms of the GNU Lesser General Public License |
10 | * as published by the Free Software Foundation; either version 2.1 of |
11 | * the License, or (at your option) any later version. |
12 | * |
13 | * This library is distributed in the hope that it will be useful, but |
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
16 | * Lesser General Public License for more details. |
17 | * |
18 | * You should have received a copy of the GNU Lesser General Public |
19 | * License along with this library; if not, write to the Free Software |
20 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |
21 | * 02110-1301 USA |
22 | * |
23 | */ |
24 | |
25 | /**@CFILE http_parser.c |
26 | * |
27 | * HTTP parser. |
28 | * |
29 | * @author Pekka Pessi <Pekka.Pessi@nokia.com> |
30 | * |
31 | * @date Created: Thu Oct 5 14:01:24 2000 ppessi |
32 | */ |
33 | |
34 | #include "config.h" |
35 | |
36 | /* Avoid casting http_t to msg_pub_t and http_header_t to msg_header_t */ |
37 | #define MSG_PUB_Tstruct http_s struct http_s |
38 | #define MSG_HDR_Tunion http_header_u union http_header_u |
39 | |
40 | #include <sofia-sip/su_alloc.h> |
41 | #include <sofia-sip/su_string.h> |
42 | #include "sofia-sip/http_parser.h" |
43 | #include <sofia-sip/msg_parser.h> |
44 | #include <sofia-sip/http_header.h> |
45 | #include <sofia-sip/http_status.h> |
46 | #include <sofia-sip/msg_mclass.h> |
47 | |
48 | #include <sofia-sip/su_tagarg.h> |
49 | |
50 | #include <stddef.h> |
51 | #include <stdlib.h> |
52 | #include <stdio.h> |
53 | #include <assert.h> |
54 | #include <limits.h> |
55 | #include <stdarg.h> |
56 | |
57 | /** HTTP version 1.1. */ |
58 | char const http_version_1_1[] = "HTTP/1.1"; |
59 | /** HTTP version 1.0. */ |
60 | char const http_version_1_0[] = "HTTP/1.0"; |
61 | /** HTTP version 0.9 is an empty string. */ |
62 | char const http_version_0_9[] = ""; |
63 | |
64 | msg_mclass_t const *http_default_mclass(void) |
65 | { |
66 | extern msg_mclass_t const http_mclass[]; |
67 | |
68 | return http_mclass; |
69 | } |
70 | |
71 | static |
72 | issize_t http_extract_chunk(msg_t *, http_t *, char b[], isize_t bsiz, int eos); |
73 | |
74 | /** Calculate length of line ending (0, 1 or 2) */ |
75 | #define CRLF_TEST(s)(((s)[0]) == '\r' ? (((s)[1]) == '\n') + 1 : ((s)[0])=='\n') \ |
76 | (((s)[0]) == '\r' ? (((s)[1]) == '\n') + 1 : ((s)[0])=='\n') |
77 | |
78 | /** Extract the HTTP message body, including separator line. |
79 | * |
80 | * @retval -1 error |
81 | * @retval 0 cannot proceed |
82 | * @retval other number of bytes extracted |
83 | */ |
84 | issize_t http_extract_body(msg_t *msg, http_t *http, char b[], isize_t bsiz, int eos) |
85 | { |
86 | issize_t m = 0; |
87 | size_t body_len; |
88 | |
89 | int flags = http->http_flags; |
90 | |
91 | if (eos && bsiz == 0) { |
92 | msg_mark_as_complete(msg, MSG_FLG_COMPLETE); |
93 | return 0; |
94 | } |
95 | |
96 | if (flags & MSG_FLG_TRAILERS) { |
97 | /* The empty line after trailers */ |
98 | if (!eos && (bsiz == 0 || (bsiz == 1 && b[0] == '\r'))) |
99 | return 0; |
100 | |
101 | m = CRLF_TEST(b)(((b)[0]) == '\r' ? (((b)[1]) == '\n') + 1 : ((b)[0])=='\n'); |
102 | |
103 | assert(m > 0 || eos)((void) sizeof ((m > 0 || eos) ? 1 : 0), __extension__ ({ if (m > 0 || eos) ; else __assert_fail ("m > 0 || eos", "http_parser.c" , 103, __extension__ __PRETTY_FUNCTION__); })); /* We should be looking at an empty line */ |
104 | |
105 | /* We have completed trailers */ |
106 | msg_mark_as_complete(msg, MSG_FLG_COMPLETE); |
107 | |
108 | return m; |
109 | } |
110 | |
111 | if (flags & MSG_FLG_CHUNKS) |
112 | return http_extract_chunk(msg, http, b, bsiz, eos); |
113 | |
114 | if (!(flags & MSG_FLG_BODY)) { |
115 | /* We are looking at a potential empty line */ |
116 | m = msg_extract_separator(msg, http, b, bsiz, eos); |
117 | |
118 | if (m == 0) /* Not yet */ |
119 | return 0; |
120 | |
121 | http->http_flags |= MSG_FLG_BODY; |
122 | b += m, bsiz -= m; |
123 | } |
124 | |
125 | /* body_len is determined by rules in RFC2616 sections 4.3 and 4.4 */ |
126 | |
127 | /* 1XX, 204, 304 do not have message-body, ever */ |
128 | if (http->http_status) { |
129 | int status = http->http_status->st_status; |
130 | |
131 | if (status < 200 || status == 204 || status == 304) |
132 | flags |= HTTP_FLG_NO_BODY; |
133 | } |
134 | |
135 | if (flags & HTTP_FLG_NO_BODY) { |
136 | msg_mark_as_complete(msg, MSG_FLG_COMPLETE); |
137 | return m; |
138 | } |
139 | |
140 | if (http->http_transfer_encoding) { |
141 | if (/* NOTE - there is really no Transfer-Encoding: identity in RFC 2616 |
142 | * but it was used in drafts... |
143 | */ |
144 | http->http_transfer_encoding->k_items && |
145 | http->http_transfer_encoding->k_items[0] && |
146 | !su_casematch(http->http_transfer_encoding->k_items[0], "identity")) { |
147 | http->http_flags |= MSG_FLG_CHUNKS; |
148 | |
149 | if (http->http_flags & MSG_FLG_STREAMING) |
150 | msg_set_streaming(msg, msg_start_streaming); |
151 | |
152 | if (m) |
153 | return m; |
154 | |
155 | return http_extract_chunk(msg, http, b, bsiz, eos); |
156 | } |
157 | } |
158 | |
159 | |
160 | if (http->http_content_length) |
161 | body_len = http->http_content_length->l_length; |
162 | /* We cannot parse multipart/byteranges ... */ |
163 | else if (http->http_content_type && http->http_content_type->c_type && |
164 | su_casematch(http->http_content_type->c_type, "multipart/byteranges")) |
165 | return -1; |
166 | else if (MSG_IS_MAILBOX(flags)((((flags)) & (MSG_FLG_MAILBOX)) == MSG_FLG_MAILBOX)) /* message fragments */ |
167 | body_len = 0; |
168 | else if (http->http_request) |
169 | body_len = 0; |
170 | else if (eos) |
171 | body_len = bsiz; |
172 | else |
173 | return 0; /* XXX */ |
174 | |
175 | if (body_len == 0) { |
176 | msg_mark_as_complete(msg, MSG_FLG_COMPLETE); |
177 | return m; |
178 | } |
179 | |
180 | if (http->http_flags & MSG_FLG_STREAMING) |
181 | msg_set_streaming(msg, msg_start_streaming); |
182 | |
183 | if (m) |
184 | return m; |
185 | |
186 | m = msg_extract_payload(msg, http, NULL((void*)0), body_len, b, bsiz, eos); |
187 | if (m == -1) |
188 | return -1; |
189 | |
190 | /* We have now all message fragments in place */ |
191 | http->http_flags |= MSG_FLG_FRAGS; |
192 | if (bsiz >= body_len) { |
193 | msg_mark_as_complete(msg, MSG_FLG_COMPLETE); |
194 | } |
195 | |
196 | return m; |
197 | } |
198 | |
199 | /** Extract a chunk. |
200 | * |
201 | * @retval -1 error |
202 | * @retval 0 cannot proceed |
203 | * @retval other number of bytes extracted |
204 | */ |
205 | issize_t http_extract_chunk(msg_t *msg, http_t *http, char b[], isize_t bsiz, int eos) |
206 | { |
207 | size_t n; |
208 | unsigned crlf, chunk_len; |
209 | char *b0 = b, *s; |
210 | union { |
211 | msg_header_t *header; |
212 | msg_payload_t *chunk; |
213 | } h = { NULL((void*)0) }; |
214 | size_t bsiz0 = bsiz; |
215 | |
216 | if (bsiz == 0) |
217 | return 0; |
218 | |
219 | /* We should be looking at an empty line followed by the chunk header */ |
220 | while ((crlf = CRLF_TEST(b)(((b)[0]) == '\r' ? (((b)[1]) == '\n') + 1 : ((b)[0])=='\n'))) { |
221 | if (bsiz == 1 && crlf == 1 && b[0] == '\r' && !eos) |
222 | return 0; |
223 | |
224 | if (crlf == bsiz) { |
225 | if (eos) { |
226 | msg_mark_as_complete(msg, MSG_FLG_COMPLETE | MSG_FLG_FRAGS); |
227 | return (b - b0) + crlf; |
228 | } |
229 | else |
230 | return 0; |
231 | } |
232 | assert(crlf < bsiz)((void) sizeof ((crlf < bsiz) ? 1 : 0), __extension__ ({ if (crlf < bsiz) ; else __assert_fail ("crlf < bsiz", "http_parser.c" , 232, __extension__ __PRETTY_FUNCTION__); })); |
233 | |
234 | /* Skip crlf */ |
235 | b += crlf; bsiz -= crlf; |
236 | } |
237 | |
238 | /* Now, looking at the chunk header */ |
239 | n = strcspn(b, CRLF"\r" "\n"); |
240 | if (!eos && n == bsiz) |
241 | return 0; |
242 | crlf = CRLF_TEST(b + n)(((b + n)[0]) == '\r' ? (((b + n)[1]) == '\n') + 1 : ((b + n) [0])=='\n'); |
243 | |
244 | if (n == 0) { |
245 | if (crlf == bsiz && eos) { |
246 | msg_mark_as_complete(msg, MSG_FLG_COMPLETE | MSG_FLG_FRAGS); |
247 | return crlf; |
248 | } |
249 | else |
250 | return -1; /* XXX - should we be more liberal? */ |
251 | } |
252 | |
253 | if (!eos && n + crlf == bsiz && (crlf == 0 || (crlf == 1 && b[n] == '\r'))) |
254 | return 0; |
255 | |
256 | chunk_len = strtoul(b, &s, 16); |
257 | if (s == b) |
258 | return -1; |
259 | skip_ws(&s)(*(&s) += strspn(*(&s), " " "\t")); |
260 | if (s != b + n && s[0] != ';') /* Extra stuff that is not parameter */ |
261 | return -1; |
262 | |
263 | if (chunk_len == 0) { /* We found last-chunk */ |
264 | b += n + crlf, bsiz -= n + crlf; |
265 | |
266 | crlf = bsiz > 0 ? CRLF_TEST(b)(((b)[0]) == '\r' ? (((b)[1]) == '\n') + 1 : ((b)[0])=='\n') : 0; |
267 | |
268 | if ((eos && bsiz == 0) || crlf == 2 || |
269 | (crlf == 1 && (bsiz > 1 || b[0] == '\n'))) { |
270 | /* Shortcut - We got empty trailers */ |
271 | b += crlf; |
272 | msg_mark_as_complete(msg, MSG_FLG_COMPLETE | MSG_FLG_FRAGS); |
273 | } else { |
274 | /* We have to parse trailers */ |
275 | http->http_flags |= MSG_FLG_TRAILERS; |
276 | } |
277 | |
278 | return b - b0; |
279 | } |
280 | else { |
281 | issize_t chunk; |
282 | |
283 | b += n + crlf, bsiz -= n + crlf; |
Although the value stored to 'bsiz' is used in the enclosing expression, the value is never actually read from 'bsiz' | |
284 | |
285 | /* Extract chunk */ |
286 | chunk = msg_extract_payload(msg, http, |
287 | &h.header, chunk_len + (b - b0), |
288 | b0, bsiz0, eos); |
289 | |
290 | if (chunk != -1 && h.header) { |
291 | assert(h.chunk->pl_data)((void) sizeof ((h.chunk->pl_data) ? 1 : 0), __extension__ ({ if (h.chunk->pl_data) ; else __assert_fail ("h.chunk->pl_data" , "http_parser.c", 291, __extension__ __PRETTY_FUNCTION__); } )); |
292 | h.chunk->pl_data += (b - b0); |
293 | h.chunk->pl_len -= (b - b0); |
294 | } |
295 | |
296 | return chunk; |
297 | } |
298 | } |
299 | |
300 | /** Parse HTTP version. |
301 | * |
302 | * The function http_version_d() parses a HTTP method. |
303 | * |
304 | * @retval 0 when successful, |
305 | * @retval -1 upon an error. |
306 | */ |
307 | int http_version_d(char **ss, char const **ver) |
308 | { |
309 | char *s = *ss; |
310 | char const *result; |
311 | int const version_size = sizeof(http_version_1_1) - 1; |
312 | |
313 | if (su_casenmatch(s, http_version_1_1, version_size) && |
314 | !IS_TOKEN(s[version_size])((_bnf_table[(unsigned char)s[version_size]] & bnf_token) )) { |
315 | result = http_version_1_1; |
316 | s += version_size; |
317 | } |
318 | else if (su_casenmatch(s, http_version_1_0, version_size) && |
319 | !IS_TOKEN(s[version_size])((_bnf_table[(unsigned char)s[version_size]] & bnf_token) )) { |
320 | result = http_version_1_0; |
321 | s += version_size; |
322 | } |
323 | else if (s[0] == '\0') { |
324 | result = http_version_0_9; |
325 | } else { |
326 | /* Version consists of one or two tokens, separated by / */ |
327 | size_t l1 = 0, l2 = 0, n; |
328 | |
329 | result = s; |
330 | |
331 | l1 = span_token(s); |
332 | for (n = l1; IS_LWS(s[n])((s[n]) == ' ' || (s[n]) == '\t' || (s[n]) == '\r' || (s[n]) == '\n'); n++) |
333 | s[n] = '\0'; |
334 | if (s[n] == '/') { |
335 | for (n = n + 1; IS_LWS(s[n])((s[n]) == ' ' || (s[n]) == '\t' || (s[n]) == '\r' || (s[n]) == '\n'); n++) |
336 | {} |
337 | l2 = span_token(s + n); |
338 | n += l2; |
339 | } |
340 | |
341 | if (l1 == 0) |
342 | return -1; |
343 | |
344 | /* If there is extra ws between tokens, compact version */ |
345 | if (l2 > 0 && n > l1 + 1 + l2) { |
346 | s[l1] = '/'; |
347 | memmove(s + l1 + 1, s + n - l2, l2); |
348 | s[l1 + 1 + l2] = 0; |
349 | |
350 | /* Compare again with compacted version */ |
351 | if (su_casematch(s, http_version_1_1)) |
352 | result = http_version_1_1; |
353 | else if (su_casematch(s, http_version_1_0)) |
354 | result = http_version_1_0; |
355 | } |
356 | |
357 | s += n; |
358 | } |
359 | |
360 | while (IS_LWS(*s)((*s) == ' ' || (*s) == '\t' || (*s) == '\r' || (*s) == '\n')) *s++ = '\0'; |
361 | |
362 | *ss = s; |
363 | |
364 | if (ver) |
365 | *ver = result; |
366 | |
367 | return 0; |
368 | } |
369 | |
370 | /** Calculate extra space required by version string */ |
371 | isize_t http_version_xtra(char const *version) |
372 | { |
373 | if (version == http_version_1_1) |
374 | return 0; |
375 | else if (version == http_version_1_0) |
376 | return 0; |
377 | else |
378 | return MSG_STRING_SIZE(version)((version) ? (strlen(version) + 1) : 0); |
379 | } |
380 | |
381 | /** Duplicate a transport string */ |
382 | void http_version_dup(char **pp, char const **dd, char const *s) |
383 | { |
384 | if (s == http_version_1_1) |
385 | *dd = s; |
386 | else if (s == http_version_1_0) |
387 | *dd = s; |
388 | else |
389 | MSG_STRING_DUP(*pp, *dd, s)(void)((s)?((*pp)=(char*)memccpy((void *)((*dd)=(char*)*pp),( s),0,2147483647)) :((*dd)=((void*)0))); |
390 | } |
391 | |
392 | /** Well-known HTTP method names. */ |
393 | static char const * const methods[] = { |
394 | "<UNKNOWN>", |
395 | http_method_name_get, |
396 | http_method_name_post, |
397 | http_method_name_head, |
398 | http_method_name_options, |
399 | http_method_name_put, |
400 | http_method_name_delete, |
401 | http_method_name_trace, |
402 | http_method_name_connect, |
403 | NULL((void*)0), |
404 | /* If you add something here, add also them to http_method_d! */ |
405 | }; |
406 | |
407 | char const http_method_name_get[] = "GET"; |
408 | char const http_method_name_post[] = "POST"; |
409 | char const http_method_name_head[] = "HEAD"; |
410 | char const http_method_name_options[] = "OPTIONS"; |
411 | char const http_method_name_put[] = "PUT"; |
412 | char const http_method_name_delete[] = "DELETE"; |
413 | char const http_method_name_trace[] = "TRACE"; |
414 | char const http_method_name_connect[] = "CONNECT"; |
415 | |
416 | char const *http_method_name(http_method_t method, char const *name) |
417 | { |
418 | if (method > 0 && (size_t)method < sizeof(methods)/sizeof(methods[0])) |
419 | return methods[method]; |
420 | else if (method == 0) |
421 | return name; |
422 | else |
423 | return NULL((void*)0); |
424 | } |
425 | |
426 | /**Parse a HTTP method name. |
427 | * |
428 | * The function @c http_method_d() parses a HTTP method, and returns a code |
429 | * corresponding to the method. It stores the address of the first non-LWS |
430 | * character after method name in @c *ss. |
431 | * |
432 | * @param ss pointer to pointer to string to be parsed |
433 | * @param nname pointer to value-result parameter formethod name |
434 | * |
435 | * @note |
436 | * If there is no whitespace after method name, the value in @a *nname |
437 | * may not be NUL-terminated. The calling function @b must NUL terminate |
438 | * the value by setting the @a **ss to NUL after first examining its value. |
439 | * |
440 | * @return The function @c http_method_d returns the method code if method |
441 | * was identified, 0 (@c http_method_unknown) if method is not known, or @c -1 |
442 | * (@c http_method_invalid) if an error occurred. |
443 | * |
444 | * If the value-result argument @a nname is not @c NULL, http_method_d() |
445 | * stores a pointer to the method name to it. |
446 | */ |
447 | http_method_t http_method_d(char **ss, char const **nname) |
448 | { |
449 | char *s = *ss, c = *s; |
450 | char const *name; |
451 | int code = http_method_unknown; |
452 | size_t n = 0; |
453 | |
454 | #define MATCH(s, m) (su_casenmatch(s, m, n = sizeof(m) - 1)) |
455 | |
456 | if (c >= 'a' && c <= 'z') |
457 | c += 'A' - 'a'; |
458 | |
459 | switch (c) { |
460 | case 'C': if (MATCH(s, "CONNECT")) code = http_method_connect; break; |
461 | case 'D': if (MATCH(s, "DELETE")) code = http_method_delete; break; |
462 | case 'G': if (MATCH(s, "GET")) code = http_method_get; break; |
463 | case 'H': if (MATCH(s, "HEAD")) code = http_method_head; break; |
464 | case 'O': if (MATCH(s, "OPTIONS")) code = http_method_options; break; |
465 | case 'P': if (MATCH(s, "POST")) code = http_method_post; |
466 | else |
467 | if (MATCH(s, "PUT")) code = http_method_put; |
468 | break; |
469 | case 'T': if (MATCH(s, "TRACE")) code = http_method_trace; break; |
470 | } |
471 | |
472 | #undef MATCH |
473 | |
474 | if (!code || IS_NON_WS(s[n])(s[n] && !((s[n]) == ' ' || (s[n]) == '\t'))) { |
475 | /* Unknown method */ |
476 | code = http_method_unknown; |
477 | name = s; |
478 | for (n = 0; IS_UNRESERVED(s[n])((_bnf_table[(unsigned char)s[n]] & bnf_unreserved)); n++) |
479 | ; |
480 | if (s[n]) { |
481 | if (!IS_LWS(s[n])((s[n]) == ' ' || (s[n]) == '\t' || (s[n]) == '\r' || (s[n]) == '\n')) |
482 | return http_method_invalid; |
483 | if (nname) |
484 | s[n++] = '\0'; |
485 | } |
486 | } |
487 | else { |
488 | name = methods[code]; |
489 | } |
490 | |
491 | while (IS_LWS(s[n])((s[n]) == ' ' || (s[n]) == '\t' || (s[n]) == '\r' || (s[n]) == '\n')) |
492 | n++; |
493 | |
494 | *ss = (s + n); |
495 | if (nname) *nname = name; |
496 | |
497 | return (http_method_t)code; |
498 | } |
499 | |
500 | /** Get method enum corresponding to method name */ |
501 | http_method_t http_method_code(char const *name) |
502 | { |
503 | /* Note that http_method_d() does not change string if nname is NULL */ |
504 | return http_method_d((char **)&name, NULL((void*)0)); |
505 | } |
506 | |
507 | /**Parse HTTP query string. |
508 | * |
509 | * The function http_query_parse() searches for the given keys in HTTP @a |
510 | * query. For each key, a query element (in the form name=value) is searched |
511 | * from the query string. If a query element has a beginning matching with |
512 | * the key, a copy of the rest of the element is returned in corresponding |
513 | * return_value argument. |
514 | * |
515 | * @note The @a query string will be modified. |
516 | * |
517 | * @return |
518 | * The function http_query_parse() returns number keys that matched within |
519 | * the @a query string. |
520 | */ |
521 | issize_t http_query_parse(char *query, |
522 | /* char const *key, char **return_value, */ |
523 | ...) |
524 | { |
525 | va_list ap; |
526 | char *q, *q_next; |
527 | char *name, *value, **return_value; |
528 | char const *key; |
529 | size_t namelen, valuelen, keylen; |
530 | isize_t N; |
531 | int has_value; |
532 | |
533 | if (!query) |
534 | return -1; |
535 | |
536 | for (q = query, N = 0; *q; q = q_next) { |
537 | namelen = strcspn(q, "=&"); |
538 | valuelen = namelen + strcspn(q + namelen, "&"); |
539 | |
540 | q_next = q + valuelen; |
541 | if (*q_next) |
542 | *q_next++ = '\0'; |
543 | |
544 | value = q + namelen; |
545 | has_value = (*value) != '\0'; /* is the part in form of name=value? */ |
546 | if (has_value) |
547 | *value++ = '\0'; |
548 | |
549 | name = url_unescape(q, q); |
550 | |
551 | if (has_value) { |
552 | namelen = strlen(name); |
553 | name[namelen] = '='; |
554 | url_unescape(name + namelen + 1, value); |
555 | } |
556 | |
557 | va_start(ap, query)__builtin_va_start(ap, query); |
558 | |
559 | while ((key = va_arg(ap, char const *)__builtin_va_arg(ap, char const *))) { |
560 | return_value = va_arg(ap, char **)__builtin_va_arg(ap, char **); |
561 | keylen = strlen(key); |
562 | |
563 | if (strncmp(key, name, keylen) == 0) { |
564 | *return_value = name + keylen; |
565 | N++; |
566 | } |
567 | } |
568 | |
569 | va_end(ap)__builtin_va_end(ap); |
570 | } |
571 | |
572 | return N; |
573 | } |