Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/curl/lib/urlapi.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /*************************************************************************** | |
| 2 * _ _ ____ _ | |
| 3 * Project ___| | | | _ \| | | |
| 4 * / __| | | | |_) | | | |
| 5 * | (__| |_| | _ <| |___ | |
| 6 * \___|\___/|_| \_\_____| | |
| 7 * | |
| 8 * Copyright (C) 1998 - 2019, Daniel Stenberg, <daniel@haxx.se>, et al. | |
| 9 * | |
| 10 * This software is licensed as described in the file COPYING, which | |
| 11 * you should have received as part of this distribution. The terms | |
| 12 * are also available at https://curl.haxx.se/docs/copyright.html. | |
| 13 * | |
| 14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell | |
| 15 * copies of the Software, and permit persons to whom the Software is | |
| 16 * furnished to do so, under the terms of the COPYING file. | |
| 17 * | |
| 18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY | |
| 19 * KIND, either express or implied. | |
| 20 * | |
| 21 ***************************************************************************/ | |
| 22 | |
| 23 #include "curl_setup.h" | |
| 24 | |
| 25 #include "urldata.h" | |
| 26 #include "urlapi-int.h" | |
| 27 #include "strcase.h" | |
| 28 #include "dotdot.h" | |
| 29 #include "url.h" | |
| 30 #include "escape.h" | |
| 31 #include "curl_ctype.h" | |
| 32 #include "inet_pton.h" | |
| 33 | |
| 34 /* The last 3 #include files should be in this order */ | |
| 35 #include "curl_printf.h" | |
| 36 #include "curl_memory.h" | |
| 37 #include "memdebug.h" | |
| 38 | |
| 39 /* MSDOS/Windows style drive prefix, eg c: in c:foo */ | |
| 40 #define STARTS_WITH_DRIVE_PREFIX(str) \ | |
| 41 ((('a' <= str[0] && str[0] <= 'z') || \ | |
| 42 ('A' <= str[0] && str[0] <= 'Z')) && \ | |
| 43 (str[1] == ':')) | |
| 44 | |
| 45 /* MSDOS/Windows style drive prefix, optionally with | |
| 46 * a '|' instead of ':', followed by a slash or NUL */ | |
| 47 #define STARTS_WITH_URL_DRIVE_PREFIX(str) \ | |
| 48 ((('a' <= (str)[0] && (str)[0] <= 'z') || \ | |
| 49 ('A' <= (str)[0] && (str)[0] <= 'Z')) && \ | |
| 50 ((str)[1] == ':' || (str)[1] == '|') && \ | |
| 51 ((str)[2] == '/' || (str)[2] == '\\' || (str)[2] == 0)) | |
| 52 | |
| 53 /* Internal representation of CURLU. Point to URL-encoded strings. */ | |
| 54 struct Curl_URL { | |
| 55 char *scheme; | |
| 56 char *user; | |
| 57 char *password; | |
| 58 char *options; /* IMAP only? */ | |
| 59 char *host; | |
| 60 char *zoneid; /* for numerical IPv6 addresses */ | |
| 61 char *port; | |
| 62 char *path; | |
| 63 char *query; | |
| 64 char *fragment; | |
| 65 | |
| 66 char *scratch; /* temporary scratch area */ | |
| 67 long portnum; /* the numerical version */ | |
| 68 }; | |
| 69 | |
| 70 #define DEFAULT_SCHEME "https" | |
| 71 | |
| 72 static void free_urlhandle(struct Curl_URL *u) | |
| 73 { | |
| 74 free(u->scheme); | |
| 75 free(u->user); | |
| 76 free(u->password); | |
| 77 free(u->options); | |
| 78 free(u->host); | |
| 79 free(u->zoneid); | |
| 80 free(u->port); | |
| 81 free(u->path); | |
| 82 free(u->query); | |
| 83 free(u->fragment); | |
| 84 free(u->scratch); | |
| 85 } | |
| 86 | |
| 87 /* move the full contents of one handle onto another and | |
| 88 free the original */ | |
| 89 static void mv_urlhandle(struct Curl_URL *from, | |
| 90 struct Curl_URL *to) | |
| 91 { | |
| 92 free_urlhandle(to); | |
| 93 *to = *from; | |
| 94 free(from); | |
| 95 } | |
| 96 | |
| 97 /* | |
| 98 * Find the separator at the end of the host name, or the '?' in cases like | |
| 99 * http://www.url.com?id=2380 | |
| 100 */ | |
| 101 static const char *find_host_sep(const char *url) | |
| 102 { | |
| 103 const char *sep; | |
| 104 const char *query; | |
| 105 | |
| 106 /* Find the start of the hostname */ | |
| 107 sep = strstr(url, "//"); | |
| 108 if(!sep) | |
| 109 sep = url; | |
| 110 else | |
| 111 sep += 2; | |
| 112 | |
| 113 query = strchr(sep, '?'); | |
| 114 sep = strchr(sep, '/'); | |
| 115 | |
| 116 if(!sep) | |
| 117 sep = url + strlen(url); | |
| 118 | |
| 119 if(!query) | |
| 120 query = url + strlen(url); | |
| 121 | |
| 122 return sep < query ? sep : query; | |
| 123 } | |
| 124 | |
| 125 /* | |
| 126 * Decide in an encoding-independent manner whether a character in an | |
| 127 * URL must be escaped. The same criterion must be used in strlen_url() | |
| 128 * and strcpy_url(). | |
| 129 */ | |
| 130 static bool urlchar_needs_escaping(int c) | |
| 131 { | |
| 132 return !(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c)); | |
| 133 } | |
| 134 | |
| 135 /* | |
| 136 * strlen_url() returns the length of the given URL if the spaces within the | |
| 137 * URL were properly URL encoded. | |
| 138 * URL encoding should be skipped for host names, otherwise IDN resolution | |
| 139 * will fail. | |
| 140 */ | |
| 141 static size_t strlen_url(const char *url, bool relative) | |
| 142 { | |
| 143 const unsigned char *ptr; | |
| 144 size_t newlen = 0; | |
| 145 bool left = TRUE; /* left side of the ? */ | |
| 146 const unsigned char *host_sep = (const unsigned char *) url; | |
| 147 | |
| 148 if(!relative) | |
| 149 host_sep = (const unsigned char *) find_host_sep(url); | |
| 150 | |
| 151 for(ptr = (unsigned char *)url; *ptr; ptr++) { | |
| 152 | |
| 153 if(ptr < host_sep) { | |
| 154 ++newlen; | |
| 155 continue; | |
| 156 } | |
| 157 | |
| 158 switch(*ptr) { | |
| 159 case '?': | |
| 160 left = FALSE; | |
| 161 /* FALLTHROUGH */ | |
| 162 default: | |
| 163 if(urlchar_needs_escaping(*ptr)) | |
| 164 newlen += 2; | |
| 165 newlen++; | |
| 166 break; | |
| 167 case ' ': | |
| 168 if(left) | |
| 169 newlen += 3; | |
| 170 else | |
| 171 newlen++; | |
| 172 break; | |
| 173 } | |
| 174 } | |
| 175 return newlen; | |
| 176 } | |
| 177 | |
| 178 /* strcpy_url() copies a url to a output buffer and URL-encodes the spaces in | |
| 179 * the source URL accordingly. | |
| 180 * URL encoding should be skipped for host names, otherwise IDN resolution | |
| 181 * will fail. | |
| 182 */ | |
| 183 static void strcpy_url(char *output, const char *url, bool relative) | |
| 184 { | |
| 185 /* we must add this with whitespace-replacing */ | |
| 186 bool left = TRUE; | |
| 187 const unsigned char *iptr; | |
| 188 char *optr = output; | |
| 189 const unsigned char *host_sep = (const unsigned char *) url; | |
| 190 | |
| 191 if(!relative) | |
| 192 host_sep = (const unsigned char *) find_host_sep(url); | |
| 193 | |
| 194 for(iptr = (unsigned char *)url; /* read from here */ | |
| 195 *iptr; /* until zero byte */ | |
| 196 iptr++) { | |
| 197 | |
| 198 if(iptr < host_sep) { | |
| 199 *optr++ = *iptr; | |
| 200 continue; | |
| 201 } | |
| 202 | |
| 203 switch(*iptr) { | |
| 204 case '?': | |
| 205 left = FALSE; | |
| 206 /* FALLTHROUGH */ | |
| 207 default: | |
| 208 if(urlchar_needs_escaping(*iptr)) { | |
| 209 msnprintf(optr, 4, "%%%02x", *iptr); | |
| 210 optr += 3; | |
| 211 } | |
| 212 else | |
| 213 *optr++=*iptr; | |
| 214 break; | |
| 215 case ' ': | |
| 216 if(left) { | |
| 217 *optr++='%'; /* add a '%' */ | |
| 218 *optr++='2'; /* add a '2' */ | |
| 219 *optr++='0'; /* add a '0' */ | |
| 220 } | |
| 221 else | |
| 222 *optr++='+'; /* add a '+' here */ | |
| 223 break; | |
| 224 } | |
| 225 } | |
| 226 *optr = 0; /* zero terminate output buffer */ | |
| 227 | |
| 228 } | |
| 229 | |
| 230 /* | |
| 231 * Returns true if the given URL is absolute (as opposed to relative) within | |
| 232 * the buffer size. Returns the scheme in the buffer if TRUE and 'buf' is | |
| 233 * non-NULL. | |
| 234 */ | |
| 235 bool Curl_is_absolute_url(const char *url, char *buf, size_t buflen) | |
| 236 { | |
| 237 size_t i; | |
| 238 #ifdef WIN32 | |
| 239 if(STARTS_WITH_DRIVE_PREFIX(url)) | |
| 240 return FALSE; | |
| 241 #endif | |
| 242 for(i = 0; i < buflen && url[i]; ++i) { | |
| 243 char s = url[i]; | |
| 244 if((s == ':') && (url[i + 1] == '/')) { | |
| 245 if(buf) | |
| 246 buf[i] = 0; | |
| 247 return TRUE; | |
| 248 } | |
| 249 /* RFC 3986 3.1 explains: | |
| 250 scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) | |
| 251 */ | |
| 252 else if(ISALNUM(s) || (s == '+') || (s == '-') || (s == '.') ) { | |
| 253 if(buf) | |
| 254 buf[i] = (char)TOLOWER(s); | |
| 255 } | |
| 256 else | |
| 257 break; | |
| 258 } | |
| 259 return FALSE; | |
| 260 } | |
| 261 | |
| 262 /* | |
| 263 * Concatenate a relative URL to a base URL making it absolute. | |
| 264 * URL-encodes any spaces. | |
| 265 * The returned pointer must be freed by the caller unless NULL | |
| 266 * (returns NULL on out of memory). | |
| 267 */ | |
| 268 static char *concat_url(const char *base, const char *relurl) | |
| 269 { | |
| 270 /*** | |
| 271 TRY to append this new path to the old URL | |
| 272 to the right of the host part. Oh crap, this is doomed to cause | |
| 273 problems in the future... | |
| 274 */ | |
| 275 char *newest; | |
| 276 char *protsep; | |
| 277 char *pathsep; | |
| 278 size_t newlen; | |
| 279 bool host_changed = FALSE; | |
| 280 | |
| 281 const char *useurl = relurl; | |
| 282 size_t urllen; | |
| 283 | |
| 284 /* we must make our own copy of the URL to play with, as it may | |
| 285 point to read-only data */ | |
| 286 char *url_clone = strdup(base); | |
| 287 | |
| 288 if(!url_clone) | |
| 289 return NULL; /* skip out of this NOW */ | |
| 290 | |
| 291 /* protsep points to the start of the host name */ | |
| 292 protsep = strstr(url_clone, "//"); | |
| 293 if(!protsep) | |
| 294 protsep = url_clone; | |
| 295 else | |
| 296 protsep += 2; /* pass the slashes */ | |
| 297 | |
| 298 if('/' != relurl[0]) { | |
| 299 int level = 0; | |
| 300 | |
| 301 /* First we need to find out if there's a ?-letter in the URL, | |
| 302 and cut it and the right-side of that off */ | |
| 303 pathsep = strchr(protsep, '?'); | |
| 304 if(pathsep) | |
| 305 *pathsep = 0; | |
| 306 | |
| 307 /* we have a relative path to append to the last slash if there's one | |
| 308 available, or if the new URL is just a query string (starts with a | |
| 309 '?') we append the new one at the end of the entire currently worked | |
| 310 out URL */ | |
| 311 if(useurl[0] != '?') { | |
| 312 pathsep = strrchr(protsep, '/'); | |
| 313 if(pathsep) | |
| 314 *pathsep = 0; | |
| 315 } | |
| 316 | |
| 317 /* Check if there's any slash after the host name, and if so, remember | |
| 318 that position instead */ | |
| 319 pathsep = strchr(protsep, '/'); | |
| 320 if(pathsep) | |
| 321 protsep = pathsep + 1; | |
| 322 else | |
| 323 protsep = NULL; | |
| 324 | |
| 325 /* now deal with one "./" or any amount of "../" in the newurl | |
| 326 and act accordingly */ | |
| 327 | |
| 328 if((useurl[0] == '.') && (useurl[1] == '/')) | |
| 329 useurl += 2; /* just skip the "./" */ | |
| 330 | |
| 331 while((useurl[0] == '.') && | |
| 332 (useurl[1] == '.') && | |
| 333 (useurl[2] == '/')) { | |
| 334 level++; | |
| 335 useurl += 3; /* pass the "../" */ | |
| 336 } | |
| 337 | |
| 338 if(protsep) { | |
| 339 while(level--) { | |
| 340 /* cut off one more level from the right of the original URL */ | |
| 341 pathsep = strrchr(protsep, '/'); | |
| 342 if(pathsep) | |
| 343 *pathsep = 0; | |
| 344 else { | |
| 345 *protsep = 0; | |
| 346 break; | |
| 347 } | |
| 348 } | |
| 349 } | |
| 350 } | |
| 351 else { | |
| 352 /* We got a new absolute path for this server */ | |
| 353 | |
| 354 if((relurl[0] == '/') && (relurl[1] == '/')) { | |
| 355 /* the new URL starts with //, just keep the protocol part from the | |
| 356 original one */ | |
| 357 *protsep = 0; | |
| 358 useurl = &relurl[2]; /* we keep the slashes from the original, so we | |
| 359 skip the new ones */ | |
| 360 host_changed = TRUE; | |
| 361 } | |
| 362 else { | |
| 363 /* cut off the original URL from the first slash, or deal with URLs | |
| 364 without slash */ | |
| 365 pathsep = strchr(protsep, '/'); | |
| 366 if(pathsep) { | |
| 367 /* When people use badly formatted URLs, such as | |
| 368 "http://www.url.com?dir=/home/daniel" we must not use the first | |
| 369 slash, if there's a ?-letter before it! */ | |
| 370 char *sep = strchr(protsep, '?'); | |
| 371 if(sep && (sep < pathsep)) | |
| 372 pathsep = sep; | |
| 373 *pathsep = 0; | |
| 374 } | |
| 375 else { | |
| 376 /* There was no slash. Now, since we might be operating on a badly | |
| 377 formatted URL, such as "http://www.url.com?id=2380" which doesn't | |
| 378 use a slash separator as it is supposed to, we need to check for a | |
| 379 ?-letter as well! */ | |
| 380 pathsep = strchr(protsep, '?'); | |
| 381 if(pathsep) | |
| 382 *pathsep = 0; | |
| 383 } | |
| 384 } | |
| 385 } | |
| 386 | |
| 387 /* If the new part contains a space, this is a mighty stupid redirect | |
| 388 but we still make an effort to do "right". To the left of a '?' | |
| 389 letter we replace each space with %20 while it is replaced with '+' | |
| 390 on the right side of the '?' letter. | |
| 391 */ | |
| 392 newlen = strlen_url(useurl, !host_changed); | |
| 393 | |
| 394 urllen = strlen(url_clone); | |
| 395 | |
| 396 newest = malloc(urllen + 1 + /* possible slash */ | |
| 397 newlen + 1 /* zero byte */); | |
| 398 | |
| 399 if(!newest) { | |
| 400 free(url_clone); /* don't leak this */ | |
| 401 return NULL; | |
| 402 } | |
| 403 | |
| 404 /* copy over the root url part */ | |
| 405 memcpy(newest, url_clone, urllen); | |
| 406 | |
| 407 /* check if we need to append a slash */ | |
| 408 if(('/' == useurl[0]) || (protsep && !*protsep) || ('?' == useurl[0])) | |
| 409 ; | |
| 410 else | |
| 411 newest[urllen++]='/'; | |
| 412 | |
| 413 /* then append the new piece on the right side */ | |
| 414 strcpy_url(&newest[urllen], useurl, !host_changed); | |
| 415 | |
| 416 free(url_clone); | |
| 417 | |
| 418 return newest; | |
| 419 } | |
| 420 | |
| 421 /* | |
| 422 * parse_hostname_login() | |
| 423 * | |
| 424 * Parse the login details (user name, password and options) from the URL and | |
| 425 * strip them out of the host name | |
| 426 * | |
| 427 */ | |
| 428 static CURLUcode parse_hostname_login(struct Curl_URL *u, | |
| 429 const struct Curl_handler *h, | |
| 430 char **hostname, | |
| 431 unsigned int flags) | |
| 432 { | |
| 433 CURLUcode result = CURLUE_OK; | |
| 434 CURLcode ccode; | |
| 435 char *userp = NULL; | |
| 436 char *passwdp = NULL; | |
| 437 char *optionsp = NULL; | |
| 438 | |
| 439 /* At this point, we're hoping all the other special cases have | |
| 440 * been taken care of, so conn->host.name is at most | |
| 441 * [user[:password][;options]]@]hostname | |
| 442 * | |
| 443 * We need somewhere to put the embedded details, so do that first. | |
| 444 */ | |
| 445 | |
| 446 char *ptr = strchr(*hostname, '@'); | |
| 447 char *login = *hostname; | |
| 448 | |
| 449 if(!ptr) | |
| 450 goto out; | |
| 451 | |
| 452 /* We will now try to extract the | |
| 453 * possible login information in a string like: | |
| 454 * ftp://user:password@ftp.my.site:8021/README */ | |
| 455 *hostname = ++ptr; | |
| 456 | |
| 457 /* We could use the login information in the URL so extract it. Only parse | |
| 458 options if the handler says we should. Note that 'h' might be NULL! */ | |
| 459 ccode = Curl_parse_login_details(login, ptr - login - 1, | |
| 460 &userp, &passwdp, | |
| 461 (h && (h->flags & PROTOPT_URLOPTIONS)) ? | |
| 462 &optionsp:NULL); | |
| 463 if(ccode) { | |
| 464 result = CURLUE_MALFORMED_INPUT; | |
| 465 goto out; | |
| 466 } | |
| 467 | |
| 468 if(userp) { | |
| 469 if(flags & CURLU_DISALLOW_USER) { | |
| 470 /* Option DISALLOW_USER is set and url contains username. */ | |
| 471 result = CURLUE_USER_NOT_ALLOWED; | |
| 472 goto out; | |
| 473 } | |
| 474 | |
| 475 u->user = userp; | |
| 476 } | |
| 477 | |
| 478 if(passwdp) | |
| 479 u->password = passwdp; | |
| 480 | |
| 481 if(optionsp) | |
| 482 u->options = optionsp; | |
| 483 | |
| 484 return CURLUE_OK; | |
| 485 out: | |
| 486 | |
| 487 free(userp); | |
| 488 free(passwdp); | |
| 489 free(optionsp); | |
| 490 | |
| 491 return result; | |
| 492 } | |
| 493 | |
| 494 UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, char *hostname) | |
| 495 { | |
| 496 char *portptr = NULL; | |
| 497 char endbracket; | |
| 498 int len; | |
| 499 | |
| 500 /* | |
| 501 * Find the end of an IPv6 address, either on the ']' ending bracket or | |
| 502 * a percent-encoded zone index. | |
| 503 */ | |
| 504 if(1 == sscanf(hostname, "[%*45[0123456789abcdefABCDEF:.]%c%n", | |
| 505 &endbracket, &len)) { | |
| 506 if(']' == endbracket) | |
| 507 portptr = &hostname[len]; | |
| 508 else if('%' == endbracket) { | |
| 509 int zonelen = len; | |
| 510 if(1 == sscanf(hostname + zonelen, "%*[^]]%c%n", &endbracket, &len)) { | |
| 511 if(']' != endbracket) | |
| 512 return CURLUE_MALFORMED_INPUT; | |
| 513 portptr = &hostname[--zonelen + len + 1]; | |
| 514 } | |
| 515 else | |
| 516 return CURLUE_MALFORMED_INPUT; | |
| 517 } | |
| 518 else | |
| 519 return CURLUE_MALFORMED_INPUT; | |
| 520 | |
| 521 /* this is a RFC2732-style specified IP-address */ | |
| 522 if(portptr && *portptr) { | |
| 523 if(*portptr != ':') | |
| 524 return CURLUE_MALFORMED_INPUT; | |
| 525 } | |
| 526 else | |
| 527 portptr = NULL; | |
| 528 } | |
| 529 else | |
| 530 portptr = strchr(hostname, ':'); | |
| 531 | |
| 532 if(portptr) { | |
| 533 char *rest; | |
| 534 long port; | |
| 535 char portbuf[7]; | |
| 536 | |
| 537 /* Browser behavior adaptation. If there's a colon with no digits after, | |
| 538 just cut off the name there which makes us ignore the colon and just | |
| 539 use the default port. Firefox, Chrome and Safari all do that. */ | |
| 540 if(!portptr[1]) { | |
| 541 *portptr = '\0'; | |
| 542 return CURLUE_OK; | |
| 543 } | |
| 544 | |
| 545 if(!ISDIGIT(portptr[1])) | |
| 546 return CURLUE_BAD_PORT_NUMBER; | |
| 547 | |
| 548 port = strtol(portptr + 1, &rest, 10); /* Port number must be decimal */ | |
| 549 | |
| 550 if((port <= 0) || (port > 0xffff)) | |
| 551 /* Single unix standard says port numbers are 16 bits long, but we don't | |
| 552 treat port zero as OK. */ | |
| 553 return CURLUE_BAD_PORT_NUMBER; | |
| 554 | |
| 555 if(rest[0]) | |
| 556 return CURLUE_BAD_PORT_NUMBER; | |
| 557 | |
| 558 *portptr++ = '\0'; /* cut off the name there */ | |
| 559 *rest = 0; | |
| 560 /* generate a new port number string to get rid of leading zeroes etc */ | |
| 561 msnprintf(portbuf, sizeof(portbuf), "%ld", port); | |
| 562 u->portnum = port; | |
| 563 u->port = strdup(portbuf); | |
| 564 if(!u->port) | |
| 565 return CURLUE_OUT_OF_MEMORY; | |
| 566 } | |
| 567 | |
| 568 return CURLUE_OK; | |
| 569 } | |
| 570 | |
| 571 /* scan for byte values < 31 or 127 */ | |
| 572 static CURLUcode junkscan(char *part) | |
| 573 { | |
| 574 if(part) { | |
| 575 static const char badbytes[]={ | |
| 576 /* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, | |
| 577 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, | |
| 578 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, | |
| 579 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, | |
| 580 0x7f, | |
| 581 0x00 /* zero terminate */ | |
| 582 }; | |
| 583 size_t n = strlen(part); | |
| 584 size_t nfine = strcspn(part, badbytes); | |
| 585 if(nfine != n) | |
| 586 /* since we don't know which part is scanned, return a generic error | |
| 587 code */ | |
| 588 return CURLUE_MALFORMED_INPUT; | |
| 589 } | |
| 590 return CURLUE_OK; | |
| 591 } | |
| 592 | |
| 593 static CURLUcode hostname_check(struct Curl_URL *u, char *hostname) | |
| 594 { | |
| 595 size_t len; | |
| 596 size_t hlen = strlen(hostname); | |
| 597 | |
| 598 if(hostname[0] == '[') { | |
| 599 char dest[16]; /* fits a binary IPv6 address */ | |
| 600 const char *l = "0123456789abcdefABCDEF:."; | |
| 601 hostname++; | |
| 602 hlen -= 2; | |
| 603 | |
| 604 if(hostname[hlen] != ']') | |
| 605 return CURLUE_MALFORMED_INPUT; | |
| 606 | |
| 607 /* only valid letters are ok */ | |
| 608 len = strspn(hostname, l); | |
| 609 if(hlen != len) { | |
| 610 hlen = len; | |
| 611 if(hostname[len] == '%') { | |
| 612 /* this could now be '%[zone id]' */ | |
| 613 char zoneid[16]; | |
| 614 int i = 0; | |
| 615 char *h = &hostname[len + 1]; | |
| 616 /* pass '25' if present and is a url encoded percent sign */ | |
| 617 if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']')) | |
| 618 h += 2; | |
| 619 while(*h && (*h != ']') && (i < 15)) | |
| 620 zoneid[i++] = *h++; | |
| 621 if(!i || (']' != *h)) | |
| 622 return CURLUE_MALFORMED_INPUT; | |
| 623 zoneid[i] = 0; | |
| 624 u->zoneid = strdup(zoneid); | |
| 625 if(!u->zoneid) | |
| 626 return CURLUE_OUT_OF_MEMORY; | |
| 627 hostname[len] = ']'; /* insert end bracket */ | |
| 628 hostname[len + 1] = 0; /* terminate the hostname */ | |
| 629 } | |
| 630 else | |
| 631 return CURLUE_MALFORMED_INPUT; | |
| 632 /* hostname is fine */ | |
| 633 } | |
| 634 #ifdef ENABLE_IPV6 | |
| 635 hostname[hlen] = 0; /* end the address there */ | |
| 636 if(1 != Curl_inet_pton(AF_INET6, hostname, dest)) | |
| 637 return CURLUE_MALFORMED_INPUT; | |
| 638 hostname[hlen] = ']'; /* restore ending bracket */ | |
| 639 #endif | |
| 640 } | |
| 641 else { | |
| 642 /* letters from the second string is not ok */ | |
| 643 len = strcspn(hostname, " "); | |
| 644 if(hlen != len) | |
| 645 /* hostname with bad content */ | |
| 646 return CURLUE_MALFORMED_INPUT; | |
| 647 } | |
| 648 if(!hostname[0]) | |
| 649 return CURLUE_NO_HOST; | |
| 650 return CURLUE_OK; | |
| 651 } | |
| 652 | |
| 653 #define HOSTNAME_END(x) (((x) == '/') || ((x) == '?') || ((x) == '#')) | |
| 654 | |
| 655 static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags) | |
| 656 { | |
| 657 char *path; | |
| 658 bool path_alloced = FALSE; | |
| 659 char *hostname; | |
| 660 char *query = NULL; | |
| 661 char *fragment = NULL; | |
| 662 CURLUcode result; | |
| 663 bool url_has_scheme = FALSE; | |
| 664 char schemebuf[MAX_SCHEME_LEN + 1]; | |
| 665 char *schemep = NULL; | |
| 666 size_t schemelen = 0; | |
| 667 size_t urllen; | |
| 668 const struct Curl_handler *h = NULL; | |
| 669 | |
| 670 if(!url) | |
| 671 return CURLUE_MALFORMED_INPUT; | |
| 672 | |
| 673 /************************************************************* | |
| 674 * Parse the URL. | |
| 675 ************************************************************/ | |
| 676 /* allocate scratch area */ | |
| 677 urllen = strlen(url); | |
| 678 if(urllen > CURL_MAX_INPUT_LENGTH) | |
| 679 /* excessive input length */ | |
| 680 return CURLUE_MALFORMED_INPUT; | |
| 681 | |
| 682 path = u->scratch = malloc(urllen * 2 + 2); | |
| 683 if(!path) | |
| 684 return CURLUE_OUT_OF_MEMORY; | |
| 685 | |
| 686 hostname = &path[urllen + 1]; | |
| 687 hostname[0] = 0; | |
| 688 | |
| 689 if(Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf))) { | |
| 690 url_has_scheme = TRUE; | |
| 691 schemelen = strlen(schemebuf); | |
| 692 } | |
| 693 | |
| 694 /* handle the file: scheme */ | |
| 695 if(url_has_scheme && strcasecompare(schemebuf, "file")) { | |
| 696 /* path has been allocated large enough to hold this */ | |
| 697 strcpy(path, &url[5]); | |
| 698 | |
| 699 hostname = NULL; /* no host for file: URLs */ | |
| 700 u->scheme = strdup("file"); | |
| 701 if(!u->scheme) | |
| 702 return CURLUE_OUT_OF_MEMORY; | |
| 703 | |
| 704 /* Extra handling URLs with an authority component (i.e. that start with | |
| 705 * "file://") | |
| 706 * | |
| 707 * We allow omitted hostname (e.g. file:/<path>) -- valid according to | |
| 708 * RFC 8089, but not the (current) WHAT-WG URL spec. | |
| 709 */ | |
| 710 if(path[0] == '/' && path[1] == '/') { | |
| 711 /* swallow the two slashes */ | |
| 712 char *ptr = &path[2]; | |
| 713 | |
| 714 /* | |
| 715 * According to RFC 8089, a file: URL can be reliably dereferenced if: | |
| 716 * | |
| 717 * o it has no/blank hostname, or | |
| 718 * | |
| 719 * o the hostname matches "localhost" (case-insensitively), or | |
| 720 * | |
| 721 * o the hostname is a FQDN that resolves to this machine. | |
| 722 * | |
| 723 * For brevity, we only consider URLs with empty, "localhost", or | |
| 724 * "127.0.0.1" hostnames as local. | |
| 725 * | |
| 726 * Additionally, there is an exception for URLs with a Windows drive | |
| 727 * letter in the authority (which was accidentally omitted from RFC 8089 | |
| 728 * Appendix E, but believe me, it was meant to be there. --MK) | |
| 729 */ | |
| 730 if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) { | |
| 731 /* the URL includes a host name, it must match "localhost" or | |
| 732 "127.0.0.1" to be valid */ | |
| 733 if(!checkprefix("localhost/", ptr) && | |
| 734 !checkprefix("127.0.0.1/", ptr)) { | |
| 735 /* Invalid file://hostname/, expected localhost or 127.0.0.1 or | |
| 736 none */ | |
| 737 return CURLUE_MALFORMED_INPUT; | |
| 738 } | |
| 739 ptr += 9; /* now points to the slash after the host */ | |
| 740 } | |
| 741 | |
| 742 path = ptr; | |
| 743 } | |
| 744 | |
| 745 #if !defined(MSDOS) && !defined(WIN32) && !defined(__CYGWIN__) | |
| 746 /* Don't allow Windows drive letters when not in Windows. | |
| 747 * This catches both "file:/c:" and "file:c:" */ | |
| 748 if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) || | |
| 749 STARTS_WITH_URL_DRIVE_PREFIX(path)) { | |
| 750 /* File drive letters are only accepted in MSDOS/Windows */ | |
| 751 return CURLUE_MALFORMED_INPUT; | |
| 752 } | |
| 753 #else | |
| 754 /* If the path starts with a slash and a drive letter, ditch the slash */ | |
| 755 if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) { | |
| 756 /* This cannot be done with strcpy, as the memory chunks overlap! */ | |
| 757 memmove(path, &path[1], strlen(&path[1]) + 1); | |
| 758 } | |
| 759 #endif | |
| 760 | |
| 761 } | |
| 762 else { | |
| 763 /* clear path */ | |
| 764 const char *p; | |
| 765 const char *hostp; | |
| 766 size_t len; | |
| 767 path[0] = 0; | |
| 768 | |
| 769 if(url_has_scheme) { | |
| 770 int i = 0; | |
| 771 p = &url[schemelen + 1]; | |
| 772 while(p && (*p == '/') && (i < 4)) { | |
| 773 p++; | |
| 774 i++; | |
| 775 } | |
| 776 if((i < 1) || (i>3)) | |
| 777 /* less than one or more than three slashes */ | |
| 778 return CURLUE_MALFORMED_INPUT; | |
| 779 | |
| 780 schemep = schemebuf; | |
| 781 if(!Curl_builtin_scheme(schemep) && | |
| 782 !(flags & CURLU_NON_SUPPORT_SCHEME)) | |
| 783 return CURLUE_UNSUPPORTED_SCHEME; | |
| 784 | |
| 785 if(junkscan(schemep)) | |
| 786 return CURLUE_MALFORMED_INPUT; | |
| 787 } | |
| 788 else { | |
| 789 /* no scheme! */ | |
| 790 | |
| 791 if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME))) | |
| 792 return CURLUE_MALFORMED_INPUT; | |
| 793 if(flags & CURLU_DEFAULT_SCHEME) | |
| 794 schemep = (char *) DEFAULT_SCHEME; | |
| 795 | |
| 796 /* | |
| 797 * The URL was badly formatted, let's try without scheme specified. | |
| 798 */ | |
| 799 p = url; | |
| 800 } | |
| 801 hostp = p; /* host name starts here */ | |
| 802 | |
| 803 while(*p && !HOSTNAME_END(*p)) /* find end of host name */ | |
| 804 p++; | |
| 805 | |
| 806 len = p - hostp; | |
| 807 if(!len) | |
| 808 return CURLUE_MALFORMED_INPUT; | |
| 809 | |
| 810 memcpy(hostname, hostp, len); | |
| 811 hostname[len] = 0; | |
| 812 | |
| 813 if((flags & CURLU_GUESS_SCHEME) && !schemep) { | |
| 814 /* legacy curl-style guess based on host name */ | |
| 815 if(checkprefix("ftp.", hostname)) | |
| 816 schemep = (char *)"ftp"; | |
| 817 else if(checkprefix("dict.", hostname)) | |
| 818 schemep = (char *)"dict"; | |
| 819 else if(checkprefix("ldap.", hostname)) | |
| 820 schemep = (char *)"ldap"; | |
| 821 else if(checkprefix("imap.", hostname)) | |
| 822 schemep = (char *)"imap"; | |
| 823 else if(checkprefix("smtp.", hostname)) | |
| 824 schemep = (char *)"smtp"; | |
| 825 else if(checkprefix("pop3.", hostname)) | |
| 826 schemep = (char *)"pop3"; | |
| 827 else | |
| 828 schemep = (char *)"http"; | |
| 829 } | |
| 830 | |
| 831 len = strlen(p); | |
| 832 memcpy(path, p, len); | |
| 833 path[len] = 0; | |
| 834 | |
| 835 u->scheme = strdup(schemep); | |
| 836 if(!u->scheme) | |
| 837 return CURLUE_OUT_OF_MEMORY; | |
| 838 } | |
| 839 | |
| 840 /* if this is a known scheme, get some details */ | |
| 841 h = Curl_builtin_scheme(u->scheme); | |
| 842 | |
| 843 if(junkscan(path)) | |
| 844 return CURLUE_MALFORMED_INPUT; | |
| 845 | |
| 846 query = strchr(path, '?'); | |
| 847 if(query) | |
| 848 *query++ = 0; | |
| 849 | |
| 850 fragment = strchr(query?query:path, '#'); | |
| 851 if(fragment) | |
| 852 *fragment++ = 0; | |
| 853 | |
| 854 if(!path[0]) | |
| 855 /* if there's no path set, unset */ | |
| 856 path = NULL; | |
| 857 else if(!(flags & CURLU_PATH_AS_IS)) { | |
| 858 /* sanitise paths and remove ../ and ./ sequences according to RFC3986 */ | |
| 859 char *newp = Curl_dedotdotify(path); | |
| 860 if(!newp) | |
| 861 return CURLUE_OUT_OF_MEMORY; | |
| 862 | |
| 863 if(strcmp(newp, path)) { | |
| 864 /* if we got a new version */ | |
| 865 path = newp; | |
| 866 path_alloced = TRUE; | |
| 867 } | |
| 868 else | |
| 869 free(newp); | |
| 870 } | |
| 871 if(path) { | |
| 872 u->path = path_alloced?path:strdup(path); | |
| 873 if(!u->path) | |
| 874 return CURLUE_OUT_OF_MEMORY; | |
| 875 } | |
| 876 | |
| 877 if(hostname) { | |
| 878 /* | |
| 879 * Parse the login details and strip them out of the host name. | |
| 880 */ | |
| 881 if(junkscan(hostname)) | |
| 882 return CURLUE_MALFORMED_INPUT; | |
| 883 | |
| 884 result = parse_hostname_login(u, h, &hostname, flags); | |
| 885 if(result) | |
| 886 return result; | |
| 887 | |
| 888 result = Curl_parse_port(u, hostname); | |
| 889 if(result) | |
| 890 return result; | |
| 891 | |
| 892 result = hostname_check(u, hostname); | |
| 893 if(result) | |
| 894 return result; | |
| 895 | |
| 896 u->host = strdup(hostname); | |
| 897 if(!u->host) | |
| 898 return CURLUE_OUT_OF_MEMORY; | |
| 899 } | |
| 900 | |
| 901 if(query) { | |
| 902 u->query = strdup(query); | |
| 903 if(!u->query) | |
| 904 return CURLUE_OUT_OF_MEMORY; | |
| 905 } | |
| 906 if(fragment && fragment[0]) { | |
| 907 u->fragment = strdup(fragment); | |
| 908 if(!u->fragment) | |
| 909 return CURLUE_OUT_OF_MEMORY; | |
| 910 } | |
| 911 | |
| 912 free(u->scratch); | |
| 913 u->scratch = NULL; | |
| 914 | |
| 915 return CURLUE_OK; | |
| 916 } | |
| 917 | |
| 918 /* | |
| 919 * Parse the URL and set the relevant members of the Curl_URL struct. | |
| 920 */ | |
| 921 static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags) | |
| 922 { | |
| 923 CURLUcode result = seturl(url, u, flags); | |
| 924 if(result) { | |
| 925 free_urlhandle(u); | |
| 926 memset(u, 0, sizeof(struct Curl_URL)); | |
| 927 } | |
| 928 return result; | |
| 929 } | |
| 930 | |
| 931 /* | |
| 932 */ | |
| 933 CURLU *curl_url(void) | |
| 934 { | |
| 935 return calloc(sizeof(struct Curl_URL), 1); | |
| 936 } | |
| 937 | |
| 938 void curl_url_cleanup(CURLU *u) | |
| 939 { | |
| 940 if(u) { | |
| 941 free_urlhandle(u); | |
| 942 free(u); | |
| 943 } | |
| 944 } | |
| 945 | |
| 946 #define DUP(dest, src, name) \ | |
| 947 if(src->name) { \ | |
| 948 dest->name = strdup(src->name); \ | |
| 949 if(!dest->name) \ | |
| 950 goto fail; \ | |
| 951 } | |
| 952 | |
| 953 CURLU *curl_url_dup(CURLU *in) | |
| 954 { | |
| 955 struct Curl_URL *u = calloc(sizeof(struct Curl_URL), 1); | |
| 956 if(u) { | |
| 957 DUP(u, in, scheme); | |
| 958 DUP(u, in, user); | |
| 959 DUP(u, in, password); | |
| 960 DUP(u, in, options); | |
| 961 DUP(u, in, host); | |
| 962 DUP(u, in, port); | |
| 963 DUP(u, in, path); | |
| 964 DUP(u, in, query); | |
| 965 DUP(u, in, fragment); | |
| 966 u->portnum = in->portnum; | |
| 967 } | |
| 968 return u; | |
| 969 fail: | |
| 970 curl_url_cleanup(u); | |
| 971 return NULL; | |
| 972 } | |
| 973 | |
| 974 CURLUcode curl_url_get(CURLU *u, CURLUPart what, | |
| 975 char **part, unsigned int flags) | |
| 976 { | |
| 977 char *ptr; | |
| 978 CURLUcode ifmissing = CURLUE_UNKNOWN_PART; | |
| 979 char portbuf[7]; | |
| 980 bool urldecode = (flags & CURLU_URLDECODE)?1:0; | |
| 981 bool plusdecode = FALSE; | |
| 982 (void)flags; | |
| 983 if(!u) | |
| 984 return CURLUE_BAD_HANDLE; | |
| 985 if(!part) | |
| 986 return CURLUE_BAD_PARTPOINTER; | |
| 987 *part = NULL; | |
| 988 | |
| 989 switch(what) { | |
| 990 case CURLUPART_SCHEME: | |
| 991 ptr = u->scheme; | |
| 992 ifmissing = CURLUE_NO_SCHEME; | |
| 993 urldecode = FALSE; /* never for schemes */ | |
| 994 break; | |
| 995 case CURLUPART_USER: | |
| 996 ptr = u->user; | |
| 997 ifmissing = CURLUE_NO_USER; | |
| 998 break; | |
| 999 case CURLUPART_PASSWORD: | |
| 1000 ptr = u->password; | |
| 1001 ifmissing = CURLUE_NO_PASSWORD; | |
| 1002 break; | |
| 1003 case CURLUPART_OPTIONS: | |
| 1004 ptr = u->options; | |
| 1005 ifmissing = CURLUE_NO_OPTIONS; | |
| 1006 break; | |
| 1007 case CURLUPART_HOST: | |
| 1008 ptr = u->host; | |
| 1009 ifmissing = CURLUE_NO_HOST; | |
| 1010 break; | |
| 1011 case CURLUPART_ZONEID: | |
| 1012 ptr = u->zoneid; | |
| 1013 break; | |
| 1014 case CURLUPART_PORT: | |
| 1015 ptr = u->port; | |
| 1016 ifmissing = CURLUE_NO_PORT; | |
| 1017 urldecode = FALSE; /* never for port */ | |
| 1018 if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) { | |
| 1019 /* there's no stored port number, but asked to deliver | |
| 1020 a default one for the scheme */ | |
| 1021 const struct Curl_handler *h = | |
| 1022 Curl_builtin_scheme(u->scheme); | |
| 1023 if(h) { | |
| 1024 msnprintf(portbuf, sizeof(portbuf), "%ld", h->defport); | |
| 1025 ptr = portbuf; | |
| 1026 } | |
| 1027 } | |
| 1028 else if(ptr && u->scheme) { | |
| 1029 /* there is a stored port number, but ask to inhibit if | |
| 1030 it matches the default one for the scheme */ | |
| 1031 const struct Curl_handler *h = | |
| 1032 Curl_builtin_scheme(u->scheme); | |
| 1033 if(h && (h->defport == u->portnum) && | |
| 1034 (flags & CURLU_NO_DEFAULT_PORT)) | |
| 1035 ptr = NULL; | |
| 1036 } | |
| 1037 break; | |
| 1038 case CURLUPART_PATH: | |
| 1039 ptr = u->path; | |
| 1040 if(!ptr) { | |
| 1041 ptr = u->path = strdup("/"); | |
| 1042 if(!u->path) | |
| 1043 return CURLUE_OUT_OF_MEMORY; | |
| 1044 } | |
| 1045 break; | |
| 1046 case CURLUPART_QUERY: | |
| 1047 ptr = u->query; | |
| 1048 ifmissing = CURLUE_NO_QUERY; | |
| 1049 plusdecode = urldecode; | |
| 1050 break; | |
| 1051 case CURLUPART_FRAGMENT: | |
| 1052 ptr = u->fragment; | |
| 1053 ifmissing = CURLUE_NO_FRAGMENT; | |
| 1054 break; | |
| 1055 case CURLUPART_URL: { | |
| 1056 char *url; | |
| 1057 char *scheme; | |
| 1058 char *options = u->options; | |
| 1059 char *port = u->port; | |
| 1060 char *allochost = NULL; | |
| 1061 if(u->scheme && strcasecompare("file", u->scheme)) { | |
| 1062 url = aprintf("file://%s%s%s", | |
| 1063 u->path, | |
| 1064 u->fragment? "#": "", | |
| 1065 u->fragment? u->fragment : ""); | |
| 1066 } | |
| 1067 else if(!u->host) | |
| 1068 return CURLUE_NO_HOST; | |
| 1069 else { | |
| 1070 const struct Curl_handler *h = NULL; | |
| 1071 if(u->scheme) | |
| 1072 scheme = u->scheme; | |
| 1073 else if(flags & CURLU_DEFAULT_SCHEME) | |
| 1074 scheme = (char *) DEFAULT_SCHEME; | |
| 1075 else | |
| 1076 return CURLUE_NO_SCHEME; | |
| 1077 | |
| 1078 if(scheme) { | |
| 1079 h = Curl_builtin_scheme(scheme); | |
| 1080 if(!port && (flags & CURLU_DEFAULT_PORT)) { | |
| 1081 /* there's no stored port number, but asked to deliver | |
| 1082 a default one for the scheme */ | |
| 1083 if(h) { | |
| 1084 msnprintf(portbuf, sizeof(portbuf), "%ld", h->defport); | |
| 1085 port = portbuf; | |
| 1086 } | |
| 1087 } | |
| 1088 else if(port) { | |
| 1089 /* there is a stored port number, but asked to inhibit if it matches | |
| 1090 the default one for the scheme */ | |
| 1091 if(h && (h->defport == u->portnum) && | |
| 1092 (flags & CURLU_NO_DEFAULT_PORT)) | |
| 1093 port = NULL; | |
| 1094 } | |
| 1095 } | |
| 1096 if(h && !(h->flags & PROTOPT_URLOPTIONS)) | |
| 1097 options = NULL; | |
| 1098 | |
| 1099 if((u->host[0] == '[') && u->zoneid) { | |
| 1100 /* make it '[ host %25 zoneid ]' */ | |
| 1101 size_t hostlen = strlen(u->host); | |
| 1102 size_t alen = hostlen + 3 + strlen(u->zoneid) + 1; | |
| 1103 allochost = malloc(alen); | |
| 1104 if(!allochost) | |
| 1105 return CURLUE_OUT_OF_MEMORY; | |
| 1106 memcpy(allochost, u->host, hostlen - 1); | |
| 1107 msnprintf(&allochost[hostlen - 1], alen - hostlen + 1, | |
| 1108 "%%25%s]", u->zoneid); | |
| 1109 } | |
| 1110 | |
| 1111 url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", | |
| 1112 scheme, | |
| 1113 u->user ? u->user : "", | |
| 1114 u->password ? ":": "", | |
| 1115 u->password ? u->password : "", | |
| 1116 options ? ";" : "", | |
| 1117 options ? options : "", | |
| 1118 (u->user || u->password || options) ? "@": "", | |
| 1119 allochost ? allochost : u->host, | |
| 1120 port ? ":": "", | |
| 1121 port ? port : "", | |
| 1122 (u->path && (u->path[0] != '/')) ? "/": "", | |
| 1123 u->path ? u->path : "/", | |
| 1124 (u->query && u->query[0]) ? "?": "", | |
| 1125 (u->query && u->query[0]) ? u->query : "", | |
| 1126 u->fragment? "#": "", | |
| 1127 u->fragment? u->fragment : ""); | |
| 1128 free(allochost); | |
| 1129 } | |
| 1130 if(!url) | |
| 1131 return CURLUE_OUT_OF_MEMORY; | |
| 1132 *part = url; | |
| 1133 return CURLUE_OK; | |
| 1134 } | |
| 1135 default: | |
| 1136 ptr = NULL; | |
| 1137 break; | |
| 1138 } | |
| 1139 if(ptr) { | |
| 1140 *part = strdup(ptr); | |
| 1141 if(!*part) | |
| 1142 return CURLUE_OUT_OF_MEMORY; | |
| 1143 if(plusdecode) { | |
| 1144 /* convert + to space */ | |
| 1145 char *plus; | |
| 1146 for(plus = *part; *plus; ++plus) { | |
| 1147 if(*plus == '+') | |
| 1148 *plus = ' '; | |
| 1149 } | |
| 1150 } | |
| 1151 if(urldecode) { | |
| 1152 char *decoded; | |
| 1153 size_t dlen; | |
| 1154 CURLcode res = Curl_urldecode(NULL, *part, 0, &decoded, &dlen, TRUE); | |
| 1155 free(*part); | |
| 1156 if(res) { | |
| 1157 *part = NULL; | |
| 1158 return CURLUE_URLDECODE; | |
| 1159 } | |
| 1160 *part = decoded; | |
| 1161 } | |
| 1162 return CURLUE_OK; | |
| 1163 } | |
| 1164 else | |
| 1165 return ifmissing; | |
| 1166 } | |
| 1167 | |
| 1168 CURLUcode curl_url_set(CURLU *u, CURLUPart what, | |
| 1169 const char *part, unsigned int flags) | |
| 1170 { | |
| 1171 char **storep = NULL; | |
| 1172 long port = 0; | |
| 1173 bool urlencode = (flags & CURLU_URLENCODE)? 1 : 0; | |
| 1174 bool plusencode = FALSE; | |
| 1175 bool urlskipslash = FALSE; | |
| 1176 bool appendquery = FALSE; | |
| 1177 bool equalsencode = FALSE; | |
| 1178 | |
| 1179 if(!u) | |
| 1180 return CURLUE_BAD_HANDLE; | |
| 1181 if(!part) { | |
| 1182 /* setting a part to NULL clears it */ | |
| 1183 switch(what) { | |
| 1184 case CURLUPART_URL: | |
| 1185 break; | |
| 1186 case CURLUPART_SCHEME: | |
| 1187 storep = &u->scheme; | |
| 1188 break; | |
| 1189 case CURLUPART_USER: | |
| 1190 storep = &u->user; | |
| 1191 break; | |
| 1192 case CURLUPART_PASSWORD: | |
| 1193 storep = &u->password; | |
| 1194 break; | |
| 1195 case CURLUPART_OPTIONS: | |
| 1196 storep = &u->options; | |
| 1197 break; | |
| 1198 case CURLUPART_HOST: | |
| 1199 storep = &u->host; | |
| 1200 break; | |
| 1201 case CURLUPART_ZONEID: | |
| 1202 storep = &u->zoneid; | |
| 1203 break; | |
| 1204 case CURLUPART_PORT: | |
| 1205 u->portnum = 0; | |
| 1206 storep = &u->port; | |
| 1207 break; | |
| 1208 case CURLUPART_PATH: | |
| 1209 storep = &u->path; | |
| 1210 break; | |
| 1211 case CURLUPART_QUERY: | |
| 1212 storep = &u->query; | |
| 1213 break; | |
| 1214 case CURLUPART_FRAGMENT: | |
| 1215 storep = &u->fragment; | |
| 1216 break; | |
| 1217 default: | |
| 1218 return CURLUE_UNKNOWN_PART; | |
| 1219 } | |
| 1220 if(storep && *storep) { | |
| 1221 free(*storep); | |
| 1222 *storep = NULL; | |
| 1223 } | |
| 1224 return CURLUE_OK; | |
| 1225 } | |
| 1226 | |
| 1227 switch(what) { | |
| 1228 case CURLUPART_SCHEME: | |
| 1229 if(strlen(part) > MAX_SCHEME_LEN) | |
| 1230 /* too long */ | |
| 1231 return CURLUE_MALFORMED_INPUT; | |
| 1232 if(!(flags & CURLU_NON_SUPPORT_SCHEME) && | |
| 1233 /* verify that it is a fine scheme */ | |
| 1234 !Curl_builtin_scheme(part)) | |
| 1235 return CURLUE_UNSUPPORTED_SCHEME; | |
| 1236 storep = &u->scheme; | |
| 1237 urlencode = FALSE; /* never */ | |
| 1238 break; | |
| 1239 case CURLUPART_USER: | |
| 1240 storep = &u->user; | |
| 1241 break; | |
| 1242 case CURLUPART_PASSWORD: | |
| 1243 storep = &u->password; | |
| 1244 break; | |
| 1245 case CURLUPART_OPTIONS: | |
| 1246 storep = &u->options; | |
| 1247 break; | |
| 1248 case CURLUPART_HOST: | |
| 1249 storep = &u->host; | |
| 1250 free(u->zoneid); | |
| 1251 u->zoneid = NULL; | |
| 1252 break; | |
| 1253 case CURLUPART_ZONEID: | |
| 1254 storep = &u->zoneid; | |
| 1255 break; | |
| 1256 case CURLUPART_PORT: | |
| 1257 { | |
| 1258 char *endp; | |
| 1259 urlencode = FALSE; /* never */ | |
| 1260 port = strtol(part, &endp, 10); /* Port number must be decimal */ | |
| 1261 if((port <= 0) || (port > 0xffff)) | |
| 1262 return CURLUE_BAD_PORT_NUMBER; | |
| 1263 if(*endp) | |
| 1264 /* weirdly provided number, not good! */ | |
| 1265 return CURLUE_MALFORMED_INPUT; | |
| 1266 storep = &u->port; | |
| 1267 } | |
| 1268 break; | |
| 1269 case CURLUPART_PATH: | |
| 1270 urlskipslash = TRUE; | |
| 1271 storep = &u->path; | |
| 1272 break; | |
| 1273 case CURLUPART_QUERY: | |
| 1274 plusencode = urlencode; | |
| 1275 appendquery = (flags & CURLU_APPENDQUERY)?1:0; | |
| 1276 equalsencode = appendquery; | |
| 1277 storep = &u->query; | |
| 1278 break; | |
| 1279 case CURLUPART_FRAGMENT: | |
| 1280 storep = &u->fragment; | |
| 1281 break; | |
| 1282 case CURLUPART_URL: { | |
| 1283 /* | |
| 1284 * Allow a new URL to replace the existing (if any) contents. | |
| 1285 * | |
| 1286 * If the existing contents is enough for a URL, allow a relative URL to | |
| 1287 * replace it. | |
| 1288 */ | |
| 1289 CURLUcode result; | |
| 1290 char *oldurl; | |
| 1291 char *redired_url; | |
| 1292 CURLU *handle2; | |
| 1293 | |
| 1294 if(Curl_is_absolute_url(part, NULL, MAX_SCHEME_LEN + 1)) { | |
| 1295 handle2 = curl_url(); | |
| 1296 if(!handle2) | |
| 1297 return CURLUE_OUT_OF_MEMORY; | |
| 1298 result = parseurl(part, handle2, flags); | |
| 1299 if(!result) | |
| 1300 mv_urlhandle(handle2, u); | |
| 1301 else | |
| 1302 curl_url_cleanup(handle2); | |
| 1303 return result; | |
| 1304 } | |
| 1305 /* extract the full "old" URL to do the redirect on */ | |
| 1306 result = curl_url_get(u, CURLUPART_URL, &oldurl, flags); | |
| 1307 if(result) { | |
| 1308 /* couldn't get the old URL, just use the new! */ | |
| 1309 handle2 = curl_url(); | |
| 1310 if(!handle2) | |
| 1311 return CURLUE_OUT_OF_MEMORY; | |
| 1312 result = parseurl(part, handle2, flags); | |
| 1313 if(!result) | |
| 1314 mv_urlhandle(handle2, u); | |
| 1315 else | |
| 1316 curl_url_cleanup(handle2); | |
| 1317 return result; | |
| 1318 } | |
| 1319 | |
| 1320 /* apply the relative part to create a new URL */ | |
| 1321 redired_url = concat_url(oldurl, part); | |
| 1322 free(oldurl); | |
| 1323 if(!redired_url) | |
| 1324 return CURLUE_OUT_OF_MEMORY; | |
| 1325 | |
| 1326 /* now parse the new URL */ | |
| 1327 handle2 = curl_url(); | |
| 1328 if(!handle2) { | |
| 1329 free(redired_url); | |
| 1330 return CURLUE_OUT_OF_MEMORY; | |
| 1331 } | |
| 1332 result = parseurl(redired_url, handle2, flags); | |
| 1333 free(redired_url); | |
| 1334 if(!result) | |
| 1335 mv_urlhandle(handle2, u); | |
| 1336 else | |
| 1337 curl_url_cleanup(handle2); | |
| 1338 return result; | |
| 1339 } | |
| 1340 default: | |
| 1341 return CURLUE_UNKNOWN_PART; | |
| 1342 } | |
| 1343 if(storep) { | |
| 1344 const char *newp = part; | |
| 1345 size_t nalloc = strlen(part); | |
| 1346 | |
| 1347 if(nalloc > CURL_MAX_INPUT_LENGTH) | |
| 1348 /* excessive input length */ | |
| 1349 return CURLUE_MALFORMED_INPUT; | |
| 1350 | |
| 1351 if(urlencode) { | |
| 1352 const unsigned char *i; | |
| 1353 char *o; | |
| 1354 bool free_part = FALSE; | |
| 1355 char *enc = malloc(nalloc * 3 + 1); /* for worst case! */ | |
| 1356 if(!enc) | |
| 1357 return CURLUE_OUT_OF_MEMORY; | |
| 1358 if(plusencode) { | |
| 1359 /* space to plus */ | |
| 1360 i = (const unsigned char *)part; | |
| 1361 for(o = enc; *i; ++o, ++i) | |
| 1362 *o = (*i == ' ') ? '+' : *i; | |
| 1363 *o = 0; /* zero terminate */ | |
| 1364 part = strdup(enc); | |
| 1365 if(!part) { | |
| 1366 free(enc); | |
| 1367 return CURLUE_OUT_OF_MEMORY; | |
| 1368 } | |
| 1369 free_part = TRUE; | |
| 1370 } | |
| 1371 for(i = (const unsigned char *)part, o = enc; *i; i++) { | |
| 1372 if(Curl_isunreserved(*i) || | |
| 1373 ((*i == '/') && urlskipslash) || | |
| 1374 ((*i == '=') && equalsencode) || | |
| 1375 ((*i == '+') && plusencode)) { | |
| 1376 if((*i == '=') && equalsencode) | |
| 1377 /* only skip the first equals sign */ | |
| 1378 equalsencode = FALSE; | |
| 1379 *o = *i; | |
| 1380 o++; | |
| 1381 } | |
| 1382 else { | |
| 1383 msnprintf(o, 4, "%%%02x", *i); | |
| 1384 o += 3; | |
| 1385 } | |
| 1386 } | |
| 1387 *o = 0; /* zero terminate */ | |
| 1388 newp = enc; | |
| 1389 if(free_part) | |
| 1390 free((char *)part); | |
| 1391 } | |
| 1392 else { | |
| 1393 char *p; | |
| 1394 newp = strdup(part); | |
| 1395 if(!newp) | |
| 1396 return CURLUE_OUT_OF_MEMORY; | |
| 1397 p = (char *)newp; | |
| 1398 while(*p) { | |
| 1399 /* make sure percent encoded are lower case */ | |
| 1400 if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) && | |
| 1401 (ISUPPER(p[1]) || ISUPPER(p[2]))) { | |
| 1402 p[1] = (char)TOLOWER(p[1]); | |
| 1403 p[2] = (char)TOLOWER(p[2]); | |
| 1404 p += 3; | |
| 1405 } | |
| 1406 else | |
| 1407 p++; | |
| 1408 } | |
| 1409 } | |
| 1410 | |
| 1411 if(appendquery) { | |
| 1412 /* Append the string onto the old query. Add a '&' separator if none is | |
| 1413 present at the end of the exsting query already */ | |
| 1414 size_t querylen = u->query ? strlen(u->query) : 0; | |
| 1415 bool addamperand = querylen && (u->query[querylen -1] != '&'); | |
| 1416 if(querylen) { | |
| 1417 size_t newplen = strlen(newp); | |
| 1418 char *p = malloc(querylen + addamperand + newplen + 1); | |
| 1419 if(!p) { | |
| 1420 free((char *)newp); | |
| 1421 return CURLUE_OUT_OF_MEMORY; | |
| 1422 } | |
| 1423 strcpy(p, u->query); /* original query */ | |
| 1424 if(addamperand) | |
| 1425 p[querylen] = '&'; /* ampersand */ | |
| 1426 strcpy(&p[querylen + addamperand], newp); /* new suffix */ | |
| 1427 free((char *)newp); | |
| 1428 free(*storep); | |
| 1429 *storep = p; | |
| 1430 return CURLUE_OK; | |
| 1431 } | |
| 1432 } | |
| 1433 | |
| 1434 if(what == CURLUPART_HOST) { | |
| 1435 if(hostname_check(u, (char *)newp)) { | |
| 1436 free((char *)newp); | |
| 1437 return CURLUE_MALFORMED_INPUT; | |
| 1438 } | |
| 1439 } | |
| 1440 | |
| 1441 free(*storep); | |
| 1442 *storep = (char *)newp; | |
| 1443 } | |
| 1444 /* set after the string, to make it not assigned if the allocation above | |
| 1445 fails */ | |
| 1446 if(port) | |
| 1447 u->portnum = port; | |
| 1448 return CURLUE_OK; | |
| 1449 } |
