1.\"- 2.\" Copyright (c) 1998-2004 Dag-Erling Co�dan Sm�rgrav 3.\" All rights reserved. 4.\" 5.\" Redistribution and use in source and binary forms, with or without 6.\" modification, are permitted provided that the following conditions 7.\" are met: 8.\" 1. Redistributions of source code must retain the above copyright 9.\" notice, this list of conditions and the following disclaimer. 10.\" 2. Redistributions in binary form must reproduce the above copyright 11.\" notice, this list of conditions and the following disclaimer in the 12.\" documentation and/or other materials provided with the distribution. 13.\" 14.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24.\" SUCH DAMAGE. 25.\" 26.\" $FreeBSD: fetch.3,v 1.64 2007/12/18 11:03:26 des Exp $ 27.\" $NetBSD: fetch.3,v 1.1.1.2 2008/10/07 15:55:20 joerg Exp $ 28.\" 29.Dd April 25, 2008 30.Dt FETCH 3 31.Os 32.Sh NAME 33.Nm fetchMakeURL , 34.Nm fetchParseURL , 35.Nm fetchCopyURL , 36.Nm fetchFreeURL , 37.Nm fetchXGetURL , 38.Nm fetchGetURL , 39.Nm fetchPutURL , 40.Nm fetchStatURL , 41.Nm fetchListURL , 42.Nm fetchXGet , 43.Nm fetchGet , 44.Nm fetchPut , 45.Nm fetchStat , 46.Nm fetchList , 47.Nm fetchXGetFile , 48.Nm fetchGetFile , 49.Nm fetchPutFile , 50.Nm fetchStatFile , 51.Nm fetchListFile , 52.Nm fetchXGetHTTP , 53.Nm fetchGetHTTP , 54.Nm fetchPutHTTP , 55.Nm fetchStatHTTP , 56.Nm fetchListHTTP , 57.Nm fetchXGetFTP , 58.Nm fetchGetFTP , 59.Nm fetchPutFTP , 60.Nm fetchStatFTP , 61.Nm fetchListFTP 62.Nm fetchInitURLList , 63.Nm fetchFreeURLList , 64.Nm fetchUnquotePath , 65.Nm fetchUnquoteFilename , 66.Nm fetchStringifyURL , 67.Nm fetch 68.Nd file transfer functions 69.Sh LIBRARY 70.Lb libfetch 71.Sh SYNOPSIS 72.In stdio.h 73.In fetch.h 74.Ft struct url * 75.Fn fetchMakeURL "const char *scheme" "const char *host" "int port" "const char *doc" "const char *user" "const char *pwd" 76.Ft struct url * 77.Fn fetchParseURL "const char *URL" 78.Ft struct url * 79.Fn fetchCopyURL "const struct url *u" 80.Ft void 81.Fn fetchFreeURL "struct url *u" 82.Ft fetchIO * 83.Fn fetchXGetURL "const char *URL" "struct url_stat *us" "const char *flags" 84.Ft fetchIO * 85.Fn fetchGetURL "const char *URL" "const char *flags" 86.Ft fetchIO * 87.Fn fetchPutURL "const char *URL" "const char *flags" 88.Ft int 89.Fn fetchStatURL "const char *URL" "struct url_stat *us" "const char *flags" 90.Ft int 91.Fn fetchListURL "struct url_list *list" "const char *URL" "const char *flags" 92.Ft fetchIO * 93.Fn fetchXGet "struct url *u" "struct url_stat *us" "const char *flags" 94.Ft fetchIO * 95.Fn fetchGet "struct url *u" "const char *flags" 96.Ft fetchIO * 97.Fn fetchPut "struct url *u" "const char *flags" 98.Ft int 99.Fn fetchStat "struct url *u" "struct url_stat *us" "const char *flags" 100.Ft int 101.Fn fetchList "struct url_list *list" "struct url *u" "const char *flags" 102.Ft fetchIO * 103.Fn fetchXGetFile "struct url *u" "struct url_stat *us" "const char *flags" 104.Ft fetchIO * 105.Fn fetchGetFile "struct url *u" "const char *flags" 106.Ft fetchIO * 107.Fn fetchPutFile "struct url *u" "const char *flags" 108.Ft int 109.Fn fetchStatFile "struct url *u" "struct url_stat *us" "const char *flags" 110.Ft int 111.Fn fetchListFile "struct url_list *list" "struct url *u" "const char *flags" 112.Ft fetchIO * 113.Fn fetchXGetHTTP "struct url *u" "struct url_stat *us" "const char *flags" 114.Ft fetchIO * 115.Fn fetchGetHTTP "struct url *u" "const char *flags" 116.Ft fetchIO * 117.Fn fetchPutHTTP "struct url *u" "const char *flags" 118.Ft int 119.Fn fetchStatHTTP "struct url *u" "struct url_stat *us" "const char *flags" 120.Ft int 121.Fn fetchListHTTP "struct url_list *list" "struct url *u" "const char *flags" 122.Ft fetchIO * 123.Fn fetchXGetFTP "struct url *u" "struct url_stat *us" "const char *flags" 124.Ft fetchIO * 125.Fn fetchGetFTP "struct url *u" "const char *flags" 126.Ft fetchIO * 127.Fn fetchPutFTP "struct url *u" "const char *flags" 128.Ft int 129.Fn fetchStatFTP "struct url *u" "struct url_stat *us" "const char *flags" 130.Ft int 131.Fn fetchListFTP "struct url_list *list" "struct url *u" "const char *flags" 132.Ft void 133.Fn fetchInitURLList "struct url_list *ul" 134.Ft void 135.Fn fetchFreeURLList "struct url_list *ul" 136.Ft char * 137.Fn fetchUnquotePath "struct url *u" 138.Ft char * 139.Fn fetchUnquoteFilename "struct url *u" 140.Ft char * 141.Fn fetchStringifyURL "const struct url *u" 142.Sh DESCRIPTION 143These functions implement a high-level library for retrieving and 144uploading files using Uniform Resource Locators (URLs). 145.Pp 146.Fn fetchParseURL 147takes a URL in the form of a null-terminated string and splits it into 148its components function according to the Common Internet Scheme Syntax 149detailed in RFC 1738. 150A regular expression which produces this syntax is: 151.Bd -literal 152 \*[Lt]scheme\*[Gt]:(//(\*[Lt]user\*[Gt](:\*[Lt]pwd\*[Gt])?@)?\*[Lt]host\*[Gt](:\*[Lt]port\*[Gt])?)?/(\*[Lt]document\*[Gt])? 153.Ed 154.Pp 155If the URL does not seem to begin with a scheme name, it is assumed to be a local path. 156Only absolute path names are accepted. 157.Pp 158Note that some components of the URL are not necessarily relevant to 159all URL schemes. 160For instance, the file scheme only needs the 161.Aq scheme 162and 163.Aq document 164components. 165.Fn fetchParseURL 166quotes any unsafe character in the URL automatically. 167This is not done by 168.Fn fetchMakeURL . 169.Fn fetchCopyURL 170copies an existing 171.Vt url 172structure. 173.Pp 174.Fn fetchMakeURL , 175.Fn fetchParseURL , 176and 177.Fn fetchCopyURL 178return a pointer to a 179.Vt url 180structure, which is defined as follows in 181.In fetch.h : 182.Bd -literal 183#define URL_SCHEMELEN 16 184#define URL_USERLEN 256 185#define URL_PWDLEN 256 186#define URL_HOSTLEN 255 187 188struct url { 189 char scheme[URL_SCHEMELEN + 1]; 190 char user[URL_USERLEN + 1]; 191 char pwd[URL_PWDLEN + 1]; 192 char host[URL_HOSTLEN + 1]; 193 int port; 194 char *doc; 195 off_t offset; 196 size_t length; 197}; 198.Ed 199.Pp 200The pointer returned by 201.Fn fetchMakeURL , 202.Fn fetchCopyURL , 203and 204.Fn fetchParseURL 205should be freed using 206.Fn fetchFreeURL . 207.Pp 208.Fn fetchXGetURL , 209.Fn fetchGetURL , 210and 211.Fn fetchPutURL 212constitute the recommended interface to the 213.Nm fetch 214library. 215They examine the URL passed to them to determine the transfer 216method, and call the appropriate lower-level functions to perform the 217actual transfer. 218.Fn fetchXGetURL 219also returns the remote document's metadata in the 220.Vt url_stat 221structure pointed to by the 222.Fa us 223argument. 224.Pp 225The 226.Fa flags 227argument is a string of characters which specify transfer options. 228The 229meaning of the individual flags is scheme-dependent, and is detailed 230in the appropriate section below. 231.Pp 232.Fn fetchStatURL 233attempts to obtain the requested document's metadata and fill in the 234structure pointed to by its second argument. 235The 236.Vt url_stat 237structure is defined as follows in 238.In fetch.h : 239.Bd -literal 240struct url_stat { 241 off_t size; 242 time_t atime; 243 time_t mtime; 244}; 245.Ed 246.Pp 247If the size could not be obtained from the server, the 248.Fa size 249field is set to \-1. 250If the modification time could not be obtained from the server, the 251.Fa mtime 252field is set to the epoch. 253If the access time could not be obtained from the server, the 254.Fa atime 255field is set to the modification time. 256.Pp 257.Fn fetchListURL 258attempts to list the contents of the directory pointed to by the URL provided. 259The pattern can be a simple glob-like expression as hint. 260Callers should not depend on the server to filter names. 261If successful, it appends the list of entries to the 262.Vt url_list 263structure. 264The 265.Vt url_list 266structure is defined as follows in 267.In fetch.h : 268.Bd -literal 269struct url_list { 270 size_t length; 271 size_t alloc_size; 272 struct url *urls; 273}; 274.Ed 275.Pp 276The list should be initialized by calling 277.Fn fetchInitURLList 278and the entries be freed by calling 279.Fn fetchFreeURLList . 280.Pp 281.Fn fetchStringifyURL 282returns the URL as string. 283.Fn fetchUnquotePath 284returns the path name part of the URL with any quoting undone. 285Query arguments and fragment identifiers are not included. 286.Fn fetchUnquoteFilename 287returns the last component of the path name as returned by 288.Fn fetchUnquotePath . 289.Fn fetchStringifyURL , 290.Fn fetchUnquotePath , 291and 292.Fn fetchUnquoteFilename 293return a string that should be deallocated with 294.Fn free 295after use. 296.Pp 297.Fn fetchXGet , 298.Fn fetchGet , 299.Fn fetchPut , 300and 301.Fn fetchStat 302are similar to 303.Fn fetchXGetURL , 304.Fn fetchGetURL , 305.Fn fetchPutURL , 306and 307.Fn fetchStatURL , 308except that they expect a pre-parsed URL in the form of a pointer to 309a 310.Vt struct url 311rather than a string. 312.Pp 313All of the 314.Fn fetchXGetXXX , 315.Fn fetchGetXXX , 316and 317.Fn fetchPutXXX 318functions return a pointer to a stream which can be used to read or 319write data from or to the requested document, respectively. 320Note that 321although the implementation details of the individual access methods 322vary, it can generally be assumed that a stream returned by one of the 323.Fn fetchXGetXXX 324or 325.Fn fetchGetXXX 326functions is read-only, and that a stream returned by one of the 327.Fn fetchPutXXX 328functions is write-only. 329.Sh FILE SCHEME 330.Fn fetchXGetFile , 331.Fn fetchGetFile , 332and 333.Fn fetchPutFile 334provide access to documents which are files in a locally mounted file 335system. 336Only the 337.Aq document 338component of the URL is used. 339.Pp 340.Fn fetchXGetFile 341and 342.Fn fetchGetFile 343do not accept any flags. 344.Pp 345.Fn fetchPutFile 346accepts the 347.Ql a 348(append to file) flag. 349If that flag is specified, the data written to 350the stream returned by 351.Fn fetchPutFile 352will be appended to the previous contents of the file, instead of 353replacing them. 354.Sh FTP SCHEME 355.Fn fetchXGetFTP , 356.Fn fetchGetFTP , 357and 358.Fn fetchPutFTP 359implement the FTP protocol as described in RFC 959. 360.Pp 361If the 362.Ql p 363(passive) flag is specified, a passive (rather than active) connection 364will be attempted. 365.Pp 366If the 367.Ql l 368(low) flag is specified, data sockets will be allocated in the low (or 369default) port range instead of the high port range (see 370.Xr ip 4 ) . 371.Pp 372If the 373.Ql d 374(direct) flag is specified, 375.Fn fetchXGetFTP , 376.Fn fetchGetFTP , 377and 378.Fn fetchPutFTP 379will use a direct connection even if a proxy server is defined. 380.Pp 381If no user name or password is given, the 382.Nm fetch 383library will attempt an anonymous login, with user name "anonymous" 384and password "anonymous@\*[Lt]hostname\*[Gt]". 385.Sh HTTP SCHEME 386The 387.Fn fetchXGetHTTP , 388.Fn fetchGetHTTP , 389and 390.Fn fetchPutHTTP 391functions implement the HTTP/1.1 protocol. 392With a little luck, there is 393even a chance that they comply with RFC 2616 and RFC 2617. 394.Pp 395If the 396.Ql d 397(direct) flag is specified, 398.Fn fetchXGetHTTP , 399.Fn fetchGetHTTP , 400and 401.Fn fetchPutHTTP 402will use a direct connection even if a proxy server is defined. 403.Pp 404Since there seems to be no good way of implementing the HTTP PUT 405method in a manner consistent with the rest of the 406.Nm fetch 407library, 408.Fn fetchPutHTTP 409is currently unimplemented. 410.Sh AUTHENTICATION 411Apart from setting the appropriate environment variables and 412specifying the user name and password in the URL or the 413.Vt struct url , 414the calling program has the option of defining an authentication 415function with the following prototype: 416.Pp 417.Ft int 418.Fn myAuthMethod "struct url *u" 419.Pp 420The callback function should fill in the 421.Fa user 422and 423.Fa pwd 424fields in the provided 425.Vt struct url 426and return 0 on success, or any other value to indicate failure. 427.Pp 428To register the authentication callback, simply set 429.Va fetchAuthMethod 430to point at it. 431The callback will be used whenever a site requires authentication and 432the appropriate environment variables are not set. 433.Pp 434This interface is experimental and may be subject to change. 435.Sh RETURN VALUES 436.Fn fetchParseURL 437returns a pointer to a 438.Vt struct url 439containing the individual components of the URL. 440If it is 441unable to allocate memory, or the URL is syntactically incorrect, 442.Fn fetchParseURL 443returns a 444.Dv NULL 445pointer. 446.Pp 447The 448.Fn fetchStat 449functions return 0 on success and \-1 on failure. 450.Pp 451All other functions return a stream pointer which may be used to 452access the requested document, or 453.Dv NULL 454if an error occurred. 455.Pp 456The following error codes are defined in 457.In fetch.h : 458.Bl -tag -width 18n 459.It Bq Er FETCH_ABORT 460Operation aborted 461.It Bq Er FETCH_AUTH 462Authentication failed 463.It Bq Er FETCH_DOWN 464Service unavailable 465.It Bq Er FETCH_EXISTS 466File exists 467.It Bq Er FETCH_FULL 468File system full 469.It Bq Er FETCH_INFO 470Informational response 471.It Bq Er FETCH_MEMORY 472Insufficient memory 473.It Bq Er FETCH_MOVED 474File has moved 475.It Bq Er FETCH_NETWORK 476Network error 477.It Bq Er FETCH_OK 478No error 479.It Bq Er FETCH_PROTO 480Protocol error 481.It Bq Er FETCH_RESOLV 482Resolver error 483.It Bq Er FETCH_SERVER 484Server error 485.It Bq Er FETCH_TEMP 486Temporary error 487.It Bq Er FETCH_TIMEOUT 488Operation timed out 489.It Bq Er FETCH_UNAVAIL 490File is not available 491.It Bq Er FETCH_UNKNOWN 492Unknown error 493.It Bq Er FETCH_URL 494Invalid URL 495.El 496.Pp 497The accompanying error message includes a protocol-specific error code 498and message, e.g.\& "File is not available (404 Not Found)" 499.Sh ENVIRONMENT 500.Bl -tag -width ".Ev FETCH_BIND_ADDRESS" 501.It Ev FETCH_BIND_ADDRESS 502Specifies a host name or IP address to which sockets used for outgoing 503connections will be bound. 504.It Ev FTP_LOGIN 505Default FTP login if none was provided in the URL. 506.It Ev FTP_PASSIVE_MODE 507If set to anything but 508.Ql no , 509forces the FTP code to use passive mode. 510.It Ev FTP_PASSWORD 511Default FTP password if the remote server requests one and none was 512provided in the URL. 513.It Ev FTP_PROXY 514URL of the proxy to use for FTP requests. 515The document part is ignored. 516FTP and HTTP proxies are supported; if no scheme is specified, FTP is 517assumed. 518If the proxy is an FTP proxy, 519.Nm libfetch 520will send 521.Ql user@host 522as user name to the proxy, where 523.Ql user 524is the real user name, and 525.Ql host 526is the name of the FTP server. 527.Pp 528If this variable is set to an empty string, no proxy will be used for 529FTP requests, even if the 530.Ev HTTP_PROXY 531variable is set. 532.It Ev ftp_proxy 533Same as 534.Ev FTP_PROXY , 535for compatibility. 536.It Ev HTTP_AUTH 537Specifies HTTP authorization parameters as a colon-separated list of 538items. 539The first and second item are the authorization scheme and realm 540respectively; further items are scheme-dependent. 541Currently, only basic authorization is supported. 542.Pp 543Basic authorization requires two parameters: the user name and 544password, in that order. 545.Pp 546This variable is only used if the server requires authorization and 547no user name or password was specified in the URL. 548.It Ev HTTP_PROXY 549URL of the proxy to use for HTTP requests. 550The document part is ignored. 551Only HTTP proxies are supported for HTTP requests. 552If no port number is specified, the default is 3128. 553.Pp 554Note that this proxy will also be used for FTP documents, unless the 555.Ev FTP_PROXY 556variable is set. 557.It Ev http_proxy 558Same as 559.Ev HTTP_PROXY , 560for compatibility. 561.It Ev HTTP_PROXY_AUTH 562Specifies authorization parameters for the HTTP proxy in the same 563format as the 564.Ev HTTP_AUTH 565variable. 566.Pp 567This variable is used if and only if connected to an HTTP proxy, and 568is ignored if a user and/or a password were specified in the proxy 569URL. 570.It Ev HTTP_REFERER 571Specifies the referrer URL to use for HTTP requests. 572If set to 573.Dq auto , 574the document URL will be used as referrer URL. 575.It Ev HTTP_USER_AGENT 576Specifies the User-Agent string to use for HTTP requests. 577This can be useful when working with HTTP origin or proxy servers that 578differentiate between user agents. 579.It Ev NETRC 580Specifies a file to use instead of 581.Pa ~/.netrc 582to look up login names and passwords for FTP sites. 583See 584.Xr ftp 1 585for a description of the file format. 586This feature is experimental. 587.It Ev NO_PROXY 588Either a single asterisk, which disables the use of proxies 589altogether, or a comma- or whitespace-separated list of hosts for 590which proxies should not be used. 591.It Ev no_proxy 592Same as 593.Ev NO_PROXY , 594for compatibility. 595.El 596.Sh EXAMPLES 597To access a proxy server on 598.Pa proxy.example.com 599port 8080, set the 600.Ev HTTP_PROXY 601environment variable in a manner similar to this: 602.Pp 603.Dl HTTP_PROXY=http://proxy.example.com:8080 604.Pp 605If the proxy server requires authentication, there are 606two options available for passing the authentication data. 607The first method is by using the proxy URL: 608.Pp 609.Dl HTTP_PROXY=http://\*[Lt]user\*[Gt]:\*[Lt]pwd\*[Gt]@proxy.example.com:8080 610.Pp 611The second method is by using the 612.Ev HTTP_PROXY_AUTH 613environment variable: 614.Bd -literal -offset indent 615HTTP_PROXY=http://proxy.example.com:8080 616HTTP_PROXY_AUTH=basic:*:\*[Lt]user\*[Gt]:\*[Lt]pwd\*[Gt] 617.Ed 618.Pp 619To disable the use of a proxy for an HTTP server running on the local 620host, define 621.Ev NO_PROXY 622as follows: 623.Bd -literal -offset indent 624NO_PROXY=localhost,127.0.0.1 625.Ed 626.Sh SEE ALSO 627.\" .Xr fetch 1 , 628.\" .Xr ftpio 3 , 629.Xr ftp 1 , 630.Xr ip 4 631.Rs 632.%A J. Postel 633.%A J. K. Reynolds 634.%D October 1985 635.%B File Transfer Protocol 636.%O RFC 959 637.Re 638.Rs 639.%A P. Deutsch 640.%A A. Emtage 641.%A A. Marine 642.%D May 1994 643.%T How to Use Anonymous FTP 644.%O RFC 1635 645.Re 646.Rs 647.%A T. Berners-Lee 648.%A L. Masinter 649.%A M. McCahill 650.%D December 1994 651.%T Uniform Resource Locators (URL) 652.%O RFC 1738 653.Re 654.Rs 655.%A R. Fielding 656.%A J. Gettys 657.%A J. Mogul 658.%A H. Frystyk 659.%A L. Masinter 660.%A P. Leach 661.%A T. Berners-Lee 662.%D January 1999 663.%B Hypertext Transfer Protocol -- HTTP/1.1 664.%O RFC 2616 665.Re 666.Rs 667.%A J. Franks 668.%A P. Hallam-Baker 669.%A J. Hostetler 670.%A S. Lawrence 671.%A P. Leach 672.%A A. Luotonen 673.%A L. Stewart 674.%D June 1999 675.%B HTTP Authentication: Basic and Digest Access Authentication 676.%O RFC 2617 677.Re 678.Sh HISTORY 679The 680.Nm fetch 681library first appeared in 682.Fx 3.0 . 683.Sh AUTHORS 684.An -nosplit 685The 686.Nm fetch 687library was mostly written by 688.An Dag-Erling Sm\(/orgrav Aq des@FreeBSD.org 689with numerous suggestions from 690.An Jordan K. Hubbard Aq jkh@FreeBSD.org , 691.An Eugene Skepner Aq eu@qub.com 692and other 693.Fx 694developers. 695It replaces the older 696.Nm ftpio 697library written by 698.An Poul-Henning Kamp Aq phk@FreeBSD.org 699and 700.An Jordan K. Hubbard Aq jkh@FreeBSD.org . 701.Pp 702This manual page was written by 703.An Dag-Erling Sm\(/orgrav Aq des@FreeBSD.org . 704.Sh BUGS 705Some parts of the library are not yet implemented. 706The most notable 707examples of this are 708.Fn fetchPutHTTP 709and FTP proxy support. 710.Pp 711There is no way to select a proxy at run-time other than setting the 712.Ev HTTP_PROXY 713or 714.Ev FTP_PROXY 715environment variables as appropriate. 716.Pp 717.Nm libfetch 718does not understand or obey 305 (Use Proxy) replies. 719.Pp 720Error numbers are unique only within a certain context; the error 721codes used for FTP and HTTP overlap, as do those used for resolver and 722system errors. 723For instance, error code 202 means "Command not 724implemented, superfluous at this site" in an FTP context and 725"Accepted" in an HTTP context. 726.Pp 727.Fn fetchStatFTP 728does not check that the result of an MDTM command is a valid date. 729.Pp 730The man page is incomplete, poorly written and produces badly 731formatted text. 732.Pp 733The error reporting mechanism is unsatisfactory. 734.Pp 735Some parts of the code are not fully reentrant. 736