- 论坛徽章:
- 0
|
本帖最后由 雨过白鹭洲 于 2011-10-27 16:50 编辑
CU源码阅读8: curl之大块头operate函数(4)
接上一篇,我们继续来阅读和分析curl-7.22.0这个工具,src/main.c文件
1. 现在curl开始要调用libcurl的"curl_easy_setopt"函数来设置网络通信的各种选项,首先看下curl_easy_setopt的使用方式:- CURLcode curl_easy_setopt(CURL *handle, CURLoption option, parameter); // 来自于libcurl man page
复制代码 实际上curl_easy_setopt的函数定义使用了可变数量参数:- CURLcode curl_easy_setopt(CURL *curl, CURLoption tag, ...)
复制代码 但是在include/curl.h中暴露接口时,却又定义了如下宏:- #define curl_easy_setopt(handle,opt,param) curl_easy_setopt(handle,opt,param)
复制代码 这个宏替换成为完全一样的函数调用,作用是确保应用代码使用三个参数来调用curl_easy_setopt函数。
curl_easy_setopt用来设置curl easy handle的各种选项,从而控制和修改libcurl的行为。每次只能设置一个选项,因此典型的应用需要调用许多次这个函数。libcurl定义了非常多的option,不同选项输入的parameter类型不相同,目前有四种类型:long, 函数指针, 对象指针, curl_off_t。
详细的用法和选项列表请参考:http://curl.haxx.se/libcurl/c/curl_easy_setopt.html
2. 回到curl的代码,curl封装了_my_setopt函数来调用curl_easy_setopt,里面根据不同的选项类型进行一些处理- static CURLcode _my_setopt(CURL *curl, bool str, struct Configurable *config,
- const char *name, CURLoption tag, ...)
复制代码 作者写这个辅助函数,主要目的也是调试和日志。这个函数根据选项的四种类型,分别取出相应的parameter,然后调用curl_easy_setopt进行设置。同时会把设置的选项和参数信息生成字符串,记录日志便于调试。
然后定义了以下两个宏以方便使用:- #define my_setopt(x,y,z) _my_setopt(x, FALSE, config, #y, y, z) // 参数不是字符串
- #define my_setopt_str(x,y,z) _my_setopt(x, TRUE, config, #y, y, z) // 参数是字符串
复制代码 3. curl在调用curl_easy_setopt函数上真是毫不吝啬!几乎把所有选项都调了个遍,差不多用了400行代码。- // 设置(1)或清除(0) TCP_NODELAY选项,连接建立后设置没有作用
- // 设置该选项将禁用TCP的Nagle算法
- if(config->tcp_nodelay)
- my_setopt(curl, CURLOPT_TCP_NODELAY, 1);
- // 传递给文件写入函数的数据指针
- // 如果我们设置了CURLOPT_WRITEFUNCTION选项,这个指针就是回调函数的输入
- // 如果我们没有设置CURLOPT_WRITEFUNCTION选项,就使用libcurl内部的写入函数,这个指针必须是FILE *
- my_setopt(curl, CURLOPT_WRITEDATA, &outs);
- // 文件写入回调函数
- // size_t function( char *ptr, size_t size, size_t nmemb, void *userdata);
- // libcurl接收到数据需要存储时,就会自动调用这个函数
- my_setopt(curl, CURLOPT_WRITEFUNCTION, my_fwrite);
- /* for uploads */
- input.fd = infd;
- input.config = config;
- // 传递给文件读取函数的数据指针
- // 如果我们设置了CURLOPT_READFUNCTION选项,这个指针就是回调函数的输入
- // 如果我们没有设置CURLOPT_READFUNCTION选项,就使用libcurl内部的读取函数,这个指针必须是FILE *
- my_setopt(curl, CURLOPT_READDATA, &input);
- /* what call to read */
- if((outfile && !curlx_strequal("-", outfile)) ||
- !checkprefix("telnet:", url))
- // 同上,这里指定了文件读取回调函数
- my_setopt(curl, CURLOPT_READFUNCTION, my_fread);
- /* in 7.18.0, the CURLOPT_SEEKFUNCTION/DATA pair is taking over what
- CURLOPT_IOCTLFUNCTION/DATA pair previously provided for seeking */
- // 这两个和上面也是类似的,当libcurl需要定位到文件的特定位置时
- // 就会调用相应的回调函数,并传递这里设置的输入
- my_setopt(curl, CURLOPT_SEEKDATA, &input);
- my_setopt(curl, CURLOPT_SEEKFUNCTION, my_seek);
- if(config->recvpersecond)
- /* tell libcurl to use a smaller sized buffer as it allows us to
- make better sleeps! 7.9.9 stuff! */
- // 设置libcurl的接收缓冲区大小,小的缓冲区会导致更频繁地调用写入回调函数
- my_setopt(curl, CURLOPT_BUFFERSIZE, config->recvpersecond);
- /* size of uploaded file: */
- if(uploadfilesize != -1)
- // 上传文件时,这个选项告诉libcurl上传文件的大小
- my_setopt(curl, CURLOPT_INFILESIZE_LARGE, uploadfilesize);
- // 要处理的URL地址
- my_setopt_str(curl, CURLOPT_URL, url); /* what to fetch */
- // 要使用的代理
- my_setopt_str(curl, CURLOPT_PROXY, config->proxy); /* proxy to use */
- if(config->proxy)
- // 代理类型
- my_setopt(curl, CURLOPT_PROXYTYPE, config->proxyver);
- // libcurl的进度测量表
- my_setopt(curl, CURLOPT_NOPROGRESS, config->noprogress);
- if(config->no_body) {
- // 在输出中不包含body部分,只对header和body分离的协议有效
- // 对HTTP服务器来说,libcurl将发起HEAD请求
- my_setopt(curl, CURLOPT_NOBODY, 1);
- // 在输出中包含header
- my_setopt(curl, CURLOPT_HEADER, 1);
- }
- else
- my_setopt(curl, CURLOPT_HEADER, config->include_headers);
- // 返回的HTTP code大于400时,自动失败;默认是忽略错误,正常返回页面内容
- my_setopt(curl, CURLOPT_FAILONERROR, config->failonerror);
- // 告诉libcurl准备上传
- my_setopt(curl, CURLOPT_UPLOAD, uploadfile?TRUE:FALSE);
- // 告诉libcurl只列出目录的文件名列表,而不包含文件大小、数据等
- my_setopt(curl, CURLOPT_DIRLISTONLY, config->dirlistonly);
- // 添加到远程文件末尾,而不是覆盖远程文件
- my_setopt(curl, CURLOPT_APPEND, config->ftp_append);
- if(config->netrc_opt)
- // 使用~/.netrc中的用户和密码,还是URL中的用户和密码
- my_setopt(curl, CURLOPT_NETRC, CURL_NETRC_OPTIONAL);
- else if(config->netrc || config->netrc_file)
- my_setopt(curl, CURLOPT_NETRC, CURL_NETRC_REQUIRED);
- else
- my_setopt(curl, CURLOPT_NETRC, CURL_NETRC_IGNORED);
- if(config->netrc_file)
- // 指定自定义的.netrc文件
- my_setopt(curl, CURLOPT_NETRC_FILE, config->netrc_file);
- my_setopt(curl, CURLOPT_FOLLOWLOCATION, config->followlocation);
- my_setopt(curl, CURLOPT_UNRESTRICTED_AUTH, config->unrestricted_auth);
- my_setopt(curl, CURLOPT_TRANSFERTEXT, config->use_ascii);
- my_setopt_str(curl, CURLOPT_USERPWD, config->userpwd);
- my_setopt_str(curl, CURLOPT_PROXYUSERPWD, config->proxyuserpwd);
- my_setopt(curl, CURLOPT_NOPROXY, config->noproxy);
- my_setopt_str(curl, CURLOPT_RANGE, config->range);
- my_setopt(curl, CURLOPT_ERRORBUFFER, errorbuffer);
- my_setopt(curl, CURLOPT_TIMEOUT, config->timeout);
- switch(config->httpreq) {
- case HTTPREQ_SIMPLEPOST:
- my_setopt_str(curl, CURLOPT_POSTFIELDS, config->postfields);
- my_setopt(curl, CURLOPT_POSTFIELDSIZE_LARGE, config->postfieldsize);
- break;
- case HTTPREQ_POST:
- my_setopt(curl, CURLOPT_HTTPPOST, config->httppost);
- break;
- default:
- break;
- }
- my_setopt_str(curl, CURLOPT_REFERER, config->referer);
- my_setopt(curl, CURLOPT_AUTOREFERER, config->autoreferer);
- my_setopt_str(curl, CURLOPT_USERAGENT, config->useragent);
- my_setopt_str(curl, CURLOPT_FTPPORT, config->ftpport);
- my_setopt(curl, CURLOPT_LOW_SPEED_LIMIT,
- config->low_speed_limit);
- my_setopt(curl, CURLOPT_LOW_SPEED_TIME, config->low_speed_time);
- my_setopt(curl, CURLOPT_MAX_SEND_SPEED_LARGE,
- config->sendpersecond);
- my_setopt(curl, CURLOPT_MAX_RECV_SPEED_LARGE,
- config->recvpersecond);
- my_setopt(curl, CURLOPT_RESUME_FROM_LARGE,
- config->use_resume?config->resume_from:0);
- my_setopt_str(curl, CURLOPT_COOKIE, config->cookie);
- my_setopt(curl, CURLOPT_HTTPHEADER, config->headers);
- my_setopt(curl, CURLOPT_SSLCERT, config->cert);
- my_setopt_str(curl, CURLOPT_SSLCERTTYPE, config->cert_type);
- my_setopt(curl, CURLOPT_SSLKEY, config->key);
- my_setopt_str(curl, CURLOPT_SSLKEYTYPE, config->key_type);
- my_setopt_str(curl, CURLOPT_KEYPASSWD, config->key_passwd);
- ............................
- ............................
- /* new in curl 7.16.1 */
- if(config->ftp_ssl_ccc)
- my_setopt(curl, CURLOPT_FTP_SSL_CCC, config->ftp_ssl_ccc_mode);
- /* new in curl 7.11.1, modified in 7.15.2 */
- if(config->socksproxy) {
- my_setopt_str(curl, CURLOPT_PROXY, config->socksproxy);
- my_setopt(curl, CURLOPT_PROXYTYPE, config->socksver);
- }
- #if defined(HAVE_GSSAPI) || defined(USE_WINDOWS_SSPI)
- /* new in curl 7.19.4 */
- if(config->socks5_gssapi_service)
- my_setopt_str(curl, CURLOPT_SOCKS5_GSSAPI_SERVICE,
- config->socks5_gssapi_service);
- /* new in curl 7.19.4 */
- if(config->socks5_gssapi_nec)
- my_setopt_str(curl, CURLOPT_SOCKS5_GSSAPI_NEC,
- config->socks5_gssapi_nec);
- #endif
- /* curl 7.13.0 */
- my_setopt_str(curl, CURLOPT_FTP_ACCOUNT, config->ftp_account);
- my_setopt(curl, CURLOPT_IGNORE_CONTENT_LENGTH, config->ignorecl);
- /* curl 7.14.2 */
- my_setopt(curl, CURLOPT_FTP_SKIP_PASV_IP, config->ftp_skip_ip);
- /* curl 7.15.1 */
- my_setopt(curl, CURLOPT_FTP_FILEMETHOD, config->ftp_filemethod);
- /* curl 7.15.2 */
- if(config->localport) {
- my_setopt(curl, CURLOPT_LOCALPORT, config->localport);
- my_setopt_str(curl, CURLOPT_LOCALPORTRANGE,
- config->localportrange);
- }
- /* curl 7.15.5 */
- my_setopt_str(curl, CURLOPT_FTP_ALTERNATIVE_TO_USER,
- config->ftp_alternative_to_user);
- /* curl 7.16.0 */
- if(config->disable_sessionid)
- my_setopt(curl, CURLOPT_SSL_SESSIONID_CACHE,
- !config->disable_sessionid);
- /* curl 7.16.2 */
- if(config->raw) {
- my_setopt(curl, CURLOPT_HTTP_CONTENT_DECODING, FALSE);
- my_setopt(curl, CURLOPT_HTTP_TRANSFER_DECODING, FALSE);
- }
- /* curl 7.17.1 */
- if(!config->nokeepalive) {
- my_setopt(curl, CURLOPT_SOCKOPTFUNCTION, sockoptcallback);
- my_setopt(curl, CURLOPT_SOCKOPTDATA, config);
- }
- /* curl 7.19.1 (the 301 version existed in 7.18.2) */
- my_setopt(curl, CURLOPT_POSTREDIR, config->post301 |
- (config->post302 ? CURL_REDIR_POST_302 : FALSE));
- /* curl 7.20.0 */
- if(config->tftp_blksize)
- my_setopt(curl, CURLOPT_TFTP_BLKSIZE, config->tftp_blksize);
- if(config->mail_from)
- my_setopt_str(curl, CURLOPT_MAIL_FROM, config->mail_from);
- if(config->mail_rcpt)
- my_setopt(curl, CURLOPT_MAIL_RCPT, config->mail_rcpt);
- /* curl 7.20.x */
- if(config->ftp_pret)
- my_setopt(curl, CURLOPT_FTP_USE_PRET, TRUE);
- if(config->proto_present)
- my_setopt(curl, CURLOPT_PROTOCOLS, config->proto);
- if(config->proto_redir_present)
- my_setopt(curl, CURLOPT_REDIR_PROTOCOLS, config->proto_redir);
- if((urlnode->flags & GETOUT_USEREMOTE)
- && config->content_disposition) {
- my_setopt(curl, CURLOPT_HEADERFUNCTION, header_callback);
- my_setopt(curl, CURLOPT_HEADERDATA, &outs);
- }
- else {
- /* if HEADERFUNCTION was set to something in the previous loop, it
- is important that we set it (back) to NULL now */
- my_setopt(curl, CURLOPT_HEADERFUNCTION, NULL);
- my_setopt(curl, CURLOPT_HEADERDATA, config->headerfile?&heads:NULL);
- }
- if(config->resolve)
- /* new in 7.21.3 */
- my_setopt(curl, CURLOPT_RESOLVE, config->resolve);
- /* new in 7.21.4 */
- my_setopt_str(curl, CURLOPT_TLSAUTH_USERNAME, config->tls_username);
- my_setopt_str(curl, CURLOPT_TLSAUTH_PASSWORD, config->tls_password);
- /* new in 7.22.0 */
- if(config->gssapi_delegation)
- my_setopt_str(curl, CURLOPT_GSSAPI_DELEGATION,
- config->gssapi_delegation);
复制代码 我的那个亲娘嘞,这段设置选项的代码实在是太多了,大家可以参考http://curl.haxx.se/libcurl/c/curl_easy_setopt.html,对应起来看,我们这里就不多加注释了。
4. 我们接着往下看,curl使用了一个 for( ; ; ) 循环处理一个URL,循环一进来就是调用curl_easy_perform(),我们看看文档怎么说的:- CURLcode curl_easy_perform(CURL * handle );
复制代码 CURL handle初始化之后,调用curl_easy_setopt设置选项完成,就可以使用curl_easy_perform执行传输。
我们可以使用相同handle调用curl_easy_perform多次,如果我们需要传输多个文件,这是推荐的做法。因为libcurl会为接下来的传输尝试重用相同的连接,因此达到加快速度、降低CPU使用、使用更少的网络资源等目的。不过在多次curl_easy_perform调用之间,可能需要使用curl_easy_setopt重新设置CURL的选项。
相同handle不能并行地调用,并行时必须使用不同的curl handle。
- res = curl_easy_perform(curl);
- if(!curl_slist_append(easycode, "ret = curl_easy_perform(hnd);")) {
- res = CURLE_OUT_OF_MEMORY;
- break;
- }
复制代码 调用curl_easy_perform- if(config->content_disposition && outs.stream && !config->mute &&
- outs.filename)
- printf("curl: Saved to filename '%s'\n", outs.filename);
复制代码 记录保存的文件名
然后根据retry重试次数去处理网络传输,要么continue继续curl_easy_perform,要么break结束当前URL的处理- /* retry_numretries是剩余的重试次数 */
- /* if retry-max-time非0, 确保我们没有超过这个时间 */
- if(retry_numretries &&
- (!config->retry_maxtime ||
- (cutil_tvdiff(cutil_tvnow(), retrystart)<
- config->retry_maxtime*1000)) ) {
- enum {
- RETRY_NO,
- RETRY_TIMEOUT,
- RETRY_HTTP,
- RETRY_FTP,
- RETRY_LAST /* not used */
- } retry = RETRY_NO; // 用来记录重试的原因
-
- long response;
- // curl_easy_perform返回超时
- if(CURLE_OPERATION_TIMEDOUT == res)
- /* retry timeout always */
- retry = RETRY_TIMEOUT;
- else if((CURLE_OK == res) ||
- (config->failonerror &&
- (CURLE_HTTP_RETURNED_ERROR == res))) {
- /* 如果curl_easy_perform返回OK,启用了failonerror
- 由于出现HTTP错误而失败, 检查HTTP transient错误并重试 */
- char *this_url=NULL;
- curl_easy_getinfo(curl, CURLINFO_EFFECTIVE_URL, &this_url);
- if(this_url &&
- checkprefix("http", this_url)) {
- /* HTTP(S) */
- curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &response);
- // 检查HTTP的错误
- switch(response) {
- case 500: /* Internal Server Error */
- case 502: /* Bad Gateway */
- case 503: /* Service Unavailable */
- case 504: /* Gateway Timeout */
- retry = RETRY_HTTP;
- /*
- * At this point, we have already written data to the output
- * file (or terminal). If we write to a file, we must rewind
- * or close/re-open the file so that the next attempt starts
- * over from the beginning.
- *
- * TODO: similar action for the upload case. We might need
- * to start over reading from a previous point if we have
- * uploaded something when this was returned.
- */
- break;
- }
- }
- } /* if CURLE_OK */
- else if(res) {
- curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &response);
- if(response/100 == 4)
- /*
- * This is typically when the FTP server only allows a certain
- * amount of users and we are not one of them. All 4xx codes
- * are transient.
- */
- retry = RETRY_FTP;
- }
- // 重试
- if(retry) {
- static const char * const m[]={
- NULL, "timeout", "HTTP error", "FTP error"
- };
- warnf(config, "Transient problem: %s "
- "Will retry in %ld seconds. "
- "%ld retries left.\n",
- m[retry], retry_sleep/1000, retry_numretries);
-
- // 先睡一下
- go_sleep(retry_sleep);
- retry_numretries--;
- if(!config->retry_delay) {
- retry_sleep *= 2;
- if(retry_sleep > RETRY_SLEEP_MAX)
- retry_sleep = RETRY_SLEEP_MAX;
- }
- if(outs.bytes && outs.filename) {
- /* We have written data to a output file, we truncate file
- */
- if(!config->mute)
- fprintf(config->errors, "Throwing away %"
- CURL_FORMAT_CURL_OFF_T " bytes\n",
- outs.bytes);
- fflush(outs.stream);
- /* truncate file at the position where we started appending */
- #ifdef HAVE_FTRUNCATE
- if(ftruncate( fileno(outs.stream), outs.init)) {
- /* when truncate fails, we can't just append as then we'll
- create something strange, bail out */
- if(!config->mute)
- fprintf(config->errors,
- "failed to truncate, exiting\n");
- break;
- }
- /* now seek to the end of the file, the position where we
- just truncated the file in a large file-safe way */
- fseek(outs.stream, 0, SEEK_END);
- #else
- /* ftruncate is not available, so just reposition the file
- to the location we would have truncated it. This won't
- work properly with large files on 32-bit systems, but
- most of those will have ftruncate. */
- fseek(outs.stream, (long)outs.init, SEEK_SET);
- #endif
- outs.bytes = 0; /* clear for next round */
- }
- continue;
- }
- } /* if retry_numretries */
复制代码 5. 从循环里面出来之后,就是当前URL处理的收尾工作了,如下:
处理一下进度条
打印一下错误输出
关闭一下相关的文件
释放一下相关的资源
curl的大致流程就是这样了,咱们今天先到这里,回头有时间再分析curl的其它部分,以及最核心的libcurl |
|