- 论坛徽章:
- 0
|
在网上看了一段代码,有点糊涂,希望好心的cu帮我指明一条思路,源码来自于:http://www.monkey.org/~provos/crawl/
1.这里是调用部分
http_register_dispatch("image/", image_saver);
http_register_dispatch("audio/", image_saver);
http_register_dispatch("video/", image_saver);
2.http_register_dispatch
http_register_dispatch(char *type, int (*cb)(struct uri *))
{
struct dispatch *dispatch;
dispatch = malloc(sizeof (struct dispatch));
if (dispatch == NULL) {
warn("%s: malloc", __func__);
return (-1);
}
dispatch->type = type;
dispatch->cb = cb;
TAILQ_INSERT_TAIL(&dispatchqueue, dispatch, next);
return (0);
}
int image_saver(struct uri *uri)
{
struct stat sb;
char *path;
char tmp[128], *p = "";
char *url;
这里不是很理解,http_register_dispatch("image/", image_saver)调用的时候明明传递的是"image/"
url = http_make_url(&uri->url);
/*
* Sometimes we download something that should have been
* html, but is media instead.
*/
if (uri->save_fd == -1 && !img_permitted(url))
return (-1);
if (uri->length != -1 && uri->bdlen != uri->length) {
snprintf(tmp, sizeof(tmp), " (%4.1f%%/%d)",
(float)uri->bdlen/uri->length*100, uri->length);
p = tmp;
}
fprintf(stdout, "%s %s%s\n",
uri->flags & HTTP_REQUEST_GET ? "GET" : "HEAD",
url, uri->flags & HTTP_REQUEST_GET ? p : "");
if (uri->flags & HTTP_REQUEST_HEAD) {
int minlen, maxlen;
if (uri->format == NULL) {
minlen = media_minlen;
maxlen = media_maxlen;
} else {
/* Get lengths depending on mime types */
minlen = conf_get_num(uri->format, "Min-Length",
media_minlen);
maxlen = conf_get_num(uri->format, "Max-Length",
media_maxlen);
}
/* See if it meets our extra constraints */
if ((minlen != -1 && uri->length < minlen) ||
(maxlen != -1 && uri->length > maxlen))
return (-1);
/* Re-add request as GET */
http_add(HTTP_REQUEST_GET, url, uri->depth);
return (0);
}
if ((path = construct_path(url, 1)) == NULL)
return (-1);
if (stat(path, &sb) != -1) {
if (sb.st_size >= uri->length)
return (-1);
}
if (uri->save_fd == -1) {
uri->save_fd = open(path, O_WRONLY|O_CREAT|O_TRUNC, 0644);
if (uri->save_fd == -1)
return (-1);
}
if (atomicio(write, uri->save_fd, uri->body, uri->bdread) == -1)
return (-1);
uri->bdread = 0;
return (0);
}
http_make_url(struct url *url)
{
这一步数据不知道怎么来的了
static char output[1024];
if (url->port != HTTP_DEFAULTPORT)
snprintf(output, sizeof(output), "http://%s:%d%s",
url->host, url->port, url->file);
else
snprintf(output, sizeof(output), "http://%s%s",
url->host, url->file);
return (output);
}
最终我想知道,传递的参数是image..等类型,url是怎么生成的? |
|