- 论坛徽章:
- 0
|
代码如下
#include <stdio.h>
#include <sys/types.h>
#include <regex.h>
#include <stdlib.h>
#include <string.h>
#define SUBSLEN 10
#define EBUFLEN 128 /* error buffer length */
#define BUFLEN 1024 /* matched buffer length */
int main (int argc, char **argv)
{
size_t len; /* store error message length */
regex_t re; /* store compilned regular expression */
regmatch_t subs[SUBSLEN]; /* store matched string position */
char matched[BUFLEN]; /* store matched strings */
char errbuf[EBUFLEN]; /* store error message */
int err, i;
char string[] = "http+++http://club.cat898.com/newbbs/dispbbs.asp?boardid=1&id=2098717<;";
char pattern[] = "(ftp|https?)://([[:alnum:]*_.-]*:[[:alnum:]*_.-]*@)?[[:alnum:]*_.-]{,250}(:[0-9]+)?(/([[:alnum:]*_.-/]{,250}([?][[:alnum:]*_.-=]{,250}(&[[:alnum:]*_.-=]{,250})*)?)?)?";
printf ("String : %s\n", string);
printf ("Pattern: %s\n", pattern);
/* compile regular expression */
err = regcomp (&re, pattern, REG_EXTENDED);
if (err)
{
len = regerror (err, &re, errbuf, sizeof (errbuf));
fprintf (stderr, "error: regcomp: %s\n", errbuf);
exit (1);
}
printf ("Total has subexpression: %d\n", re.re_nsub);
/* execute pattern match */
err = regexec (&re, string, (size_t)SUBSLEN, subs, 0);
if (err == REG_NOMATCH)
{
fprintf (stderr, "Sorry, no match ...\n");
regfree (&re);
exit (0);
}
else if (err)
{
len = regerror (err, &re, errbuf, sizeof (errbuf));
fprintf (stderr, "error: regexec: %s\n", errbuf);
exit (1);
}
/* if no REG_NOMATCH and no error, then pattern matched */
printf ("\nOK, has matched ...\n\n");
for (i = 0; i <= re.re_nsub; i++)
{
if (i == 0)
{
printf ("begin: %d, end: %d, ",
subs[i].rm_so, subs[i].rm_eo);
}
else
{
printf ("subexpression %d begin: %d, end: %d, ",
i, subs[i].rm_so, subs[i].rm_eo);
}
len = subs[i].rm_eo - subs[i].rm_so;
memcpy (matched, string + subs[i].rm_so, len);
matched[len] = '\0';
printf ("match: %s\n", matched);
}
regfree(&re);
exit(0);
}
|
但是执行结果却不是我想要的:
String : http+++http://club.cat898.com/newbbs/dispbbs.asp?boardid=1&;id=2098717<
Pattern: (ftp|https?)://([[:alnum:]*_.-]*:[[:alnum:]*_.-]*@)?[[:alnum:]*_.-]{,250}(:[0-9]+)?(/([[:alnum:]*_.-/]{,250}([?][[:alnum:]*_.-=]{,250}(&[[:alnum:]*_.-=]{,250})*)?)?)?
Total has subexpression: 7
OK, has matched ...
begin: 7, end: 70, match: http://club.cat898.com/newbbs/dispbbs.asp?boardid=1&;id=2098717<
subexpression 1 begin: 7, end: 11, match: http
subexpression 2 begin: -1, end: -1, match:
subexpression 3 begin: -1, end: -1, match:
subexpression 4 begin: 29, end: 70, match: /newbbs/dispbbs.asp?boardid=1&id=2098717<
subexpression 5 begin: 30, end: 70, match: newbbs/dispbbs.asp?boardid=1&id=2098717<
subexpression 6 begin: 48, end: 70, match: ?boardid=1&id=2098717<
subexpression 7 begin: 58, end: 70, match: &id=2098717<
主要就是最后的那个<也匹配进去了,在正则表达式中最后是(&[[:alnum:]*_.-=]{,250})*,
并没有包括字符<
请问是为什么?
谢谢!!!
[ 本帖最后由 13706808 于 2008-3-2 10:10 编辑 ] |
|