- 论坛徽章:
- 0
|
[Python]代码- #!/usr/bin/env python
- #-*- coding:utf-8-*-
-
- import urllib2
- import re
- import hashlib
- import json
-
- #--------------------------------------------------- 工具 start
- def md5(str):
- '''
- 计算MD5值
- '''
- m = hashlib.md5()
- m.update(str)
- return m.hexdigest()
-
- def search(regex, content, group = 1):
- '''
- 搜索指定正则匹配的内容
- '''
- pattern = re.search(regex, content, re.DOTALL)
- if(pattern != None):
- return pattern.group(group)
- return ''
-
- def findall(regex, content):
- '''
- 查找指定正则匹配的所有内容
- '''
- return re.findall(regex, content, re.DOTALL)
-
- def cleanHtmlTag(content):
- '''
- 清理HTML标签
- '''
- return content or re.sub(r'<[^>]*?>', '', content).strip()
-
- def cleanedSearch(regex, content, group = 1):
- '''
- 查找匹配的指定字符串并清除HTML标签
- '''
- return cleanHtmlTag(search(regex, content, group))
-
- def httpGet(url, encoding='gbk'):
- '''
- 发送Http GET请求,返回内容
- '''
- return urllib2.urlopen(url).read().decode(encoding, 'ignore').encode('utf-8')
-
- def toJson(dict):
- return json.dumps(dict, ensure_ascii=False, indent=4)
-
- #--------------------------------------------------- 工具 end
复制代码 |
|