- 论坛徽章:
- 0
|
这是python-sidebar中update.py的源代码,
- # -*- coding: iso8859-1 -*-
- """
- @Author: Daniel Lundin <daniel@edgewall.com>
- @Contact: daniel@edgewall.com
- @Organization: Edgewall Software
- @Copyright: Copyright (C) 2004 Edgewall Software. All Rights Reserved.
- The Python Sidebar is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
- The Python Sidebar is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- Changelog
- ---------
- - 1.0.2 (Oct 11, 2004)
- - Scraping the modules index missed modules with comments, like
- windows/mac-only modules.
- - 1.0.1 (Jul 29, 2004)
- - Added target="_content" to all links to make the sidebar work better in
- other browsers. Thanks to Jarkko Laiho for the suggestion and patch.
- Todo
- ----
- * Wrap sidebar in XUL to make it appear in the panel menu (thanks to Chris
- Green for the idea).
- """
- __version__ = '1.0.2'
- import re
- import os.path
- import time
- from urllib2 import urlopen
- html_template = """
- <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
- "http://www.w3.org/TR/2000/REC-xhtml1-20000126/DTD/xhtml1-strict.dtd">
- <html>
- <head>
- <title>Python Sidebar %(ver)s</title>
- <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-15" />
- <meta name="author" content="Daniel Lundin <daniel@edgewall.com>" />
- <meta name="description" content="A nifty Python documentation sidebar for Mozilla" />
- <meta name="keywords" content="python sidebar,python,documentation,reference,mozilla,firefox,edgewall,edgewall software" />
- <script type="text/javascript" src="cm.js"></script>
- <style type="text/css">
- @import url(sidebar.css);
- </style>
- </head>
- <body onload="cm()">
- <div id="top">
- <div id="menu">%(menu)s</div>
- <h1>%(title)s</h1>
- </div>
- <div id="toc">
- %(content)s
- <div id="lastmod">
- Last modified:<br />
- %(timestamp)s
- </div>
- </div>
- <div id="bottom">
- <div id="footer">
- <address>
- <a href="http://www.edgewall.com/">Edgewall Software</a> -
- Expert Linux & Software Development Services.
- <a href="mailto:info@edgewall.com">info@edgewall.com</a>
- </address>
- </div>
- <a href="http://www.python.org/"><img src="python.gif" /></a>
- </div>
- </body></html>
- """
- class BuildPage:
- url = ''
- title = ''
- menutxt = ''
- outfile = ''
- def __init__(self, out_dir):
- self.outfile = os.path.join(out_dir, self.outfile)
- data = self.get_rawdata(self.url)
- if data is None:
- raise ValueError, 'Error loading data'
- content = self.scrape(data)
- if not content:
- raise ValueError, 'Error scraping data'
- html = self.to_html(content)
- if not html:
- raise ValueError, 'Error creating HTML'
- self.save_html(self.outfile, html)
- def save_html(self, outfile, html):
- f = open(outfile, 'w')
- f.write(html)
- f.close()
- def make_menu(self):
- html = '<ul>'
- for p in all_pages:
- html += '<li><a href="javascript:goSidebar(\'%s\')"' % p.outfile
- html += ' title="%s"' % p.title
- if self.__class__ == p:
- html += ' class="current"'
- html += ' target="_self">%s</a></li>' % getattr(p, 'menutxt',
- self.title)
- html += '</ul>'
- return html
- def to_html(self, content):
- tmpl = {}
- tmpl['timestamp'] = time.strftime('%c', time.localtime())
- tmpl['title'] = self.title
- tmpl['content'] = content
- tmpl['menu'] = self.make_menu()
- tmpl['ver'] = __version__
- return html_template % tmpl
- def scrape(self, data):
- return data
- def get_rawdata(self, url):
- f = urlopen(url)
- data = f.read()
- f.close()
- return data
- class BuildTutorial(BuildPage):
- url = 'http://www.python.org/doc/current/tut/tut.html'
- baseref = 'http://www.python.org/doc/current/tut/'
- outfile = 'toc-tutorial.html'
- title = 'Python Tutorial'
- menutxt = 'tut'
- def scrape(self, data):
- beg = data.find('<UL CLASS="ChildLinks">')
- end = data.find('<!--End of Table of Child-Links-->')
- if beg == -1 or end == -1:
- raise ValueError, 'Unable to scrape data.'
- return data[beg:end].replace('href="','target="_content" href="%s' % self.baseref)
- class BuildLibRef(BuildTutorial):
- baseref = 'http://www.python.org/doc/current/lib/'
- url = 'http://www.python.org/doc/current/lib/lib.html'
- outfile = 'toc-libref.html'
- menutxt = 'lib'
- title = 'Python Library Reference'
- class BuildLangRef(BuildTutorial):
- baseref = 'http://www.python.org/doc/current/ref/'
- url = 'http://www.python.org/doc/current/ref/ref.html'
- outfile = 'toc-ref.html'
- title = 'Python Language Reference'
- menutxt = 'ref'
- class BuildAPIRef(BuildTutorial):
- baseref = 'http://www.python.org/doc/current/api/'
- url = 'http://www.python.org/doc/current/api/api.html'
- outfile = 'toc-api.html'
- title = 'Python/C API Reference'
- menutxt = 'api'
- class BuildExtEmb(BuildTutorial):
- baseref = 'http://www.python.org/doc/current/ext/'
- url = 'http://www.python.org/doc/current/ext/ext.html'
- outfile = 'toc-ext.html'
- title = 'Extending and Embedding Python'
- menutxt = 'ext'
- class BuildMods(BuildPage):
- baseref = 'http://www.python.org/doc/current/'
- url = 'http://www.python.org/doc/current/modindex.html'
- outfile = 'modindex.html'
- title = 'Module Index'
- menutxt = 'mod'
- def scrape(self, data):
- r = re.compile(r'<dt><a href="(.*)"><tt class="module">(.*)</tt>.*</a>')
- beg = data.find('<dl compact>')
- items = ['<li><a href="%s%s" target="_content">%s</a></li>' % \
- (self.baseref, m.group(1), m.group(2))
- for m in re.finditer(r, data)]
- half = len(items) / 2
- html = '<div class="left">%s</div><div class="right">%s</div>' % \
- ('\n'.join(items[:half]), '\n'.join(items[half:]))
- return html
- class BuildSearch(BuildPage):
- title = 'Search'
- menutxt = '<img src="search.gif"/>'
- outfile = 'search.html'
- search_html ="""
- <h2>Python Website</h2>
- <form method="get" action="http://www.google.com/search" target="_content">
- <input type="hidden" name="sourceid" value="python-sidebar">
- <input name="q" size="30" value="">
- <input type="submit" name="submit" value="Search"><br>
- <input type="radio" name="q" value="site:python.org" checked="checked">All of python.org
- <br />
- <input type="radio" name="q" value="site:www.python.org">www.python.org
- <br />
- <input type="radio" name="q" value="site:mail.python.org">python.org mailling lists
- <br />
- <input type="radio" name="q" value="site:ftp.python.org">ftp.python.org
- </form>
- <h2>Python Newsgroups</h2>
- <form action="http://groups.google.com/groups" method="get" target="_content">
- <input type="hidden" name="group" value="comp.lang.python.*">
- <input type="text" name="q" value="" size="30">
- <input type="submit" value="Search">
- <a href="http://groups.google.com/advanced_group_search">(advanced
- search)</a>
- </form>
- """
- def get_rawdata(self, url):
- return ''
- def scrape(self, data):
- return self.search_html
- all_pages = [BuildTutorial,
- BuildLangRef,
- BuildLibRef,
- BuildAPIRef,
- BuildExtEmb,
- BuildMods,
- BuildSearch]
- def build_all(out_dir):
- for p in all_pages:
- p(out_dir)
- if __name__ == '__main__':
- out_dir = './html'
- build_all(out_dir)
复制代码
我应该如何修改才能浏览本地文档呢?我试过modindex.html和search.html,看了下链接,全是在网上直接浏览的。
|
|