- 论坛徽章:
- 0
|
- #/usr/bin/python
- # -*- coding: utf-8 -*-
- """
- Fulltext Server 网络应用支持插件,用于构造 问答系统的增量索引。
- 版权属于(C) 2008 coreseek.com,保留所有权利
- 请仔细阅读代码中的注释,如果对Python语言不熟悉,请不要修改本文件
- """
- import MySQLdb
- import unicodedata
- import re
- import time
- import sys
- import signal
- import os
- from os.path import dirname, join, pardir
- # unicode convert helper function
- def u(s):
- if isinstance(s, unicode):
- return s;
- else:
- return unicode(s, g_datasource_encoding , errors='ignore')
- #初始化,用于检测当前文件是否被Python解释器直接执行
- bStandAlone = False
- if not 'DataSource' in dir():
- # make the define
- class DataSource:
- pass
- bStandAlone = True
- #包含db_conn.py所在的目录
- if '__file__' in globals():
- #include parent dir
- sys.path.insert(0, join(dirname(__file__), pardir))
- #检测系统参数,当存在该参数时,系统将读取该参数所指的文件作为 配置文件。
- if 'argv' in dir(sys) and len(sys.argv) > 1:
- #0 self, 1 argv
- try:
- execfile (sys.argv[1])
- except:
- #normal route.
- from t_db_conn import *
- else:
- from t_db_conn import *
- class t_Private:
- def __init__(self):
- self.m_cursor = None
- self.m_dbconn = None
- self.m_docId = 0 # the 1st field if wanna to use autoid, return -1. means no id defined
- #range query.
- self.m_minid = 0
- self.m_maxid = 0
- self.m_start_id = 0;
- #base str
- self.m_basesql_str = ''
-
- def Connected(self):
- try:
- self.m_dbconn = MySQLdb.connect (host = g_database_host,
- port = g_database_port,
- user = g_database_username,
- passwd = g_database_password,
- db = g_database_dbname)
- except MySQLdb.Error, e:
- print "Error %d: %s" % (e.args[0], e.args[1])
- return False
- return True
-
- def BeforeIndex(self):
- #select max & min doc_id
- if self.m_dbconn == None:
- return False
-
- sql = """SELECT MIN(id),MAX(id) FROM {$prefix}contentlist"""
- sql = sql.replace("{$prefix}",g_table_prefix)
- try:
- self.m_cursor = self.m_dbconn.cursor ()
- rowCount = self.m_cursor.execute (sql)
- except MySQLdb.Error, e:
- print "Error %d: %s" % (e.args[0], e.args[1])
- return False
- if rowCount == 0:
- return False
- tm_row = self.m_cursor.fetchone()
- #print tm_row[0],tm_row[1]
- if tm_row[0]:
- self.m_minid = tm_row[0]
- if tm_row[1]:
- self.m_maxid = tm_row[1]
- #self.m_minid = 4667671
- self.m_start_id = self.m_minid
-
- self.m_cursor.close ()
- self.m_cursor = None
-
- sql = """SELECT c.id AS id, c.CateID AS cid,
- c.AuthorID AS AuthorID, c.Title, c.Description,
- c.Author AS author, c.State AS State,
- c.PublicTime AS dateline, c.Top as top,
- c.pink as pink, cc.content as content,
- ccc.HitNum as views,ccc.CommentNum as comments
- FROM {$prefix}contentlist AS c
- LEFT JOIN {$prefix}content AS cc ON cc.ContentID = c.id
- LEFT JOIN {$prefix}counter AS ccc ON c.id=ccc.ContentID"""
-
- self.m_basesql_str = sql.replace("{$prefix}",g_table_prefix)
- return True
-
- def NextDocument(self, item, bDofetch):
- if self.m_cursor == None or bDofetch:
- #do fetch
- try:
- self.m_cursor = self.m_dbconn.cursor ()
- sql_condition = " AND c.id>="+str(self.m_start_id) + " AND c.id<="+str(self.m_start_id+g_each_max_record_count)
- sql_condition = sql_condition.replace("{$prefix}",g_table_prefix)
- rowCount = self.m_cursor.execute (self.m_basesql_str+sql_condition)
- self.m_start_id = self.m_start_id + g_each_max_record_count+1; #append 1 to avoid doc_id duplicate.
- #print 'select ', self.m_start_id, self.m_maxid
- except MySQLdb.Error, e:
- print "Error %d: %s" % (e.args[0], e.args[1])
- return False
- if rowCount == 0 and self.m_start_id > self.m_maxid:
- return False;
- self.m_row = self.m_cursor.fetchone()
- else:
- self.m_row = self.m_cursor.fetchone()
- if self.m_row == None:
- return self.NextDocument(item,True)
- #do data assign
- item.post_id = self.m_row[0]
- item.cid = self.m_row[1]
- item.authorid = self.m_row[2]
- item.title = self.m_row[3]
- item.description = self.m_row[4]
- item.author = self.m_row[5]
- item.state = self.m_row[6]
- item.dateline = self.m_row[7]
- item.top = self.m_row[8]
- item.pink = self.m_row[9]
- item.content = self.m_row[10]
- item.views = self.m_row[11]
- item.comments = self.m_row[12]
- #item.post_id = 0
- return True
- def OnIndexFinished(self):
- sql = """UPDATE {$prefix}settings SET value='{$id}' WHERE variable='idx_postid'
- """
- sql = sql.replace("{$prefix}", g_table_prefix)
- sql = sql.replace("{$id}", str(self.m_maxid))
- print sql
- print self.m_maxid
- #self.Connected()
- #self.m_cursor = self.m_dbconn.cursor ()
- #Change DataBase Enocding here, is to support gbk or others.
- self.m_cursor.execute(sql)
- #self.m_dbconn.cursor().execute(sql)
- return True
- # your must declare a class drive from DataSource, if mutil-class defined in py file, only the 1st will be use.
- class t_DataSource(DataSource):
- def __init__(self):
- """
- in this function ,you should define data schema, string2ord is not supported.
- supported types:
- string(use to launch full text search)
- number(int)
- timestamp(time)
- list(mva)
- if a attribue started with '__', it will not be used as a data attribute
- """
-
- self.post_id = 0
- self.cid = 0
- self.author = 0
- self.title = ''
- self.description = ''
- self.author = ''
- self.state = 0
- self.views = 0
- self.state = 0
- self.top = 0
- self.content = ''
- self.pink = 0
- self.comments = 0
- self.dateline = 0
- #self.digest = True
- #self.istop = True
- #inner attribute define
- self.__docid__ = 'post_id'; #use this to declare which attr is document unique id.
- self.__p__ = t_Private();
- pass
-
- def Dump_Object(self):
- ret = ''
- instancemethod = type(self.NextDocument)
- for s in dir(self):
- if s[0] == '_':
- continue
- if type(getattr(self,s)) == instancemethod:
- continue
- #print type(getattr(self,s)),s
- ret = ret + s+': '+ str(getattr(self,s)) + ', '
- return ret;
-
- def __del__(self):
- #print 'delete source';
- pass
-
- def Connected(self):
- return self.__p__.Connected();
-
- def OnBeforeIndex(self):
- #print 'OnBeforeIndex called';
- return self.__p__.BeforeIndex();
-
- def NextDocument(self):
- global NEEDEXIT
- if NEEDEXIT:
- return False
- bRet = self.__p__.NextDocument(self,False);
- #you can deal the context ,eg.
- """
- 1 skip too small documents
- 2 strip htmls
- 3 strip emotions.
- """
- #do encoding convert also, sphinx can only accept utf-8 string.
- if bRet:
- pass
- #self.title = u(self.title).encode('gbk')
- #self.content = u(self.content).encode('gbk')
- else:
- self.post_id = 0
- return bRet
-
- def OnAfterIndex(self):
- #print 'OnAfterIndex';
- pass
- def OnIndexFinished(self):
- print 'OnIndexFinished'
- return self.__p__.OnIndexFinished();
-
- #deal ctrl+c in indexer
- NEEDEXIT=False
- def SignalHandler(sig, id):
- global NEEDEXIT
- NEEDEXIT = True
- pass
-
- signal.signal(signal.SIGINT, SignalHandler)
- # test bench
- if bStandAlone:
- ds = Ask_DataSource();
- ds.Connected();
- if not ds.OnBeforeIndex():
- exit(0)
- while(1):
- if not ds.NextDocument():
- break;
- if ds.post_id <= 0:
- break
- print ds.Dump_Object();
- ds.OnAfterIndex();
- ds.OnIndexFinished();
- pass
- #data source define end
复制代码
这个程序在执行的时候会报错。
错误提示代码如下:
- Exception exceptions.AttributeError: "'Cursor' object has no attribute 'connecti
- on'" in <bound method Cursor.__del__ of <MySQLdb.cursors.Cursor object at 0x00E2
- 5F50>> ignored
- Exception exceptions.AttributeError: "'Cursor' object has no attribute 'connecti
- on'" in <bound method Cursor.__del__ of <MySQLdb.cursors.Cursor object at 0x00E2
- 5F50>> ignored
- Exception exceptions.AttributeError: "'Cursor' object has no attribute 'connecti
- on'" in <bound method Cursor.__del__ of <MySQLdb.cursors.Cursor object at 0x00E2
- 5F50>> ignored
- Exception exceptions.AttributeError: "'Cursor' object has no attribute 'connecti
- on'" in <bound method Cursor.__del__ of <MySQLdb.cursors.Cursor object at 0x00E2
- 5F50>> ignored
- Exception exceptions.AttributeError: "'Cursor' object has no attribute 'connecti
- on'" in <bound method Cursor.__del__ of <MySQLdb.cursors.Cursor object at 0x00E2
- 5F50>> ignored
- Exception exceptions.AttributeError: "'Cursor' object has no attribute 'connecti
- on'" in <bound method Cursor.__del__ of <MySQLdb.cursors.Cursor object at 0x00E2
- 5F50>> ignored
- Exception exceptions.AttributeError: "'Cursor' object has no attribute 'connecti
- on'" in <bound method Cursor.__del__ of <MySQLdb.cursors.Cursor object at 0x00E2
- 5F50>> ignored
- Exception exceptions.AttributeError: "'Cursor' object has no attribute 'connecti
- on'" in <bound method Cursor.__del__ of <MySQLdb.cursors.Cursor object at 0x00E2
- 5F50>> ignored
- Traceback (most recent call last):
- File "<string>", line 1, in <module>
- File "E:\searchd\share\hxkang\plugins\hxkang.main.py", line 246, in OnIndexFin
- ished
- return self.__p__.OnIndexFinished();
- File "E:\searchd\share\hxkang\plugins\hxkang.main.py", line 160, in OnIndexFin
- ished
- self.m_cursor.execute(sql)
- File "C:\Python25\Lib\site-packages\MySQLdb\cursors.py", line 143, in execute
- from types import ListType, TupleType
- TypeError: an integer is required
- Exception exceptions.TypeError: 'an integer is required' in 'garbage collection'
- ignored
- Fatal Python error: unexpected exception during garbage collection
复制代码 前面的错误提示是出现非常多行,在强行ctrl+C退出后出现Traceback后面这段错误; |
|