- 论坛徽章:
- 0
|
顺便发布刚刚作好的 PHP 访问 cscwsd 的操作类, 最后附了一个小例子.
这样搭配用起来的话,效率相当可观, 粗估了一下1秒可以处理100万字符以上, 这样作甚至可能比作成PHP扩展还要好.
关于分词准确度方面还请大家共同继续改进, 不希望太复杂的算法, 马马虎虎过得去就可以了
- <?php
- /**
- cscwsd-0.0.1 (PHP版查询交互程序示例版)
- 由于服务器互无法显性判断数据读取的始终, 故目前 socket 设为无阻塞模式
-
- 尝试读取一定次数后仍没数据就假设已经读完, 在 close() 前调用 recv() 会比较浪费资源.
- 这一点请注意, 务必一个 send() 对应一次 recv()
- send 时程序自动在 string 的最后插了一个 '\x01'
- PHP 版本要求 >= 4.3.0
- 类用法:
- 建立操作句柄: server,port 开启
- $cws = new CSCWS('localhost', 4700);
- $cws->open('localhost', 4700); // 若已打开且port/host不对就先关闭旧的
- $cws = CSCWS::open('localhost', 4700);
- 查询分词(自动侦测连接?):
- $cws->send("我是中国人");
- 获取结果
- echo $cws->recv();
- 设定参数: key=>value
- binary => (on)
- autodis => (on|off)
- ignore_mark => (yes|no)
- delim => '_|/'...
- $cws->set("autodis", "yes")
- 关闭
- $cws->close();
- 变量: dirty (多余的 read次数), rbuf (close之后续读的数据)
- 对于大量的输入输出, 处理上可能会有问题, 请务必再 ->close() 之后调用 $cws->rbuf 来查看
- $Id: CWS_query.class.php,v 1.2 2006/05/28 09:19:42 hightman Exp $
- */
- // 纯英文或纯字母组成或纯数字
- define ('_SPECIAL_TAG_', 'abcdefgHgfedcba');
- define ('_SPECIAL_LEN_', strlen(_SPECIAL_TAG_));
- define ('_SPECIAL_OFF_', (-2 - _SPECIAL_LEN_));
- class CSCWS
- {
- var $host = 'localhost';
- var $port = 4700;
- var $sock = false;
- var $rbuf = '';
- var $dirty = 0;
- var $clean = 0;
- /** construct function */
- function CSCWS($host = '', $port = 0)
- {
- if ($host !== '')
- $this->host = $host;
- if ($port !== 0)
- $this->port = $port;
- if ($host !== '')
- $this->open($this->host, $this->port);
- }
- /** return true on success or false on failed */
- function &open($host, $port = 4700)
- {
- if (!isset($this))
- {
- $cws = new CSCWS;
- $cws->open($host, $port);
- return $cws;
- }
- /** sock opened? */
- if ($this->sock !== false)
- {
- if ($host === $this->host && $port == $this->port)
- return true;
- $this->close();
- }
- /** connect to the server */
- $this->host = $host;
- $this->port = $port;
- $this->sock = fsockopen($host, $port, $errno, $errstr, 10);
- if (!$this->sock)
- {
- trigger_error("fsockopen(): $errstr ($errno)", E_USER_WARNING);
- trigger_error("Failed to open cscwsd server by '$host:$port'.", E_USER_ERROR);
- return false;
- }
-
- /** set to nonblock mode */
- stream_set_blocking($this->sock, 0);
- return true;
- }
- /** set the parameters */
- function set($key, $value = 'on')
- {
- if (!in_array($key, array('autodis', 'binary', 'delim', 'ignore_mark')))
- return;
- $str = "/set $key=$value\n";
- fwrite($this->sock, $str);
- }
- /** send the original string */
- function send($str)
- {
- $str = trim($str);
- if (empty($str))
- return false;
- if (!$this->sock)
- $this->open($this->host, $this->port);
- if (!$this->sock)
- return false;
- fwrite($this->sock, $str . _SPECIAL_TAG_ . "\n");
- }
- /** recv the segmented string, param: try times for end? */
- function recv($maxtry = 1000)
- {
- $try = 0;
- $frag = '';
- if (!$this->sock)
- return $this->rbuf;
- while (true)
- {
- $frag = fread($this->sock, 2048);
- if ($frag === '')
- $try++;
- else
- {
- $this->dirty += $try;
- $this->clean++;
- $try = 0;
- if (substr($frag, _SPECIAL_OFF_, _SPECIAL_LEN_) == _SPECIAL_TAG_)
- {
- $str .= substr($frag, 0, _SPECIAL_OFF_);
- break;
- }
- $str .= $frag;
- }
- if ($try > $maxtry)
- break;
- if (feof($this->sock))
- break;
- }
- $this->dirty += $try;
- return $str;
- }
- /** close the connection */
- function close()
- {
- $this->send('/bye');
- $this->rbuf = '';
- while (!feof($this->sock))
- $this->rbuf .= $this->recv();
- fclose($this->sock);
- $this->sock = false;
- }
- }
- // Sample01:
- /*
- $cws = &CSCWS::open('localhost');
- $cws->set("ignore_mark", "yes");
- $cws->set("delim", "_");
- $cws->send("我是中国人,你应该也是吧?\n哈哈by hightman2006");
- echo $cws->recv();
- $cws->close();
- echo $cws->recv();
- */
- ?>
复制代码
[ 本帖最后由 hightman 于 2006-5-28 22:23 编辑 ] |
|