网站导航免费论文 原创论文 论文搜索 原创论文 网学软件 学术大家 资料中心 会员中心 问题解答 原创论文 论文素材 设计下载 最新论文 下载排行 论文上传 在线投稿 联系我们
返回网学首页
网学联系
最新论文 推荐专题 热门论文 素材专题
当前位置: 网学 > 编程文档 > PHP > 正文
对MYSQL进行全文检索的PHP类库
来源:Http://myeducs.cn 联系QQ:点击这里给我发消息 作者: 用户投稿 来源: 网络 发布时间: 12/12/07
下载{$ArticleTitle}原创论文样式

1 2 3 下一页

  真是好东西,但我还没研究出来,如果要看这个程序的详细说明和演示,请到这个地方看,真的很爽: http://steven.haryan.to/PHP/KwIndex.html

  注意,只能在Linux,Unix下用。

<?php
  
$debug = 0;
  
if($debug) require "Dumper.lib"; //这个全文检索需要的库文件,你有吗?
  
function _debug() {
global $debug;
  
$args = func_get_args();
if (!$debug) return;
echo "<pre>debug: ", htmlentities(join("", $args)), "</pre><br>n";
}
  
class KwIndex {
  
# CONSTRUCTOR
#############
  
function KwIndex($args) {
# check for argument type
if (!is_array(&$args))
die("KwIndex: constructor: syntax: KwIndex(array $args)");
  
# check for unknown arguments
$known_arguments = array_flip(array(
"linkid", "db_name", "hostname", "username", "password",
"index_name", "wordlist_cardinality", "doclist_cardinality",
"stoplist_cardinality", "vectorlist_cardinality",
"max_word_length", "use_persistent_connection"));
while(list($k,$v) = each($args))
if (!isset($known_arguments[$k]))
die("KwIndex: constructor: unknown argument `$k'");
  
# required for required arguments
if (!isset($args["db_name"]))
die("KwIndex: constructor: You must specify 'db_name'");
if (!isset($args["linkid"]) &&
(!isset($args["hostname"]) || !isset($args["username"]) ||
!isset($args["password"])))
die("KwIndex: constructor: You must either specify 'linkid' or ".
"arguments to MySQL_connect ('hostname', 'username', and ".
"'password')");
  
# supply default values for optional arguments
if (!isset($args["index_name"]))
$args["index_name"] = "kwindex";
if (!isset($args["wordlist_cardinality"]))
$args["wordlist_cardinality"] = 100000;
if (!isset($args["stoplist_cardinality"]))
$args["stoplist_cardinality"] = 10000;
if (!isset($args["vectorlist_cardinality"]))
$args["vectorlist_cardinality"] = 100000000;
if (!isset($args["doclist_cardinality"]))
$args["doclist_cardinality"] = 1000000;
if (!isset($args["max_word_length"]))
$args["max_word_length"] = 32;
if (!isset($args["use_persistent_connection"]))
$args["use_persistent_connection"] = 1;
  
# set object attributes
$this->db_name = $args["db_name"];
$this->index_name = $args["index_name"];
$this->wordlist_cardinality = $args["wordlist_cardinality"];
$this->stoplist_cardinality = $args["stoplist_cardinality"];
$this->vectorlist_cardinality = $args["vectorlist_cardinality"];
$this->doclist_cardinality = $args["doclist_cardinality"];
$this->max_word_length = $args["max_word_length"];
  
if (!isset($args["linkid"])) {
if ($args["use_persistent_connection"]) {
$linkid = mysql_pconnect($args["hostname"], $args["username"],
$args["password"]);
} else {
$linkid = mysql_connect($args["hostname"], $args["username"],
$args["password"]);
}
if (!$linkid)
die("KwIndex: constructor: Can't connect to database: ".
mysql_error());
} else {
$linkid = $args["linkid"];
}
  
$this->linkid = $linkid;
$idx = $this->index_name;
  
if (!mysql_select_db($this->db_name, $linkid))
die("KwIndex: constructor: Can't select DB: ".
mysql_error($linkid));
  
if (!$this->_index_exists()) $this->_create_index();
  
# load stoplist as keys of array
$this->stoplist = array();
if (!($res = mysql_query("SELECT word FROM ${idx}_stoplist",
$linkid)))
die("KwIndex: constructor: Can't load stoplist: ".
mysql_error($linkid));
while($row = mysql_fetch_row($res)) {
$this->stoplist[ strtolower($row[0]) ] = 1;
}
} // constructor
  
# PUBLIC METHODS
################
  
function &document_sub($doc_ids) {
die("KwIndex: document_sub: this method must be overriden");
}
  
function add_document($doc_ids) {
if (!is_array(&$doc_ids))
die("KwIndex: syntax: add_document(array $doc_ids)");
  
if (!sizeof(&$doc_ids)) return 1;
  
$wordlist = array();
# structure: ( 'word1' => [ [doc_id,freq], ... ], ... )
$doclist = array();
# format: ( doc_id => n, ... ); # n = number of words in document
  
# retrieve documents
####################
$docs = $this->document_sub(&$doc_ids);
  
if(!is_array(&$docs))
die("KwIndex: add_document: ".
"'document_sub' does not return an array");
if (sizeof(&$doc_ids) < sizeof(&$docs))
die("KwIndex: add_document: ".
"'document_sub' does not return enough documents");
if (sizeof(&$doc_ids) > sizeof(&$docs))
die("KwIndex: add_document: ".
"'document_sub' returns too many documents");
  
# split documents into words
############################
while(list($id, $doc) = each($docs)) {
if (!isset($doc) || !strlen($doc)) continue;
  
$words = $this->_split_to_words($doc);
$num_of_words = sizeof(&$words);
# note: this means that numbers, etc are counted
$doclist[$id] = $num_of_words;
  
# filter non-qualifying words: 1-char length, numbers, words
# that are too long
  
$w2 = array();
while(list($k, $v) = each($words)) {
$len = strlen($v);
$lower_v = strtolower($v);
if ($len > 1 &&
$len <= $this->max_word_length &&
preg_match("/[a-z]/", $lower_v) &&
!isset($this->stoplist[$lower_v])) ++$w2[ $lower_v ];
}
  
while(list($k, $v) = each($w2)) {
$lower_k = strtolower($k);
if (!isset($wordlist[$lower_k]))
$wordlist[$lower_k] = array();
array_push($wordlist[$lower_k], array($id, $v/$num_of_words));
}
}
  
#_debug("wordlist: ", Dumper($wordlist));
  
# submit to database
####################
$linkid = $this->linkid;
$idx = $this->index_name;
  
# lock the tables in case some other process remove a certain word
# between step 0 and 1 and 2 and 3
if(!mysql_query("LOCK TABLES ${idx}_doclist WRITE, ".
"${idx}_vectorlist WRITE, ".
"${idx}_wordlist WRITE",
$linkid)) {
$this->ERROR = "Can't lock tables when adding documents: ".
mysql_error($linkid);
return;
}
  
# 0
# add the docs first
#_debug( "doclist = ", Dumper($doclist));
while(list($k,$v) = each($doclist)) {
if(!mysql_query("REPLACE INTO ${idx}_doclist (id,n) VALUES (".
"'".(addslashes($k))."'".
",".
"'".(addslashes($v))."'".
")",
$linkid)) {
$this->ERROR = "Can't add doc id=` 对MYSQL进行全文检索的PHP类库_网学

浏览:
版权所有 QQ:3710167 邮箱:3710167@qq.com 网学网 [Myeducs.cn] 您电脑的分辨率是 像素
Copyright 2008-2015 myeducs.Cn www.myeducs.Cn All Rights Reserved
湘ICP备09003080号
' to doclist: ".
mysql_error($linkid);
mysql_query("UNLOCK TABLES", $linkid);
return;
}
}
  
# 1
# and then add the words
while(list($k,$v) = each($wordlist)) {
if(!mysql_query("INSERT IGNORE INTO ${idx}_wordlist (word) ".
"VALUES (".
"'".(addslashes($k))."'".
")",
$linkid)) {
$this->ERROR = "Can't add word `$k' to wordlist: ".
mysql_error($linkid);
mysql_query("UNLOCK TABLES", $linkid);
return

(责任编辑:admin)

  • 下一篇资讯: PHP脚本数据库功能详解
  • 网学推荐

    免费论文

    原创论文

    浏览:
    设为首页 | 加入收藏 | 论文首页 | 论文专题 | 设计下载 | 网学软件 | 论文模板 | 论文资源 | 程序设计 | 关于网学 | 站内搜索 | 网学留言 | 友情链接 | 资料中心
    版权所有 QQ:3710167 邮箱:3710167@qq.com 网学网 [Myeducs.cn] 您电脑的分辨率是 像素
    Copyright 2008-2015 myeducs.Cn www.myeducs.Cn All Rights Reserved
    湘ICP备09003080号