# required for required arguments if (!isset($args["db_name"])) die("KwIndex: constructor: You must specify 'db_name'"); if (!isset($args["linkid"]) && (!isset($args["hostname"]) || !isset($args["username"]) || !isset($args["password"]))) die("KwIndex: constructor: You must either specify 'linkid' or ". "arguments to MySQL_connect ('hostname', 'username', and ". "'password')");
# supply default values for optional arguments if (!isset($args["index_name"])) $args["index_name"] = "kwindex"; if (!isset($args["wordlist_cardinality"])) $args["wordlist_cardinality"] = 100000; if (!isset($args["stoplist_cardinality"])) $args["stoplist_cardinality"] = 10000; if (!isset($args["vectorlist_cardinality"])) $args["vectorlist_cardinality"] = 100000000; if (!isset($args["doclist_cardinality"])) $args["doclist_cardinality"] = 1000000; if (!isset($args["max_word_length"])) $args["max_word_length"] = 32; if (!isset($args["use_persistent_connection"])) $args["use_persistent_connection"] = 1;
if(!is_array(&$docs)) die("KwIndex: add_document: ". "'document_sub' does not return an array"); if (sizeof(&$doc_ids) < sizeof(&$docs)) die("KwIndex: add_document: ". "'document_sub' does not return enough documents"); if (sizeof(&$doc_ids) > sizeof(&$docs)) die("KwIndex: add_document: ". "'document_sub' returns too many documents");
# split documents into words ############################ while(list($id, $doc) = each($docs)) { if (!isset($doc) || !strlen($doc)) continue;
$words = $this->_split_to_words($doc); $num_of_words = sizeof(&$words); # note: this means that numbers, etc are counted $doclist[$id] = $num_of_words;
# filter non-qualifying words: 1-char length, numbers, words # that are too long
# lock the tables in case some other process remove a certain word # between step 0 and 1 and 2 and 3 if(!mysql_query("LOCK TABLES ${idx}_doclist WRITE, ". "${idx}_vectorlist WRITE, ". "${idx}_wordlist WRITE", $linkid)) { $this->ERROR = "Can't lock tables when adding documents: ". mysql_error($linkid); return; }