<?php
require_once "mylist.config.php";
require_once "backend/shared/emailMessage.php";
class CIndexer
{
var $subject;
var $header; // array
var $body; //
var $attachements; // array
function indexMessage($_archive_id)
{
$this->indexPartOfMessage($_archive_id, "header");
$this->indexPartOfMessage($_archive_id, "subject");
$this->indexPartOfMessage($_archive_id, "body");
// pri indexovani priloh nie su existujuce tabulky postacujuce, problem je, ak bude viac ako 1 priloha, nebudeme vediet userovi vypisat, v ktorej z nich sa hladany vyraz nachadza
$this->indexPartOfMessage($_archive_id, "attachement");
}
function indexPartOfMessage($id, $type)
{
$text = $this->getTextToIndex($type);
$words = Array();
$words = $this->parseTextIntoWords($text);
global $dbh;
$sql = "SELECT id, default_weight FROM mylist_search_types WHERE name LIKE '$type'";
$res =& $dbh->query($sql);
if (PEAR::isError($res))
die($res->getMessage());
$res->fetchInto($row);
foreach($words as $word)
{
$sql = "SELECT weight FROM mylist_search_index WHERE message_id = $id AND word = '".AddSlashes($word)."' AND search_type_id = ".$row["id"];
$weight =& $dbh->getOne($sql);
if (PEAR::isError($weight))
die($weight->getMessage());
if ($weight == "" )
{
$sql = "INSERT INTO mylist_search_index ";
$sql .= "(message_id, word, search_type_id, weight) ";
$sql .= "VALUES ($id, '".AddSlashes($word)."', ".$row["id"].", ".$row["default_weight"].")";
}
else
{
$sql = "UPDATE mylist_search_index ";
$sql .= "SET weight = ".((0.3 * $row["default_weight"]) + $weight);
$sql .= " WHERE message_id = $id AND word LIKE '".AddSlashes($word)."' AND search_type_id = ".$row["id"];
}
$res =& $dbh->query($sql);
if (PEAR::isError($res))
die($res->getMessage());
}
}
function getTextToIndex($type)
{
if ($type == "header")
return $this->getAllHeaderValues($this->header);
else if ($type == "subject")
return $this->subject;
else if ($type == "body")
return $this->body;
else if ($type == "attachement")
return $this->attachements;
}
// recursive function
function getAllHeaderValues($array)
{
$i = 0;
$count = Count($array);
$text = "";
Reset($array);
while ($i < $count)
{
if (Is_array(Current($array)))
$text .= $this->getAllHeaderValues(Current($array));
else
$text .= Current($array)."\n";
Next($array);
$i++;
}
return $text;
}
function parseTextIntoWords($_text)
{
$_words = Array();
$output = Array();
$_text = str_replace(".", " ", $_text);
$_text = str_replace(",", " ", $_text);
$_text = str_replace(";", " ", $_text);
$_text = str_replace(":", " ", $_text);
$_text = str_replace("-", " ", $_text);
$_text = str_replace("+", " ", $_text);
$_text = str_replace("*", " ", $_text);
$_text = str_replace("/", " ", $_text);
$_text = str_replace("|", " ", $_text);
$_text = str_replace("=", " ", $_text);
$_text = str_replace("`", " ", $_text);
$_text = str_replace("'", " ", $_text);
$_text = str_replace("\"", " ", $_text);
$_text = str_replace("~", " ", $_text);
$_text = str_replace("\n", " ", $_text);
$_text = str_replace("\r.", " ", $_text);
$_text = str_replace("(", " ", $_text);
$_text = str_replace(")", " ", $_text);
$_text = str_replace("<", " ", $_text);
$_text = str_replace(">", " ", $_text);
$_text = str_replace("[", " ", $_text);
$_text = str_replace("]", " ", $_text);
$_text = str_replace("{", " ", $_text);
$_text = str_replace("}", " ", $_text);
$_text = str_replace("!", " ", $_text);
$_text = str_replace("?", " ", $_text);
$_text = str_replace("#", " ", $_text);
$_text = str_replace("$", " ", $_text);
$_text = str_replace("%", " ", $_text);
$_text = str_replace("^", " ", $_text);
$_text = str_replace("&", " ", $_text);
//$_text = preg_replace("(\s+)", " ", $_text);
//$_words = preg_split('/\s/', $_text, -1, PREG_SPLIT_NO_EMPTY);
$_words =& split ( " ", $_text);
foreach($_words as $_word)
{
$w = $this->validateIndexWord($_word);
if ($w != "")
$output[Count($output)] = $w;
}
return $output;
}
function validateIndexWord($w)
{
// all modifications, replacements
$w = StrToLower(trim($w));
// musim akceptovat aj stringy s dlzkou 2, kvoli domenam .sk, .cz atd
// 48 je zas ohranicenie v db
if (StrLen($w) > 1 && StrLen($w) <= 48)
return $w;
else
return;
}
}
?>
Platon Group <platon@platon.sk> http://platon.sk/
|