3 function search_results()
5 return "Search Results";
8 function matches_text($num)
12 return "Sorry, no documents matching your query.";
16 return "Found <b>1</b> document matching your query.";
20 return "Found <b>$num</b> documents matching your query. Showing best matches first.";
24 function report_matches()
31 if (array_key_exists("query", $_GET))
33 $query=$_GET["query"];
35 echo "<input type=\"text\" name=\"query\" value=\"$query\" size=\"20\" accesskey=\"s\"/>";
38 function readInt($file)
40 $b1 = ord(fgetc($file)); $b2 = ord(fgetc($file));
41 $b3 = ord(fgetc($file)); $b4 = ord(fgetc($file));
42 return ($b1<<24)|($b2<<16)|($b3<<8)|$b4;
45 function readString($file)
48 while (ord($c=fgetc($file))) $result.=$c;
52 function readHeader($file)
54 $header =fgetc($file); $header.=fgetc($file);
55 $header.=fgetc($file); $header.=fgetc($file);
59 function computeIndex($word)
61 // Fast string hashing
62 //$lword = strtolower($word);
63 //$l = strlen($lword);
64 //for ($i=0;$i<$l;$i++)
66 // $c = ord($lword{$i});
67 // $v = (($v & 0xfc00) ^ ($v << 6) ^ $c) & 0xffff;
71 // Simple hashing that allows for substring search
72 if (strlen($word)<2) return -1;
73 // high char of the index
75 if ($hi==0) return -1;
76 // low char of the index
78 if ($lo==0) return -1;
83 function do_search($path, $file,$word,&$statsList)
85 $index = computeIndex($word);
86 if ($index!=-1) // found a valid index
88 fseek($file,$index*4+4); // 4 bytes per entry, skip header
89 $index = readInt($file);
90 if ($index) // found words matching the hash key
92 $start=sizeof($statsList);
95 $w = readString($file);
98 $statIdx = readInt($file);
99 if ($word==substr($w,0,strlen($word)))
100 { // found word that matches (as substring)
101 $statsList[$count++]=array(
105 "full"=>strlen($w)==strlen($word),
109 $w = readString($file);
114 for ($count=$start;$count<sizeof($statsList);$count++)
116 $statInfo = &$statsList[$count];
118 // whole word matches have a double weight
119 if ($statInfo["full"]) $multiplier=2;
120 fseek($file,$statInfo["index"]);
121 $numDocs = readInt($file);
123 // read docs info + occurrence frequency of the word
124 for ($i=0;$i<$numDocs;$i++)
127 $freq=readInt($file);
128 $docInfo[$i]=array("idx" => $idx,
133 if ($freq&1) // word occurs in high priority doc
136 $totalFreqHi+=$freq*$multiplier;
138 else // word occurs in low priority doc
140 $totalFreqLo+=$freq*$multiplier;
143 // read name and url info for the doc
144 for ($i=0;$i<$numDocs;$i++)
146 fseek($file,$docInfo[$i]["idx"]);
147 $docInfo[$i]["name"]=readString($file);
148 $docInfo[$i]["url"]=$path.readString($file);
150 $statInfo["docs"]=$docInfo;
152 $totalFreq=($totalHi+1)*$totalFreqLo + $totalFreqHi;
153 for ($count=$start;$count<sizeof($statsList);$count++)
155 $statInfo = &$statsList[$count];
157 // whole word matches have a double weight
158 if ($statInfo["full"]) $multiplier=2;
159 for ($i=0;$i<sizeof($statInfo["docs"]);$i++)
161 $docInfo = &$statInfo["docs"];
162 // compute frequency rank of the word in each doc
163 $freq=$docInfo[$i]["freq"];
164 if ($docInfo[$i]["hi"])
166 $statInfo["docs"][$i]["rank"]=
167 (float)($freq*$multiplier+$totalFreqLo)/$totalFreq;
171 $statInfo["docs"][$i]["rank"]=
172 (float)($freq*$multiplier)/$totalFreq;
181 function combine_results($results,&$docs)
183 foreach ($results as $wordInfo)
185 $docsList = &$wordInfo["docs"];
186 foreach ($docsList as $di)
190 if (in_array($key, array_keys($docs)))
192 $docs[$key]["rank"]+=$rank;
196 $docs[$key] = array("url"=>$key,
201 $docs[$key]["words"][] = array(
202 "word"=>$wordInfo["word"],
203 "match"=>$wordInfo["match"],
211 function filter_results($docs,&$requiredWords,&$forbiddenWords)
213 $filteredDocs=array();
214 while (list ($key, $val) = each ($docs))
216 $words = &$docs[$key]["words"];
217 $copy=1; // copy entry by default
218 if (sizeof($requiredWords)>0)
220 foreach ($requiredWords as $reqWord)
223 foreach ($words as $wordInfo)
225 $found = $wordInfo["word"]==$reqWord;
230 $copy=0; // document contains none of the required words
235 if (sizeof($forbiddenWords)>0)
237 foreach ($words as $wordInfo)
239 if (in_array($wordInfo["word"],$forbiddenWords))
241 $copy=0; // document contains a forbidden word
246 if ($copy) $filteredDocs[$key]=$docs[$key];
248 return $filteredDocs;
251 function compare_rank($a,$b)
253 if ($a["rank"] == $b["rank"])
257 return ($a["rank"]>$b["rank"]) ? -1 : 1;
260 function sort_results($docs,&$sorted)
263 usort($sorted,"compare_rank");
267 function report_results(&$docs)
269 echo "<table cellspacing=\"2\">\n";
271 echo " <td colspan=\"2\"><h2>".search_results()."</h2></td>\n";
273 $numDocs = sizeof($docs);
277 echo " <td colspan=\"2\">".matches_text(0)."</td>\n";
283 echo " <td colspan=\"2\">".matches_text($numDocs);
288 foreach ($docs as $doc)
291 echo " <td align=\"right\">$num.</td>";
292 echo "<td><a class=\"el\" href=\"".$doc["url"]."\">".$doc["name"]."</a></td>\n";
294 echo " <td></td><td class=\"tiny\">".report_matches()." ";
295 foreach ($doc["words"] as $wordInfo)
297 $word = $wordInfo["word"];
298 $matchRight = substr($wordInfo["match"],strlen($word));
299 echo "<b>$word</b>$matchRight(".$wordInfo["freq"].") ";
311 if(strcmp('4.1.0', phpversion()) > 0)
313 die("Error: PHP version 4.1.0 or above required!");
318 "../Socket/doc/html/",
319 "../Examples/doc/html/",
320 "../Packets/doc/html/",
321 "../Utils/doc/html/",
322 "../Scheduler/doc/html/",
326 for ($i=0; $i<sizeof($paths); $i++) {
327 if (!($f=@fopen($paths[$i]."search.idx","rb")))
329 die("Error: Search index file could NOT be opened!");
333 if (readHeader($f)!="DOXS")
335 die("Error: Header of index file is invalid!");
340 if (array_key_exists("query", $_GET))
342 $query=$_GET["query"];
345 echo " \n<div class=\"searchresults\">\n";
347 $requiredWords = array();
348 $forbiddenWords = array();
349 $foundWords = array();
350 $word=strtok($query," ");
351 while ($word) // for each word in the search query
353 if (($word{0}=='+')) { $word=substr($word,1); $requiredWords[]=$word; }
354 if (($word{0}=='-')) { $word=substr($word,1); $forbiddenWords[]=$word; }
355 if (!in_array($word,$foundWords))
358 for ($i=0; $i<sizeof($files); $i++) {
359 do_search($paths[$i], $files[$i], strtolower($word), $results);
365 combine_results($results,$docs);
366 // filter out documents with forbidden word or that do not contain
368 $filteredDocs = filter_results($docs,$requiredWords,$forbiddenWords);
369 // sort the results based on rank
371 sort_results($filteredDocs,$sorted);
372 // report results to the user
373 report_results($sorted);
375 foreach ($files as $file) {