基本信息
源码名称:Zend/Search/Lucene.php 相关示例
源码大小:0.01M
文件格式:.zip
开发语言:PHP
更新时间:2013-01-02
友情提示:(无需注册或充值,赞助后即可获取资源下载链接)
嘿,亲!知识可是无价之宝呢,但咱这精心整理的资料也耗费了不少心血呀。小小地破费一下,绝对物超所值哦!如有下载和支付问题,请联系我们QQ(微信同号):813200300
本次赞助数额为: 2 元×
微信扫码支付:2 元
×
请留下您的邮箱,我们将在2小时内将文件发到您的邮箱
源码介绍
Zend/Search/Lucene.php 构建文件索引 Zend/Search/Lucene/Analysis/Analyzer.php 分析
代码一:
<?php require_once "Zend/Search/Lucene.php"; require_once "Zend/Search/Lucene/Document.php"; //设置构造函数的第二个参数实现新的索引的创建 $index=new Zend_Search_Lucene('index',true); //新建一个文档对象 $doc=new Zend_Search_Lucene_Document(); //将文档的路径存储在名为url的text类型的字段中 $doc->addField(Zend_Search_Lucene_Field::Text('url',$docUrl)); //索引文档内容,使用名为contents的UnStored类型字段 $doc->addField(Zend_Search_Lucene_Field::UnStored('contents',$docContent)); //将文档对象添加到索引中 $index->addDocument($doc); //执行索引对象的commit()方法更新索引 $index->commit(); ?>
代码二:
<?php require_once "Zend/Search/Lucene.php"; //加载Zend_Search_Lucene //设置构造函数的第一个参数指定当前路径的index目录为索引对象 //设置构造函数的第二个参数实现新的索引的创建 $index=new Zend_Search_Lucene('index',true); $dir=$index->getDirectory(); //获取路径 echo "索引指定的路径信息为:<p>"; print_r($dir); //输出结果 ?>
代码三:
<?php require_once "Zend/Search/Lucene.php"; //加载Zend_Search_Lucene //打开一个存在的索引 $index=new Zend_Search_Lucene('index'); //新建一个文档对象 $doc=new Zend_Search_Lucene_Document(); //将文档的路径存储在名为url的text类型的字段中 $doc->addField(Zend_Search_Lucene_Field::Text('url',$docUrl)); //索引文档内容,使用名为contents的UnStored类型字段 $doc->addField(Zend_Search_Lucene_Field::UnStored('contents',$docContent)); //将文档对象添加到索引中 $index->addDocument($doc); //执行索引对象的commit()方法更新索引 $index->commit(); $result=$index->getFieldNames(); //获取字段列表 echo "\$index索引的全部字段为:<p>"; foreach($result as $value) //遍历数组内容 { echo $value; //输出结果 echo "<p>"; } ?>
代码四:
<? require_once 'Zend/Search/Lucene.php'; //加载Zend_Search_Lucene $index=new Zend_Search_Lucene('test'); //打开存在的(23-7.php创建的)索引 echo "索引包括{$index->count()}个文档<p>\n"; //输出文档数 $search="beijing"; //定义查询Query $hits=$index->find($search); //执行find()方法进行查找,并返回结果 foreach($hits as $hit) //使用foreach遍历结果集 { echo str_repeat('-', 80)."--<p>"; //输出分隔线 echo '结果ID为:'.$hit->id."<p>"; //输出结果ID echo '分值为:'.sprintf('%.2f', $hit->score)."<p>"; //输出结果分值 $document=$hit->getDocument(); //获取结果文档 echo "标题为:".$hit->title; //输出结果标题 echo "<p>"; echo $document->getFieldValue('contents'); //输出结果内容 } ?>
代码五:
<? require_once 'Zend/Search/Lucene.php'; //加载Zend_Search_Lucene $index=new Zend_Search_Lucene('test',true); //新建一个名为test的索引 $data=array( array( 'title'=>'hello', 'auth'=>'h', 'contents'=>'hello world!' ), array( 'title'=>'test', 'auth'=>'t', 'contents'=>'this is a test' ), array( 'title'=>'hello', 'auth'=>'h', 'contents'=>'hello sky!' ) ); //定义数组 foreach($data as $temp) //循环插入索引中 { $doc=new Zend_Search_Lucene_Document(); //新建文档对象 $doc->addField(Zend_Search_Lucene_Field::Text('title',$temp['title'])); //添加标题 $doc->addField(Zend_Search_Lucene_Field::Text('auth',$temp['auth'])); //添加作者 $doc->addField(Zend_Search_Lucene_Field::Text('contents',$temp['contents'])); //添加内容 $index->addDocument($doc); //将文档添加到索引 } $index->commit(); //提交索引 echo $index->count()."个文档已经被索引"; //输出文档数 ?>
代码六:
<? require_once 'Zend/Search/Lucene.php'; //加载Zend_Search_Lucene $index=new Zend_Search_Lucene('test'); //打开存在的索引 $search="world or sky"; //定义查询Query $hits=$index->find($search); //执行find()方法进行查找,并返回结果 echo "符合包含world或者sky的结果数为:"; echo count($hits); //输出结果数 echo "<p>"; echo str_repeat('-',40)."<p>"; //输出分隔线 foreach($hits as $hit) //使用foreach遍历结果集 { echo '结果ID为:'.$hit->id."<p>"; //输出结果ID echo '分值为:'.sprintf('%.2f',$hit->score)."<p>"; //输出结果分值 $document=$hit->getDocument(); //获取结果文档 echo "标题为:".$hit->title; //输出结果标题 echo "<p>"; echo "作者为:".$hit->auth; //输出结果作者 echo "<p>"; echo "内容为:".$document->getFieldValue('contents'); //输出结果内容 echo "<p>"; } ?>
代码七:
<? require_once 'Zend/Search/Lucene.php'; //加载Zend_Search_Lucene $index=new Zend_Search_Lucene('test'); //打开存在的索引 $search= "title:hello AND NOT sky"; //定义查询Query $hits=$index->find($search); //执行find()方法进行查找,并返回结果 echo "符合标题包含hello同时内容不含sky的结果数为:"; echo count($hits); //输出结果数 echo "<p>"; echo str_repeat('-',40)."<p>"; //输出分隔线 foreach($hits as $hit) //使用foreach遍历结果集 { echo '结果ID为:'.$hit->id."<p>"; //输出结果ID echo '分值为:'.sprintf('%.2f',$hit->score)."<p>"; //输出结果分值 $document=$hit->getDocument(); //获取结果文档 echo "标题为:".$hit->title; //输出结果标题 echo "<p>"; echo "作者为:".$hit->auth; //输出结果作者 echo "<p>"; echo "内容为:".$document->getFieldValue('contents'); //输出结果内容 echo "<p>"; } ?>
代码八:
<? //设置页面编码 header("Content-Type","utf-8"); //加载Zend_Search_Lucene_Analysis_Analyzer子类 require_once 'Zend/Search/Lucene/Analysis/Analyzer.php'; //自定义文本分析程序 class Phpbean_Lucene_Analyzer extends Zend_Search_Lucene_Analysis_Analyzer_Common { private $_position; //定义位置 private $_cnStopWords=array(); //定义中文分词数组 //设置中文分词 public function setCnStopWords($cnStopWords) { $this->_cnStopWords=$cnStopWords; } /** *reset方法,重置数据 */ public function reset() { $this->_position=0; //起始位置为0 $search=array(",", "/", "\\", ".", ";", ":", "\"", "!", "~", "`", "^", "(", ")", "?", "-", "'", "<", ">", "$", "&", "%", "#", "@", " ", "=", "{", "}", "[", "]", ":", ")", "(", ".", "。", ",", "!", ";", "“", "”", "‘", "’", "[", "]", "、", "—", " ", "《", "》", "-", "…", "【", "】","的"); //定义数组 //替换空格 $this->_input=str_replace($search,' ',$this->_input); //替换分词内容 $this->_input=str_replace($this->_cnStopWords,' ',$this->_input); } /** *Tokenization stream API *Get next token *Returns null at the end of stream * *@return Zend_Search_Lucene_Analysis_Token|null */ public function nextToken() { if ($this->_input === null) { return null; } $len=strlen($this->_input); while($this->_position < $len) { while ($this->_position < $len && $this->_input[$this->_position]==' ' ) { $this->_position ; } $termStartPosition = $this->_position; $temp_char = $this->_input[$this->_position]; $isCnWord=false; if(ord($temp_char)>127) { $i=0; while($this->_position < $len && ord( $this->_input[$this->_position] )>127) { $this->_position = $this->_position 3; $i ; if($i==2) { $isCnWord = true; break; } } if($i==1)continue; } else { while ($this->_position < $len && ctype_alnum( $this->_input[$this->_position] )) { $this->_position ; } //echo $this->_position.":".$this->_input[$this->_position]."\n"; } if($this->_position == $termStartPosition) { $this->_position ; continue; } $token=new Zend_Search_Lucene_Analysis_Token(substr($this->_input,$termStartPosition, $this->_position - $termStartPosition),$termStartPosition,$this->_position); $token = $this->normalize($token); if($isCnWord)$this->_position = $this->_position - 3; if ($token !== null) { return $token; } } return null; } } $stopWords=array('a', 'an', 'at', 'the', 'and', 'or', 'is', 'am'); $stopWordsFilter=new Zend_Search_Lucene_Analysis_TokenFilter_StopWords($stopWords); $analyzer=new Phpbean_Lucene_Analyzer(); $cnStopWords=array('的'); $analyzer->setCnStopWords($cnStopWords); $analyzer->addFilter($stopWordsFilter); $value='this is a 中文的内容的测试'; $analyzer->setInput($value, 'utf-8'); $position=0; $tokenCounter=0; while(($token=$analyzer->nextToken())!==null) { $tokenCounter ; $tokens[]=$token; } print_r($tokens); ?>
更多实例代码详见压缩包