基本信息
源码名称:Zend/Search/Lucene.php 相关示例
源码大小:0.01M
文件格式:.zip
开发语言:PHP
更新时间:2013-01-02
友情提示:(无需注册或充值,赞助后即可获取资源下载链接)
嘿,亲!知识可是无价之宝呢,但咱这精心整理的资料也耗费了不少心血呀。小小地破费一下,绝对物超所值哦!如有下载和支付问题,请联系我们QQ(微信同号):813200300
本次赞助数额为: 2 元×
微信扫码支付:2 元
×
请留下您的邮箱,我们将在2小时内将文件发到您的邮箱
源码介绍
Zend/Search/Lucene.php 构建文件索引 Zend/Search/Lucene/Analysis/Analyzer.php 分析
代码一:
<?php
require_once "Zend/Search/Lucene.php";
require_once "Zend/Search/Lucene/Document.php";
//设置构造函数的第二个参数实现新的索引的创建
$index=new Zend_Search_Lucene('index',true);
//新建一个文档对象
$doc=new Zend_Search_Lucene_Document();
//将文档的路径存储在名为url的text类型的字段中
$doc->addField(Zend_Search_Lucene_Field::Text('url',$docUrl));
//索引文档内容,使用名为contents的UnStored类型字段
$doc->addField(Zend_Search_Lucene_Field::UnStored('contents',$docContent));
//将文档对象添加到索引中
$index->addDocument($doc);
//执行索引对象的commit()方法更新索引
$index->commit();
?>
代码二:
<?php
require_once "Zend/Search/Lucene.php"; //加载Zend_Search_Lucene
//设置构造函数的第一个参数指定当前路径的index目录为索引对象
//设置构造函数的第二个参数实现新的索引的创建
$index=new Zend_Search_Lucene('index',true);
$dir=$index->getDirectory(); //获取路径
echo "索引指定的路径信息为:<p>";
print_r($dir); //输出结果
?>
代码三:
<?php
require_once "Zend/Search/Lucene.php"; //加载Zend_Search_Lucene
//打开一个存在的索引
$index=new Zend_Search_Lucene('index');
//新建一个文档对象
$doc=new Zend_Search_Lucene_Document();
//将文档的路径存储在名为url的text类型的字段中
$doc->addField(Zend_Search_Lucene_Field::Text('url',$docUrl));
//索引文档内容,使用名为contents的UnStored类型字段
$doc->addField(Zend_Search_Lucene_Field::UnStored('contents',$docContent));
//将文档对象添加到索引中
$index->addDocument($doc);
//执行索引对象的commit()方法更新索引
$index->commit();
$result=$index->getFieldNames(); //获取字段列表
echo "\$index索引的全部字段为:<p>";
foreach($result as $value) //遍历数组内容
{
echo $value; //输出结果
echo "<p>";
}
?>
代码四:
<?
require_once 'Zend/Search/Lucene.php'; //加载Zend_Search_Lucene
$index=new Zend_Search_Lucene('test'); //打开存在的(23-7.php创建的)索引
echo "索引包括{$index->count()}个文档<p>\n"; //输出文档数
$search="beijing"; //定义查询Query
$hits=$index->find($search); //执行find()方法进行查找,并返回结果
foreach($hits as $hit) //使用foreach遍历结果集
{
echo str_repeat('-', 80)."--<p>"; //输出分隔线
echo '结果ID为:'.$hit->id."<p>"; //输出结果ID
echo '分值为:'.sprintf('%.2f', $hit->score)."<p>"; //输出结果分值
$document=$hit->getDocument(); //获取结果文档
echo "标题为:".$hit->title; //输出结果标题
echo "<p>";
echo $document->getFieldValue('contents'); //输出结果内容
}
?>
代码五:
<?
require_once 'Zend/Search/Lucene.php'; //加载Zend_Search_Lucene
$index=new Zend_Search_Lucene('test',true); //新建一个名为test的索引
$data=array(
array(
'title'=>'hello',
'auth'=>'h',
'contents'=>'hello world!'
),
array(
'title'=>'test',
'auth'=>'t',
'contents'=>'this is a test'
),
array(
'title'=>'hello',
'auth'=>'h',
'contents'=>'hello sky!'
)
); //定义数组
foreach($data as $temp) //循环插入索引中
{
$doc=new Zend_Search_Lucene_Document(); //新建文档对象
$doc->addField(Zend_Search_Lucene_Field::Text('title',$temp['title'])); //添加标题
$doc->addField(Zend_Search_Lucene_Field::Text('auth',$temp['auth'])); //添加作者
$doc->addField(Zend_Search_Lucene_Field::Text('contents',$temp['contents'])); //添加内容
$index->addDocument($doc); //将文档添加到索引
}
$index->commit(); //提交索引
echo $index->count()."个文档已经被索引"; //输出文档数
?>
代码六:
<?
require_once 'Zend/Search/Lucene.php'; //加载Zend_Search_Lucene
$index=new Zend_Search_Lucene('test'); //打开存在的索引
$search="world or sky"; //定义查询Query
$hits=$index->find($search); //执行find()方法进行查找,并返回结果
echo "符合包含world或者sky的结果数为:";
echo count($hits); //输出结果数
echo "<p>";
echo str_repeat('-',40)."<p>"; //输出分隔线
foreach($hits as $hit) //使用foreach遍历结果集
{
echo '结果ID为:'.$hit->id."<p>"; //输出结果ID
echo '分值为:'.sprintf('%.2f',$hit->score)."<p>"; //输出结果分值
$document=$hit->getDocument(); //获取结果文档
echo "标题为:".$hit->title; //输出结果标题
echo "<p>";
echo "作者为:".$hit->auth; //输出结果作者
echo "<p>";
echo "内容为:".$document->getFieldValue('contents'); //输出结果内容
echo "<p>";
}
?>
代码七:
<?
require_once 'Zend/Search/Lucene.php'; //加载Zend_Search_Lucene
$index=new Zend_Search_Lucene('test'); //打开存在的索引
$search= "title:hello AND NOT sky"; //定义查询Query
$hits=$index->find($search); //执行find()方法进行查找,并返回结果
echo "符合标题包含hello同时内容不含sky的结果数为:";
echo count($hits); //输出结果数
echo "<p>";
echo str_repeat('-',40)."<p>"; //输出分隔线
foreach($hits as $hit) //使用foreach遍历结果集
{
echo '结果ID为:'.$hit->id."<p>"; //输出结果ID
echo '分值为:'.sprintf('%.2f',$hit->score)."<p>"; //输出结果分值
$document=$hit->getDocument(); //获取结果文档
echo "标题为:".$hit->title; //输出结果标题
echo "<p>";
echo "作者为:".$hit->auth; //输出结果作者
echo "<p>";
echo "内容为:".$document->getFieldValue('contents'); //输出结果内容
echo "<p>";
}
?>
代码八:
<?
//设置页面编码
header("Content-Type","utf-8");
//加载Zend_Search_Lucene_Analysis_Analyzer子类
require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
//自定义文本分析程序
class Phpbean_Lucene_Analyzer extends Zend_Search_Lucene_Analysis_Analyzer_Common
{
private $_position; //定义位置
private $_cnStopWords=array(); //定义中文分词数组
//设置中文分词
public function setCnStopWords($cnStopWords)
{
$this->_cnStopWords=$cnStopWords;
}
/**
*reset方法,重置数据
*/
public function reset()
{
$this->_position=0; //起始位置为0
$search=array(",", "/", "\\", ".", ";", ":", "\"", "!", "~", "`", "^", "(", ")", "?", "-", "'", "<", ">", "$", "&", "%", "#", "@", " ", "=", "{", "}", "[", "]", ":", ")", "(", ".", "。", ",", "!", ";", "“", "”", "‘", "’", "[", "]", "、", "—", " ", "《", "》", "-", "…", "【", "】","的"); //定义数组
//替换空格
$this->_input=str_replace($search,' ',$this->_input);
//替换分词内容
$this->_input=str_replace($this->_cnStopWords,' ',$this->_input);
}
/**
*Tokenization stream API
*Get next token
*Returns null at the end of stream
*
*@return Zend_Search_Lucene_Analysis_Token|null
*/
public function nextToken()
{
if ($this->_input === null)
{
return null;
}
$len=strlen($this->_input);
while($this->_position < $len)
{
while ($this->_position < $len && $this->_input[$this->_position]==' ' )
{
$this->_position ;
}
$termStartPosition = $this->_position;
$temp_char = $this->_input[$this->_position];
$isCnWord=false;
if(ord($temp_char)>127)
{
$i=0;
while($this->_position < $len && ord( $this->_input[$this->_position] )>127)
{
$this->_position = $this->_position 3;
$i ;
if($i==2)
{
$isCnWord = true;
break;
}
}
if($i==1)continue;
}
else
{
while ($this->_position < $len && ctype_alnum( $this->_input[$this->_position] ))
{
$this->_position ;
}
//echo $this->_position.":".$this->_input[$this->_position]."\n";
}
if($this->_position == $termStartPosition)
{
$this->_position ;
continue;
}
$token=new Zend_Search_Lucene_Analysis_Token(substr($this->_input,$termStartPosition, $this->_position - $termStartPosition),$termStartPosition,$this->_position);
$token = $this->normalize($token);
if($isCnWord)$this->_position = $this->_position - 3;
if ($token !== null)
{
return $token;
}
}
return null;
}
}
$stopWords=array('a', 'an', 'at', 'the', 'and', 'or', 'is', 'am');
$stopWordsFilter=new Zend_Search_Lucene_Analysis_TokenFilter_StopWords($stopWords);
$analyzer=new Phpbean_Lucene_Analyzer();
$cnStopWords=array('的');
$analyzer->setCnStopWords($cnStopWords);
$analyzer->addFilter($stopWordsFilter);
$value='this is a 中文的内容的测试';
$analyzer->setInput($value, 'utf-8');
$position=0;
$tokenCounter=0;
while(($token=$analyzer->nextToken())!==null)
{
$tokenCounter ;
$tokens[]=$token;
}
print_r($tokens);
?>
更多实例代码详见压缩包