Zend/Search/Lucene.php 相关示例

基本信息

源码名称：Zend/Search/Lucene.php 相关示例

源码大小：0.01M

文件格式：.zip

开发语言：PHP

更新时间：2013-01-02

友情提示：（无需注册或充值，赞助后即可获取资源下载链接）

嘿，亲！知识可是无价之宝呢，但咱这精心整理的资料也耗费了不少心血呀。小小地破费一下，绝对物超所值哦！如有下载和支付问题，请联系我们QQ(微信同号)：78630559

本次赞助数额为： 2 元　

源码介绍

Zend/Search/Lucene.php 构建文件索引 Zend/Search/Lucene/Analysis/Analyzer.php 分析

代码一：

<?php
require_once "Zend/Search/Lucene.php";
require_once "Zend/Search/Lucene/Document.php";
//设置构造函数的第二个参数实现新的索引的创建
$index=new Zend_Search_Lucene('index',true);
//新建一个文档对象
$doc=new Zend_Search_Lucene_Document();
//将文档的路径存储在名为url的text类型的字段中
$doc->addField(Zend_Search_Lucene_Field::Text('url',$docUrl));
//索引文档内容，使用名为contents的UnStored类型字段
$doc->addField(Zend_Search_Lucene_Field::UnStored('contents',$docContent));
//将文档对象添加到索引中
$index->addDocument($doc);
//执行索引对象的commit()方法更新索引
$index->commit();
?>

代码二：

<?php
require_once "Zend/Search/Lucene.php";					//加载Zend_Search_Lucene
//设置构造函数的第一个参数指定当前路径的index目录为索引对象
//设置构造函数的第二个参数实现新的索引的创建
$index=new Zend_Search_Lucene('index',true);
$dir=$index->getDirectory();							//获取路径
echo "索引指定的路径信息为：<p>";
print_r($dir);										//输出结果
?>

代码三：

<?php
require_once "Zend/Search/Lucene.php";					//加载Zend_Search_Lucene
//打开一个存在的索引
$index=new Zend_Search_Lucene('index');
//新建一个文档对象
$doc=new Zend_Search_Lucene_Document();
//将文档的路径存储在名为url的text类型的字段中
$doc->addField(Zend_Search_Lucene_Field::Text('url',$docUrl));
//索引文档内容，使用名为contents的UnStored类型字段
$doc->addField(Zend_Search_Lucene_Field::UnStored('contents',$docContent));
//将文档对象添加到索引中
$index->addDocument($doc);
//执行索引对象的commit()方法更新索引
$index->commit();
$result=$index->getFieldNames();						//获取字段列表
echo "\$index索引的全部字段为：<p>";
foreach($result as $value)								//遍历数组内容
{
	echo $value;									//输出结果
	echo "<p>";
}
?>

代码四：

<?
require_once 'Zend/Search/Lucene.php';							//加载Zend_Search_Lucene
$index=new Zend_Search_Lucene('test');							//打开存在的（23-7.php创建的）索引
echo "索引包括{$index->count()}个文档<p>\n";						//输出文档数
$search="beijing";											//定义查询Query
$hits=$index->find($search);									//执行find()方法进行查找，并返回结果
foreach($hits as $hit)										//使用foreach遍历结果集
{
	echo str_repeat('-', 80)."--<p>";								//输出分隔线
	echo '结果ID为：'.$hit->id."<p>";							//输出结果ID
	echo '分值为：'.sprintf('%.2f', $hit->score)."<p>";				//输出结果分值
	$document=$hit->getDocument();							//获取结果文档
	echo "标题为：".$hit->title;								//输出结果标题
	echo "<p>";
	echo $document->getFieldValue('contents');					//输出结果内容
}
?>

代码五：

<?
require_once 'Zend/Search/Lucene.php';									//加载Zend_Search_Lucene
$index=new Zend_Search_Lucene('test',true);								//新建一个名为test的索引
$data=array(
	array(
		'title'=>'hello',
		'auth'=>'h',
		'contents'=>'hello world!'
	),
	array(
		'title'=>'test',
		'auth'=>'t',
		'contents'=>'this is a test'
	),
	array(
		'title'=>'hello',
		'auth'=>'h',
		'contents'=>'hello sky!'
	)
);																//定义数组
foreach($data as $temp)												//循环插入索引中
{
	$doc=new Zend_Search_Lucene_Document();							//新建文档对象
	$doc->addField(Zend_Search_Lucene_Field::Text('title',$temp['title']));		//添加标题
	$doc->addField(Zend_Search_Lucene_Field::Text('auth',$temp['auth']));		//添加作者
	$doc->addField(Zend_Search_Lucene_Field::Text('contents',$temp['contents']));	//添加内容
	$index->addDocument($doc);										//将文档添加到索引
}
$index->commit();													//提交索引
echo $index->count()."个文档已经被索引";								//输出文档数
?>

代码六：

<?
require_once 'Zend/Search/Lucene.php';							//加载Zend_Search_Lucene
$index=new Zend_Search_Lucene('test');							//打开存在的索引
$search="world or sky";										//定义查询Query
$hits=$index->find($search);									//执行find()方法进行查找，并返回结果
echo "符合包含world或者sky的结果数为：";
echo count($hits);											//输出结果数
echo "<p>";
echo str_repeat('-',40)."<p>";								//输出分隔线
foreach($hits as $hit)										//使用foreach遍历结果集
{
	echo '结果ID为：'.$hit->id."<p>";							//输出结果ID
	echo '分值为：'.sprintf('%.2f',$hit->score)."<p>";				//输出结果分值
	$document=$hit->getDocument();							//获取结果文档
	echo "标题为：".$hit->title;								//输出结果标题
	echo "<p>";
	echo "作者为：".$hit->auth;								//输出结果作者
	echo "<p>";
	echo "内容为：".$document->getFieldValue('contents');			//输出结果内容
	echo "<p>";
}
?>

代码七：

<?
require_once 'Zend/Search/Lucene.php';							//加载Zend_Search_Lucene
$index=new Zend_Search_Lucene('test');							//打开存在的索引
$search= "title:hello AND NOT sky";								//定义查询Query
$hits=$index->find($search);									//执行find()方法进行查找，并返回结果
echo "符合标题包含hello同时内容不含sky的结果数为：";
echo count($hits);											//输出结果数
echo "<p>";
echo str_repeat('-',40)."<p>";								//输出分隔线
foreach($hits as $hit)										//使用foreach遍历结果集
{
	echo '结果ID为：'.$hit->id."<p>";							//输出结果ID
	echo '分值为：'.sprintf('%.2f',$hit->score)."<p>";				//输出结果分值
	$document=$hit->getDocument();							//获取结果文档
	echo "标题为：".$hit->title;								//输出结果标题
	echo "<p>";
	echo "作者为：".$hit->auth;								//输出结果作者
	echo "<p>";
	echo "内容为：".$document->getFieldValue('contents');			//输出结果内容
	echo "<p>";
}
?>

代码八：

<?
//设置页面编码
header("Content-Type","utf-8");
//加载Zend_Search_Lucene_Analysis_Analyzer子类
require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
//自定义文本分析程序
class Phpbean_Lucene_Analyzer extends Zend_Search_Lucene_Analysis_Analyzer_Common
{
	private $_position;						//定义位置
	private $_cnStopWords=array();			//定义中文分词数组
	//设置中文分词
	public function setCnStopWords($cnStopWords)
	{
		$this->_cnStopWords=$cnStopWords;
	}
	/**
	*reset方法，重置数据
	*/
	public function reset()
	{
		$this->_position=0;					//起始位置为0
		$search=array(",", "/", "\\", ".", ";", ":", "\"", "!", "~", "`", "^", "(", ")", "?", "-", "'", "<", ">", "$", "&", "%", "#", "@", " ", "=", "{", "}", "[", "]", "：", "）", "（", "．", "。", "，", "！", "；", "“", "”", "‘", "’", "［", "］", "、", "—", "　", "《", "》", "－", "…", "【", "】","的");				//定义数组
		//替换空格
		$this->_input=str_replace($search,' ',$this->_input);
		//替换分词内容
		$this->_input=str_replace($this->_cnStopWords,' ',$this->_input); 
	}
	/** 
	*Tokenization stream API 
	*Get next token 
	*Returns null at the end of stream 
	*
	*@return Zend_Search_Lucene_Analysis_Token|null
	*/
	public function nextToken()
	{
		if ($this->_input === null)
		{
			return null;
		}
		$len=strlen($this->_input);
		while($this->_position < $len)
		{
			while ($this->_position < $len && $this->_input[$this->_position]==' ' )
			{
				$this->_position  ; 
			}
			$termStartPosition = $this->_position;
			$temp_char = $this->_input[$this->_position];
			$isCnWord=false;
			if(ord($temp_char)>127)
			{
				$i=0;
				while($this->_position < $len && ord( $this->_input[$this->_position] )>127)
				{
					$this->_position = $this->_position   3; 
					$i   ; 
					if($i==2)
					{ 
						$isCnWord = true; 
						break; 
					}
				}
				if($i==1)continue;
			}
			else
			{
				while ($this->_position < $len && ctype_alnum( $this->_input[$this->_position] ))
				{ 
					$this->_position  ;
				} 
				//echo $this->_position.":".$this->_input[$this->_position]."\n"; 
			}
			if($this->_position == $termStartPosition)
			{
				$this->_position  ;
				continue;
			}
			$token=new Zend_Search_Lucene_Analysis_Token(substr($this->_input,$termStartPosition, $this->_position - $termStartPosition),$termStartPosition,$this->_position);
			$token = $this->normalize($token);
			if($isCnWord)$this->_position = $this->_position - 3;
			if ($token !== null)
			{
				return $token;
			} 
		} 
		return null; 
	}
}
$stopWords=array('a', 'an', 'at', 'the', 'and', 'or', 'is', 'am');
$stopWordsFilter=new Zend_Search_Lucene_Analysis_TokenFilter_StopWords($stopWords); 
$analyzer=new Phpbean_Lucene_Analyzer();
$cnStopWords=array('的');
$analyzer->setCnStopWords($cnStopWords);
$analyzer->addFilter($stopWordsFilter);
$value='this is a 中文的内容的测试';
$analyzer->setInput($value, 'utf-8');
$position=0;
$tokenCounter=0;
while(($token=$analyzer->nextToken())!==null)
{
	$tokenCounter  ;
	$tokens[]=$token;
}
print_r($tokens);
?>

更多实例代码详见压缩包