赞
踩
我将问题理解成如何判断内容是否包含敏感词,题主可将敏感词生成成字典树,然后再查找内容是否包含关键词
下面是一个简单的PHP字典树的示例,供参考
class TrieTree
{
public $tree = array();
/**
* 增加关键词到字典树
*
* @param string $utf8_str
*/
public function add($utf8_str)
{
$chars = &UTF8Util::getChars($utf8_str);
// 串结尾字符
$chars[] = null;
$count = count($chars);
$T = &$this->tree;
for ($i = 0; $i < $count; $i ++) {
$c = $chars[$i];
if (! array_key_exists($c, $T)) {
// 插入新字符,关联数组
$T[$c] = array();
}
$T = &$T[$c];
}
return $this;
}
/**
* 从字典树移除关键词
*
* @param string $utf8_str
*/
public function remove($utf8_str)
{
$chars = &UTF8Util::getChars($utf8_str);
$chars[] = null;
// 先保证此串在树中
if ($this->_find($chars)) {
$chars[] = null;
$count = count($chars);
$T = &$this->tree;
for ($i = 0; $i < $count; $i ++) {
$c = $chars[$i];
// 表明仅有此串
if (count($T[$c]) == 1) {
unset($T[$c]);
return;
}
$T = &$T[$c];
}
}
return $this;
}
/**
* 从字典树查找关键词
*
* @param string $utf8_str
* @return boolean
*/
public function exists($utf8_str)
{
$chars = &UTF8Util::getChars($utf8_str);
$chars[] = null;
return $this->_find($chars);
}
private function _find(&$chars)
{
$count = count($chars);
$T = &$this->tree;
for ($i = 0; $i < $count; $i ++) {
$c = $chars[$i];
if (! array_key_exists($c, $T)) {
return false;
}
$T = &$T[$c];
}
return true;
}
/**
* 是否含有关键词
*
* @param string $utf8_str
* @param boolean $do_count
* @return boolean|number
*/
public function contain($utf8_str, $do_count = false)
{
$chars = &UTF8Util::getChars($utf8_str);
$chars[] = null;
$len = count($chars);
$Tree = &$this->tree;
$count = 0;
for ($i = 0; $i < $len; $i ++) {
$c = $chars[$i];
// 起始字符匹配
if (array_key_exists($c, $Tree)) {
$T = &$Tree[$c];
for ($j = $i + 1; $j < $len; $j ++) {
$c = $chars[$j];
if (array_key_exists(null, $T)) {
if ($do_count) {
$count ++;
} else {
return true;
}
}
if (! array_key_exists($c, $T)) {
break;
}
$T = &$T[$c];
}
}
}
return $do_count ? $count : false;
}
/**
* 批量检查是否包含关键词
*
* @param array $str_array
* @return boolean
*/
public function containMulti($str_array)
{
if (\is_array($str_array)) {
foreach ($str_array as $str) {
if ($this->contain($str)) {
return true;
}
}
}
return false;
}
/**
* 导出序列化后的字典树
*
* @return string
*/
public function export()
{
return serialize($this->tree);
}
/**
* 导入序列化后的字典树
*
* @param string $str
*/
public function import($str)
{
$this->tree = unserialize($str);
}
}
class UTF8Util
{
public static function getChars($utf8_str)
{
$s = $utf8_str;
$len = strlen($s);
if ($len == 0)
return array();
$chars = array();
for ($i = 0; $i < $len; $i ++) {
$c = $s[$i];
$n = ord($c);
// 0xxx xxxx, asci, single
if (($n >> 7) == 0) {
$chars[] = $c;
} else
// 1111 xxxx, first in four char
if (($n >> 4) == 15) {
if ($i < $len - 3) {
$chars[] = $c . $s[$i + 1] . $s[$i + 2] . $s[$i + 3];
$i += 3;
}
} else
// 111x xxxx, first in three char
if (($n >> 5) == 7) {
if ($i < $len - 2) {
$chars[] = $c . $s[$i + 1] . $s[$i + 2];
$i += 2;
}
} else
// 11xx xxxx, first in two char
if (($n >> 6) == 3) {
if ($i < $len - 1) {
$chars[] = $c . $s[$i + 1];
$i ++;
}
}
}
return $chars;
}
}
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。