php权重计算方法代码分享

2015-01-24信息快讯网

权重计算,稍加修改亦可用于分词,词频统计,全文和spam检测等

<?php
/* vim: set expandtab tabstop=4 shiftwidth=4: */
// +------------------------------------------------------------------------
//  Name       :   权重计算                                         
//  Description:   稍加修改,亦可用于分词,词频统计,全文检索和垃圾检测
//  Date       :   2013/12/16 08:51

class weight { protected $aDict = array(array()); protected $aItems = array(); protected $sLastRule; protected $aMatchs = array(); protected $aShow = array();

private function init() { //清空记录的匹配表和输出结果 unset($this->aShow); }

public function newItems($mItems) { //导入新的项目 $this->aItems = (is_array($mItems))? $mItems: array($mItems); $this->init(); }

public function newTable(array $aTable) { //导入新的对照表,并生成字典 foreach($aTable as $iTableKey=>$sTableLine) { $aTableLine = explode(',', str_replace('|', ',', $sTableLine)); $setter = function($v, $k, $paraMeter) { $k1 = $paraMeter[0]; $oWeight = $paraMeter[1]; $oWeight->genDict($v, $k1); }; array_walk($aTableLine, $setter, array($iTableKey, $this)); } $this->init(); }

public function getShow($sRule = 'max') { //获取最终的显示结果 if(empty($this->aItems) || empty($this->aDict)) return array(); if (empty($this->aShow) || $sRule != $this->sLastRule) return $this->genShow($sRule); return $this->aShow; }

public function genShow($sRule) { $aShow = array(); $aMatchs = array(); $getter = function($v, $k, $oWeight) use(&$aShow, &$aMatchs, $sRule) { $t = array_count_values($oWeight->matchWord($v)); $aMatchs[] = $t; switch ($sRule) { case 'max': $aShow[$k] = array_keys($t, max($t)); break; } }; array_walk($this->aItems, $getter, $this); $this->aShow = $aShow; $this->aMatchs = $aMatchs; return $aShow; }

private function genDict($mWord, $iKey = '') { $iInsertPonit = count($this->aDict); $iCur = 0; //当前节点号 foreach (str_split($mWord) as $iChar) { if (isset($this->aDict[$iCur][$iChar])) { $iCur = $this->aDict[$iCur][$iChar]; continue; } $this->aDict[$iInsertPonit] = array(); $this->aDict[$iCur][$iChar] = $iInsertPonit; $iCur = $iInsertPonit; $iInsertPonit++; } $this->aDict[$iCur]['acc'][] = $iKey; }

function matchWord($sLine) { $iCur = $iOffset = $iPosition = 0; $sLine .= "\0"; $iLen = strlen($sLine); $aReturn = array(); while($iOffset < $iLen) { $sChar = $sLine{$iOffset}; if(isset($this->aDict[$iCur][$sChar])) { $iCur = $this->aDict[$iCur][$sChar]; if(isset($this->aDict[$iCur]['acc'])) { $aReturn = array_merge($aReturn, $this->aDict[$iCur]['acc']);

$iPosition = $iOffset + 1; $iCur = 0; } } else { $iCur = 0; $iOffset = $iPosition; $iPosition = $iOffset + 1; } ++$iOffset; } return $aReturn; } }

?>

外部调用示例

$aItems = array(
    'chinaisbig',
    'whichisnot',
    'totalyrightforme',
);
$aTable = array(
    'china,is|small',
    'china,big|me',
    'china,is|big,which|not,me',
    'totaly|right,for,me',
);

$oWeight = new ttrie; $oWeight->newItems($aItems); $aResult = $oWeight->newTable($aTable);

©2014-2024 dbsqp.com