php获取中文拼音首字母类和函数分享

2015-01-24信息快讯网

这篇文章主要介绍了php获取中文拼音首字母类和函数,需要的朋友可以参考下

一、公司同事整理的类,挺实用的.相信拿出来分享下他不会介意的O(∩_∩)O.不过如果首字母是数字或英文会有些问题.

/**
 * Helper_Spell 汉字拼音首字母工具类
 *
 * @category Helper
 * @package  Helper_Spell
 * @author   Lancer <[email protected]>
 * @version  1.0
 * @see      Translation_Big2gb
 */

class Helper_Spell { /** * $_pinyins * @var array * @access private */ private $_pinyins = array( 176161 => 'A', 176197 => 'B', 178193 => 'C', 180238 => 'D', 182234 => 'E', 183162 => 'F', 184193 => 'G', 185254 => 'H', 187247 => 'J', 191166 => 'K', 192172 => 'L', 194232 => 'M', 196195 => 'N', 197182 => 'O', 197190 => 'P', 198218 => 'Q', 200187 => 'R', 200246 => 'S', 203250 => 'T', 205218 => 'W', 206244 => 'X', 209185 => 'Y', 212209 => 'Z', 215249 => 'Z', );

/** * $_charset * @var string * @access private */ private $_charset = null; /** * __construct 构造函数, 指定需要的编码 default: utf-8 支持utf-8, gb2312 * * @param unknown_type $charset */ public function __construct( $charset = 'utf-8' ) { $this->_charset = $charset; }

/** * getInitialsFirst 返回首个汉字的拼音 * * @access public * @static * @param string $str * @return string * @example Helper_Spell::getInitialsFirst('我的爱'); => w */ public static function getInitialsFirst( $str, $charset = 'utf-8' ) { $chars = array( 'A','B','C','D','E','F', 'G','H','I','J','K','L', 'M','N','O','P','Q','R', 'S','T','U','V','W','X', 'Y','Z');

$string = self::getInitials( $str ); $length = strlen($string);

for($i=0; $i < $length; $i++) { if ( in_array( $string{$i}, $chars ) ) { return $string{$i}; } } return '*'; }

/** * getInitials 返回拼音组合 * * @access public * @static * @param string $str * @return string * @example Helper_Spell::getInitials('我的爱'); => wda */ public static function getInitials( $str, $charset = 'utf-8' ) { $instance = new Helper_Spell( $charset ); return $instance->_getInitials( $str ); }

/** * _getInitials 获取中文字串的拼音首字符 * 注:英文的字串:不变返回(包括数字) eg .abc123 => abc123 * 中文字符串:返回拼音首字符 eg. 王小明 => WXM * 中英混合串: 返回拼音首字符和英文 eg. 我i我j => WIWJ * * @access private * @param string $str * @return string */ private function _getInitials( $str, $translation=TRUE ){ if ( empty($str) ) return ''; if ( $this->_isAscii($str[0]) && $this->_isAsciis( $str )) return $str; if ( $translation ) $str = Translation_Big2gb::big2gb( $str ); $result = array(); if ( $this->_charset == 'utf-8' ){ //IGNORE很重要,加上这个就可以是ICONV()函数忽略错误,继续执行 $str = iconv( 'utf-8', 'gbk//IGNORE', $str ); } $words = $this->_cutWord( $str );

foreach ( $words AS $word ) { if ( $this->_isAscii($word) ) {//非中文 $result[] = $word; continue; } $code = ( ord(substr($word,0,1)) ) * 1000 + (ord(substr($word,1,1))); //获取拼音首字母A--Z

if ( ($i = $this->_search($code)) != -1 ){ $result[] = $this->_pinyins[$i]; } } return strtoupper(implode('', $result)); }

/** * _msubstr 获取中文字符串 * * @access private * @param string $str * @param int $start * @param int $len * @return string */ private function _msubstr ($str, $start, $len) { $start = $start * 2; $len = $len * 2; $strlen = strlen($str); $result = ''; for ( $i = 0; $i < $strlen; $i++ ) { if ( $i >= $start && $i < ($start + $len) ) { if ( ord(substr($str, $i, 1)) > 129 ) $result .= substr($str, $i, 2); else $result .= substr($str, $i, 1); } if ( ord(substr($str, $i, 1)) > 129 ) $i++; } return $result; }

/** * _cutWord 字符串切分为数组 (汉字或者一个字符为单位) * * @access private * @param string $str * @return array */ private function _cutWord( $str ) { $words = array(); while ( $str != "" ) { if ( $this->_isAscii($str) ) {//非中文 $words[] = $str[0]; $str = substr( $str, strlen($str[0]) ); } else { $word = $this->_msubstr( $str, 0, 1 ); $words[] = $word; $str = substr( $str, strlen($word) ); } } return $words; }

/** * _isAscii 判断字符是否是ascii字符 * * @access private * @param string $char * @return bool */ private function _isAscii( $char ) { return ( ord( substr($char,0,1) ) < 160 ); }

/** * _isAsciis 判断字符串前3个字符是否是ascii字符 * * @access private * @param string $str * @return bool */ private function _isAsciis( $str ) { $len = strlen($str) >= 3 ? 3: 2; $chars = array(); for( $i = 1; $i < $len -1; $i++ ){ $chars[] = $this->_isAscii( $str[$i] ) ? 'yes':'no'; } $result = array_count_values( $chars ); if ( empty($result['no']) ){ return true; } return false; }

/** * _getChar 通过ASC码返回字母或者数字 * * @access private * @param string $ascii * @return string */ private function _getChar( $ascii ){ if ( $ascii >= 48 && $ascii <= 57 ) { return chr($ascii); //数字 } elseif ( $ascii>=65 && $ascii<=90 ) { return chr($ascii); // A--Z } elseif ($ascii>=97 && $ascii<=122 ) { return chr($ascii-32); // a--z } else { return '~'; //其他 } }

/** * _search 查找需要的汉字内码(gb2312) 对应的拼音字符(二分法) * * @access private * @param int $code * @return int */ private function _search( $code ) { $data = array_keys($this->_pinyins);

$lower = 0; $upper = sizeof($data)-1;

// 排除非一级汉字 if ($code < $data[0] || $code > $data[23]) return -1;

for (;;) { if ( $lower > $upper ){ return $data[$lower-1]; } $middle = (int) round(($lower + $upper) / 2); if ( !isset($data[$middle]) ) { return -1; }

if ( $data[$middle] < $code ){ $lower = (int)$middle + 1; } else if ( $data[$middle] == $code ) { return $data[$middle]; } else { $upper = (int)$middle - 1; } }// end for }

}

二、用来得到中文的首字母

这个是将中文转换为拼音的类:charset

<?php
/**
 * 汉字转化为拼音,拼音转化为汉字
 * 
 */

class charset { private $_code=array( array("a",'-20319'), array("ai",'-20317'), array("an",'-20304'), array("ang",'-20295'), array("ao",'-20292'), array("ba",'-20283'), array("bai",'-20265'), array("ban",'-20257'), array("bang",'-20242'), array("bao",'-20230'), array("bei",'-20051'), array("ben",'-20036'), array("beng",'-20032'), array("bi",'-20026'), array("bian",'-20002'), array("biao",'-19990'), array("bie",'-19986'), array("bin",'-19982'), array("bing",'-19976'), array("bo",'-19805'), array("bu",'-19784'), array("ca",'-19775'), array("cai",'-19774'), array("can",'-19763'), array("cang",'-19756'), array("cao",'-19751'), array("ce",'-19746'), array("ceng",'-19741'), array("cha",'-19739'), array("chai",'-19728'), array("chan",'-19725'), array("chang",'-19715'), array("chao",'-19540'), array("che",'-19531'), array("chen",'-19525'), array("cheng",'-19515'), array("chi",'-19500'), array("chong",'-19484'), array("chou",'-19479'), array("chu",'-19467'), array("chuai",'-19289'), array("chuan",'-19288'), array("chuang",'-19281'), array("chui",'-19275'), array("chun",'-19270'), array("chuo",'-19263'), array("ci",'-19261'), array("cong",'-19249'), array("cou",'-19243'), array("cu",'-19242'), array("cuan",'-19238'), array("cui",'-19235'), array("cun",'-19227'), array("cuo",'-19224'), array("da",'-19218'), array("dai",'-19212'), array("dan",'-19038'), array("dang",'-19023'), array("dao",'-19018'), array("de",'-19006'), array("deng",'-19003'), array("di",'-18996'), array("dian",'-18977'), array("diao",'-18961'), array("die",'-18952'), array("ding",'-18783'), array("diu",'-18774'), array("dong",'-18773'), array("dou",'-18763'), array("du",'-18756'), array("duan",'-18741'), array("dui",'-18735'), array("dun",'-18731'), array("duo",'-18722'), array("e",'-18710'), array("en",'-18697'), array("er",'-18696'), array("fa",'-18526'), array("fan",'-18518'), array("fang",'-18501'), array("fei",'-18490'), array("fen",'-18478'), array("feng",'-18463'), array("fo",'-18448'), array("fou",'-18447'), array("fu",'-18446'), array("ga",'-18239'), array("gai",'-18237'), array("gan",'-18231'), array("gang",'-18220'), array("gao",'-18211'), array("ge",'-18201'), array("gei",'-18184'), array("gen",'-18183'), array("geng",'-18181'), array("gong",'-18012'), array("gou",'-17997'), array("gu",'-17988'), array("gua",'-17970'), array("guai",'-17964'), array("guan",'-17961'), array("guang",'-17950'), array("gui",'-17947'), array("gun",'-17931'), array("guo",'-17928'), array("ha",'-17922'), array("hai",'-17759'), array("han",'-17752'), array("hang",'-17733'), array("hao",'-17730'), array("he",'-17721'), array("hei",'-17703'), array("hen",'-17701'), array("heng",'-17697'), array("hong",'-17692'), array("hou",'-17683'), array("hu",'-17676'), array("hua",'-17496'), array("huai",'-17487'), array("huan",'-17482'), array("huang",'-17468'), array("hui",'-17454'), array("hun",'-17433'), array("huo",'-17427'), array("ji",'-17417'), array("jia",'-17202'), array("jian",'-17185'), array("jiang",'-16983'), array("jiao",'-16970'), array("jie",'-16942'), array("jin",'-16915'), array("jing",'-16733'), array("jiong",'-16708'), array("jiu",'-16706'), array("ju",'-16689'), array("juan",'-16664'), array("jue",'-16657'), array("jun",'-16647'), array("ka",'-16474'), array("kai",'-16470'), array("kan",'-16465'), array("kang",'-16459'), array("kao",'-16452'), array("ke",'-16448'), array("ken",'-16433'), array("keng",'-16429'), array("kong",'-16427'), array("kou",'-16423'), array("ku",'-16419'), array("kua",'-16412'), array("kuai",'-16407'), array("kuan",'-16403'), array("kuang",'-16401'), array("kui",'-16393'), array("kun",'-16220'), array("kuo",'-16216'), array("la",'-16212'), array("lai",'-16205'), array("lan",'-16202'), array("lang",'-16187'), array("lao",'-16180'), array("le",'-16171'), array("lei",'-16169'), array("leng",'-16158'), array("li",'-16155'), array("lia",'-15959'), array("lian",'-15958'), array("liang",'-15944'), array("liao",'-15933'), array("lie",'-15920'), array("lin",'-15915'), array("ling",'-15903'), array("liu",'-15889'), array("long",'-15878'), array("lou",'-15707'), array("lu",'-15701'), array("lv",'-15681'), array("luan",'-15667'), array("lue",'-15661'), array("lun",'-15659'), array("luo",'-15652'), array("ma",'-15640'), array("mai",'-15631'), array("man",'-15625'), array("mang",'-15454'), array("mao",'-15448'), array("me",'-15436'), array("mei",'-15435'), array("men",'-15419'), array("meng",'-15416'), array("mi",'-15408'), array("mian",'-15394'), array("miao",'-15385'), array("mie",'-15377'), array("min",'-15375'), array("ming",'-15369'), array("miu",'-15363'), array("mo",'-15362'), array("mou",'-15183'), array("mu",'-15180'), array("na",'-15165'), array("nai",'-15158'), array("nan",'-15153'), array("nang",'-15150'), array("nao",'-15149'), array("ne",'-15144'), array("nei",'-15143'), array("nen",'-15141'), array("neng",'-15140'), array("ni",'-15139'), array("nian",'-15128'), array("niang",'-15121'), array("niao",'-15119'), array("nie",'-15117'), array("nin",'-15110'), array("ning",'-15109'), array("niu",'-14941'), array("nong",'-14937'), array("nu",'-14933'), array("nv",'-14930'), array("nuan",'-14929'), array("nue",'-14928'), array("nuo",'-14926'), array("o",'-14922'), array("ou",'-14921'), array("pa",'-14914'), array("pai",'-14908'), array("pan",'-14902'), array("pang",'-14894'), array("pao",'-14889'), array("pei",'-14882'), array("pen",'-14873'), array("peng",'-14871'), array("pi",'-14857'), array("pian",'-14678'), array("piao",'-14674'), array("pie",'-14670'), array("pin",'-14668'), array("ping",'-14663'), array("po",'-14654'), array("pu",'-14645'), array("qi",'-14630'), array("qia",'-14594'), array("qian",'-14429'), array("qiang",'-14407'), array("qiao",'-14399'), array("qie",'-14384'), array("qin",'-14379'), array("qing",'-14368'), array("qiong",'-14355'), array("qiu",'-14353'), array("qu",'-14345'), array("quan",'-14170'), array("que",'-14159'), array("qun",'-14151'), array("ran",'-14149'), array("rang",'-14145'), array("rao",'-14140'), array("re",'-14137'), array("ren",'-14135'), array("reng",'-14125'), array("ri",'-14123'), array("rong",'-14122'), array("rou",'-14112'), array("ru",'-14109'), array("ruan",'-14099'), array("rui",'-14097'), array("run",'-14094'), array("ruo",'-14092'), array("sa",'-14090'), array("sai",'-14087'), array("san",'-14083'), array("sang",'-13917'), array("sao",'-13914'), array("se",'-13910'), array("sen",'-13907'), array("seng",'-13906'), array("sha",'-13905'), array("shai",'-13896'), array("shan",'-13894'), array("shang",'-13878'), array("shao",'-13870'), array("she",'-13859'), array("shen",'-13847'), array("sheng",'-13831'), array("shi",'-13658'), array("shou",'-13611'), array("shu",'-13601'), array("shua",'-13406'), array("shuai",'-13404'), array("shuan",'-13400'), array("shuang",'-13398'), array("shui",'-13395'), array("shun",'-13391'), array("shuo",'-13387'), array("si",'-13383'), array("song",'-13367'), array("sou",'-13359'), array("su",'-13356'), array("suan",'-13343'), array("sui",'-13340'), array("sun",'-13329'), array("suo",'-13326'), array("ta",'-13318'), array("tai",'-13147'), array("tan",'-13138'), array("tang",'-13120'), array("tao",'-13107'), array("te",'-13096'), array("teng",'-13095'), array("ti",'-13091'), array("tian",'-13076'), array("tiao",'-13068'), array("tie",'-13063'), array("ting",'-13060'), array("tong",'-12888'), array("tou",'-12875'), array("tu",'-12871'), array("tuan",'-12860'), array("tui",'-12858'), array("tun",'-12852'), array("tuo",'-12849'), array("wa",'-12838'), array("wai",'-12831'), array("wan",'-12829'), array("wang",'-12812'), array("wei",'-12802'), array("wen",'-12607'), array("weng",'-12597'), array("wo",'-12594'), array("wu",'-12585'), array("xi",'-12556'), array("xia",'-12359'), array("xian",'-12346'), array("xiang",'-12320'), array("xiao",'-12300'), array("xie",'-12120'), array("xin",'-12099'), array("xing",'-12089'), array("xiong",'-12074'), array("xiu",'-12067'), array("xu",'-12058'), array("xuan",'-12039'), array("xue",'-11867'), array("xun",'-11861'), array("ya",'-11847'), array("yan",'-11831'), array("yang",'-11798'), array("yao",'-11781'), array("ye",'-11604'), array("yi",'-11589'), array("yin",'-11536'), array("ying",'-11358'), array("yo",'-11340'), array("yong",'-11339'), array("you",'-11324'), array("yu",'-11303'), array("yuan",'-11097'), array("yue",'-11077'), array("yun",'-11067'), array("za",'-11055'), array("zai",'-11052'), array("zan",'-11045'), array("zang",'-11041'), array("zao",'-11038'), array("ze",'-11024'), array("zei",'-11020'), array("zen",'-11019'), array("zeng",'-11018'), array("zha",'-11014'), array("zhai",'-10838'), array("zhan",'-10832'), array("zhang",'-10815'), array("zhao",'-10800'), array("zhe",'-10790'), array("zhen",'-10780'), array("zheng",'-10764'), array("zhi",'-10587'), array("zhong",'-10544'), array("zhou",'-10533'), array("zhu",'-10519'), array("zhua",'-10331'), array("zhuai",'-10329'), array("zhuan",'-10328'), array("zhuang",'-10322'), array("zhui",'-10315'), array("zhun",'-10309'), array("zhuo",'-10307'), array("zi",'-10296'), array("zong",'-10281'), array("zou",'-10274'), array("zu",'-10270'), array("zuan",'-10262'), array("zui",'-10260'), array("zun",'-10256'), array("zuo",'-10254') ); //拼音转化函数 function PinYin($str){ $ret=""; for($i=0;$i<strlen($str);$i++){ $p=ord(substr($str,$i,1)); //查看ASCII码 if($p>160){ //如果是中文,再多截取一个字符 $q=ord(substr($str,++$i,1)); $p=$p*256+$q-65536; } $ret.=$this->convert($p); } return $ret; } //转化函数 function convert($num){ if($num>0&&$num<160){ //如果不是汉字,直接返回相对字符 return chr($num); }elseif($num<"-20319"||$num>"-10247"){ return ""; }else{ //汉字,查找对应拼音 for($i=count($this->_code)-1;$i>=0;$i--){ if($this->_code[$i][1]<=$num) break; } return substr($this->_code[$i][0],0,1); } } }

 

 

下面这个是用来测试代码的:

<?php

include_once 'charset.class.php'; header('Content-type: text/html;charset=utf-8');

$charset=new charset(); foreach(array('武汉','中国','上海') as $val){ echo iconv('gbk','utf-8//IGNORE',strtoupper($charset->PinYin(mb_convert_encoding($val,'gbk','utf-8')))); echo '<br/>'; }

三、这是网上找到个一个方法,经过测试可以正常使用,但对一些生僻字或者特殊字符会有问题.

function getFirstCharter($str){
 if(empty($str)){return '';}
 $fchar=ord($str{0});
 if($fchar>=ord('A')&&$fchar<=ord('z')) return strtoupper($str{0});
 $s1=iconv('UTF-8','gb2312',$str);
 $s2=iconv('gb2312','UTF-8',$s1);
 $s=$s2==$str?$s1:$str;
 $asc=ord($s{0})*256+ord($s{1})-65536;
 if($asc>=-20319&&$asc<=-20284) return 'A';
 if($asc>=-20283&&$asc<=-19776) return 'B';
 if($asc>=-19775&&$asc<=-19219) return 'C';
 if($asc>=-19218&&$asc<=-18711) return 'D';
 if($asc>=-18710&&$asc<=-18527) return 'E';
 if($asc>=-18526&&$asc<=-18240) return 'F';
 if($asc>=-18239&&$asc<=-17923) return 'G';
 if($asc>=-17922&&$asc<=-17418) return 'H';
 if($asc>=-17417&&$asc<=-16475) return 'J';
 if($asc>=-16474&&$asc<=-16213) return 'K';
 if($asc>=-16212&&$asc<=-15641) return 'L';
 if($asc>=-15640&&$asc<=-15166) return 'M';
 if($asc>=-15165&&$asc<=-14923) return 'N';
 if($asc>=-14922&&$asc<=-14915) return 'O';
 if($asc>=-14914&&$asc<=-14631) return 'P';
 if($asc>=-14630&&$asc<=-14150) return 'Q';
 if($asc>=-14149&&$asc<=-14091) return 'R';
 if($asc>=-14090&&$asc<=-13319) return 'S';
 if($asc>=-13318&&$asc<=-12839) return 'T';
 if($asc>=-12838&&$asc<=-12557) return 'W';
 if($asc>=-12556&&$asc<=-11848) return 'X';
 if($asc>=-11847&&$asc<=-11056) return 'Y';
 if($asc>=-11055&&$asc<=-10247) return 'Z';
 return null;

}

分享一段PHP制作的中文拼音首字母工具类
php中strstr、strrchr、substr、stristr四个函数的区别总结
PHP中常用的输出函数总结
PHP正则替换函数preg_replace和preg_replace_callback使用总结
php分页函数完整实例代码
PHP return语句另类用法不止是在函数中
访问编码后的中文URL返回404错误的解决方法
phpword插件导出word文件时中文乱码问题处理方案
phpExcel中文帮助手册之常用功能指南
PHP中使用asort进行中文排序失效的问题处理
PHP解码unicode编码的中文字符代码分享
ThinkPHP单字母函数(快捷方法)使用总结
PHP实现的汉字拼音转换和公历农历转换类及使用示例
支持生僻字且自动识别utf-8编码的php汉字转拼音类
PHP英文字母大小写转换函数小结
php冒泡排序、快速排序、快速查找、二维数组去重实例分享
PHP获取浏览器信息类和客户端地理位置的2个方法
php中有关字符串的4个函数substr、strrchr、strstr、ereg介绍和使用例子
phpmailer中文乱码问题的解决方法
PHP函数addslashes和mysql_real_escape_string的区别
自己写了一个php检测文件编码的函数
php setcookie函数的参数说明及其用法
php生成随机字符串可指定纯数字、纯字母或者混合的
不使用php api函数实现数组的交换排序示例
开源php中文分词系统SCWS安装和使用实例
php5.2 Json不能正确处理中文、GB编码的解决方法
php ctype函数中文翻译和示例
支持中文和其他编码的php截取字符串函数分享(截取中文字符串)
根据ip调用新浪api获取城市名并转成拼音
php汉字转拼音的示例
简单的php中文转拼音的实现代码
PHP随机生成随机个数的字母组合示例
php获取汉字首字母的函数
php 中文字符串首字母的获取函数分享
PHP实现把数字ID转字母ID
php 获得汉字拼音首字母的函数
©2014-2025 dbsqp.com