php读取大文件示例分享(文件操作类)
这篇文章主要介绍了php读取大文件示例,这也是一个文件操作类,同时可以学习一下php的文件操作方法,需要的朋友可以参考下
Lib_File2.php
<?php class Lib_File2 { //文件目录 private $root = '/data/wwwroot/kkpromo/data/'; //文件后缀 private $suffix = '.log'; //文件句柄 private $handle=null; //一次读取文件的最大记录数 private $limit=40000; //每行读取的字节长度 private $length=1024; //开始时间 private $startTime=0; //内存使用基准点 private static $startMemory=0; // private $conn=null; // private static $init=null; public static function instance() { self::$startMemory = memory_get_usage(true); if(self::$init && is_object(self::$init)) { return self::$init; } self::$init = new self(); return self::$init; } private function __construct(){} public function setRoot($root) { if(!is_dir($root)) die($root.' ROOT DOES NOT EXIST'); $this->root = $root; } public function setSuffix($suffix) { $this->suffix = $suffix; } public function setLimit($limit) { if(!is_numeric($limit)) die($limit.' SHOULD BE NUMBERIC'); if(intval($limit) > 1000000) die($limit.' SHOULD BE LOWER THAN 1000000'); $this->limit = intval($limit); } public function _getFile($date , $appid , $op) { $filename = rtrim($this->root , '/').DIRECTORY_SEPARATOR.$date.DIRECTORY_SEPARATOR.$appid.'.'.$op.$this->suffix; if(!file_exists($filename)) { die($filename.' FILE DOES NOT EXISTS!'); } if(!is_file($filename)) { die($filename.' FILE DOES NOT EXIST!'); } if(!is_readable($filename)) { die($filename.' FILE ACCESS DENY!'); } return $filename; } public function closeFile($date=null , $appid=null , $op=null) { if($op && $date && $appid) { if(is_object($this->handle[$date.'_'.$appid.'_'.$op]) || $this->conn[$date.'_'.$appid.'_'.$op]) { unset($this->handle[$date.'_'.$appid.'_'.$op]); $this->handle[$date.'_'.$appid.'_'.$op]=null; } $this->conn[$date.'_'.$appid.'_'.$op]=null; $this->handle[$date.'_'.$appid.'_'.$op]=null; unset($this->handle[$date.'_'.$appid.'_'.$op]); } else { if(is_array($this->handle) && $this->handle) { foreach ($this->handle as $key=>$val){ unset($this->handle[$key]); $this->conn[$key]=null; $this->handle[$key]=null; } } } return true; } private function _openFile($date , $appid , $op) { $this->startTime = microtime(true); if(isset($this->conn[$date.'_'.$appid.'_'.$op]) && $this->conn[$date.'_'.$appid.'_'.$op]) { return $this->handle[$date.'_'.$appid.'_'.$op]; } $filename = self::_getFile($date , $appid , $op); if(($this->handle[$date.'_'.$appid.'_'.$op] = new SplFileObject($filename , 'r'))!=null) { $this->conn[$date.'_'.$appid.'_'.$op] = true; return $this->handle[$date.'_'.$appid.'_'.$op]; } else { die('FILE OPEN FAILED!'); } }/** * 功能:解析数据 * 格式: array('timestamp','mid','data'); * @param string $data * @return boolean|array */ private function _parseData($data , $jsonFlag=true) { if(empty($data) || !is_string($data)) return false; $result = array( 'timestamp'=>0, 'mid'=>0, 'data'=>array(), ); $data = explode('|', $data); if(count($data) < 3 || !is_array($data)) return false; $result['timestamp'] = $data[0]; $result['mid'] = $data[1]; if($jsonFlag) { $result['data'] = @json_decode($data[2] , true); unset($result['mid']); } if(empty($result['timestamp']) || empty($result['mid'])) return false; unset($data); return $result; } /** * TODO:读取单一文件 * @param string $date: 如(20140327) * @param int $appid: 如(1000,9000) * @param string $op:如(show,login , index) * @param number $startNum 默认从第一行开始 * @param number $length 默认到$this->limit 读取的行数 * @param array $condition:array('mid'=>arrray() , 'ip'=>array() , ...) 过滤条件 * @param bool $jsonFlag:默认为true, 则保留jsondata字段;设为false,则去掉false字段 * @return array(count , diffTime , memory , data) */ public function readFile($date , $appid , $op , $startNum=0 , $length=0 , $jsonFlag=false , $condition=array()) { $data['data'] = ""; $data['count'] = 0; $index = $startNum; $startNum = empty($startNum) ? 0 : $startNum; $length = empty($length) ? $this->limit : $length; $handle = self::_openFile($date , $appid , $op ); $line_number=0; if($handle) { $handle->seek($startNum); $handle->setMaxLineLen($this->length); while (intval($line_number) - intval($startNum) < intval($length)-1) { $tmp = $handle->current(); if(empty($tmp)) continue; $tmp = self::_parseData($tmp , $jsonFlag); $line_number = $handle->key(); !$jsonFlag && $condition= array(); if(isset($condition) && $condition) { $key = array_keys($condition); if(in_array($tmp['data'][$key[0]], $condition[$key[0]])) { $data['count']++; $data['data'][$line_number] = $tmp; } } else { $data['data'][$line_number] = $tmp; $data['count']++; } if(intval($line_number) - intval($startNum) >= intval($length)-1) break; unset($tmp); $handle->next(); } unset($tmp , $length , $line_number , $condition); } $data['diffTime'] = doubleval(microtime(true)) - doubleval($this->startTime); $data['memory'] = doubleval((doubleval(memory_get_usage(true)) - doubleval(self::$startMemory))/1024/1024) . ' M'; return $data; }
/** * TODO:命令行下获取文件总记录数* * @param string $date * @param int $appid * @param string $op * @return array */ public function total_lineFile($date, $appid, $op) { $this->_openFile($date, $appid, $op); $file = escapeshellarg($this->_getFile($date, $appid, $op)); // 对命令行参数进行安全转义 $line = `wc -l $file`; if(preg_match("/(\d{1,})/", $line , $ret)){ $data['count']=$ret[1]; }else{ $data['count']=0; } $data['diffTime'] = doubleval(microtime(true)) - doubleval($this->startTime); $data['memory'] = doubleval((doubleval(memory_get_usage(true)) - doubleval(self::$startMemory))/1024/1024) . ' M'; return $data; }
/** * TODO:统计{$data}.{$op}.log记录数 * @param string $date * @param int $appid * @param string $op * @param array $condition * @return array */ public function countFile($date , $appid , $op ,$condition=array()) { $data['count'] = 0; $handle = self::_openFile($date , $appid , $op ); if($handle) { $handle->setMaxLineLen($this->length); while (!$handle->eof()) { $tmp = $handle->current(); if(empty($tmp)) continue; $tmp = self::_parseData($tmp); if($condition && is_array($condition) ) { $key = array_keys($condition); if(isset($tmp['data'][$key[0]]) && $tmp['data'][$key[0]] && in_array($tmp['data'][$key[0]], $condition[$key[0]])) { $data['count']++; } } else { $data['count']++; } unset($tmp); $handle->next(); } } unset($handle , $condition , $tmp , $key , $val); self::closeFile($date , $appid , $op ); $data['diffTime'] = doubleval(microtime(true)) - doubleval($this->startTime); $data['memory'] = doubleval((doubleval(memory_get_usage(true)) - doubleval(self::$startMemory))/1024/1024) . ' M'; return $data; } /** * TODO:统计用户数 * @param string $date * @param int $appid * @param string $op * @param bool $midflag :默认为false 则 mid返回空数组;如设为true,则mid数组不为空 * * @param bool $jsonFlag:默认为true, 则保留jsondata字段;设为false,则去掉jsondata字段 * @param array $condition * @return : array:形如({"mid":[],"count":2181,"diffTime":0.0397667884827,"memory":"3.75 M"}) */ public function countFileMID($date , $appid , $op , $midflag=false , $jsonFlag=false, $condition=array()) { //$count = self::total_lineFile($date , $appid , $op ); $count = self::countFile($date , $appid , $op ); $index = ceil($count['count'] / $this->limit); $result = array('mid'=>array() , 'count'=>0 , 'diffTime'=>0 , 'memory'=>0);
for ($i=0 ; $i<$index ; $i++) { $startNum = $this->limit*$i; $endNum = $this->limit; $data = self::readFile($date , $appid , $op , $startNum , $endNum , $jsonFlag); var_dump($data);exit();
if($data['data'] && is_array($data['data'])) { foreach ($data['data'] as $arr) { if($condition && is_array($condition)) { $key = array_keys($condition); if(isset($arr['data'][$key[0]]) && (in_array($arr['data'][$key[0]] , $condition[$key]) || empty($condition[$key[0]]))) { $result['mid'][$arr["mid"]] =1; $result['count']++; } } else { $result['mid'][$arr["mid"]] =1; $result['count']++; } unset($data); } } } unset($index , $count , $condition , $data , $arr); self::closeFile($date , $appid , $op); $result['mid'] = array_keys($result['mid']); if(empty($midflag)) unset($result['mid']); $result['diffTime'] = doubleval(microtime(true)) - doubleval($this->startTime); $result['memory'] = (memory_get_usage(true) - self::$startMemory)/1024/1024 . ' M'; return $result; } /** * TODO:跨时间段 统计参加$op用户数据 * @param string $date * @param int $appid * @param string $op * @param number $day * @param bool $midflag :默认为false 则 mid返回空数组;如设为true,则mid数组不为空 * @return array 形如("20140326":{"mid":[],"count":4571,"diffTime":0.0806441307068,"memory":"3.75 M"}, * "20140325":{"mid":[],"count":2181,"diffTime":0.0397667884827,"memory":"3.75 M"}) */ public function getReturnUser($date , $appid , $op , $day=1 , $midflag=false) { $date_i=0; for ($i =0; $i<$day ; $i++){ $date_i = date('Ymd' , strtotime($date)-$i*86400); $result[$date_i] = self::countFileMID($date_i , $appid , $op , $midflag); } unset($date , $date_i , $appid , $op , $day); return $result; } } ?>
Lib_File1.php
<?php class Lib_File1 { //文件目录 private $root = '/data/wwwroot/kkpromo/data/'; //文件后缀 private $suffix = '.log'; //文件句柄 private $hander=null; //一次读取文件的最大记录数 private $limit=40000; //每行读取的字节长度 private $length=1024; //开始时间 private $startTime=0; //内存使用基准点 private static $startMemory=0; // private $conn=null; // private static $init=null; public static function instance() { self::$startMemory = memory_get_usage(true); if(self::$init && is_object(self::$init)) { return self::$init; } self::$init = new self(); return self::$init; } private function __construct(){} public function setRoot($root) { if(!is_dir($root)) die($root.' ROOT DOES NOT EXIST'); $this->root = $root; } public function setSuffix($suffix) { $this->suffix = $suffix; } public function setLimit($limit) { if(!is_numeric($limit)) die($limit.' SHOULD BE NUMBERIC'); if(intval($limit) > 1000000) die($limit.' SHOULD BE LOWER THAN 1000000'); $this->limit = intval($limit); } private function _getFile($date , $appid , $op) { $filename = rtrim($this->root , '/').DIRECTORY_SEPARATOR.$date.DIRECTORY_SEPARATOR.$appid.'.'.$op.$this->suffix; if(!file_exists($filename)) { die($filename.' FILE DOES NOT EXISTS!'); } if(!is_file($filename)) { die($filename.' FILE DOES NOT EXIST!'); } if(!is_readable($filename)) { die($filename.' FILE ACCESS DENY!'); } return $filename; } public function closeFile($date=null , $appid=null , $op=null) { if($op && $date && $appid) { if(is_object($this->hander[$date.'_'.$appid.'_'.$op]) || $this->conn[$date.'_'.$appid.'_'.$op]) { fclose($this->hander[$date.'_'.$appid.'_'.$op]); } $this->conn[$date.'_'.$appid.'_'.$op]=null; $this->hander[$date.'_'.$appid.'_'.$op]=null; } else { if(is_array($this->hander) && $this->hander) { foreach ($this->hander as $key=>$val){ fclose($this->hander[$key]); $this->conn[$key]=null; $this->hander[$key]=null; } } } return true; } private function _openFile($date , $appid , $op) { $this->startTime = microtime(true); if(isset($this->conn[$date.'_'.$appid.'_'.$op]) && $this->conn[$date.'_'.$appid.'_'.$op]) { return $this->hander[$date.'_'.$appid.'_'.$op]; } $filename = self::_getFile($date , $appid , $op); if(($this->hander[$date.'_'.$appid.'_'.$op] = fopen($filename, 'r'))!=null) { $this->conn[$date.'_'.$appid.'_'.$op] = true; return $this->hander[$date.'_'.$appid.'_'.$op]; } else { die('FILE OPEN FAILED!'); } }/** * 功能:解析数据 * 格式: array('timestamp','mid','data'); * @param string $data * @return boolean|array */ private function _parseData($data) { if(empty($data) || !is_string($data)) return false; $result = array( 'timestamp'=>0, 'mid'=>0, 'data'=>array(), ); $data = explode('|', $data); if(count($data) < 3 || !is_array($data)) return false; $result['timestamp'] = $data[0]; $result['mid'] = $data[1]; $result['data'] = @json_decode($data[2] , true); if(empty($result['timestamp']) || empty($result['mid'])) return false; unset($data); return $result; } /** * TODO:读取单一文件 * @param string $date: 如(20140327) * @param int $appid: 如(1000,9000) * @param string $op:如(show,login , index) * @param number $startNum 默认从第一行开始 * @param number $endNum 默认到$this->limit结束 * @param array $condition:array('mid'=>arrray() , 'ip'=>array() , ...) 过滤条件 * @param bool $jsonFlag:默认为true, 则保留jsondata字段;设为false,则去掉false字段 * @return array(count , diffTime , memory , data) */ public function readFile($date , $appid , $op ,$startNum=0 , $endNum=0 , $jsonFlag=false , $condition=array()) { $data['data'] = ""; $data['count'] = 0; $index = $startNum; $startNum = empty($startNum) ? 0 : $startNum; $endNum = empty($endNum) ? $this->limit : $endNum; $hander = self::_openFile($date , $appid , $op ); $tmpindex=0; if($hander) { //!feof($hander) while ($tmpindex < $endNum) { $tmp = fgets($hander , $this->length); if(empty($tmp)) continue; if($tmpindex < $endNum && $tmpindex >=$startNum) { $tmp = self::_parseData($tmp); if(empty($tmp)) continue; //去掉jsondata if(!$jsonFlag) { unset($tmp[2]); $condition= array(); } //条件过滤 if($condition && is_array($condition) ) { foreach ($condition as $key=>$val){ if(in_array($tmp['data'][$key], $condition[$key])) unset($key , $val); $data['count']++; $data['data'][$index] = $tmp; $index++; } } else{ $data['data'][$index] = $tmp; $index++; $data['count']++; } } if($tmpindex >= $endNum) break; $tmpindex++; unset($tmp); } fseek($hander , SEEK_END); } $data['diffTime'] = doubleval(microtime(true)) - doubleval($this->startTime); $data['memory'] = doubleval((doubleval(memory_get_usage(true)) - doubleval(self::$startMemory))/1024/1024) . ' M'; return $data; }
/** * TODO:命令行下获取文件总记录数* * @param string $date * @param int $appid * @param string $op * @return array */ public function total_lineFile($date, $appid, $op) { $this->_openFile($date, $appid, $op); $file = escapeshellarg($this->_getFile($date, $appid, $op)); // 对命令行参数进行安全转义 $line = `wc -l $file`; if(preg_match("/(\d{1,})/", $line , $ret)){ $data['count']=$ret[1]; }else{ $data['count']=0; } $data['diffTime'] = doubleval(microtime(true)) - doubleval($this->startTime); $data['memory'] = doubleval((doubleval(memory_get_usage(true)) - doubleval(self::$startMemory))/1024/1024) . ' M'; return $data; }
/** * TODO:统计{$data}.{$op}.log记录数 * @param string $date * @param int $appid * @param string $op * @param array $condition * @return array */ public function countFile($date , $appid , $op ,$condition=array()) { $data['count'] = 0; $hander = self::_openFile($date , $appid , $op ); if($hander) { while (!feof($hander)) { $tmp = fgets($hander , $this->length); $tmp = self::_parseData($tmp); if(empty($tmp)) continue; if($condition && is_array($condition) ) { foreach ($condition as $key=>$val){ if(isset($tmp['data'][$key]) && $tmp['data'][$key] && in_array($tmp['data'][$key], $condition[$key])){ unset($key , $val); $data['count']++; } } } else $data['count']++; unset($tmp); } fseek($hander , SEEK_END); } $data['diffTime'] = doubleval(microtime(true)) - doubleval($this->startTime); $data['memory'] = doubleval((doubleval(memory_get_usage(true)) - doubleval(self::$startMemory))/1024/1024) . ' M'; return $data; } /** * TODO:统计用户数 * @param string $date * @param int $appid * @param string $op * @param bool $midflag :默认为false 则 mid返回空数组;如设为true,则mid数组不为空 * * @param bool $jsonFlag:默认为true, 则保留jsondata字段;设为false,则去掉jsondata字段 * @param array $condition * @return : array:形如({"mid":[],"count":2181,"diffTime":0.0397667884827,"memory":"3.75 M"}) */ public function countFileMID($date , $appid , $op , $midflag=false , $jsonFlag=false, $condition=array()) { $count = self::total_lineFile($date , $appid , $op ); $index = ceil($count['count'] / $this->limit); $result = array('mid'=>array() , 'count'=>0 , 'diffTime'=>0 , 'memory'=>0);
for ($i=0 ; $i<$index ; $i++) { $startNum = $this->limit*$i; $endNum = $this->limit*($i+1); $data = self::readFile($date , $appid , $op , $startNum , $endNum , $jsonFlag); if($data['data'] && is_array($data['data'])) { foreach ($data['data'] as $arr) { if($condition && is_array($condition)){ foreach ($condition as $key=>$val){ if(isset($arr['data'][$key]) && (in_array($arr['data'][$key] , $condition[$key]) || empty($condition[$key]))){ if(!isset($result['mid'][$arr['mid']])) { $result['mid'][$arr["mid"]] =1; $result['count']++; } } } } else { if(!isset($result['mid'][$arr['mid']])) { $result['mid'][$arr["mid"]] =1; $result['count']++; } } } } unset($data['data'] , $data); } unset($index , $count , $condition , $data , $arr); self::closeFile($date , $appid , $op); $result['mid'] = array_keys($result['mid']); //$result['count'] = count($result['mid']); if(empty($midflag)) unset($result['mid']); $result['diffTime'] = doubleval(microtime(true)) - doubleval($this->startTime); $result['memory'] = (memory_get_usage(true) - self::$startMemory)/1024/1024 . ' M'; return $result; } /** * TODO:跨时间段 统计参加$op用户数据 * @param string $date * @param int $appid * @param string $op * @param number $day * @param bool $midflag :默认为false 则 mid返回空数组;如设为true,则mid数组不为空 * @return array 形如("20140326":{"mid":[],"count":4571,"diffTime":0.0806441307068,"memory":"3.75 M"}, * "20140325":{"mid":[],"count":2181,"diffTime":0.0397667884827,"memory":"3.75 M"}) */ public function getReturnUser($date , $appid , $op , $day=1 , $midflag=false) { $date_i=0; for ($i =0; $i<$day ; $i++){ $date_i = date('Ymd' , strtotime($date)-$i*86400); $result[$date_i] = self::countFileMID($date_i , $appid , $op , $midflag); } unset($date , $date_i , $appid , $op , $day); return $result; } } ?>