php读取大文件示例分享(文件操作类)

2015-01-24信息快讯网

这篇文章主要介绍了php读取大文件示例,这也是一个文件操作类,同时可以学习一下php的文件操作方法,需要的朋友可以参考下

Lib_File2.php

<?php 
 class Lib_File2
 {
  //文件目录
  private $root = '/data/wwwroot/kkpromo/data/';

  //文件后缀
  private $suffix = '.log';

  //文件句柄
  private $handle=null;

  //一次读取文件的最大记录数
  private $limit=40000;

  //每行读取的字节长度
  private $length=1024;

  //开始时间 
  private  $startTime=0;

  //内存使用基准点
  private static $startMemory=0;

  //
  private $conn=null;

  //
  private static  $init=null;

  public static function instance()
  {
   self::$startMemory = memory_get_usage(true);

   if(self::$init && is_object(self::$init))
   {
    return self::$init;
   }

   self::$init = new self();

   return self::$init;
  }

  private function __construct(){}

  public  function setRoot($root)   
  {
   if(!is_dir($root)) die($root.' ROOT DOES NOT EXIST');
   $this->root = $root;
  }

  public function setSuffix($suffix)
  {
   $this->suffix = $suffix;
  }

  public function setLimit($limit)
  {
   if(!is_numeric($limit)) die($limit.' SHOULD BE NUMBERIC');
   if(intval($limit) > 1000000) die($limit.' SHOULD BE LOWER THAN 1000000');
   $this->limit = intval($limit);
  }

  public function _getFile($date , $appid , $op)
  {
   $filename = rtrim($this->root , '/').DIRECTORY_SEPARATOR.$date.DIRECTORY_SEPARATOR.$appid.'.'.$op.$this->suffix;
   if(!file_exists($filename))
   {
    die($filename.' FILE DOES NOT EXISTS!');
   }

   if(!is_file($filename))
   {
    die($filename.' FILE DOES NOT EXIST!');
   }

   if(!is_readable($filename))
   {
    die($filename.'  FILE ACCESS DENY!');
   }

   return $filename;
  }

  
  public function closeFile($date=null , $appid=null , $op=null)
  {
   if($op && $date && $appid)
   {
    if(is_object($this->handle[$date.'_'.$appid.'_'.$op]) || $this->conn[$date.'_'.$appid.'_'.$op])
    {
     unset($this->handle[$date.'_'.$appid.'_'.$op]);
     $this->handle[$date.'_'.$appid.'_'.$op]=null;
    }

    $this->conn[$date.'_'.$appid.'_'.$op]=null;
    $this->handle[$date.'_'.$appid.'_'.$op]=null;
    unset($this->handle[$date.'_'.$appid.'_'.$op]);
   }
   else {
    if(is_array($this->handle) && $this->handle)
    {
     foreach ($this->handle as $key=>$val){
      unset($this->handle[$key]);
      $this->conn[$key]=null;
      $this->handle[$key]=null;
     }
    }
   } 

   return true;
  }

  
  private function _openFile($date , $appid , $op)
  {   
   $this->startTime = microtime(true);
   if(isset($this->conn[$date.'_'.$appid.'_'.$op])  && $this->conn[$date.'_'.$appid.'_'.$op])
   {
    return $this->handle[$date.'_'.$appid.'_'.$op];
   }

   $filename = self::_getFile($date , $appid , $op);
   if(($this->handle[$date.'_'.$appid.'_'.$op] = new SplFileObject($filename , 'r'))!=null)
   {
    $this->conn[$date.'_'.$appid.'_'.$op] = true;
    return $this->handle[$date.'_'.$appid.'_'.$op];
   }
   else {
    die('FILE OPEN FAILED!');
   }
  }

/** * 功能:解析数据 * 格式: array('timestamp','mid','data'); * @param string $data * @return boolean|array */ private function _parseData($data , $jsonFlag=true) { if(empty($data) || !is_string($data)) return false; $result = array( 'timestamp'=>0, 'mid'=>0, 'data'=>array(), ); $data = explode('|', $data); if(count($data) < 3 || !is_array($data)) return false; $result['timestamp'] = $data[0]; $result['mid'] = $data[1]; if($jsonFlag) { $result['data'] = @json_decode($data[2] , true); unset($result['mid']); } if(empty($result['timestamp']) || empty($result['mid'])) return false; unset($data); return $result; } /** * TODO:读取单一文件 * @param string $date: 如(20140327) * @param int $appid: 如(1000,9000) * @param string $op:如(show,login , index) * @param number $startNum 默认从第一行开始 * @param number $length 默认到$this->limit 读取的行数 * @param array $condition:array('mid'=>arrray() , 'ip'=>array() , ...) 过滤条件 * @param bool $jsonFlag:默认为true, 则保留jsondata字段;设为false,则去掉false字段 * @return array(count , diffTime , memory , data) */ public function readFile($date , $appid , $op , $startNum=0 , $length=0 , $jsonFlag=false , $condition=array()) { $data['data'] = ""; $data['count'] = 0; $index = $startNum; $startNum = empty($startNum) ? 0 : $startNum; $length = empty($length) ? $this->limit : $length; $handle = self::_openFile($date , $appid , $op ); $line_number=0; if($handle) { $handle->seek($startNum); $handle->setMaxLineLen($this->length); while (intval($line_number) - intval($startNum) < intval($length)-1) { $tmp = $handle->current(); if(empty($tmp)) continue; $tmp = self::_parseData($tmp , $jsonFlag); $line_number = $handle->key(); !$jsonFlag && $condition= array(); if(isset($condition) && $condition) { $key = array_keys($condition); if(in_array($tmp['data'][$key[0]], $condition[$key[0]])) { $data['count']++; $data['data'][$line_number] = $tmp; } } else { $data['data'][$line_number] = $tmp; $data['count']++; } if(intval($line_number) - intval($startNum) >= intval($length)-1) break; unset($tmp); $handle->next(); } unset($tmp , $length , $line_number , $condition); } $data['diffTime'] = doubleval(microtime(true)) - doubleval($this->startTime); $data['memory'] = doubleval((doubleval(memory_get_usage(true)) - doubleval(self::$startMemory))/1024/1024) . ' M'; return $data; }

/** * TODO:命令行下获取文件总记录数* * @param string $date * @param int $appid * @param string $op * @return array */ public function total_lineFile($date, $appid, $op) { $this->_openFile($date, $appid, $op); $file = escapeshellarg($this->_getFile($date, $appid, $op)); // 对命令行参数进行安全转义 $line = `wc -l $file`; if(preg_match("/(\d{1,})/", $line , $ret)){ $data['count']=$ret[1]; }else{ $data['count']=0; } $data['diffTime'] = doubleval(microtime(true)) - doubleval($this->startTime); $data['memory'] = doubleval((doubleval(memory_get_usage(true)) - doubleval(self::$startMemory))/1024/1024) . ' M'; return $data; }

/** * TODO:统计{$data}.{$op}.log记录数 * @param string $date * @param int $appid * @param string $op * @param array $condition * @return array */ public function countFile($date , $appid , $op ,$condition=array()) { $data['count'] = 0; $handle = self::_openFile($date , $appid , $op ); if($handle) { $handle->setMaxLineLen($this->length); while (!$handle->eof()) { $tmp = $handle->current(); if(empty($tmp)) continue; $tmp = self::_parseData($tmp); if($condition && is_array($condition) ) { $key = array_keys($condition); if(isset($tmp['data'][$key[0]]) && $tmp['data'][$key[0]] && in_array($tmp['data'][$key[0]], $condition[$key[0]])) { $data['count']++; } } else { $data['count']++; } unset($tmp); $handle->next(); } } unset($handle , $condition , $tmp , $key , $val); self::closeFile($date , $appid , $op ); $data['diffTime'] = doubleval(microtime(true)) - doubleval($this->startTime); $data['memory'] = doubleval((doubleval(memory_get_usage(true)) - doubleval(self::$startMemory))/1024/1024) . ' M'; return $data; } /** * TODO:统计用户数 * @param string $date * @param int $appid * @param string $op * @param bool $midflag :默认为false 则 mid返回空数组;如设为true,则mid数组不为空 * * @param bool $jsonFlag:默认为true, 则保留jsondata字段;设为false,则去掉jsondata字段 * @param array $condition * @return : array:形如({"mid":[],"count":2181,"diffTime":0.0397667884827,"memory":"3.75 M"}) */ public function countFileMID($date , $appid , $op , $midflag=false , $jsonFlag=false, $condition=array()) { //$count = self::total_lineFile($date , $appid , $op ); $count = self::countFile($date , $appid , $op ); $index = ceil($count['count'] / $this->limit); $result = array('mid'=>array() , 'count'=>0 , 'diffTime'=>0 , 'memory'=>0);

for ($i=0 ; $i<$index ; $i++) { $startNum = $this->limit*$i; $endNum = $this->limit; $data = self::readFile($date , $appid , $op , $startNum , $endNum , $jsonFlag); var_dump($data);exit();

if($data['data'] && is_array($data['data'])) { foreach ($data['data'] as $arr) { if($condition && is_array($condition)) { $key = array_keys($condition); if(isset($arr['data'][$key[0]]) && (in_array($arr['data'][$key[0]] , $condition[$key]) || empty($condition[$key[0]]))) { $result['mid'][$arr["mid"]] =1; $result['count']++; } } else { $result['mid'][$arr["mid"]] =1; $result['count']++; } unset($data); } } } unset($index , $count , $condition , $data , $arr); self::closeFile($date , $appid , $op); $result['mid'] = array_keys($result['mid']); if(empty($midflag)) unset($result['mid']); $result['diffTime'] = doubleval(microtime(true)) - doubleval($this->startTime); $result['memory'] = (memory_get_usage(true) - self::$startMemory)/1024/1024 . ' M'; return $result; } /** * TODO:跨时间段 统计参加$op用户数据 * @param string $date * @param int $appid * @param string $op * @param number $day * @param bool $midflag :默认为false 则 mid返回空数组;如设为true,则mid数组不为空 * @return array 形如("20140326":{"mid":[],"count":4571,"diffTime":0.0806441307068,"memory":"3.75 M"}, * "20140325":{"mid":[],"count":2181,"diffTime":0.0397667884827,"memory":"3.75 M"}) */ public function getReturnUser($date , $appid , $op , $day=1 , $midflag=false) { $date_i=0; for ($i =0; $i<$day ; $i++){ $date_i = date('Ymd' , strtotime($date)-$i*86400); $result[$date_i] = self::countFileMID($date_i , $appid , $op , $midflag); } unset($date , $date_i , $appid , $op , $day); return $result; } } ?>

Lib_File1.php

<?php 
 class Lib_File1
 {
  //文件目录
  private $root = '/data/wwwroot/kkpromo/data/';

  //文件后缀
  private $suffix = '.log';

  //文件句柄
  private $hander=null;

  //一次读取文件的最大记录数
  private $limit=40000;

  //每行读取的字节长度
  private $length=1024;

  //开始时间 
  private  $startTime=0;

  //内存使用基准点
  private static $startMemory=0;

  //
  private $conn=null;

  //
  private static  $init=null;

  public static function instance()
  {
   self::$startMemory = memory_get_usage(true);

   if(self::$init && is_object(self::$init))
   {
    return self::$init;
   }

   self::$init = new self();

   return self::$init;
  }

  private function __construct(){}

  public  function setRoot($root)   
  {
   if(!is_dir($root)) die($root.' ROOT DOES NOT EXIST');
   $this->root = $root;
  }

  public function setSuffix($suffix)
  {
   $this->suffix = $suffix;
  }

  public function setLimit($limit)
  {
   if(!is_numeric($limit)) die($limit.' SHOULD BE NUMBERIC');
   if(intval($limit) > 1000000) die($limit.' SHOULD BE LOWER THAN 1000000');
   $this->limit = intval($limit);
  }

  private function _getFile($date , $appid , $op)
  {
   $filename = rtrim($this->root , '/').DIRECTORY_SEPARATOR.$date.DIRECTORY_SEPARATOR.$appid.'.'.$op.$this->suffix;
   if(!file_exists($filename))
   {
    die($filename.' FILE DOES NOT EXISTS!');
   }

   if(!is_file($filename))
   {
    die($filename.' FILE DOES NOT EXIST!');
   }

   if(!is_readable($filename))
   {
    die($filename.'  FILE ACCESS DENY!');
   }

   return $filename;
  }

  
  public function closeFile($date=null , $appid=null , $op=null)
  {
   if($op && $date && $appid)
   {
    if(is_object($this->hander[$date.'_'.$appid.'_'.$op]) || $this->conn[$date.'_'.$appid.'_'.$op])
    {
     fclose($this->hander[$date.'_'.$appid.'_'.$op]);
    }

    $this->conn[$date.'_'.$appid.'_'.$op]=null;
    $this->hander[$date.'_'.$appid.'_'.$op]=null;
   }
   else {
    if(is_array($this->hander) && $this->hander)
    {
     foreach ($this->hander as $key=>$val){
      fclose($this->hander[$key]);
      $this->conn[$key]=null;
      $this->hander[$key]=null;
     }
    }
   } 

   return true;
  }

  
  private function _openFile($date , $appid , $op)
  {   
   $this->startTime = microtime(true);
   if(isset($this->conn[$date.'_'.$appid.'_'.$op])  && $this->conn[$date.'_'.$appid.'_'.$op])
   {
    return $this->hander[$date.'_'.$appid.'_'.$op];
   }

   $filename = self::_getFile($date , $appid , $op);
   if(($this->hander[$date.'_'.$appid.'_'.$op] = fopen($filename, 'r'))!=null)
   {
    $this->conn[$date.'_'.$appid.'_'.$op] = true;
    return $this->hander[$date.'_'.$appid.'_'.$op];
   }
   else {
    die('FILE OPEN FAILED!');
   }
  }

/** * 功能:解析数据 * 格式: array('timestamp','mid','data'); * @param string $data * @return boolean|array */ private function _parseData($data) { if(empty($data) || !is_string($data)) return false; $result = array( 'timestamp'=>0, 'mid'=>0, 'data'=>array(), ); $data = explode('|', $data); if(count($data) < 3 || !is_array($data)) return false; $result['timestamp'] = $data[0]; $result['mid'] = $data[1]; $result['data'] = @json_decode($data[2] , true); if(empty($result['timestamp']) || empty($result['mid'])) return false; unset($data); return $result; } /** * TODO:读取单一文件 * @param string $date: 如(20140327) * @param int $appid: 如(1000,9000) * @param string $op:如(show,login , index) * @param number $startNum 默认从第一行开始 * @param number $endNum 默认到$this->limit结束 * @param array $condition:array('mid'=>arrray() , 'ip'=>array() , ...) 过滤条件 * @param bool $jsonFlag:默认为true, 则保留jsondata字段;设为false,则去掉false字段 * @return array(count , diffTime , memory , data) */ public function readFile($date , $appid , $op ,$startNum=0 , $endNum=0 , $jsonFlag=false , $condition=array()) { $data['data'] = ""; $data['count'] = 0; $index = $startNum; $startNum = empty($startNum) ? 0 : $startNum; $endNum = empty($endNum) ? $this->limit : $endNum; $hander = self::_openFile($date , $appid , $op ); $tmpindex=0; if($hander) { //!feof($hander) while ($tmpindex < $endNum) { $tmp = fgets($hander , $this->length); if(empty($tmp)) continue; if($tmpindex < $endNum && $tmpindex >=$startNum) { $tmp = self::_parseData($tmp); if(empty($tmp)) continue; //去掉jsondata if(!$jsonFlag) { unset($tmp[2]); $condition= array(); } //条件过滤 if($condition && is_array($condition) ) { foreach ($condition as $key=>$val){ if(in_array($tmp['data'][$key], $condition[$key])) unset($key , $val); $data['count']++; $data['data'][$index] = $tmp; $index++; } } else{ $data['data'][$index] = $tmp; $index++; $data['count']++; } } if($tmpindex >= $endNum) break; $tmpindex++; unset($tmp); } fseek($hander , SEEK_END); } $data['diffTime'] = doubleval(microtime(true)) - doubleval($this->startTime); $data['memory'] = doubleval((doubleval(memory_get_usage(true)) - doubleval(self::$startMemory))/1024/1024) . ' M'; return $data; }

/** * TODO:命令行下获取文件总记录数* * @param string $date * @param int $appid * @param string $op * @return array */ public function total_lineFile($date, $appid, $op) { $this->_openFile($date, $appid, $op); $file = escapeshellarg($this->_getFile($date, $appid, $op)); // 对命令行参数进行安全转义 $line = `wc -l $file`; if(preg_match("/(\d{1,})/", $line , $ret)){ $data['count']=$ret[1]; }else{ $data['count']=0; } $data['diffTime'] = doubleval(microtime(true)) - doubleval($this->startTime); $data['memory'] = doubleval((doubleval(memory_get_usage(true)) - doubleval(self::$startMemory))/1024/1024) . ' M'; return $data; }

/** * TODO:统计{$data}.{$op}.log记录数 * @param string $date * @param int $appid * @param string $op * @param array $condition * @return array */ public function countFile($date , $appid , $op ,$condition=array()) { $data['count'] = 0; $hander = self::_openFile($date , $appid , $op ); if($hander) { while (!feof($hander)) { $tmp = fgets($hander , $this->length); $tmp = self::_parseData($tmp); if(empty($tmp)) continue; if($condition && is_array($condition) ) { foreach ($condition as $key=>$val){ if(isset($tmp['data'][$key]) && $tmp['data'][$key] && in_array($tmp['data'][$key], $condition[$key])){ unset($key , $val); $data['count']++; } } } else $data['count']++; unset($tmp); } fseek($hander , SEEK_END); } $data['diffTime'] = doubleval(microtime(true)) - doubleval($this->startTime); $data['memory'] = doubleval((doubleval(memory_get_usage(true)) - doubleval(self::$startMemory))/1024/1024) . ' M'; return $data; } /** * TODO:统计用户数 * @param string $date * @param int $appid * @param string $op * @param bool $midflag :默认为false 则 mid返回空数组;如设为true,则mid数组不为空 * * @param bool $jsonFlag:默认为true, 则保留jsondata字段;设为false,则去掉jsondata字段 * @param array $condition * @return : array:形如({"mid":[],"count":2181,"diffTime":0.0397667884827,"memory":"3.75 M"}) */ public function countFileMID($date , $appid , $op , $midflag=false , $jsonFlag=false, $condition=array()) { $count = self::total_lineFile($date , $appid , $op ); $index = ceil($count['count'] / $this->limit); $result = array('mid'=>array() , 'count'=>0 , 'diffTime'=>0 , 'memory'=>0);

for ($i=0 ; $i<$index ; $i++) { $startNum = $this->limit*$i; $endNum = $this->limit*($i+1); $data = self::readFile($date , $appid , $op , $startNum , $endNum , $jsonFlag); if($data['data'] && is_array($data['data'])) { foreach ($data['data'] as $arr) { if($condition && is_array($condition)){ foreach ($condition as $key=>$val){ if(isset($arr['data'][$key]) && (in_array($arr['data'][$key] , $condition[$key]) || empty($condition[$key]))){ if(!isset($result['mid'][$arr['mid']])) { $result['mid'][$arr["mid"]] =1; $result['count']++; } } } } else { if(!isset($result['mid'][$arr['mid']])) { $result['mid'][$arr["mid"]] =1; $result['count']++; } } } } unset($data['data'] , $data); } unset($index , $count , $condition , $data , $arr); self::closeFile($date , $appid , $op); $result['mid'] = array_keys($result['mid']); //$result['count'] = count($result['mid']); if(empty($midflag)) unset($result['mid']); $result['diffTime'] = doubleval(microtime(true)) - doubleval($this->startTime); $result['memory'] = (memory_get_usage(true) - self::$startMemory)/1024/1024 . ' M'; return $result; } /** * TODO:跨时间段 统计参加$op用户数据 * @param string $date * @param int $appid * @param string $op * @param number $day * @param bool $midflag :默认为false 则 mid返回空数组;如设为true,则mid数组不为空 * @return array 形如("20140326":{"mid":[],"count":4571,"diffTime":0.0806441307068,"memory":"3.75 M"}, * "20140325":{"mid":[],"count":2181,"diffTime":0.0397667884827,"memory":"3.75 M"}) */ public function getReturnUser($date , $appid , $op , $day=1 , $midflag=false) { $date_i=0; for ($i =0; $i<$day ; $i++){ $date_i = date('Ymd' , strtotime($date)-$i*86400); $result[$date_i] = self::countFileMID($date_i , $appid , $op , $midflag); } unset($date , $date_i , $appid , $op , $day); return $result; } } ?>

php常用文件操作函数汇总
PHP打开和关闭文件操作函数总结
PHP常用技术文之文件操作和目录操作总结
PHP+iFrame实现页面无需刷新的异步文件上传
PHP使用静态方法的几个注意事项
PHP处理JSON字符串key缺少双引号的解决方法
php中使用PHPExcel读写excel(xls)文件的方法
PHP常量使用的几个需要注意的地方(谨慎使用PHP中的常量)
php修改指定文件后缀的方法
叫你如何修改Nginx与PHP的文件上传大小限制
使用PHP和HTML5 FormData实现无刷新文件上传教程
PHP中使用sleep造成mysql读取失败的案例和解决方法
PHP中读取照片exif信息的方法
php操作XML、读取数据和写入数据的实现代码
Php连接及读取和写入mysql数据库的常用代码
ThinkPHP自动转义存储富文本编辑器内容导致读取出错的解决方法
不使用php api函数实现数组的交换排序示例
php使用smtp发送支持附件的邮件示例
php实现上传图片生成缩略图示例
php使用curl和正则表达式抓取网页数据示例
PHP header()函数常用方法总结
开源php中文分词系统SCWS安装和使用实例
PHP动态生成javascript文件的2个例子
PHP快速按行读取CSV大文件的封装类分享(也适用于其它超大文本文件)
php实现批量下载百度云盘文件例子分享
PHP按行读取、处理较大CSV文件的代码实例
PHP读取大文件的类SplFileObject使用介绍
PHP使用imagick读取PDF生成png缩略图的两种方法
php读取目录所有文件信息dir示例
解析web文件操作常见安全漏洞(目录、文件名检测漏洞)
基于php常用函数总结(数组,字符串,时间,文件操作)
基于PHP文件操作的详细诠释
基于PHP文件操作的详解
PHP常用的文件操作函数经典收藏
©2014-2024 dbsqp.com