I'm using phpExcel to read a rather large XML file. I'm trying to do this in chunks, as is indicated in the examples. But no matter what I try, the "$objPHPExcel = $objReader->load($inputFileName)" always fails and results in an error saying that memory is exhausted.
I have seen http://phpexcel.codeplex.com/discussions/242712?ProjectName=phpexcel and How to read large worksheets from large Excel files (27MB+) with PHPExcel? but every code I tried failed at the same load method.
This is a part of the code I'm currently using:
set_include_path(get_include_path() . PATH_SEPARATOR . '../../../Classes/');
/** PHPExcel_IOFactory */
include 'PHPExcel/IOFactory.php';
$inputFileType = 'Excel5';
// $inputFileType = 'Excel2007';
// $inputFileType = 'Excel2003XML';
// $inputFileType = 'OOCalc';
// $inputFileType = 'Gnumeric';
$inputFileName = 'testfile.xls';
/** Define a Read Filter class implementing PHPExcel_Reader_IReadFilter */
class chunkReadFilter implements PHPExcel_Reader_IReadFilter
{
private $_startRow = 0;
private $_endRow = 0;
/** We expect a list of the rows that we want to read to be passed into the constructor */
public function __construct($startRow, $chunkSize) {
$this->_startRow = $startRow;
$this->_endRow = $startRow + $chunkSize;
}
public function readCell($column, $row, $worksheetName = '') {
// Only read the heading row, and the rows that were configured in the constructor
if (($row == 1) || ($row >= $this->_startRow && $row < $this->_endRow)) {
return true;
}
return false;
}
}
$objReader = PHPExcel_IOFactory::createReader($inputFileType);
$objReader->setReadDataOnly(true);
echo '<hr />';
/** Define how many rows we want for each "chunk" **/
$chunkSize = 5;
/** Loop to read our worksheet in "chunk size" blocks **/
for ($startRow = 2; $startRow <= 240; $startRow += $chunkSize) {
echo 'Loading WorkSheet using configurable filter for headings row 1 and for rows ',$startRow,' to ',($startRow+$chunkSize-1),'<br />';
/** Create a new Instance of our Read Filter, passing in the limits on which rows we want to read **/
$chunkFilter = new chunkReadFilter($startRow,$chunkSize);
/** Tell the Reader that we want to use the new Read Filter that we've just Instantiated **/
$objReader->setReadFilter($chunkFilter);
/** Load only the rows that match our filter from $inputFileName to a PHPExcel Object **/
echo "before load <br />";
$objPHPExcel = $objReader->load($inputFileName);
echo "after load";
// Do some processing here
$sheetData = $objPHPExcel->getActiveSheet()->toArray(null,true,true,true);
var_dump($sheetData);
echo '<br /><br />';
}
So I wonder if anyone has an idea why none of the solutions in the other topics work?
Thanks,
Grtz
Related
I got a problem when call the PHPExcel library AutoLoader.php file.
I try to upload the Excel file, then after clicking on the upload button, it saves data into the database. I get an error, as shown in the photo after uploading the file. How can I fix this?
<?php
require('library/Classes/PHPExcel/IOFactory.php');
/** Include path **/
set_include_path(get_include_path() . PATH_SEPARATOR . '../Classes/');
if(isset($_POST['submit'])){
if((isset($_POST['file'])) && !empty($_POST['file']))
{
$file = $_POST['file'];
}
$fileName= $_FILES["file"]["name"];
echo ('fileName +'.$fileName);
//$uploadPath = $_SERVER['DOCUMENT_ROOT'].'/SMS/excel/' ;
$fileTempName= $_FILES["name"]["temp_name"];
//echo ('fileTempName +'.$fileTempName);
$fileExtension= pathinfo($fileName,PATHINFO_EXTENSION);
$allowedtype= array('xlsx','sls','xlsm');
if(!in_array($fileExtension,$allowedtype)){
echo("<br/>Sorry, File type is not allowed. Only Excel file.");
}
else {
echo("Correct File Extension");
try
{
$inputfiletype = PHPExcel_IOFactory::identify($fileName);
$objReader = PHPExcel_IOFactory::createReader($inputfiletype);
$objPHPExcel = $objReader->load($fileName);
echo 'Reading the number of Worksheets in the WorkBook<br />';
/** Use the PHPExcel object's getSheetCount() method to get a count of the number of WorkSheets in the WorkBook */
$sheetCount = $objPHPExcel->getSheetCount();
echo 'There ',(($sheetCount == 1) ? 'is' : 'are'),' ',$sheetCount,' WorkSheet',(($sheetCount == 1) ? '' : 's'),' in the WorkBook<br /><br />';
echo 'Reading the names of Worksheets in the WorkBook<br />';
/** Use the PHPExcel object's getSheetNames() method to get an array listing the names/titles of the WorkSheets in the WorkBook */
$sheetNames = $objPHPExcel->getSheetNames();
foreach($sheetNames as $sheetIndex => $sheetName) {
echo 'WorkSheet #',$sheetIndex,' is named "',$sheetName,'"<br />';
}
}
catch(Exception $e)
{
die('Error loading file "'.pathinfo($fileName,PATHINFO_BASENAME).'": '.$e->getMessage());
}
}
}
?>
I am using the excelToArray function found here: https://gist.github.com/calvinchoy/5821235
function excelToArray($filePath, $header = true) {
require_once("./PHPExcel/Classes/PHPExcel.php"));
//Create excel reader after determining the file type
$inputFileName = $filePath;
/** Identify the type of $inputFileName **/
$inputFileType = PHPExcel_IOFactory::identify($inputFileName);
/** Create a new Reader of the type that has been identified **/
$objReader = PHPExcel_IOFactory::createReader($inputFileType);
/** Set read type to read cell data onl **/
// $objReader->setReadDataOnly(true); removed because: https://stackoverflow.com/questions/46142706/
/** Load $inputFileName to a PHPExcel Object **/
$objPHPExcel = $objReader->load($inputFileName);
//Get worksheet and built array with first row as header
$objWorksheet = $objPHPExcel->getActiveSheet();
//excel with first row header, use header as key
if($header){
$highestRow = $objWorksheet->getHighestRow();
$highestColumn = $objWorksheet->getHighestColumn();
$headingsArray = $objWorksheet->rangeToArray('A1:'.$highestColumn.'1',null, true, true, true);
$headingsArray = $headingsArray[1];
$r = -1;
$namedDataArray = array();
for ($row = 2; $row <= $highestRow; ++$row) {
$dataRow = $objWorksheet->rangeToArray('A'.$row.':'.$highestColumn.$row,null, true, true, true);
if ((isset($dataRow[$row]['A'])) && ($dataRow[$row]['A'] > '')) {
++$r;
foreach($headingsArray as $columnKey => $columnHeading) {
$namedDataArray[$r][$columnHeading] = $dataRow[$row][$columnKey];
}
}
}
}
else{
//excel sheet with no header
$namedDataArray = $objWorksheet->toArray(null,true,true,true);
}
return $namedDataArray;
}
I have a sample spreadsheet in Excel. Here it is:
Preview: https://drive.google.com/open?id=0B2GilRTNrTzKRUVsQWhnQkRLUmM
Direct Download: https://drive.google.com/uc?authuser=0&id=0B2GilRTNrTzKRUVsQWhnQkRLUmM&export=download
The problem is that when I upload the spreadsheet I'm seeing that phone numbers (which are simply large integers (ex: 2345556789) in the spreadsheet I am provided) are coming in with [<=9999999] in front of them. Ex: [<=9999999]1393153-7665
What is causing this and how I do get it to simple read it in as an string (or even an int)?
I'm aware of How to read large worksheets from large Excel files (27MB+) with PHPExcel? and I've tried to implement the chunked reading that is discussed in that question however I'm still suffering from a OOM errror. The file itself is just under 5Mb, 9000+ rows (yes, it's over 9000!) ranging from A to V.
I'd rather not have the user do any editing on this file before uploading and processing it, as currently it's all a manual process and I'd like to completely replace it with a automated one. The file is of xls format, identified as Excel5 via PHPExcel.
my PHP memory limit is currently set to 128M, running on Ubuntu Server.
No matter what chunk size I set, I end up OOM'ing eventually. With larger chunk sizes it actually runs better (as in I can manage up to around row 7000) if I set the chunk size to 200, when set to 1 it OOM's around row 370. So I believe that 'something' is being stored, or loaded into memory in each iteration of the chunk reading and then not dropped again eventually causing the OOM but I can't see where this is happening.
I'm very much an amateur programmer, this is just something I'm doing on the side in my managed service role at work to try and make our lives easier.
The whole point of this code is to read the excel file, filter out the 'crap' and then save it out as CSV (Right now I'm just dumping it to screen instead of a CSV). At the rate things are going I'm getting tempted to call excel2csv via the php script and then try and clean up the CSV instead... But that feels like giving up when I maybe rather close to a solution.
<?php
error_reporting(E_ALL);
set_time_limit(0);
date_default_timezone_set('Europe/London');
require_once 'Classes/PHPExcel/IOFactory.php';
class chunkReadFilter implements PHPExcel_Reader_IReadFilter
{
private $_startRow = 0;
private $_endRow = 0;
private $_columns = array();
/** Set the list of rows that we want to read */
public function setRows($startRow, $chunkSize, $columns) {
$this->_startRow = $startRow;
$this->_endRow = $startRow + $chunkSize;
$this->_columns = $columns;
}
public function readCell($column, $row, $worksheetName = '') {
// Only read the heading row, and the rows that are configured in $this->_startRow$
if ($row >= $this->_startRow && $row < $this->_endRow) {
if(in_array($column,$this->_columns)) {
return true;
}
}
return false;
}
}
$target_dir = "uploads/";
$file_name = $_POST["file_name"];
$full_path = $target_dir . $file_name;
echo "Processing ". $file_name . '; <br>';
ob_flush();
flush();
/** /** As files maybe large in memory, use a temp file to handle them
$cacheMethod = PHPExcel_CachedObjectStorageFactory::cache_to_phpTemp;
$cacheSettings = array( 'memoryCacheSize' => '8MB');
PHPExcel_Settings::setCacheStorageMethod($cacheMethod, $cacheSettings);
**/
$inputFileName = $full_path;
echo 'Excel reader started<br/>';
/** First we should get the type of file **/
$filetype = PHPExcel_IOFactory::identify($inputFileName);
echo 'File of type: ' . $filetype . ' found<br/>';
/** Load $inputFileName to a PHPExcel Object - https://github.com/PHPOffice/PHPExcel/blob/develop/$
/** Define how many rows we want to read for each "chunk" **/
$chunkSize = 1;
/** Create a new Instance of our Read Filter **/
$chunkFilter = new chunkReadFilter();
$objReader = PHPExcel_IOFactory::createReader($filetype);
/** Tell the Reader that we want to use the Read Filter that we've Instantiated **/
$objReader->setReadFilter($chunkFilter);
/** Loop to read our worksheet in "chunk size" blocks **/
for ($startRow = 2; $startRow <= 65000; $startRow += $chunkSize) {
$endRow = $startRow+$chunkSize-1;
echo 'Loading WorkSheet using configurable filter for headings row 1 and for rows ',$startR$
/** Tell the Read Filter, the limits on which rows we want to read this iteration **/
$chunkFilter->setRows($startRow,$chunkSize,range('A','T'));
/** Load only the rows that match our filter from $inputFileName to a PHPExcel Object **/
$objPHPExcel = $objReader->load($inputFileName);
// Do some processing here
// $sheetData = $objPHPExcel->getActiveSheet()->toArray(null,true,true,true);
$sheetData = $objPHPExcel->getActiveSheet()->rangeToArray("A$startRow:T$endRow");
var_dump($sheetData);
// Clear the variable to not go over memory!
$objPHPExcel->disconnectWorksheets();
unset ($sheetData);
unset ($objPHPExcel);
ob_flush();
flush();
echo '<br /><br />';
}
/** This loads the entire file, crashing with OOM
try {
$objPHPExcel = PHPExcel_IOFactory::load($inputFileName);
echo 'loaded sheet into memory<br>';
} catch(PHPExcel_Reader_Exception $e) {
die('Error loading file: '.$e->getMessage());
}
$objWriter = PHPExcel_IOFactory::createWriter($objPHPExcel, 'CSV');
echo 'Saving sheet as CSV<br>';
$objWriter->setSheetIndex(0);
$objWriter->save('./uploads/'.$file_name.'.csv');
echo 'Processed 1 sheet';
ob_flush();
flush();
**/
echo "<body><table>\n\n";
/**
$f = fopen($file_name, "r");
while (($line = fgetcsv($f)) !== false) {
echo "<tr>";
foreach ($line as $cell) {
echo "<td>" . htmlspecialchars($cell) . "</td>";
}
echo "</tr>\n";
}
fclose($f);
**/
echo "\n</table></body></html>";
?>
The error as indicated in the apache logs is:
[Fri Mar 31 15:35:27.982697 2017] [:error] [pid 1059] [client 10.0.2.2:53866] PHP Fatal error: Allowed memory size of 134217728 bytes exhausted (tried to allocate 45056 bytes) in /var/www/html/Classes/PHPExcel/Shared/OLERead.php on line 93, referer: http://localhost:8080/upload.php
unset ($objPHPExcel);
If you check the PHPExcel documentation, this won't cleanly unset $objPHPExcel because of cyclic references between the spreadsheet, worksheet and cells, and will result in memory leaks. The recommendation is to disconnect these cyclic references first.
$objPHPExcel->disconnectWorksheets();
unset($objPHPExcel);
There will still be some memory leakage, but it should allow more memory to be freed up between chunks
I am having problem while reading 3Mb data .xlsx file and same for 7Mb data .xls file. Is there any size limitations while reading file?
In my Excel file, I have 30,000 rows and 36 rows. Is there any solutions so that I can read up to 100K records or more then that?
In my project I have to import 1 million records, but my code is not working for more than 29000 records. Up until 29000 records my code works on my local.
And also reading 29000 records takes too much, time may be 25 min.
Can anyone please explain why this happens, and what should I do to resolve this?
Here is my code:
<?php
error_reporting(E_ALL);
set_time_limit(0);
ini_set("memory_limit","-1");
date_default_timezone_set('Europe/London');
define('EOL',(PHP_SAPI == 'cli') ? PHP_EOL : '<br />');
/** Set Include path to point at the PHPExcel Classes folder **/
set_include_path(get_include_path() . PATH_SEPARATOR . 'Classes/');
/** Include PHPExcel_IOFactory **/
include 'Classes/PHPExcel/IOFactory.php';
$inputFileName = 'files/30000rows.xls';
$inputFileType = PHPExcel_IOFactory::identify($inputFileName);
/** Define a Read Filter class implementing PHPExcel_Reader_IReadFilter */
class chunkReadFilter implements PHPExcel_Reader_IReadFilter
{
private $_startRow = 0;
private $_endRow = 0;
/** Set the list of rows that we want to read */
public function setRows($startRow, $chunkSize) {
$this->_startRow = $startRow;
$this->_endRow = $startRow + $chunkSize;
}
public function readCell($column, $row, $worksheetName = '')
{
if (($row == 1) || ($row >= $this->_startRow && $row < $this->_endRow))
{
return true;
}
return false;
}
}
echo 'Loading file ',pathinfo($inputFileName,PATHINFO_BASENAME),' using IOFactory with a defined reader type of ',$inputFileType,'<br />';
/** Create a new Reader of the type defined in $inputFileType **/
$objReader = PHPExcel_IOFactory::createReader($inputFileType);
echo '<hr />';
/** Define how many rows we want to read for each "chunk" **/
$chunkSize = 1000;
//total rows in excel
$spreadsheetInfo = $objReader->listWorksheetInfo($inputFileName);
$totalRows = $spreadsheetInfo[0]['totalRows'];
/** Create a new Instance of our Read Filter **/
$chunkFilter = new chunkReadFilter();
/** Tell the Reader that we want to use the Read Filter that we've Instantiated **/
$objReader->setReadFilter($chunkFilter);
$objReader->setReadDataOnly(true);
/** Loop to read our worksheet in "chunk size" blocks **/
for ($startRow = 2; $startRow <= $totalRows; $startRow += $chunkSize) {
echo "in for loop<br>";
echo 'Loading WorkSheet using configurable filter for headings row 1 and for rows ',$startRow,' to ',($startRow+$chunkSize-1),'<br />';
/** Tell the Read Filter, the limits on which rows we want to read this iteration **/
$chunkFilter->setRows($startRow,$chunkSize);
$cacheMethod = PHPExcel_CachedObjectStorageFactory:: cache_to_phpTemp;
$cacheSettings = array( ' memoryCacheSize ' => '1000MB');
PHPExcel_Settings::setCacheStorageMethod($cacheMethod, $cacheSettings);
$cacheMethod=PHPExcel_CachedObjectStorageFactory::cache_in_memory_serialized;
PHPExcel_Settings::setCacheStorageMethod($cacheMethod);
$cacheMethod = PHPExcel_CachedObjectStorageFactory::cache_in_memory_gzip;
if (!PHPExcel_Settings::setCacheStorageMethod($cacheMethod)) {
die($cacheMethod . " caching method is not available" . EOL);
}
echo date('H:i:s') , " Enable Cell Caching using " , $cacheMethod , " method" , EOL;
/** Load only the rows that match our filter from $inputFileName to a PHPExcel Object **/
$objPHPExcel = $objReader->load($inputFileName);
$objWorksheet = $objPHPExcel->getActiveSheet();
$highestColumn = $objWorksheet->getHighestColumn();
$sheetData = $objWorksheet- >rangeToArray('A'.$startRow.':'.$highestColumn.($startRow + $chunkSize-1),null, false, false, true);
echo '<pre>';
print_r($sheetData);
$objPHPExcel->disconnectWorksheets();
unset($objPHPExcel);
echo '<br /><br />';
}
?>
To read XLSX files, I can recommend you to use Spout. It makes it super simple to deal with large files. Here is how you would do it:
$reader = ReaderFactory::create(Type::XLSX);
$reader->open($filePath);
while ($reader->hasNextSheet()) {
$reader->nextSheet();
while ($reader->hasNextRow()) {
$row = $reader->nextRow();
// do stuff
}
}
$reader->close();
This works for any file, regardless of the file size. No need to worry about caching, filtering, memory consumption. It will require less than 10MB of memory and should take less than a minute to process the entire file.
I am using PHPExcel to read excel and store it in database.
It is working fine when the number of record is (less say less than 500).
If have records upto 2000 in excel, it stops in the middle (after 400 - 500 rows loaded) without any error .
Note: I am using sqlite to reduce memory usage but still i am facing issue...
Here is my code. Please suggest to make it workable.
ini_set('max_execution_time', 0);
set_time_limit(0) ;
//Set memory limit to maximum...
ini_set('memory_limit', '-1');
error_reporting(E_ALL);
set_include_path(get_include_path() . PATH_SEPARATOR . 'Classes/');
/** PHPExcel_IOFactory */
include $unsecured_param['home_dir'].'APIs/PHPExcelReader/Classes/PHPExcel/IOFactory.php';
//echo 'Loading file ',pathinfo($inputFileName,PATHINFO_BASENAME),' using IOFactory to identify the format<br />';
//if you only want to read certain cells within worksheets, you can add a filter:
$inputFileType = 'Excel2007';
/** Define a Read Filter class implementing PHPExcel_Reader_IReadFilter */
class chunkReadFilter implements PHPExcel_Reader_IReadFilter
{
private $_startRow = 0;
private $_endRow = 0;
/** Set the list of rows that we want to read */
public function setRows($startRow, $chunkSize) {
$this->_startRow = $startRow;
$this->_endRow = $startRow + $chunkSize;
}
public function readCell($column, $row, $worksheetName = '') {
// Only read the heading row, and the rows that are configured in $this->_startRow and $this->_endRow
if (($row == 1) || ($row >= $this->_startRow && $row < $this->_endRow)) {
return true;
}
return false;
}
}
echo 'Loading file ',pathinfo($inputFileName,PATHINFO_BASENAME),' using IOFactory with a defined reader type of ',$inputFileType,'<br />';
/** Create a new Reader of the type defined in $inputFileType **/
$objReader = PHPExcel_IOFactory::createReader($inputFileType);
echo '<hr />';
/** Define how many rows we want to read for each "chunk" **/
$chunkSize = 20;
/** Create a new Instance of our Read Filter **/
$chunkFilter = new chunkReadFilter();
/** Tell the Reader that we want to use the Read Filter that we've Instantiated **/
$objReader->setReadFilter($chunkFilter);
/** Loop to read our worksheet in "chunk size" blocks **/
for ($startRow = 1; $startRow <= 2000; $startRow += $chunkSize) {
echo 'Loading WorkSheet using configurable filter for headings row 1 and for rows ',$startRow,' to ',($startRow+$chunkSize-1),'<br />';
/** Tell the Read Filter, the limits on which rows we want to read this iteration **/
$chunkFilter->setRows($startRow,$chunkSize);
$cacheMethod = PHPExcel_CachedObjectStorageFactory::cache_to_sqlite3;
$cacheSettings = array(
'cacheTime' => 600
);
PHPExcel_Settings::setCacheStorageMethod($cacheMethod, $cacheSettings);
/** Load only the rows that match our filter from $inputFileName to a PHPExcel Object **/
$objPHPExcel = $objReader->load($inputFileName);
// Do some processing here
$range_xl="A".($startRow).":X".($startRow + $chunkSize);
echo "$range_xl.<br>";
$sheetData = $objPHPExcel->getActiveSheet()->rangeToArray($range_xl,null,true,true,true);
//var_dump($sheetData);
unset($objPHPExcel);
//store data into array..
$i=0;$h=1;
foreach($sheetData as $rec)
{
foreach($rec as $part)
{
$items[$i]=$part; //echo "items $i]=" ; echo $items[$i];echo "<br>";
$i=$i+1;
}
$i=0;//1 row completed
$h=$h+1;//echo $items[0]."<br>";
//echo "<br>---------------------<br>";
//start loading a row into database..
if($h>4) {
//store excel values
$sno=trim($items[0]);echo $sno."<br>";
$cat=trim($items[1]);
$id_xl=trim($items[2]);
$name=trim($items[3]);
....