Wednesday, June 11, 2014

Super Fast, Memory Optimized Excel Generation Using Apache POI


Problem Context 

Need to generate Huge Excels (Millions of Rows). When using a normal approach (in my previous example) when we tried to dump huge data sets into excels we faced many issues related to performance esp. OutOfMemory and Long time to generate excel (even after increased heap to 4GB).

Note that in previous mocker example I have used only 3 columns and Data size is relatively small than real world stage. So situation becomes more worse in real world than in mockers. And believe me no one understand Developer at that time.

Resolve the problem

Apache poi comes with memory friendly version of workbook class called org.apache.poi.xssf.streaming.SXSSFWorkbook. It has some mechanism called Row flushing. Meaning that flush rows from memory at defined WindowSize. And it has some cons. See more at.
http://poi.apache.org/apidocs/org/apache/poi/xssf/streaming/SXSSFWorkbook.html


So here it is...

(tweaked version from previous example but you can write your own separate component for it)

1. PersonDO - as it is.

2. Write to Excel


import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.util.ArrayList;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.xssf.streaming.SXSSFSheet;
import org.apache.poi.xssf.streaming.SXSSFWorkbook;

import org.apache.poi.xssf.usermodel.XSSFWorkbook;

//import statements
public class ReadExcelDemo
{
public static void main( String[] args )
{
System.out.println( java.lang.Runtime.getRuntime().totalMemory() );
try
{
FileInputStream file = new FileInputStream( new File( "new_example2.xlsx" ) );

// Create Workbook instance holding reference to .xlsx file

XSSFWorkbook workbook = new XSSFWorkbook( file );

// TWEAKED HERE                                    
SXSSFWorkbook wb = new SXSSFWorkbook( workbook );  
SXSSFSheet sheet = (SXSSFSheet) wb.getSheetAt( 0 );   

ArrayList<PersonDO> persons = PersonDO.getPersons( 1000 );
int startIndex = 1;
System.out.println( "Populating data..." );
for ( PersonDO p : persons )
{
Row row = sheet.createRow( startIndex ); // getRow( startIndex );
row.createCell( 0 ).setCellValue( p.getName() );
row.createCell( 1 ).setCellValue( p.getPhone() );
row.createCell( 2 ).setCellValue( p.getCity() );
if ( startIndex % 500 == 0 )
{
System.out.println( "At " + startIndex );
System.out.println( java.lang.Runtime.getRuntime().freeMemory() + " bytes" );
}
startIndex++;
}
System.out.println( "Writing to excel..." );
FileOutputStream fileOut = new FileOutputStream( "workbook.xlsx" );
wb.write( fileOut );
System.out.println( "Completed..." );
fileOut.close();
file.close();
wb.dispose();
}
catch ( Exception e )
{
e.printStackTrace();
}
}
}

Monday, June 2, 2014

POI XSSF (Example)

1. PersonDO


import java.util.ArrayList;
/**
 *
 */
/**
 * @author abhinav
 */
public class PersonDO
{
    private String name;
    private String city;
    private String phone;
    public static ArrayList<PersonDO> getPersons( Number n )
    {
        ArrayList<PersonDO> list = new ArrayList<PersonDO>();
        for ( int i = 0; i <= n.intValue(); i++ )
        {
            PersonDO p = new PersonDO();
            p.setName( "abhinav " + i );
            p.setCity( "pune " + i );
            p.setPhone( "phone " + i );
            list.add( p );
        }
        return list;
    }
    /**
     * @return the name
     */
    public String getName()
    {
        return name;
    }
    /**
     * @param name the name to set
     */
    public void setName( String name )
    {
        this.name = name;
    }
    /**
     * @return the city
     */
    public String getCity()
    {
        return city;
    }
    /**
     * @param city the city to set
     */
    public void setCity( String city )
    {
        this.city = city;
    }
    /**
     * @return the phone
     */
    public String getPhone()
    {
        return phone;
    }
    /**
     * @param phone the phone to set
     */
    public void setPhone( String phone )
    {
        this.phone = phone;
    }
}


==============================================


2.  Write to Excel


import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.util.ArrayList;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
//import statements
public class ReadExcelDemo
{
    public static void main( String[] args )
    {
        System.out.println( java.lang.Runtime.getRuntime().maxMemory() );
        try
        {
            FileInputStream file = new FileInputStream( new File( "path2template.xlsx" ) );
            // Create Workbook instance holding reference to .xlsx file
            XSSFWorkbook workbook = new XSSFWorkbook( file );
            // Get first/desired sheet from the workbook
            XSSFSheet sheet = workbook.getSheetAt( 0 );
            ArrayList<PersonDO> persons = PersonDO.getPersons( 1000000 );
            int startIndex = 1;
            System.out.println( "Populating data..." );
            for ( PersonDO p : persons )
            {
                // Row row = sheet.getRow( startIndex );
                Row row = sheet.createRow( startIndex ); // getRow( startIndex );
                row.createCell( 0 ).setCellValue( p.getName() );
                row.createCell( 1 ).setCellValue( p.getPhone() );
                row.createCell( 2 ).setCellValue( p.getCity() );
                if ( startIndex % 500 == 0 )
                {
                    System.out.println( "At " + startIndex );
                    System.out.println( java.lang.Runtime.getRuntime().freeMemory() );
                }
                startIndex++;
            }
            System.out.println( "Writing to excel..." );
            FileOutputStream fileOut = new FileOutputStream( "output_file.xlsx" );
            workbook.write( fileOut );
            System.out.println( "Completed..." );
            fileOut.close();
            file.close();
        }
        catch ( Exception e )
        {
            e.printStackTrace();
        }
    }
}


Used JARS

G:\WORKSPACE\ALLPROJECTS\EXCELPOI\LIBS

  1.     dom4j-1.6.1.jar
  2.     poi-3.10-FINAL-20140208.jar
  3.     poi-examples-3.10-FINAL-20140208.jar
  4.     poi-ooxml-3.10-FINAL-20140208.jar
  5.     poi-ooxml-schemas-3.10-FINAL-20140208.jar
  6.     xmlbeans-2.3.0.jar

ExcelTemplate
https://drive.google.com/file/d/0B3prgPRdV5ZbOEhVSEpqZEV6SjQ/edit?usp=sharing