Quote

"Between stimulus and response there is a space. In that space is our power to choose our response.
In our response lies our growth and freedom"


“The only way to discover the limits of the possible is to go beyond them into the impossible.”


Friday, 10 April 2015

Writing Reading & Manipulating PDF Files



One of the big shots with my current organization came with a problem that we are stuck with managing PDFs. It made me feel that what I thought was a routine affair may be a problem for many. Hence writing this piece.

One of the simplest ways of managing (reading, writing) PDFs is to use iTextPDF library. The library is available for use with Java a freely available technology. It is also available in other technologies such as .NET, and Android. For usage with Java Library is available in form a Jar which can be downloaded from here.

What can we do with iText?

We can do the following using iText PDF library:
Create PDF
Inspect PDF
Split PDF
Merge PDF
Fill PDF forms
So let us write some code to create and read PDFs. We will first create a PDF and then read the created PDF.


Writing PDF Files

//Import the required classes
import com.itextpdf.text.*;
import com.itextpdf.text.pdf.PdfWriter;


//Import Java classes
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.MalformedURLException;
import java.util.Date;

public class LetsCreatePDF {

       //Define the features of the file, such as the path, name and font size and color
       static String fileNamePath = "E:/Createdpdfs/MYPDF.pdf";
       static Font boldBigFont = new Font(Font.FontFamily.COURIER, 18, Font.BOLD);
       static Font smlRedFont = new Font(Font.FontFamily.COURIER, 12, Font.NORMAL, BaseColor.RED);
       static Font medFont = new Font(Font.FontFamily.COURIER, 16, Font.BOLD);
       static Font smallBoldFont = new Font(Font.FontFamily.COURIER, 12, Font.BOLD);
       public static final String RESOURCE = "E:/Createdpdfs/img/Dollar.jpg";
      
      
       // If you want to add meta-data to the created PDF you can do by operating on com.itextpdf.text.Document object
               
                private static void addMetaData(Document document) {
               
                     // Find these details under File -> Properties 
                     document.addTitle("Programmatically Created PDF");
                  document.addSubject("PDF Creation is Simple");
                  document.addKeywords("Creat, PDF, iText, Java");
                  document.addAuthor("Satyam Singh");
                  document.addCreator("iTextPDF Library");
                 
               
                }

               
                //This method will be called to create the Cover page of the PDF document we are creating
               
                private static void addCoverPage(Document document) throws DocumentException {
                       //Define a new paragraph to be used only in cover page 
                       Paragraph coverPara = new Paragraph();
                      
                       //This adds a blank line
                       coverPara.add(new Paragraph(" ")); 
                      
                       // Lets write a big header
                       coverPara.add(new Paragraph("ABCE SOFTWARE CORPORATION QUARTER4 2015 RESULTS", boldBigFont));
                      
                       //Add Image
                       try {
                                  Image img = Image.getInstance("E:/Createdpdfs/img/GrowMoney.jpg");
                                   if (img.getScaledWidth() > 300 || img.getScaledHeight() > 300) {
                                     img.scaleToFit(300, 300);
                                 }
                                   img.setAlignment(Image.MIDDLE);
                                   coverPara.add(img);
                           } catch (MalformedURLException e) {
                                  // TODO Auto-generated catch block
                                  e.printStackTrace();
                           } catch (IOException e) {
                                  // TODO Auto-generated catch block
                                  e.printStackTrace();
                           }

                       //This adds a blank line
                       coverPara.add(new Paragraph(" ")); 
                      
                       // Will create: Report generated by: _name, _date
                       coverPara.add(new Paragraph("Report generated by: " + System.getProperty("user.name") + ", " + new Date(), //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
                             smallBoldFont));
                       
                         for (int i = 0; i < 3; i++) {
                            coverPara.add(new Paragraph(" "));
                           }
                        
                         coverPara.add(new Paragraph("This document describes unaudited Quater four numbers of various functions ",
                             smallBoldFont));

                         for (int i = 0; i < 8; i++) {
                            coverPara.add(new Paragraph(" "));
                           }

                         coverPara.add(new Paragraph("The contents of this document are sctrictly private and confidential, disclosure is strongly encouraged ;-)",
                             smlRedFont));

                         document.add(coverPara);
                       
                       }
               
               
               
       public static void main(String[] args) {
              // TODO Auto-generated method stub
             
              try {
                    Document document = new Document();
                    PdfWriter.getInstance(document, new FileOutputStream(fileNamePath));
                    document.open();
                    addMetaData(document);
                    addCoverPage(document);
                    document.close();
                  } catch (Exception e) {
                    e.printStackTrace();
                  }

       }
      
}


Generated PDF Document


Reading PDF Files

import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;

import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.parser.PdfReaderContentParser;
import com.itextpdf.text.pdf.parser.SimpleTextExtractionStrategy;
import com.itextpdf.text.pdf.parser.TextExtractionStrategy;

public class PDFContentReader {

    /** The original PDF that will be parsed. */
    public static final String PREFACE = "E:/Createdpdfs/MYPDF.pdf";
    /** The resulting text file. */
    public static final String RESULT = "E:/Createdpdfs/MYPDFContent.txt";

    /**
     * Parses a PDF to a plain text file.
     * @param pdf the original PDF
     * @param txt the resulting text
     * @throws IOException
     */
    public void parsePdf(String pdf, String txt) throws IOException {

        //Create a PdfReader object instance
       PdfReader readerContent = new PdfReader(pdf);

        //Pass the PdfReader object to the PdfReaderContentParser instance
       PdfReaderContentParser parser = new PdfReaderContentParser(readerContent);

       //Create a content holder
        PrintWriter contentHolder = new PrintWriter(new FileOutputStream(txt));
        TextExtractionStrategy strategy;

        for (int i = 1; i <= readerContent.getNumberOfPages(); i++) {
            strategy = parser.processContent(i, new SimpleTextExtractionStrategy());
            contentHolder.println(strategy.getResultantText());
        }

        contentHolder.flush();
        contentHolder.close();
        readerContent.close();
    }

    /**
     * Main method.
     * @param    args    no arguments needed
     * @throws IOException
     */
    public static void main(String[] args) throws IOException {
        new PDFContentReader().parsePdf(PREFACE, RESULT);
    }
}
 

No comments:

Post a Comment