One of the big shots with my current organization came with
a problem that we are stuck with managing PDFs. It made me feel that what I
thought was a routine affair may be a problem for many. Hence writing this piece.
One of the simplest ways of managing (reading, writing) PDFs
is to use iTextPDF library. The library is available for use with Java a freely
available technology. It is also available in other technologies such as .NET,
and Android. For usage with Java Library is available in form a Jar which can
be downloaded from here.
What can we do with iText?
We can do the following using iText PDF library:
Create PDF
Inspect PDF
Split PDF
Merge PDF
Fill PDF forms
So let us write some code to create and read PDFs. We will
first create a PDF and then read the created PDF.
Writing PDF Files
//Import the required classes
import com.itextpdf.text.*;
import
com.itextpdf.text.pdf.PdfWriter;
//Import Java classes
import
java.io.FileOutputStream;
import java.io.IOException;
import
java.net.MalformedURLException;
import java.util.Date;
public class LetsCreatePDF {
//Define the
features of the file, such as the path, name and font size and color
static String fileNamePath = "E:/Createdpdfs/MYPDF.pdf";
static Font boldBigFont = new
Font(Font.FontFamily.COURIER, 18, Font.BOLD);
static Font smlRedFont = new
Font(Font.FontFamily.COURIER, 12, Font.NORMAL, BaseColor.RED);
static Font medFont = new
Font(Font.FontFamily.COURIER, 16, Font.BOLD);
static Font smallBoldFont = new
Font(Font.FontFamily.COURIER, 12, Font.BOLD);
public static final String RESOURCE = "E:/Createdpdfs/img/Dollar.jpg";
// If you want to
add meta-data to the created PDF you can do by operating on
com.itextpdf.text.Document object
private static void addMetaData(Document
document) {
// Find these
details under File -> Properties
document.addTitle("Programmatically
Created PDF");
document.addSubject("PDF Creation is Simple");
document.addKeywords("Creat, PDF, iText, Java");
document.addAuthor("Satyam Singh");
document.addCreator("iTextPDF Library");
}
//This method will be called to create the
Cover page of the PDF document we are creating
private static void
addCoverPage(Document document) throws DocumentException {
//Define a new paragraph to be used only in
cover page
Paragraph coverPara = new Paragraph();
//This adds a blank line
coverPara.add(new Paragraph(" "));
// Lets write a big header
coverPara.add(new Paragraph("ABCE SOFTWARE
CORPORATION QUARTER4 2015 RESULTS", boldBigFont));
//Add Image
try {
Image
img = Image.getInstance("E:/Createdpdfs/img/GrowMoney.jpg");
if (img.getScaledWidth() > 300 || img.getScaledHeight()
> 300) {
img.scaleToFit(300, 300);
}
img.setAlignment(Image.MIDDLE);
coverPara.add(img);
}
catch
(MalformedURLException e) {
// TODO Auto-generated
catch block
e.printStackTrace();
}
catch (IOException e) {
// TODO Auto-generated
catch block
e.printStackTrace();
}
//This adds a blank line
coverPara.add(new Paragraph(" "));
// Will create: Report generated by: _name,
_date
coverPara.add(new Paragraph("Report
generated by: " + System.getProperty("user.name") + ", " + new Date(), //$NON-NLS-1$
//$NON-NLS-2$ //$NON-NLS-3$
smallBoldFont));
for (int i = 0; i < 3; i++) {
coverPara.add(new Paragraph(" "));
}
coverPara.add(new Paragraph("This document
describes unaudited Quater four numbers of various functions ",
smallBoldFont));
for (int i = 0; i < 8; i++) {
coverPara.add(new Paragraph(" "));
}
coverPara.add(new Paragraph("The contents
of this document are sctrictly private and confidential, disclosure is strongly
encouraged ;-)",
smlRedFont));
document.add(coverPara);
}
public static void main(String[] args) {
// TODO Auto-generated
method stub
try {
Document document = new Document();
PdfWriter.getInstance(document, new FileOutputStream(fileNamePath));
document.open();
addMetaData(document);
addCoverPage(document);
document.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
Generated PDF Document |
Reading PDF Files
import
java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import
com.itextpdf.text.pdf.PdfReader;
import
com.itextpdf.text.pdf.parser.PdfReaderContentParser;
import
com.itextpdf.text.pdf.parser.SimpleTextExtractionStrategy;
import
com.itextpdf.text.pdf.parser.TextExtractionStrategy;
public class PDFContentReader {
/**
The original PDF that will be parsed. */
public static final String PREFACE = "E:/Createdpdfs/MYPDF.pdf";
/**
The resulting text file. */
public static final String RESULT = "E:/Createdpdfs/MYPDFContent.txt";
/**
* Parses a PDF to a plain text file.
* @param pdf the original PDF
* @param txt the resulting text
* @throws IOException
*/
public void parsePdf(String pdf, String txt) throws IOException {
//Create
a PdfReader object instance
PdfReader readerContent = new PdfReader(pdf);
//Pass
the PdfReader object to the PdfReaderContentParser instance
PdfReaderContentParser parser = new
PdfReaderContentParser(readerContent);
//Create a content holder
PrintWriter contentHolder = new PrintWriter(new FileOutputStream(txt));
TextExtractionStrategy strategy;
for (int i = 1; i <= readerContent.getNumberOfPages(); i++) {
strategy = parser.processContent(i, new
SimpleTextExtractionStrategy());
contentHolder.println(strategy.getResultantText());
}
contentHolder.flush();
contentHolder.close();
readerContent.close();
}
/**
* Main method.
* @param args no arguments needed
* @throws IOException
*/
public static void main(String[] args) throws IOException {
new PDFContentReader().parsePdf(PREFACE, RESULT);
}
}
No comments:
Post a Comment