Tuesday, 14 July 2015

Convert PDF to text file (Core JAVA)

If you are using maven then add dependency for itextpdf from below link or add itextpdf jars to your project:
Maven Dependency itextpdf

Download sample pdf from below link and and save it in your d drive as examplePDF.pdf:
Download Sample PDF (examplePDF.pdf)

Save exampleText.txt in your d drive.

If you want in other format like word document, just change the file name for OUTPUT i.e. "exampleText.doc"


package automation.prac;


import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
//iText imports
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.parser.PdfReaderContentParser;
import com.itextpdf.text.pdf.parser.SimpleTextExtractionStrategy;
import com.itextpdf.text.pdf.parser.TextExtractionStrategy;
 
public class pdfToText {
 
  public void partPdf(String pdf, String txt) throws IOException {
         PdfReader reader = new PdfReader(pdf);
         PdfReaderContentParser parser = new PdfReaderContentParser(reader);
         PrintWriter out = new PrintWriter(new FileOutputStream(txt));
         TextExtractionStrategy strategy;
         for (int i = 1; i <= reader.getNumberOfPages(); i++) {
             strategy = parser.processContent(i, new SimpleTextExtractionStrategy());
             out.println(strategy.getResultantText());
         }
         out.flush();
         out.close();
         reader.close();
     }
 
 private static String INPUTFILE = "d:\\examplePDF.pdf";
    private static String OUTPUTFILE = "d:\\exampleText.txt";
 
    public static void main(String[] args) throws DocumentException, IOException {
     
     System.out.println("Program Starts");
     new pdfToText().partPdf(INPUTFILE, OUTPUTFILE);
     System.out.println("Program Ends");
        
    }
 
}

No comments:

Post a Comment