Pages

Pages

Tuesday, 14 July 2015

Convert PDF to text file (Core JAVA)

If you are using maven then add dependency for itextpdf from below link or add itextpdf jars to your project:
Maven Dependency itextpdf

Download sample pdf from below link and and save it in your d drive as examplePDF.pdf:
Download Sample PDF (examplePDF.pdf)

Save exampleText.txt in your d drive.

If you want in other format like word document, just change the file name for OUTPUT i.e. "exampleText.doc"


package automation.prac;


import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
//iText imports
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.parser.PdfReaderContentParser;
import com.itextpdf.text.pdf.parser.SimpleTextExtractionStrategy;
import com.itextpdf.text.pdf.parser.TextExtractionStrategy;
 
public class pdfToText {
 
  public void partPdf(String pdf, String txt) throws IOException {
         PdfReader reader = new PdfReader(pdf);
         PdfReaderContentParser parser = new PdfReaderContentParser(reader);
         PrintWriter out = new PrintWriter(new FileOutputStream(txt));
         TextExtractionStrategy strategy;
         for (int i = 1; i <= reader.getNumberOfPages(); i++) {
             strategy = parser.processContent(i, new SimpleTextExtractionStrategy());
             out.println(strategy.getResultantText());
         }
         out.flush();
         out.close();
         reader.close();
     }
 
 private static String INPUTFILE = "d:\\examplePDF.pdf";
    private static String OUTPUTFILE = "d:\\exampleText.txt";
 
    public static void main(String[] args) throws DocumentException, IOException {
     
     System.out.println("Program Starts");
     new pdfToText().partPdf(INPUTFILE, OUTPUTFILE);
     System.out.println("Program Ends");
        
    }
 
}

No comments:

Post a Comment