If you are using maven then add dependency for itextpdf from below link or add itextpdf jars to your project:
Maven Dependency itextpdf
Download sample pdf from below link and and save it in your d drive as examplePDF.pdf:
Download Sample PDF (examplePDF.pdf)
Save exampleText.txt in your d drive.
If you want in other format like word document, just change the file name for OUTPUT i.e. "exampleText.doc"
Maven Dependency itextpdf
Download sample pdf from below link and and save it in your d drive as examplePDF.pdf:
Download Sample PDF (examplePDF.pdf)
Save exampleText.txt in your d drive.
If you want in other format like word document, just change the file name for OUTPUT i.e. "exampleText.doc"
package automation.prac; import java.io.FileOutputStream; import java.io.IOException; import java.io.PrintWriter; //iText imports import com.itextpdf.text.DocumentException; import com.itextpdf.text.pdf.PdfReader; import com.itextpdf.text.pdf.parser.PdfReaderContentParser; import com.itextpdf.text.pdf.parser.SimpleTextExtractionStrategy; import com.itextpdf.text.pdf.parser.TextExtractionStrategy; public class pdfToText { public void partPdf(String pdf, String txt) throws IOException { PdfReader reader = new PdfReader(pdf); PdfReaderContentParser parser = new PdfReaderContentParser(reader); PrintWriter out = new PrintWriter(new FileOutputStream(txt)); TextExtractionStrategy strategy; for (int i = 1; i <= reader.getNumberOfPages(); i++) { strategy = parser.processContent(i, new SimpleTextExtractionStrategy()); out.println(strategy.getResultantText()); } out.flush(); out.close(); reader.close(); } private static String INPUTFILE = "d:\\examplePDF.pdf"; private static String OUTPUTFILE = "d:\\exampleText.txt"; public static void main(String[] args) throws DocumentException, IOException { System.out.println("Program Starts"); new pdfToText().partPdf(INPUTFILE, OUTPUTFILE); System.out.println("Program Ends"); } }
No comments:
Post a Comment