public string ReadFile(string Filename) { PdfReader reader = new PdfReader(Filename); string pdfText = string.Empty; string OCRErrorPages = string.Empty; for (int i = 1; i <= reader.NumberOfPages; i++) { iTextSharp.text.pdf.parser.ITextExtractionStrategy its = new iTextSharp.text.pdf.parser.SimpleTextExtractionStrategy(); String extractText = iTextSharp.text.pdf.parser.PdfTextExtractor.GetTextFromPage(reader, i, its); extractText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(extractText))); if (extractText != "") { pdfText = pdfText + extractText; } else { OCRErrorPages = OCRErrorPages + i + extractText + "<br>"; } } reader.Close(); if (OCRErrorPages != "") { return OCRErrorPages + " This page contains no text"; } else { return pdfText; } } public string ExtractText(string inFileName) { string line = string.Empty; // Create a reader for the given PDF file PdfReader reader = new PdfReader(inFileName); int totalLen = 68; float charUnit = ((float)totalLen) / (float)reader.NumberOfPages; int totalWritten = 0; float curUnit = 0; for (int page = 1; page <= reader.NumberOfPages; page++) { line += ExtractTextFromPDFBytes(reader.GetPageContent(page)) + " "; var thing = reader.GetPageContent(page); // Write the progress. if (charUnit >= 1.0f) { for (int i = 0; i < (int)charUnit; i++) { Console.Write("#"); totalWritten++; } } else { curUnit += charUnit; if (curUnit >= 1.0f) { for (int i = 0; i < (int)curUnit; i++) { Console.Write("#"); totalWritten++; } curUnit = 0; } } } if (totalWritten < totalLen) { for (int i = 0; i < (totalLen - totalWritten); i++) { Console.Write("#"); } } return line; }
PdfContentByte.TEXT_RENDER_MODE_INVISIBLE
var
This content, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)