Introduction
This program will convert office, text and image files to PDFs. To use this program, drag your file(s) or folders onto the script file. Files in sub-folders will be converted too.
Using the Code
The VBS script uses MS Office to convert Excel, Word, Text and Power Point documents.
The VBS script uses free Tesseract library (by Google) to convert images to PDF.
Const sTesseractPath = "C:\Program Files (x86)\Tesseract-OCR\tesseract.exe"
Const sFileSuffix = "_out"
Set fso = CreateObject("Scripting.FileSystemObject")
Set oShell = WScript.CreateObject ("WSCript.shell")
Dim iCount: iCount = 0
Dim oLog
Dim bLogUsed: bLogUsed = False
Dim sFolderPath: sFolderPath = GetFolderPath()
Dim excel, word, powerPoint
Set excel = Nothing
Set word = Nothing
Set powerPoint = Nothing
if WScript.Arguments.Count = 0 then
MsgBox "Please drop office and image files or folders to convert them to searchable PDFs"
Else
Set oLog = fso.CreateTextFile(WScript.ScriptFullName & ".log", True)
For i = 0 to WScript.Arguments.Count -1
sFile = WScript.Arguments(i)
If fso.FileExists(sFile) Then
ProcessFile sFile
ElseIf fso.FolderExists(sFile) Then
ProcessFolder sFile
End If
Next
CloseOfficeApps
oLog.Close
If bLogUsed = False Then
fso.DeleteFile WScript.ScriptFullName & ".log"
End If
MsgBox "Created " & iCount & " PDFs"
End if
Sub ProcessFolder(sFolder)
Set oFolder = fso.GetFolder(sFolder)
For Each oFile in oFolder.Files
ProcessFile oFile.Path
Next
For Each oSubfolder in oFolder.SubFolders
ProcessFolder oSubfolder.Path
Next
End Sub
Sub ProcessFile(sFile)
Dim iPos, sFileBase, sOutPdf, sOutPdfNoExt
iPos = InStrRev(sFile,".")
sFileBase = Mid(sFile,1,iPos - 1)
sOutPdfNoExt = sFileBase & sFileSuffix
sOutPdf = sOutPdfNoExt & ".pdf"
If fso.FileExists(sOutPdf) Then
Msg sOutPdf & " already exists"
Exit Sub
End If
sFileExt = LCASE(fso.GetExtensionName(sFile))
Select Case sFileExt
Case "xlsx", "xls", "csv"
ExcelToPdf sFile, sOutPdf
Case "docx", "doc", "txt", "rtf", "sql"
WordToPdf sFile, sOutPdf
Case "pptx", "ppt"
PowerPointToPdf sFile, sOutPdf
Case "bmp","pnm","png","jfif","jpeg","jpg","tiff","gif"
ImgToPdf sFile, sOutPdfNoExt
Case Else
Msg "File type: " & sFileExt & " is not supported"
End Select
If fso.FileExists(sOutPdf) Then
iCount = iCount + 1
Else
Msg sOutPdf & ".pdf could not be created"
End If
End Sub
Sub ImgToPdf(sInFile, sOutPdf)
If fso.FileExists(sTesseractPath) = False Then
MsgBox "Tesseract is not installed. Download Here: _
https://github.com/UB-Mannheim/tesseract/wiki. _
If is installed, modify the first line of this script file to point it to tesseract.exe"
oShell.Run "chrome -url https://github.com/UB-Mannheim/tesseract/wiki"
WScript.Quit
End If
oShell.run """" & sTesseractPath & """ """ & sInFile & """ """ & sOutPdf & """ pdf", 1 , True
End Sub
Sub ExcelToPdf(sFrom, sTo)
If excel is Nothing Then
Set excel = CreateObject("Excel.Application")
End If
excel.ScreenUpdating = false
excel.DisplayAlerts = false
Set workbook = excel.Workbooks.Open(sFrom)
workbook.ExportAsFixedFormat 0, sTo
workbook.Close()
Set workbook = Nothing
End Sub
Sub WordToPdf(sFrom, sTo)
If word is Nothing Then
Set word = CreateObject("Word.Application")
End If
Set doc = word.Documents.Open(sFrom)
doc.Activate()
doc.SaveAs2 sTo, 17
doc.Close()
Set doc = Nothing
End Sub
Sub PowerPointToPdf(sFrom, sTo)
If powerPoint is Nothing Then
Set powerPoint = CreateObject("PowerPoint.Application")
End If
Const msoFalse = 0
Set pres = powerPoint.Presentations.Open(sFrom, , , msoFalse)
pres.SaveAs sTo, 32
pres.Close
Set pres = Nothing
End Sub
Sub CloseOfficeApps()
If Not excel is Nothing Then
excel.Quit()
Set excel = Nothing
End If
If Not word is Nothing Then
word.Quit()
Set word = Nothing
End If
If Not powerPoint is Nothing Then
powerPoint.Quit()
Set powerPoint = Nothing
End If
End Sub
Function GetFolderPath()
Dim oFile
Set oFile = fso.GetFile(WScript.ScriptFullName)
GetFolderPath = oFile.ParentFolder
End Function
Sub Msg(s)
oLog.WriteLine Now & vbTab & s
bLogUsed = True
End Sub
The script will add (_out
) prefix to each PDF file. The prefix can be changed in Line2
. Here is a script that will move all PDF files with (_out
) prefix to a folder with (_out
) prefix.
Set fso = CreateObject("Scripting.FileSystemObject")
Dim sFileSuffix: sFileSuffix = "_out"
Dim sInFolder: sInFolder = ""
Dim sOutFolder: sOutFolder = ""
if WScript.Arguments.Count <> 1 then
MsgBox "Please drop folder to move OCR PDF files to " & sFileSuffix & " folder"
Else
If WScript.Arguments.Count = 1 Then
sFolder = WScript.Arguments(i)
If fso.FolderExists(sFolder) Then
sInFolder = sFolder
sOutFolder = sFolder & sFileSuffix
ProcessFolder sFolder
MsgBox "Done"
End If
End If
End if
Sub ProcessFolder(sFolder)
iPrefixLen = Len(sFileSuffix) + 4
sSuffix = Replace(sFolder,sInFolder, "")
sTargetFolder = sOutFolder & "" & sSuffix
If fso.FolderExists(sTargetFolder) = False Then
fso.CreateFolder sTargetFolder
End If
Set oFolder = fso.GetFolder(sFolder)
For Each oFile in oFolder.Files
If Right(oFile.Path, iPrefixLen) = sFileSuffix & ".pdf" Then
sOutFile = Mid(oFile.Name, 1, Len(oFile.Name) - iPrefixLen) & ".pdf"
fso.MoveFile oFile.Path, sTargetFolder & "\" & sOutFile
End If
Next
For Each oSubfolder in oFolder.SubFolders
ProcessFolder oSubfolder.Path
Next
End Sub
I have been using this script for some time and decided to share it. I hope someone else will find this useful. If you want to merge all of these PDFs, you can use the PDF Merge application I created earlier.