Click here to Skip to main content
65,938 articles
CodeProject is changing. Read more.
Articles / Languages / VBScript

Convert All Files to Searchable PDFs

5.00/5 (5 votes)
26 Mar 2019CPOL 13.3K   266  
This program will convert office, text and image files to PDFs.

Introduction

This program will convert office, text and image files to PDFs. To use this program, drag your file(s) or folders onto the script file. Files in sub-folders will be converted too.

Using the Code

The VBS script uses MS Office to convert Excel, Word, Text and Power Point documents.

The VBS script uses free Tesseract library (by Google) to convert images to PDF.

VBScript
Const sTesseractPath = "C:\Program Files (x86)\Tesseract-OCR\tesseract.exe"  'Download Here: 
                                             'https://github.com/UB-Mannheim/tesseract/wiki
Const sFileSuffix = "_out" 'Appends at the end of output file name

Set fso = CreateObject("Scripting.FileSystemObject")
Set oShell = WScript.CreateObject ("WSCript.shell")
Dim iCount: iCount = 0
Dim oLog
Dim bLogUsed: bLogUsed = False
Dim sFolderPath: sFolderPath = GetFolderPath()
Dim excel, word, powerPoint
Set excel = Nothing
Set word = Nothing
Set powerPoint = Nothing

if WScript.Arguments.Count = 0 then
    MsgBox "Please drop office and image files or folders to convert them to searchable PDFs"

Else
    Set oLog = fso.CreateTextFile(WScript.ScriptFullName & ".log", True)

    For i = 0 to WScript.Arguments.Count -1
      sFile = WScript.Arguments(i)
      
      If fso.FileExists(sFile) Then        
        ProcessFile sFile
        
      ElseIf fso.FolderExists(sFile) Then  
        ProcessFolder sFile
      End If
      
    Next

    CloseOfficeApps
    oLog.Close    

    If bLogUsed = False Then
    'Delete unused Log
    fso.DeleteFile WScript.ScriptFullName & ".log"
    End If

    MsgBox "Created " & iCount & " PDFs" 
End if

Sub ProcessFolder(sFolder)
  Set oFolder = fso.GetFolder(sFolder)
  For Each oFile in oFolder.Files
    ProcessFile oFile.Path
  Next
  
   For Each oSubfolder in oFolder.SubFolders
    ProcessFolder oSubfolder.Path
   Next
End Sub

Sub ProcessFile(sFile)
  Dim iPos, sFileBase, sOutPdf, sOutPdfNoExt
  iPos = InStrRev(sFile,".")
  sFileBase = Mid(sFile,1,iPos - 1)
  sOutPdfNoExt = sFileBase & sFileSuffix
  sOutPdf = sOutPdfNoExt & ".pdf"

  If fso.FileExists(sOutPdf) Then    
    Msg sOutPdf & " already exists"
    Exit Sub
  End If
  
  sFileExt = LCASE(fso.GetExtensionName(sFile))

  Select Case sFileExt

    Case "xlsx", "xls", "csv"
      ExcelToPdf sFile, sOutPdf
     
    Case "docx", "doc", "txt", "rtf", "sql"
      WordToPdf sFile, sOutPdf
      
    Case "pptx", "ppt"
      PowerPointToPdf sFile, sOutPdf
      
    Case "bmp","pnm","png","jfif","jpeg","jpg","tiff","gif"
      ImgToPdf sFile, sOutPdfNoExt
      
    Case Else
      Msg "File type: " & sFileExt & " is not supported"
      
  End Select  

  If fso.FileExists(sOutPdf) Then    
    iCount = iCount + 1
  Else
    Msg sOutPdf & ".pdf could not be created"
  End If

End Sub

Sub ImgToPdf(sInFile, sOutPdf)
  If fso.FileExists(sTesseractPath) = False Then 
    MsgBox "Tesseract is not installed. Download Here: _
            https://github.com/UB-Mannheim/tesseract/wiki.  _
            If is installed, modify the first line of this script file to point it to tesseract.exe"
    oShell.Run "chrome -url https://github.com/UB-Mannheim/tesseract/wiki"
    WScript.Quit
  End If
  
  oShell.run """" & sTesseractPath & """ """ & sInFile & """ """ & sOutPdf & """ pdf", 1 , True   
End Sub

Sub ExcelToPdf(sFrom, sTo)

  If excel is Nothing Then
    Set excel = CreateObject("Excel.Application")
  End If
  
  excel.ScreenUpdating = false
  excel.DisplayAlerts = false

  Set workbook = excel.Workbooks.Open(sFrom)
  workbook.ExportAsFixedFormat 0, sTo

  workbook.Close()
  Set workbook = Nothing
End Sub

Sub WordToPdf(sFrom, sTo)

  If word is Nothing Then
    Set word = CreateObject("Word.Application")
  End If

  Set doc = word.Documents.Open(sFrom)
  doc.Activate()
  doc.SaveAs2 sTo, 17
  doc.Close()
  Set doc = Nothing
End Sub

Sub PowerPointToPdf(sFrom, sTo)

  If powerPoint is Nothing Then
    Set powerPoint = CreateObject("PowerPoint.Application")
  End If
  
  Const msoFalse = 0
  Set pres = powerPoint.Presentations.Open(sFrom, , , msoFalse)
  pres.SaveAs sTo, 32
  pres.Close
  Set pres = Nothing
End Sub

Sub CloseOfficeApps()
    If Not excel is Nothing Then
      excel.Quit()
      Set excel = Nothing
    End If

    If Not word is Nothing Then
      word.Quit()
      Set word = Nothing
    End If
    
    If Not powerPoint is Nothing Then
      powerPoint.Quit()
      Set powerPoint = Nothing
    End If
End Sub

Function GetFolderPath()
    Dim oFile 'As Scripting.File
    Set oFile = fso.GetFile(WScript.ScriptFullName)
    GetFolderPath = oFile.ParentFolder
End Function

Sub Msg(s)
  oLog.WriteLine Now & vbTab & s
  bLogUsed = True
End Sub

The script will add (_out) prefix to each PDF file. The prefix can be changed in Line2. Here is a script that will move all PDF files with (_out) prefix to a folder with (_out) prefix.

VBScript
Set fso = CreateObject("Scripting.FileSystemObject")
Dim sFileSuffix: sFileSuffix = "_out" 'Appends at the end of output file name
Dim sInFolder: sInFolder = ""
Dim sOutFolder: sOutFolder = ""

if WScript.Arguments.Count <> 1 then
    MsgBox "Please drop folder to move OCR PDF files to " & sFileSuffix & " folder"

Else
    If WScript.Arguments.Count = 1 Then
      sFolder = WScript.Arguments(i)      
      If fso.FolderExists(sFolder) Then  
        sInFolder = sFolder
        sOutFolder = sFolder & sFileSuffix 
        ProcessFolder sFolder        
        MsgBox "Done"
      End If
    End If
End if

Sub ProcessFolder(sFolder)

  iPrefixLen = Len(sFileSuffix) + 4
  sSuffix = Replace(sFolder,sInFolder, "")
  sTargetFolder = sOutFolder & "" & sSuffix

  If fso.FolderExists(sTargetFolder) = False Then
    fso.CreateFolder sTargetFolder
  End If

  Set oFolder = fso.GetFolder(sFolder)
  For Each oFile in oFolder.Files
    
    If Right(oFile.Path, iPrefixLen) = sFileSuffix & ".pdf" Then
      sOutFile = Mid(oFile.Name, 1, Len(oFile.Name) - iPrefixLen) & ".pdf"
      fso.MoveFile oFile.Path, sTargetFolder & "\" & sOutFile
    End If
  Next
  
  For Each oSubfolder in oFolder.SubFolders
    ProcessFolder oSubfolder.Path
  Next
End Sub

I have been using this script for some time and decided to share it. I hope someone else will find this useful. If you want to merge all of these PDFs, you can use the PDF Merge application I created earlier.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)