I tried to extract specific information from the first page of a pdf and open the folder. However, when i run the code nothing happens.
Sub OpenFolder()
Dim MyFolder As String
MyFolder = "C:\Users\mixna\Documents\autoFeedback Digital Platform\Group Assignment"
ActiveWorkbook.FollowHyperlink MyFolder
End Sub
Sub for_each_file_in_folder()
Dim oFSO As Object, oFolder As Object, oFile As Object
Set oFSO = CreateObject("Scripting.FileSystemObject")
Set oFolder = oFSO.GetFolder("C:\Users\mixna\Documents\autoFeedback Digital Platform\Group Assignment")
For Each oFile In oFolder.Files
If InStr(oFile.Name, ".pdf") > 0 Then
Debug.Print oFile.Path
Call read_pdf(oFile.Path)
End If
Next oFile
End Sub
Function read_pdf(in_file_path As String)
Dim word_app As Object, word_doc As Object
Dim all_text As String, lines As Variant, line As Variant
Dim groupNumber As String
Dim i As Integer, startIndex As Integer, endIndex As Integer
Set word_app = CreateObject("Word.Application")
word_app.Visible = False
Set word_doc = word_app.Documents.Open(in_file_path, False, Format:=wdOpenFormatAuto, ConfirmConversions:=False)
all_text = word_doc.Content.Text
word_doc.Close False
word_app.Quit
Set word_doc = Nothing
Set word_app = Nothing
lines = Split(all_text, vbCrLf)
' Loop through all lines of text to extract the group number
For i = LBound(lines) To UBound(lines)
If InStr(lines(i), "IAR Group No.") > 0 Then
groupNumber = Trim(Split(Split(lines(i), "IAR Group No.")(1), "_")(0))
Exit For
End If
Next i
' Loop through all lines of text to identify the start and end of the table
For i = LBound(lines) To UBound(lines)
If InStr(lines(i), "First Name") > 0 Then
startIndex = i
ElseIf startIndex > 0 And Trim(lines(i)) = "" Then
endIndex = i
Exit For
End If
Next i
' Write extracted information to the Excel sheet
Dim ws As Worksheet
Set ws = ThisWorkbook.Sheets("Group List") ' Adjust the sheet name to match your workbook
Dim lastRow As Long
If startIndex > 0 And endIndex > 0 Then
For j = startIndex + 1 To endIndex - 1 ' Loop through the table lines
line = lines(j)
Dim entryParts As Variant
entryParts = Split(line, vbTab) ' Adjust the delimiter based on the actual text format
If UBound(entryParts) >= 2 Then ' Ensure there are at least three columns
lastRow = ws.Cells(ws.Rows.Count, "B").End(xlUp).Row + 1 ' Find the next empty row based on the 'First Name' column
ws.Cells(lastRow, "A").Value = groupNumber
ws.Cells(lastRow, "B").Value = entryParts(0) ' First Name
ws.Cells(lastRow, "C").Value = entryParts(1) ' Surname
ws.Cells(lastRow, "D").Value = entryParts(2) ' ID Number
' Marks column not handled here since there's no mark data in the PDF example provided
End If
Next j
End If
End Function
This is my entire to open the folder and extract the pdf. The folder opens and word opens no information is saved into my excel sheet. There are no errors being shown.