Só é possível se na máquina do cliente estiver instalado o Acrobat (não o READER e sim o Acrobat editor.
No executável dele existe uma API que permite a leitura e obtenção do texto (imagens não vem)
Segue o código:
Option Explicit
Private myPDF As Object
Private myPDFPage As Object
Private myPageHilite As Object
Private pageSelect As Object
Private pdfData As String
Private myPDFPageCount As Object
Private openResult As Boolean
Private closeResult As Boolean
Private hiliteResult As Boolean
Private pageCount As Integer
Private filelocation As String
Private pagenumber As Integer
Private Sub cmdGetFile_Click()
txtFileName.Text = "C:ZARQUIVOACROBAT.PDF"
End Sub
Private Sub cmdGetText_Click()
txtPDFText.Text = ""
pdfData = ""
Label2.Caption = ""
getBodyTextPDF
txtPDFText.Text = pdfData
MsgBox "Terminado ; )", vbInformation, "PDF"
End Sub
Private Sub getBodyTextPDF()
Set myPDFPageCount = CreateObject("acroexch.pddoc")
filelocation = txtFileName.Text
openResult = myPDFPageCount.Open(filelocation)
If openResult = False Then
Set myPDFPageCount = Nothing
MsgBox "Erro ao abrir"
Exit Sub
End If
pageCount = myPDFPageCount.GetNumPages
closeResult = myPDFPageCount.Close
If closeResult = False Then
Set myPDFPageCount = Nothing
MsgBox "Erro ao fechar"
Exit Sub
End If
Set myPDFPageCount = Nothing
Set myPDF = CreateObject("acroexch.pddoc")
openResult = myPDF.Open(filelocation)
For pagenumber = 0 To pageCount - 1
DoEvents
getPDFTextFromPage pagenumber
Label2.Caption = "obtendo texto : " & pagenumber + 1 & " of " & pageCount
Next
Set myPDF = Nothing
End Sub
Private Sub getPDFTextFromPage(pagenumber As Integer)
hiliteResult = myPageHilite.Add(0, 9000)
Set pageSelect = myPDFPage.CreatePageHilite(myPageHilite)
Dim i As Integer
For i = 0 To pageSelect.GetNumText - 1
DoEvents
pdfData = pdfData & pageSelect.GetText(i)
Next
Set myPDFPage = Nothing
Set myPageHilite = Nothing
Set pageSelect = Nothing
End Sub
Private Sub Form_Load()
Label2.Caption = ""
End Sub