Introduction
This is a small application that can be used to convert Html Pages to a Database
format
Details
Generally the data we get from the Internet is not in a database storable format.
This application comes with with source code and a demo file with database, and will demonstrate how
you can extract your fields from a html page and submit it into an access database.
It can be run in a loop to convert the database into access. It will not hang your computer and can complete as many files
as required but the format of the html page should be same as they should have
the same fields, table etc.
Partial source code
Public con As OleDb.OleDbConnection
Private Sub enableParseButton()
btnParse.Enabled = (txtDocumentName.Text.Length > 0)
End Sub
Private Function ExtractEmailAddressesFromString(
ByVal source As String) As String()
On Error Resume Next
Dim mc As MatchCollection
Dim i As Integer
mc = Regex.Matches(source,
"([a-zA-Z0-9_\-\.]+)@([a-zA-Z0-9_\-\.]+)\.([a-zA-Z]{2,5})")
Dim results(mc.Count - 1) As String
For i = 0 To results.Length - 1
results(i) = mc(i).Value
Next
Return results
End Function
Private Function ExtractheadFromString(ByVal a As String) As String
On Error Resume Next
Dim i, mypos, mypos1, mypos2 As Integer
Dim b As String
mypos = InStr(a, "Agency head", CompareMethod.Text)
mypos = InStr(mypos, a, vbCr, CompareMethod.Text)
mypos1 = InStr(mypos + 1, a, vbCr, CompareMethod.Text)
b = Mid(a, mypos + 2, (mypos1 - mypos - 2))
Return b
End Function
Private Function ExtractAgencyFromString(ByVal a As String) As String
On Error Resume Next
Dim i, mypos, mypos1, mypos2 As Integer
Dim b As String
mypos = InStr(a, "Agency listings -", CompareMethod.Text)
mypos = InStr(mypos, a, "-", CompareMethod.Text)
mypos1 = InStr(mypos, a, vbCr, CompareMethod.Text)
b = Mid(a, mypos + 1, (mypos1 - mypos - 1))
Return b
End Function
Private Function ExtractheadoffFromString(ByVal a As String) As String
On Error Resume Next
Dim i, mypos, mypos1, mypos2 As Integer
Dim b As String
mypos = InStr(a, "Head office add", CompareMethod.Text)
mypos = InStr(mypos, a, vbCr, CompareMethod.Text)
mypos1 = InStr(mypos + 5, a, vbCr, CompareMethod.Text)
b = Mid(a, mypos + 2, (mypos1 - mypos - 2))
Return b
End Function
Private Function ExtracttfaxtelFromString(ByVal a As String) As String()
Dim i, mypos, mypos1, mypos2, mypos3 As Integer
Dim b(5) As String
On Error Resume Next
mypos = InStr(a, "Tel/Fax/email", CompareMethod.Text)
mypos = InStr(mypos, a, vbCr, CompareMethod.Text)
mypos1 = InStr(mypos, a, ";", CompareMethod.Text)
b(0) = Mid(a, mypos + 2, (mypos1 - mypos - 1))
mypos = InStr(a, "Fax:", CompareMethod.Text)
mypos = InStr(mypos, a, " ", CompareMethod.Text)
mypos1 = InStr(mypos, a, ";", CompareMethod.Text)
b(1) = Mid(a, mypos, (mypos1 - mypos - 1))
mypos2 = InStr(mypos1 + 1, a, ";", CompareMethod.Text)
b(2) = Mid(a, mypos1 + 1, (mypos2 - mypos1 - 1))
mypos3 = InStr(mypos2 + 1, a, vbCr, CompareMethod.Text)
b(3) = Mid(a, mypos2 + 1, (mypos3 - mypos2 - 1))
mypos1 = InStr(mypos1, a, vbCr, CompareMethod.Text)
Return b
End Function
Private Sub Form1_Load(ByVal sender As System.Object,
ByVal e As System.EventArgs) Handles MyBase.Load
txtDocumentName.Text = ""
enableParseButton()
End Sub
Private Sub txtDocumentName_TextChanged(ByVal sender As System.Object,
ByVal e As System.EventArgs) Handles txtDocumentName.TextChanged
enableParseButton()
End Sub
Private Sub btnParse_Click(ByVal sender As System.Object,
ByVal e As System.EventArgs) Handles btnParse.Click
Dim app As Object
Dim doc As Object
Dim docFileName As String
Dim docPath As String
Dim contents As String
Cursor.Current = Cursors.WaitCursor
Try
Lblfindcount.Text = ""
Txtresults.Text = ""
txtDocContents.Text = ""
docFileName = txtDocumentName.Text
If docFileName.Length = 0 Then
MsgBox("Please enter a file name")
txtDocumentName.Focus()
Return
End If
docPath = Path.GetDirectoryName(docFileName)
If docPath.Length = 0 Then
docFileName = Application.StartupPath & "\" & docFileName
End If
' ensure file exists
If Not File.Exists(docFileName) Then
MsgBox("File does not exist")
txtDocumentName.SelectAll()
txtDocumentName.Focus()
Return
End If
' extract contents of file
contents = ""
If Path.GetExtension(docFileName).ToLower = ".txt" Then
Dim fs As StreamReader
Try
fs = New StreamReader(docFileName)
contents = fs.ReadToEnd
Catch ex As Exception
MsgBox("Unable to read from text input file")
contents = ""
Finally
If Not fs Is Nothing Then fs.Close()
End Try
Else
Try
Try
'app = New Word.Application
app = CreateObject("Word.Application")
Catch ex As Exception
MsgBox("Unable to start Word")
Throw ex
End Try
Try
doc = app.Documents.Open(docFileName)
Catch ex As Exception
MsgBox("Unable to load document in Word")
Throw ex
End Try
contents = doc.Content.Text
Catch ex As Exception
contents = ""
Finally
If Not app Is Nothing Then app.Quit()
End Try
End If
If contents.Length = 0 Then Return
' search for email addresses
Dim emails, aglist As String()
Dim email As String
Dim results As New StringBuilder()
Dim results1 As New StringBuilder()
emails = ExtractEmailAddressesFromString(contents)
For Each email In emails
results.Append(email)
Next
Dim i As Integer
' display results
Lblfindcount.Text = String.Format("{0} match(es) found.", emails.Length)
Txtresults.Text = results.ToString
TextBox8.Text = ExtractAgencyFromString(contents)
TextBox9.Text = ExtractheadFromString(contents)
TextBox6.Text = ExtractheadoffFromString(contents)
TextBox5.Text = (ExtracttfaxtelFromString(contents))(0)
TextBox2.Text = (ExtracttfaxtelFromString(contents))(1)
TextBox3.Text = (ExtracttfaxtelFromString(contents))(2)
'TextBox1.Text = (ExtracttfaxtelFromString(contents))(3)
'TextBox1.Text = results1.ToString
txtDocContents.Text = contents
Finally
Cursor.Current = Cursors.Default
End Try
End Sub
Private Sub btnBrowse_Click(ByVal sender As System.Object,
ByVal e As System.EventArgs) Handles btnBrowse.Click
Dim ofd As OpenFileDialog
Try
ofd = New OpenFileDialog()
ofd.CheckFileExists = True
ofd.CheckPathExists = True
'ofd.Filter = "*.*|*.doc|Rich Text Documents
ofd.Title = "Select Document"
If ofd.ShowDialog = DialogResult.OK Then
txtDocumentName.Text = ofd.FileName
End If
Finally
If Not ofd Is Nothing Then ofd.Dispose()
End Try
End Sub
Public Function CREATE_CON_MS(ByVal DATA_SOURCE As String) As Boolean
con = New OleDbConnection("Provider=Microsoft.jet.OLEDB.4.0;Data Source="
+ DATA_SOURCE + ";Persist Security Info=False")
con.Open()
If con.State = ConnectionState.Open Then
Dim ss As String = "pixel.gif"
Dim SqlIns As String = "Insert into FashionDesigner(FD_Name,FD_ID,
FD_Address,FD_Phone,FD_Fax,FD_Email,FD_URL,FD_Logo,FD_ContactPerson)
values ('" & TextBox8.Text & "','" & TextBox4.Text & "','"
& TextBox6.Text & "','" & TextBox5.Text & "','" &
TextBox2.Text & "','" & Txtresults.Text & "','"
& TextBox1.Text & "','" & ss & "','" & TextBox9.Text & "')"
Dim MyCmd1 As OleDb.OleDbCommand = New OleDb.OleDbCommand(SqlIns, con)
Dim MyDataR1 As OleDb.OleDbDataReader
MyDataR1 = MyCmd1.ExecuteReader
con.Close()
con.Dispose()
Return True
Else
Return False
End If
End Function
Private Sub btnExit_Click(ByVal sender As System.Object,
ByVal e As System.EventArgs) Handles btnExit.Click
Me.Close()
End Sub
Private Sub Button1_Click(ByVal sender As System.Object,
ByVal e As System.EventArgs) Handles Button1.Click
CREATE_CON_MS("c:\DisplayDatsap.mdb")
End Sub
Private Sub OleDbDataAdapter1_RowUpdated(ByVal sender As System.Object,
ByVal e As System.Data.OleDb.OleDbRowUpdatedEventArgs)
End Sub
Private Sub TabPage3_Click(ByVal sender As System.Object,
ByVal e As System.EventArgs) Handles TabPage3.Click
End Sub
Private Sub Label12_Click(ByVal sender As System.Object,
ByVal e As System.EventArgs) Handles Label12.Click
End Sub
End Class