Click here to Skip to main content
65,938 articles
CodeProject is changing. Read more.
Articles
(untagged)

HTML to Database converter Application

0.00/5 (No votes)
2 Sep 2004 1  
HTML to Database converter Application in VB.NET

Introduction

This is a small application that can be used to convert Html Pages to a Database format

Details

Generally the data we get from the Internet is not in a database storable format. This application comes with with source code and a demo file with database, and will demonstrate how you can extract your fields from a html page and submit it into an access database. It can be run in a loop to convert the database into access. It will not hang your computer and can complete as many files as required but the format of the html page should be same as they should have the same fields, table etc.

Partial source code

 Public con As OleDb.OleDbConnection
    Private Sub enableParseButton()
        btnParse.Enabled = (txtDocumentName.Text.Length > 0)
    End Sub
    Private Function ExtractEmailAddressesFromString(
     ByVal source As String) As String()
        On Error Resume Next
        Dim mc As MatchCollection
        Dim i As Integer
        mc = Regex.Matches(source, 
         "([a-zA-Z0-9_\-\.]+)@([a-zA-Z0-9_\-\.]+)\.([a-zA-Z]{2,5})")
        Dim results(mc.Count - 1) As String
        For i = 0 To results.Length - 1
            results(i) = mc(i).Value
        Next
        Return results
    End Function
    Private Function ExtractheadFromString(ByVal a As String) As String
        On Error Resume Next
        Dim i, mypos, mypos1, mypos2 As Integer
        Dim b As String
        mypos = InStr(a, "Agency head", CompareMethod.Text)
        mypos = InStr(mypos, a, vbCr, CompareMethod.Text)
        mypos1 = InStr(mypos + 1, a, vbCr, CompareMethod.Text)
        b = Mid(a, mypos + 2, (mypos1 - mypos - 2))
        

        Return b
    End Function
    Private Function ExtractAgencyFromString(ByVal a As String) As String
        On Error Resume Next
        Dim i, mypos, mypos1, mypos2 As Integer
        Dim b As String
        mypos = InStr(a, "Agency listings -", CompareMethod.Text)
        mypos = InStr(mypos, a, "-", CompareMethod.Text)
        mypos1 = InStr(mypos, a, vbCr, CompareMethod.Text)
        b = Mid(a, mypos + 1, (mypos1 - mypos - 1))
       
        Return b
    End Function
    Private Function ExtractheadoffFromString(ByVal a As String) As String
        On Error Resume Next
        Dim i, mypos, mypos1, mypos2 As Integer
        Dim b As String
        mypos = InStr(a, "Head office add", CompareMethod.Text)
        mypos = InStr(mypos, a, vbCr, CompareMethod.Text)
        mypos1 = InStr(mypos + 5, a, vbCr, CompareMethod.Text)
        b = Mid(a, mypos + 2, (mypos1 - mypos - 2))
        Return b
    End Function
    Private Function ExtracttfaxtelFromString(ByVal a As String) As String()
        Dim i, mypos, mypos1, mypos2, mypos3 As Integer
        Dim b(5) As String
        On Error Resume Next
        mypos = InStr(a, "Tel/Fax/email", CompareMethod.Text)
        mypos = InStr(mypos, a, vbCr, CompareMethod.Text)
        mypos1 = InStr(mypos, a, ";", CompareMethod.Text)
        b(0) = Mid(a, mypos + 2, (mypos1 - mypos - 1)) 'tel

        'b(0) = "jkjh"

        mypos = InStr(a, "Fax:", CompareMethod.Text)
        mypos = InStr(mypos, a, " ", CompareMethod.Text)
        mypos1 = InStr(mypos, a, ";", CompareMethod.Text)
        b(1) = Mid(a, mypos, (mypos1 - mypos - 1))  'fax

        mypos2 = InStr(mypos1 + 1, a, ";", CompareMethod.Text)
        b(2) = Mid(a, mypos1 + 1, (mypos2 - mypos1 - 1)) 'email

        mypos3 = InStr(mypos2 + 1, a, vbCr, CompareMethod.Text)
        b(3) = Mid(a, mypos2 + 1, (mypos3 - mypos2 - 1))
        mypos1 = InStr(mypos1, a, vbCr, CompareMethod.Text) 'url

        Return b
    End Function

    Private Sub Form1_Load(ByVal sender As System.Object, 
     ByVal e As System.EventArgs) Handles MyBase.Load
        txtDocumentName.Text = ""
        enableParseButton()
    End Sub
    Private Sub txtDocumentName_TextChanged(ByVal sender As System.Object, 
     ByVal e As System.EventArgs) Handles txtDocumentName.TextChanged
        enableParseButton()
    End Sub
    Private Sub btnParse_Click(ByVal sender As System.Object,
     ByVal e As System.EventArgs) Handles btnParse.Click
        ' Dim app As Word.Application

        ' Dim doc As Word.Document

        Dim app As Object
        Dim doc As Object
        Dim docFileName As String
        Dim docPath As String
        Dim contents As String
        Cursor.Current = Cursors.WaitCursor
        Try
            ' init UI controls

            Lblfindcount.Text = ""
            Txtresults.Text = ""
            txtDocContents.Text = ""
            ' validate file name

            docFileName = txtDocumentName.Text
            If docFileName.Length = 0 Then
                MsgBox("Please enter a file name")
                txtDocumentName.Focus()
                Return
            End If
            ' if no path use APP_BASE

            docPath = Path.GetDirectoryName(docFileName)
            If docPath.Length = 0 Then
                docFileName = Application.StartupPath & "\" & docFileName
            End If
            ' ensure file exists
            If Not File.Exists(docFileName) Then
                MsgBox("File does not exist")
                txtDocumentName.SelectAll()
                txtDocumentName.Focus()
                Return
            End If
            ' extract contents of file
            contents = ""
            If Path.GetExtension(docFileName).ToLower = ".txt" Then
                Dim fs As StreamReader
                Try
                    fs = New StreamReader(docFileName)
                    contents = fs.ReadToEnd
                Catch ex As Exception
                    MsgBox("Unable to read from text input file")
                    contents = ""
                Finally
                    If Not fs Is Nothing Then fs.Close()
                End Try
            Else
                Try
                    Try
                        'app = New Word.Application
                        app = CreateObject("Word.Application")
                    Catch ex As Exception
                        MsgBox("Unable to start Word")
                        Throw ex
                    End Try
                    Try
                        doc = app.Documents.Open(docFileName)
                    Catch ex As Exception
                        MsgBox("Unable to load document in Word")
                        Throw ex
                    End Try
                    contents = doc.Content.Text
                Catch ex As Exception
                    contents = ""
                Finally
                    If Not app Is Nothing Then app.Quit()
                End Try
            End If
            If contents.Length = 0 Then Return
            ' search for email addresses
            Dim emails, aglist As String()
            Dim email As String
            Dim results As New StringBuilder()
            Dim results1 As New StringBuilder()
            emails = ExtractEmailAddressesFromString(contents)

            For Each email In emails
                results.Append(email)
            Next
            
            Dim i As Integer
            ' display results
            Lblfindcount.Text = String.Format("{0} match(es) found.", emails.Length)
            Txtresults.Text = results.ToString
            TextBox8.Text = ExtractAgencyFromString(contents)
            TextBox9.Text = ExtractheadFromString(contents)
            TextBox6.Text = ExtractheadoffFromString(contents)
            TextBox5.Text = (ExtracttfaxtelFromString(contents))(0)
            TextBox2.Text = (ExtracttfaxtelFromString(contents))(1)
            TextBox3.Text = (ExtracttfaxtelFromString(contents))(2)
            'TextBox1.Text = (ExtracttfaxtelFromString(contents))(3)
            'TextBox1.Text = results1.ToString
            txtDocContents.Text = contents
        Finally
            Cursor.Current = Cursors.Default
        End Try
    End Sub
    Private Sub btnBrowse_Click(ByVal sender As System.Object,
     ByVal e As System.EventArgs) Handles btnBrowse.Click
        Dim ofd As OpenFileDialog
        Try
            ofd = New OpenFileDialog()
            ofd.CheckFileExists = True
            ofd.CheckPathExists = True
            'ofd.Filter = "*.*|*.doc|Rich Text Documents 
            '(*.rtf)|*.rtf|Text Documents (*.txt)|*.txt"

            ofd.Title = "Select Document"
            If ofd.ShowDialog = DialogResult.OK Then
                txtDocumentName.Text = ofd.FileName
            End If
        Finally
            If Not ofd Is Nothing Then ofd.Dispose()
        End Try
    End Sub
    Public Function CREATE_CON_MS(ByVal DATA_SOURCE As String) As Boolean
        con = New OleDbConnection("Provider=Microsoft.jet.OLEDB.4.0;Data Source=" 
         + DATA_SOURCE + ";Persist Security Info=False")
        con.Open()
        If con.State = ConnectionState.Open Then
            
            Dim ss As String = "pixel.gif"

            Dim SqlIns As String = "Insert into FashionDesigner(FD_Name,FD_ID,
             FD_Address,FD_Phone,FD_Fax,FD_Email,FD_URL,FD_Logo,FD_ContactPerson) 
             values ('" & TextBox8.Text & "','" & TextBox4.Text & "','" 
             & TextBox6.Text & "','" & TextBox5.Text & "','" & 
             TextBox2.Text & "','" & Txtresults.Text & "','" 
             & TextBox1.Text & "','" & ss & "','" & TextBox9.Text & "')"
        Dim MyCmd1 As OleDb.OleDbCommand = New OleDb.OleDbCommand(SqlIns, con)
        Dim MyDataR1 As OleDb.OleDbDataReader
        MyDataR1 = MyCmd1.ExecuteReader
            con.Close()
            con.Dispose()
            Return True
        Else
            Return False
        End If

    End Function



    Private Sub btnExit_Click(ByVal sender As System.Object, 
    ByVal e As System.EventArgs) Handles btnExit.Click
       
        Me.Close()
    End Sub
    Private Sub Button1_Click(ByVal sender As System.Object, 
     ByVal e As System.EventArgs) Handles Button1.Click
        CREATE_CON_MS("c:\DisplayDatsap.mdb")
    End Sub
    Private Sub OleDbDataAdapter1_RowUpdated(ByVal sender As System.Object,
     ByVal e As System.Data.OleDb.OleDbRowUpdatedEventArgs)
    End Sub
    Private Sub TabPage3_Click(ByVal sender As System.Object,
     ByVal e As System.EventArgs) Handles TabPage3.Click
    End Sub
    Private Sub Label12_Click(ByVal sender As System.Object,
     ByVal e As System.EventArgs) Handles Label12.Click
    End Sub
End Class

License

This article has no explicit license attached to it but may contain usage terms in the article text or the download files themselves. If in doubt please contact the author via the discussion board below.

A list of licenses authors might use can be found here