Introduction
This article shows how to get DataTable
out of an Excel file. It has two functions:
GetDataTableFromExcel()
for Excel 97-2003 GetDataTableFromExcel2007()
for Excel 2007 and up
Background
GetDataTableFromExcel2007()
function uses ICSharpCode.SharpZipLib.dll. Please download it from http://icsharpcode.github.io/SharpZipLib.
Using the Code
GetDataTableFromExcel2007()
uses SharpZipLib
library to read Excel (xlsx) file. Second parameter (iColHeaderRow
) tells the function where the column header row is located. If column headers are located on the first row, pass 1. If the Excel file has no rows, pass 0.
Function GetDataTableFromExcel2007(ByVal sFilePath As String, _
ByVal iColHeaderRow As Integer) As DataTable
Dim oDataTable As New DataTable
Dim oFileStream As System.IO.FileStream = System.IO.File.OpenRead(sFilePath)
Dim oZipFile As New ICSharpCode.SharpZipLib.Zip.ZipFile(oFileStream)
Dim oFileSheet As System.IO.Stream = Nothing
Dim oFileSharedStrings As System.IO.Stream = Nothing
For Each oZipEntry As ICSharpCode.SharpZipLib.Zip.ZipEntry In oZipFile
If oZipEntry.Name = "xl/worksheets/sheet1.xml" Then
oFileSheet = oZipFile.GetInputStream(oZipEntry)
End If
If oZipEntry.Name = "xl/sharedStrings.xml" Then
oFileSharedStrings = oZipFile.GetInputStream(oZipEntry)
End If
Next
If oFileSheet Is Nothing Then
Throw New Exception("xl/worksheets/sheet1.xml is missing")
End If
If oFileSharedStrings Is Nothing Then
Throw New Exception("xl/sharedStrings.xml is missing")
End If
Dim oList As New System.Collections.Generic.List(Of String)()
Dim oXmlReader As XmlReader = XmlReader.Create(oFileSharedStrings)
oXmlReader.MoveToContent()
While oXmlReader.Read()
If oXmlReader.NodeType = XmlNodeType.Element AndAlso oXmlReader.Name = "t" Then
oList.Add(oXmlReader.ReadElementString())
End If
End While
oFileSharedStrings.Close()
Dim oDoc As New XmlDocument()
oDoc.Load(oFileSheet)
Dim nsMgr As XmlNamespaceManager = New XmlNamespaceManager(oDoc.NameTable)
nsMgr.AddNamespace("ns", "http://schemas.openxmlformats.org/spreadsheetml/2006/main")
Dim oRows As System.Xml.XmlNodeList = _
oDoc.SelectNodes("//ns:worksheet/ns:sheetData/ns:row", nsMgr)
Dim oDataRow As DataRow = Nothing
For iRow1 As Integer = 0 To oRows.Count - 1
Dim oRow As XmlNode = oRows(iRow1)
Dim iRow As Integer = iRow1 + 1
Dim bRowBlank As Boolean = True
If iRow > iColHeaderRow Then
oDataRow = oDataTable.NewRow()
End If
Dim oCells As XmlNodeList = oRow.SelectNodes("ns:c", nsMgr)
For iCol As Integer = 0 To oCells.Count - 1
Dim oCell As XmlNode = oCells(iCol)
Dim oValue As XmlNode = oCell.SelectSingleNode("ns:v", nsMgr)
Dim sValue As String = ""
If Not oValue Is Nothing Then
sValue = oValue.InnerText
Dim sType As String = ""
If Not oCell.Attributes("t") Is Nothing Then
sType = oCell.Attributes("t").InnerText
End If
If sType = "s" Then
Dim iValue As Integer = Integer.Parse_
(sValue, System.Globalization.CultureInfo.InvariantCulture)
sValue = oList(iValue)
End If
End If
If bRowBlank AndAlso sValue <> "" Then
bRowBlank = False
End If
If iColHeaderRow = 0 AndAlso iRow = 1 Then
Dim oDataColumn As New DataColumn("Column" & iCol, _
System.Type.[GetType]("System.String"))
oDataTable.Columns.Add(oDataColumn)
End If
If iRow = iColHeaderRow Then
Dim oDataColumn As New DataColumn_
(sValue, System.Type.[GetType]("System.String"))
oDataTable.Columns.Add(oDataColumn)
ElseIf iRow > iColHeaderRow Then
oDataRow(iCol) = sValue
End If
Next
If iRow > iColHeaderRow Then
If bRowBlank Then
Exit For
Else
oDataTable.Rows.Add(oDataRow)
End If
End If
Next
If oZipFile IsNot Nothing Then
oZipFile.IsStreamOwner = True
oZipFile.Close()
End If
Return oDataTable
End Function
GetDataTableFromExcel()
uses JET OLEDB provider to read Excel (xls) file.
Private Function GetDataTableFromExcel(ByVal sFilePath As String) As System.Data.DataTable
Dim sConnectionString As String = "Provider=Microsoft.Jet.OLEDB.4.0; _
Data Source=" + sFilePath + ";" & _
"Extended Properties=""Excel 8.0;"""
Dim cn As New OleDb.OleDbConnection(sConnectionString)
cn.Open()
Dim oTables As DataTable = cn.GetOleDbSchemaTable_
(System.Data.OleDb.OleDbSchemaGuid.Tables, Nothing)
Dim i As Integer
For i = 0 To oTables.Rows.Count - 1
Dim sSheetName As String = oTables.Rows(i)("TABLE_NAME").ToString()
If sSheetName.IndexOf("$") <> -1 Then
Dim oDataSet As New DataSet
Dim oAdapter As New System.Data.OleDb.OleDbDataAdapter_
("SELECT * FROM [" + sSheetName + "]", cn)
oAdapter.TableMappings.Add("Table", sSheetName)
oAdapter.Fill(oDataSet)
Dim oDataTable As DataTable = oDataSet.Tables(0)
cn.Close()
If oDataTable.Rows.Count > 0 And oDataTable.Columns.Count > 0 Then
Return oDataTable
End If
End If
Next
Return Nothing
End Function
JET works only in 32-bit mode so you need set your IIS application pool or the EXE into 32-bit mode.
Points of Interest
You might also try to create a function that will return Dataset
with tables for every tab.
History
- 28th July, 2015: Initial version