Introduction
Been a BI consultant, I have had clients not following instructions about filename convention as well as tab naming convention. The instruction are usually simple, example would be name the file 'abc.xlsx', then save in the folder 'xxx', but the client would the file 'data.xlsx'. This prompted me to come up with a way to handle situations where the SSIS package would still able to pick up a file even when the proper naming convention has not been used.
Background
Importing Excel files into a database using the Import/Export wizard from SQL Server Management Studio can be tedious, especially when dealing with Excel files containing large amount of data.
This C# script in this tip illustrates a simple example about how to search for an Excel file in a directory, rename the first or active worksheet, and then import the renamed worksheet data in a database.
The steps below highlight what I did to put together this simple process:
- Open BIDS and create an Integration Services project – call it ‘
RenameWorksheetProject
’ - In the solution window, rename the package name to ‘
RenameWorksheet
’ - Grab a sequence container from the SSIS Tool box
- Grab a script task from the SSIS Tool box and drop it in the container
- Create the following variables:
DirectoryPath
: Folder where the file is located FileName
: File name New_SheetnName
: New sheet name Pattern
: Keyword to search for the file Run_Date
: System start date
Using the Code
Open the script task and add the following variables as read only:
DirectoryPath
: Folder where the file is located Pattern
: Key word to search for the file Run_Date
: System start date New_SheetnName
: New sheet name
Add the following variables as Read-Write:
Some key namespaces such as System Namespace have to be imported.
#region Namespaces
using System;
using System.IO;
using Microsoft.SqlServer;
using System.Data;
using System.Data.OleDb;
using Microsoft.SqlServer.Dts.Runtime;
using System.Windows.Forms;
using Excel=Microsoft.Office.Interop.Excel;
using Microsoft.Office.Interop.Excel;
using System.Web.Hosting;
using System.Text.RegularExpressions;
using System.Collections.Generic;
using System.Linq;
#endregion
namespace ST_00a77f8973254452b8eb3aa1ec680f86
{
[Microsoft.SqlServer.Dts.Tasks.ScriptTask.SSISScriptTaskEntryPointAttribute]
publicpartialclassScriptMain : Microsoft.SqlServer.Dts.Tasks.ScriptTask.VSTARTScriptObjectModelBase
{
public void Main()
{
String Pattern = (string)Dts.Variables["Pattern"].Value;
String New_SheetName = (string)Dts.Variables["New_SheetName"].Value;
String DirectoryPath = (string)Dts.Variables["DirectoryPath"].Value;
FileInfo[] FilesInfo = GetFiles(DirectoryPath,Pattern);
foreach (var File in FilesInfo)
{
String FilePath = File.FullName;
String Excel_Connection_String = "Provider=Microsoft.ACE.OLEDB.12.0;" +
"Data Source=" + FilePath +
@";Extended Properties=""Excel 12.0 XML;HDR=NO""";
OleDbConnection Excel_OLEDB = new OleDbConnection(Excel_Connection_String);
Excel_OLEDB.Open();
System.Data.DataTable Excel_WorkSheets = Excel_OLEDB.GetSchema("Tables");
Int16 i=0;
String[] All_Sheets = new string[100];
foreach (DataRow sheet in Excel_WorkSheets.Rows)
{
String TABLE_NAME = sheet["TABLE_NAME"].ToString();
OleDbCommand OleDb_Sql_Cmd = new System.Data.OleDb.OleDbCommand
("SELECT * FROM [" + sheet["TABLE_NAME"].ToString() + "]", Excel_OLEDB);
string This_Sheet = sheet["TABLE_NAME"].ToString();
All_Sheets[i] = This_Sheet.ToString();
i += 1;
}
Excel_OLEDB.Close();
String FileName_Run_Date = "_"+(string)Dts.Variables["Run_Date"].Value + ".xlsx";
String New_FilePath = FilePath.Replace(".xlsx", FileName_Run_Date);
Dts.Variables["FileName"].Value = New_FilePath;
object MissingValue = System.Reflection.Missing.Value;
Object Sheet;
Excel.Application Excel_App=new Excel.Application();
Excel.Workbook Excel_WorkBook;
Excel.Worksheet Excel_WorkSheet;
Excel_WorkBook = (Workbook)Excel_App.Workbooks.Open
(FilePath, MissingValue, MissingValue, MissingValue, MissingValue,
MissingValue, MissingValue, MissingValue, MissingValue, MissingValue,
MissingValue, MissingValue, MissingValue, MissingValue, MissingValue);
Sheet = Excel_WorkBook.Worksheets.get_Item(1);
Excel_WorkSheet = (Worksheet)Sheet;
Excel_WorkSheet.Name = New_SheetName;
Excel_WorkBook.SaveAs(New_FilePath);
Excel_App.Application.Workbooks.Close();
}
Dts.TaskResult = (int)ScriptResults.Success;
}
static private FileInfo[] GetFiles(string path, string pattern)
{
Regex regex = new Regex(pattern, RegexOptions.IgnoreCase);
var Files =Directory.GetFiles(path, "*.xlsx", SearchOption.TopDirectoryOnly)
.Select(x => new FileInfo(x))
.Where(x =>
{
return regex.IsMatch(x.Name);
}
);
return Files.ToArray();
}
#region ScriptResults declaration
enum ScriptResults
{
Success = Microsoft.SqlServer.Dts.Runtime.DTSExecResult.Success,
Failure = Microsoft.SqlServer.Dts.Runtime.DTSExecResult.Failure
};
#endregion
}
}
Points of Interest
Note that this script can be used for csv or text files as well.