Introduction
I wrote this application to process around 3500 Zip files which had some plain text files. Each plain file has an error description. My task was to parse the Zipped plain text files and save the extracted data it into a local database to analyze for later usage. I used .NET to create this application.
Background
Here is the error info. Errors are evil!
Using the code
What I did was made a Windows Forms application and attached a local MS SQL Server database file. (You can use MS SQL Express to create a database and a table to keep records.) Generate the database context and you should get this:
Implement the possible interfaces and create the parsing class:
public interface IParcer
{
DAO.Uhee Parse(string file);
}
... and this one:
public enum ExpectedDataType
{
DateTime,
String,
Integer
}
public class UHEEParser : IParcer
{
public DAO.Uhee Parse(string file)
{
if (File.Exists(file))
{
try
{
List<string /> lines = new List<string />();
StreamReader sr = new StreamReader(file);
do
{
string line = sr.ReadLine();
lines.Add(line);
} while (sr.Peek() != -1);
DAO.Uhee uhee = new DAO.Uhee();
uhee.ID = Guid.NewGuid();
uhee.User = GetValue(lines[2], ExpectedDataType.String);
uhee.UserDevice = GetValue(lines[3], ExpectedDataType.String);
uhee.ErrorDateTime =
DateTime.Parse(GetValue(lines[4], ExpectedDataType.DateTime));
uhee.AppName = GetValue(lines[7], ExpectedDataType.String);
uhee.Version = GetValue(lines[8], ExpectedDataType.String);
uhee.Host = GetValue(lines[10], ExpectedDataType.String);
uhee.Port = GetValue(lines[11], ExpectedDataType.String);
uhee.WebApp = GetValue(lines[12], ExpectedDataType.String);
uhee.CurrentForm = GetValue(lines[14], ExpectedDataType.String);
uhee.Message = GetValue(lines[15], ExpectedDataType.String);
uhee.StackTrace = GetValue(lines[16], ExpectedDataType.String);
uhee.ParsingTime = DateTime.Now;
return uhee;
}
catch (Exception)
{
throw;
}
}
return null;
}
string GetValue(string line, ExpectedDataType dataType)
{
string[] strings = line.Split(new Char[] { ':' });
if (strings.Length > 1)
{
switch (dataType)
{
case ExpectedDataType.DateTime:
string mergedDateTimeString =
strings[1] + ":" + strings[2] + ":" + strings[2];
return mergedDateTimeString;
case ExpectedDataType.String:
return strings[1];
case ExpectedDataType.Integer:
return strings[1];
default:
return strings[1];
}
}
else
return string.Empty;
}
}
Now it is time to download DotNetZip dll (zip and unzip in C#, VB, and any .NET language: http://dotnetzip.codeplex.com/) and add a reference to it in your project.
The last thing is to put some code under the main form class:
Here is the code:
public partial class FormMain : Form
{
#region Properties
string zipFolder { set; get; }
const string searchMask = "*.zip";
const string unpackDirectory = @"C:\Windows\Temp\UnpackDirectory";
string fileNameContainsCharacters { set; get; }
string fileExtention { set; get; }
List<string> files = new List<string>();
List<DAO.Uhee> UHEEs = new List<DAO.Uhee>();
string connectionString { set; get; }
BindingSource bindingSource1 = new BindingSource();
#endregion
#region Init the app
public FormMain()
{
InitializeComponent();
connectionString = ConfigurationManager.ConnectionStrings[
"UHEEParser.Properties.Settings.UHEEConnectionString"].ConnectionString;
btCancel.Enabled = false;
bw.WorkerReportsProgress = true;
bw.WorkerSupportsCancellation = true;
bw.DoWork += new DoWorkEventHandler(bw_DoWork);
bw.ProgressChanged += new ProgressChangedEventHandler(bw_ProgressChanged);
bw.RunWorkerCompleted +=
new RunWorkerCompletedEventHandler(bw_RunWorkerCompleted);
}
#endregion
#region Form Events
private void btCleanLog_Click(object sender, EventArgs e)
{
rtbLog.Clear();
}
private void btnZIPFolder_Click(object sender, EventArgs e)
{
fbd.ShowDialog();
zipFolder = fbd.SelectedPath;
tbZipFolder.Text = zipFolder;
btStart.Enabled = true;
}
private void btCancel_Click(object sender, EventArgs e)
{
CancelParsing();
}
void CancelParsing()
{
if (bw.WorkerSupportsCancellation == true)
{
bw.CancelAsync();
}
}
private void btStart_Click(object sender, EventArgs e)
{
fileNameContainsCharacters = tbFileNameContains.Text.Trim().ToLower();
fileExtention = tbFileExtention.Text.Trim().ToLower();
if (bw.IsBusy != true)
{
bw.RunWorkerAsync();
}
}
#endregion
#region Background Worker Job
private void bw_DoWork(object sender, DoWorkEventArgs e)
{
BackgroundWorker worker = sender as BackgroundWorker;
WriteLog("--- Start parsing ---");
WriteLog("Get ZIP files...");
string[] fileEntries = Directory.GetFiles(zipFolder, searchMask);
WriteLog("Get ZIP files... Found: " + fileEntries.Count().ToString());
WriteLog("Get ZIP files... DONE");
if (!Directory.Exists(unpackDirectory))
{
Directory.CreateDirectory(unpackDirectory);
WriteLog("Creating temp directory... DONE: " + unpackDirectory);
}
else
{
try
{
Directory.Delete(unpackDirectory, true);
WriteLog("Deleting temp directory... DONE: " + unpackDirectory);
Directory.CreateDirectory(unpackDirectory);
WriteLog("Creating temp directory... DONE: " + unpackDirectory);
}
catch (Exception ex)
{
WriteLog("Deleting temp directory... " +
unpackDirectory + " ERROR: " + ex.Message);
}
}
int counter = 1;
SetupProgressBar(fileEntries.Length);
foreach (string fileName in fileEntries)
{
if ((worker.CancellationPending == true))
{
WriteLog("Processing file... CANCELED OPERATION: " + fileName);
e.Cancel = true;
break;
}
else
{
WriteLog("Processing file... " + fileName);
ExtractFiles(fileName);
worker.ReportProgress((counter++ * 10));
}
}
ParseFiles();
TransferData();
if (Directory.Exists(unpackDirectory))
{
try
{
Directory.Delete(unpackDirectory, true);
WriteLog("Deleting temp directory... DONE: " + unpackDirectory);
}
catch (Exception ex)
{
WriteLog("Deleting temp directory... " +
unpackDirectory + " ERROR: " + ex.Message);
}
}
WriteLog("--- End parsing ---");
}
private void TransferData()
{
if (UHEEs.Count > 0)
{
WriteLog("There are " + UHEEs.Count.ToString() + " records to transfer");
try
{
DAO.DataClassesUHEEDataContext db =
new UHEEParser.DAO.DataClassesUHEEDataContext(connectionString);
foreach (DAO.Uhee u in UHEEs)
{
db.Uhees.InsertOnSubmit(u);
db.SubmitChanges();
}
}
catch (Exception ex)
{
WriteLog("Saving UHEE... ERROR: " + ex.Message);
}
WriteLog("There are " + UHEEs.Count.ToString() +
" records has been transfered");
}
else
{
WriteLog("There are no records to transfer");
}
UHEEs.Clear();
files.Clear();
}
private void ParseFiles()
{
if (files.Count > 0)
{
WriteLog("There are " + files.Count.ToString() + " files to parce.");
UHEEParser.ENTITIES.UHEEParser parser =
new UHEEParser.ENTITIES.UHEEParser();
foreach (string file in files)
{
WriteLog("Parsing file... " + file);
DAO.Uhee uhee = parser.Parse(file);
if (uhee != null)
UHEEs.Add(uhee);
WriteLog("Parsing file... " + " DONE: " + file);
}
}
else
{
WriteLog("There are no files to parce.");
}
}
private void bw_RunWorkerCompleted(object sender,
RunWorkerCompletedEventArgs e)
{
if ((e.Cancelled == true))
{
this.tbProgress.Text = "Canceled!";
}
else if (!(e.Error == null))
{
this.tbProgress.Text = ("Error: " + e.Error.Message);
}
else
{
this.tbProgress.Text = "Done!";
this.btCancel.Enabled = false;
}
}
private void bw_ProgressChanged(object sender, ProgressChangedEventArgs e)
{
this.tbProgress.Text = (e.ProgressPercentage.ToString() + "%");
pb1.PerformStep();
}
#endregion
#region Extract files from ZIP
private List<string> ExtractFiles(string zipToUnpack)
{
using (ZipFile zip1 = ZipFile.Read(zipToUnpack))
{
foreach (ZipEntry e in zip1)
{
string completePathExtractedFile = @unpackDirectory +
@"\" + e.FileName.Replace(@"/", @"\");
string fileName = Path.GetFileName(completePathExtractedFile);
if (!string.IsNullOrEmpty(fileExtention) &&
!string.IsNullOrEmpty(fileNameContainsCharacters))
{
if (fileName.ToLower().Contains(fileNameContainsCharacters) &&
fileName.EndsWith(fileExtention))
{
WriteLog("Extracting from ZIP archive... ");
e.Extract(unpackDirectory,
ExtractExistingFileAction.OverwriteSilently);
files.Add(completePathExtractedFile);
WriteLog("Extracting from ZIP archive... DONE: " +
completePathExtractedFile);
}
}
else if (!string.IsNullOrEmpty(fileNameContainsCharacters))
{
if (fileName.ToLower().Contains(fileNameContainsCharacters))
{
WriteLog("Extracting from ZIP archive... ");
e.Extract(unpackDirectory,
ExtractExistingFileAction.OverwriteSilently);
files.Add(completePathExtractedFile);
WriteLog("Extracting from ZIP archive... DONE: " +
completePathExtractedFile);
}
}
else if (!string.IsNullOrEmpty(fileExtention))
{
if (fileName.EndsWith(fileExtention))
{
WriteLog("Extracting from ZIP archive... ");
e.Extract(unpackDirectory,
ExtractExistingFileAction.OverwriteSilently);
files.Add(completePathExtractedFile);
WriteLog("Extracting from ZIP archive... DONE: " +
completePathExtractedFile);
}
}
}
}
return files;
}
#endregion
#region Write Log
private delegate void stringDelegate(string s);
private void WriteLog(string text)
{
if (rtbLog.InvokeRequired)
{
stringDelegate sd = new stringDelegate(WriteLog);
this.Invoke(sd, new object[] { text });
}
else
{
rtbLog.Text += text + Environment.NewLine;
}
}
#endregion
#region Setup Forms Controls
private delegate void integerDelegate(int i);
private void SetupProgressBar(int i)
{
if (pb1.InvokeRequired)
{
integerDelegate sd = new integerDelegate(SetupProgressBar);
this.Invoke(sd, new object[] { i });
}
else
{
pb1.Maximum = i;
pb1.Value = 0;
}
}
#endregion
#region Statistics
private void btGetData_Click(object sender, EventArgs e)
{
GetData();
}
void GetData()
{
DAO.DataClassesUHEEDataContext db =
new UHEEParser.DAO.DataClassesUHEEDataContext(connectionString);
var itemsUHEE = from c in db.Uhees
orderby c.ErrorDateTime descending
select c;
bindingSource1.DataSource = itemsUHEE;
bindingNavigator1.BindingSource = bindingSource1;
dgv1.DataSource = bindingSource1;
}
private void btnClean_Click(object sender, EventArgs e)
{
DAO.DataClassesUHEEDataContext db =
new UHEEParser.DAO.DataClassesUHEEDataContext(connectionString);
var itemsUHEE = from c in db.Uhees
select c;
db.Uhees.DeleteAllOnSubmit(itemsUHEE);
db.SubmitChanges();
GetData();
}
#endregion
#region Menu Actions
private void exitToolStripMenuItem_Click(object sender, EventArgs e)
{
CancelParsing();
Application.Exit();
}
#endregion
}
Feel free to add to this code any thing that you wish to improve it a little, like:
- Regex to parse text
- Export to MS Excel
- Or even to convert this app to a Windows Service ;-)
Thank you for reading and happy coding!
History
This is the first version of the application.