Click here to Skip to main content
65,938 articles
CodeProject is changing. Read more.
Articles / database / SQL-Server

Parsing of Zipped plain text files and saving results to a local database with .NET 3.5

4.00/5 (3 votes)
16 May 2011CPOL1 min read 22.6K   573  
Parsing Zipped plain text files and saving results to a local database with .NET 3.5, LINQ to SQL, BackgroundWorker, and UI.

Sample Image

Introduction

I wrote this application to process around 3500 Zip files which had some plain text files. Each plain file has an error description. My task was to parse the Zipped plain text files and save the extracted data it into a local database to analyze for later usage. I used .NET to create this application.

Background

Here is the error info. Errors are evil!

Sample Image

Using the code

What I did was made a Windows Forms application and attached a local MS SQL Server database file. (You can use MS SQL Express to create a database and a table to keep records.) Generate the database context and you should get this:

Image 3

Implement the possible interfaces and create the parsing class:

C#
public interface IParcer
{
    DAO.Uhee Parse(string file);
}

... and this one:

C#
public enum ExpectedDataType
{
    DateTime,
    String,
    Integer
}
public class UHEEParser : IParcer
{
    public DAO.Uhee Parse(string file)
    {
        if (File.Exists(file))
        {
            try
            {
                List<string /> lines = new List<string />();
                StreamReader sr = new StreamReader(file);
                do
                {
                    string line = sr.ReadLine();
                    lines.Add(line);
                } while (sr.Peek() != -1);

                DAO.Uhee uhee = new DAO.Uhee();

                uhee.ID = Guid.NewGuid();
                uhee.User = GetValue(lines[2], ExpectedDataType.String);
                uhee.UserDevice = GetValue(lines[3], ExpectedDataType.String);
                uhee.ErrorDateTime = 
                  DateTime.Parse(GetValue(lines[4], ExpectedDataType.DateTime));
                uhee.AppName = GetValue(lines[7], ExpectedDataType.String);
                uhee.Version = GetValue(lines[8], ExpectedDataType.String);
                uhee.Host = GetValue(lines[10], ExpectedDataType.String);
                uhee.Port = GetValue(lines[11], ExpectedDataType.String);
                uhee.WebApp = GetValue(lines[12], ExpectedDataType.String);
                uhee.CurrentForm = GetValue(lines[14], ExpectedDataType.String);
                uhee.Message = GetValue(lines[15], ExpectedDataType.String);
                uhee.StackTrace = GetValue(lines[16], ExpectedDataType.String);
                uhee.ParsingTime = DateTime.Now;

                return uhee;
            }
            catch (Exception)
            {
                throw;
            }
        }

        return null;
    }

    string GetValue(string line, ExpectedDataType dataType)
    {
        string[] strings = line.Split(new Char[] { ':' });
        if (strings.Length > 1)
        {
            switch (dataType)
            {
                case ExpectedDataType.DateTime:
                    string mergedDateTimeString = 
                       strings[1] + ":" + strings[2] + ":" + strings[2];
                    return mergedDateTimeString;
                case ExpectedDataType.String:
                    return strings[1];
                case ExpectedDataType.Integer:
                    return strings[1];
                default:
                    return strings[1];
            }
        }
        else
            return string.Empty;
    }
}

Now it is time to download DotNetZip dll (zip and unzip in C#, VB, and any .NET language: http://dotnetzip.codeplex.com/) and add a reference to it in your project.

Sample Image

The last thing is to put some code under the main form class:

Sample Image

Here is the code:

C#
public partial class FormMain : Form
{
    #region Properties

    string zipFolder { set; get; }
    const string searchMask = "*.zip";
    const string unpackDirectory = @"C:\Windows\Temp\UnpackDirectory";
    string fileNameContainsCharacters { set; get; }
    string fileExtention { set; get; }
    List<string> files = new List<string>();
    List<DAO.Uhee> UHEEs = new List<DAO.Uhee>();
    string connectionString { set; get;  }

    BindingSource bindingSource1 = new BindingSource();

    #endregion

    #region Init the app
    public FormMain()
    {
        InitializeComponent();

        connectionString = ConfigurationManager.ConnectionStrings[
           "UHEEParser.Properties.Settings.UHEEConnectionString"].ConnectionString;
        btCancel.Enabled = false;

        bw.WorkerReportsProgress = true;
        bw.WorkerSupportsCancellation = true;
        bw.DoWork += new DoWorkEventHandler(bw_DoWork);
        bw.ProgressChanged += new ProgressChangedEventHandler(bw_ProgressChanged);
        bw.RunWorkerCompleted += 
           new RunWorkerCompletedEventHandler(bw_RunWorkerCompleted);
    }
    #endregion

    #region Form Events

    private void btCleanLog_Click(object sender, EventArgs e)
    {
        rtbLog.Clear();
    }

    private void btnZIPFolder_Click(object sender, EventArgs e)
    {
        fbd.ShowDialog();
        zipFolder = fbd.SelectedPath;
        tbZipFolder.Text = zipFolder;
        btStart.Enabled = true;
    }

    private void btCancel_Click(object sender, EventArgs e)
    {
        CancelParsing();
    }

    void CancelParsing()
    {
        if (bw.WorkerSupportsCancellation == true)
        {
            bw.CancelAsync();
        }
    }

    private void btStart_Click(object sender, EventArgs e)
    {
        fileNameContainsCharacters = tbFileNameContains.Text.Trim().ToLower();
        fileExtention = tbFileExtention.Text.Trim().ToLower();

        if (bw.IsBusy != true)
        {
            // Start the asynchronous operation.
            bw.RunWorkerAsync();
        }
    }

    #endregion

    #region Background Worker Job

    private void bw_DoWork(object sender, DoWorkEventArgs e)
    {
        BackgroundWorker worker = sender as BackgroundWorker;

        WriteLog("--- Start parsing ---");
        WriteLog("Get ZIP files...");
        // Process the list of files found in the directory. 
        string[] fileEntries = Directory.GetFiles(zipFolder, searchMask);
        WriteLog("Get ZIP files... Found: " + fileEntries.Count().ToString());
        WriteLog("Get ZIP files... DONE");

        if (!Directory.Exists(unpackDirectory))
        {
            Directory.CreateDirectory(unpackDirectory);
            WriteLog("Creating temp directory... DONE: " + unpackDirectory);
        }
        else
        {
            try
            {
                Directory.Delete(unpackDirectory, true);
                WriteLog("Deleting temp directory... DONE: " + unpackDirectory);
                Directory.CreateDirectory(unpackDirectory);
                WriteLog("Creating temp directory... DONE: " + unpackDirectory);
            }
            catch (Exception ex)
            {
                WriteLog("Deleting temp directory... " + 
                   unpackDirectory  + " ERROR: " + ex.Message);
            }
        }

        int counter = 1;
        
        SetupProgressBar(fileEntries.Length);

        foreach (string fileName in fileEntries)
        {
            if ((worker.CancellationPending == true))
            {
                WriteLog("Processing file... CANCELED OPERATION: " + fileName);
                e.Cancel = true;
                break;
            }
            else
            {
                // do something with fileName
                WriteLog("Processing file... " + fileName);

                ExtractFiles(fileName);

                // Perform a time consuming operation and report progress.
                worker.ReportProgress((counter++ * 10));
            }
        }

        ParseFiles();

        TransferData();

        if (Directory.Exists(unpackDirectory))
        {
            try
            {
                Directory.Delete(unpackDirectory, true);
                WriteLog("Deleting temp directory... DONE: " + unpackDirectory);
            }
            catch (Exception ex)
            {
                WriteLog("Deleting temp directory... " + 
                   unpackDirectory + " ERROR: " + ex.Message);
            }
        }

        WriteLog("--- End parsing ---");
    }

    private void TransferData()
    {
        if (UHEEs.Count > 0)
        {
            WriteLog("There are " + UHEEs.Count.ToString() + " records to transfer");

            try
            {
                DAO.DataClassesUHEEDataContext db = 
                  new UHEEParser.DAO.DataClassesUHEEDataContext(connectionString);
                foreach (DAO.Uhee u in UHEEs)
                {
                    db.Uhees.InsertOnSubmit(u);
                    db.SubmitChanges();
                }
            }
            catch (Exception ex)
            {
                WriteLog("Saving UHEE...  ERROR: " + ex.Message);  
            }


            WriteLog("There are " + UHEEs.Count.ToString() + 
                     " records has been transfered");  
        }
        else
        {
            WriteLog("There are no records to transfer");
        }

        UHEEs.Clear();
        files.Clear();
    }

    private void ParseFiles()
    {
        if (files.Count > 0)
        {
            WriteLog("There are " + files.Count.ToString() + " files to parce.");
            UHEEParser.ENTITIES.UHEEParser parser = 
                       new UHEEParser.ENTITIES.UHEEParser();
    
            foreach (string file in files)
            {
                WriteLog("Parsing file... " + file);

                DAO.Uhee uhee = parser.Parse(file);
                if (uhee != null)
                    UHEEs.Add(uhee);

                WriteLog("Parsing file... " + " DONE: " + file);
            }
        }
        else
        {
            WriteLog("There are no files to parce.");
        }
    }

    private void bw_RunWorkerCompleted(object sender, 
                    RunWorkerCompletedEventArgs e)
    {
        if ((e.Cancelled == true))
        {
            this.tbProgress.Text = "Canceled!";
        }
        else if (!(e.Error == null))
        {
            this.tbProgress.Text = ("Error: " + e.Error.Message);
        }
        else
        {
            this.tbProgress.Text = "Done!";
            this.btCancel.Enabled = false;
        }
    }

    private void bw_ProgressChanged(object sender, ProgressChangedEventArgs e)
    {
        this.tbProgress.Text = (e.ProgressPercentage.ToString() + "%");
        pb1.PerformStep();
    }

    #endregion

    #region Extract files from ZIP
    private List<string> ExtractFiles(string zipToUnpack)
    {
        using (ZipFile zip1 = ZipFile.Read(zipToUnpack))
        {
            // here, we extract every entry, but we could extract conditionally
            // based on entry name, size, date, checkbox status, etc.
            foreach (ZipEntry e in zip1)
            {
                string completePathExtractedFile = @unpackDirectory + 
                       @"\" + e.FileName.Replace(@"/", @"\");

                string fileName = Path.GetFileName(completePathExtractedFile);

                if (!string.IsNullOrEmpty(fileExtention) && 
                    !string.IsNullOrEmpty(fileNameContainsCharacters))
                {
                    if (fileName.ToLower().Contains(fileNameContainsCharacters) && 
                        fileName.EndsWith(fileExtention))
                    {
                        WriteLog("Extracting from ZIP archive... ");
                        e.Extract(unpackDirectory, 
                          ExtractExistingFileAction.OverwriteSilently);
                        files.Add(completePathExtractedFile);
                        WriteLog("Extracting from ZIP archive... DONE: " + 
                                 completePathExtractedFile);
                    }
                }
                else if (!string.IsNullOrEmpty(fileNameContainsCharacters))
                {
                    if (fileName.ToLower().Contains(fileNameContainsCharacters))
                    {
                        WriteLog("Extracting from ZIP archive... ");
                        e.Extract(unpackDirectory, 
                          ExtractExistingFileAction.OverwriteSilently);
                        files.Add(completePathExtractedFile);
                        WriteLog("Extracting from ZIP archive... DONE: " + 
                                 completePathExtractedFile);
                    }
                }
                else if (!string.IsNullOrEmpty(fileExtention))
                {
                    if (fileName.EndsWith(fileExtention))
                    {
                        WriteLog("Extracting from ZIP archive... ");
                        e.Extract(unpackDirectory, 
                                  ExtractExistingFileAction.OverwriteSilently);
                        files.Add(completePathExtractedFile);
                        WriteLog("Extracting from ZIP archive... DONE: " + 
                                 completePathExtractedFile);
                    }
                }
            }
        }

        return files;
    }
    #endregion

    #region Write Log

    private delegate void stringDelegate(string s);

    private void WriteLog(string text)
    {
        if (rtbLog.InvokeRequired)
        {
            stringDelegate sd = new stringDelegate(WriteLog);
            this.Invoke(sd, new object[] { text });
        }
        else
        {
            rtbLog.Text += text + Environment.NewLine;
        }
    }

    #endregion

    #region Setup Forms Controls

    private delegate void integerDelegate(int i);

    private void SetupProgressBar(int i)
    {
        if (pb1.InvokeRequired)
        {
            integerDelegate sd = new integerDelegate(SetupProgressBar);
            this.Invoke(sd, new object[] { i });
        }
        else
        {
            pb1.Maximum = i;
            pb1.Value = 0;
        }
    }

    #endregion

    #region Statistics

    private void btGetData_Click(object sender, EventArgs e)
    {
        GetData();
    }

    void GetData()
    {
        DAO.DataClassesUHEEDataContext db = 
          new UHEEParser.DAO.DataClassesUHEEDataContext(connectionString);

        var itemsUHEE = from c in db.Uhees
                        orderby c.ErrorDateTime descending
                        select c;

        bindingSource1.DataSource = itemsUHEE;
        bindingNavigator1.BindingSource = bindingSource1;
        dgv1.DataSource = bindingSource1; 
    
    }

    private void btnClean_Click(object sender, EventArgs e)
    {
        DAO.DataClassesUHEEDataContext db = 
          new UHEEParser.DAO.DataClassesUHEEDataContext(connectionString);
        var itemsUHEE = from c in db.Uhees
                        select c;
        db.Uhees.DeleteAllOnSubmit(itemsUHEE);

        db.SubmitChanges();

        GetData();
    }

    #endregion

    #region Menu Actions
    private void exitToolStripMenuItem_Click(object sender, EventArgs e)
    {
        CancelParsing();
        Application.Exit();
    }
    #endregion
}

Feel free to add to this code any thing that you wish to improve it a little, like:

  • Regex to parse text
  • Export to MS Excel
  • Or even to convert this app to a Windows Service ;-)

Thank you for reading and happy coding!

History

This is the first version of the application.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)