In the past days, I needed to compare two .NET assemblies to see if they are functionally the same. As you may know, when you build the same project several times, the resulting output .dll or .exe file is always different. That's because of the ModuleVerisonId property (MVID). So in order to check if two assemblies are the same, we can disassemble them to IL code and compare them. The easiest way to disassemble the file is to use MSIL Disassembler. However, it is not in a common location. It can be in any of the following locations:
C:\Program Files\Microsoft SDKs\Windows\v6.0\bin\ildasm.exe
C:\Program Files\Microsoft SDKs\Windows\v6.0A\bin\ildasm.exe
C:\Program Files\Microsoft SDKs\Windows\v7.0A\bin\ildasm.exe
C:\Program Files\Microsoft SDKs\Windows\v7.0A\bin\x64\ildasm.exe
C:\Program Files\Microsoft SDKs\Windows\v7.0A\bin\NETFX 4.0 Tools\ildasm.exe
C:\Program Files\Microsoft SDKs\Windows\v7.0A\bin\NETFX 4.0 Tools\x64\ildasm.exe
C:\Program Files (x86)\Microsoft SDKs\Windows\v7.0A\bin\ildasm.exe
C:\Program Files (x86)\Microsoft SDKs\Windows\v7.0A\bin\x64\ildasm.exe
C:\Program Files (x86)\Microsoft SDKs\Windows\v7.0A\bin\NETFX 4.0 Tools\ildasm.exe
C:\Program Files (x86)\Microsoft SDKs\Windows\v7.0A\bin\NETFX 4.0 Tools\x64\ildasm.exe
So in my following solution, I have embedded the ildasm.exe file in a class library to be able to use it on any machine. When we disassemble an assembly, we check if the ildasm.exe file exists in the executing assembly folder and if not the file is extracted there from our DLL file. Using the ildasm file, we get the IL code and save it to a temporary file. Then we need to remove the following three rows:
- MVID - as I wrote before this is a unique GUID generated with every build
- Image Base (The image base tells us as to where the program will be loaded in memory by the Windows loader.) - this is different with every build as well
- Time-date stamp - the time and date when the
ildasm
is run
So we read the temp file content, remove these rows we use regex and then save the file content to the same file. You can find the Disassembler file here:
using System;
using System.IO;
using System.Linq;
using System.Reflection;
using System.Diagnostics;
using System.Text.RegularExpressions;
namespace FileHasher
{
public class Disassembler
{
public static Regex regexMVID = new Regex("//\\s*MVID\\:\\s*\\{[a-zA-Z0-9\\-]+\\}",
RegexOptions.Multiline | RegexOptions.Compiled);
public static Regex regexImageBase = new Regex("//\\s*Image\\s+base\\:\\s0x[0-9A-Fa-f]*",
RegexOptions.Multiline | RegexOptions.Compiled);
public static Regex regexTimeStamp = new Regex("//\\s*Time-date\\s+stamp\\:\\s*0x[0-9A-Fa-f]*",
RegexOptions.Multiline | RegexOptions.Compiled);
private static readonly Lazy<Assembly> currentAssembly = new Lazy<Assembly>(() =>
{
return MethodBase.GetCurrentMethod().DeclaringType.Assembly;
});
private static readonly Lazy<string> executingAssemblyPath = new Lazy<string>(() =>
{
return Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location);
});
private static readonly Lazy<string> currentAssemblyFolder = new Lazy<string>(() =>
{
return Path.GetDirectoryName(currentAssembly.Value.Location);
});
private static readonly Lazy<string[]> arrResources = new Lazy<string[]>(() =>
{
return currentAssembly.Value.GetManifestResourceNames();
});
private const string ildasmArguments = "/all /text \"{0}\"";
public static string ILDasmFileLocation
{
get
{
return Path.Combine(executingAssemblyPath.Value, "ildasm.exe");
}
}
static Disassembler()
{
ExtractFileToLocation("ildasm.exe", ILDasmFileLocation);
}
protected static void SaveFileFromEmbeddedResource(string embeddedResourceName, string fileName)
{
if (File.Exists(fileName))
{
return;
}
FileInfo fileInfoOutputFile = new FileInfo(fileName);
using (FileStream streamToOutputFile = fileInfoOutputFile.OpenWrite())
using (Stream streamToResourceFile =
currentAssembly.Value.GetManifestResourceStream(embeddedResourceName))
{
const int size = 4096;
byte[] bytes = new byte[4096];
int numBytes;
while ((numBytes = streamToResourceFile.Read(bytes, 0, size)) > 0)
{
streamToOutputFile.Write(bytes, 0, numBytes);
}
streamToOutputFile.Close();
streamToResourceFile.Close();
}
}
protected static void ExtractFileToLocation(string fileNameInDll, string outFileName)
{
string resourcePath = arrResources.Value.Where(resource => resource.EndsWith(
fileNameInDll, StringComparison.InvariantCultureIgnoreCase)).FirstOrDefault();
if (resourcePath == null)
{
throw new Exception(string.Format("Cannot find {0} in " +
"the embedded resources of {1}", fileNameInDll, currentAssembly.Value.FullName));
}
SaveFileFromEmbeddedResource(resourcePath, outFileName);
}
public static string GetDisassembledFile(string assemblyFilePath)
{
if (!File.Exists(assemblyFilePath))
{
throw new InvalidOperationException(string.Format
("The file {0} does not exist!", assemblyFilePath));
}
string tempFileName = Path.GetTempFileName();
var startInfo = new ProcessStartInfo(ILDasmFileLocation,
string.Format(ildasmArguments, assemblyFilePath));
startInfo.WindowStyle = ProcessWindowStyle.Hidden;
startInfo.CreateNoWindow = true;
startInfo.UseShellExecute = false;
startInfo.RedirectStandardOutput = true;
using (var process = System.Diagnostics.Process.Start(startInfo))
{
string output = process.StandardOutput.ReadToEnd();
process.WaitForExit();
if (process.ExitCode > 0)
{
throw new InvalidOperationException(
string.Format
("Generating IL code for file {0} failed with exit code - {1}. Log: {2}",
assemblyFilePath, process.ExitCode, output));
}
File.WriteAllText(tempFileName, output);
}
RemoveUnnededRows(tempFileName);
return tempFileName;
}
private static void RemoveUnnededRows(string fileName)
{
string fileContent = File.ReadAllText(fileName);
fileContent = regexMVID.Replace(fileContent, string.Empty);
fileContent = regexImageBase.Replace(fileContent, string.Empty);
fileContent = regexTimeStamp.Replace(fileContent, string.Empty);
File.WriteAllText(fileName, fileContent);
}
public static string DisassembleFile(string assemblyFilePath)
{
string disassembledFile = GetDisassembledFile(assemblyFilePath);
try
{
return File.ReadAllText(disassembledFile);
}
finally
{
if (File.Exists(disassembledFile))
{
File.Delete(disassembledFile);
}
}
}
}
}
So using this class, we can get the IL code of a .NET assembly. Now, we can create a Hash
Calculator to calculate the hash of the file, so that we can store the hash
and compare it with newer files. To calculate the hash, I use the following approach:
- Check if the file ends with .dll or .exe - if yes, this can be a .NET assembly, if no get file hash
- Try to open the assembly with
Assembly.LoadFile
to see if this is a valid .NET assembly, if there is a BadImageFormatException
, this is not a .NET one so get file hash - If this is a .NET assembly, get the IL code file and calculate its hash.
To calculate file hash, we use MD5CryptoServiceProvider
. So, here is the Hash
Calculator:
using System;
using System.IO;
using System.Reflection;
namespace FileHasher
{
public class HashCalculator
{
public string FileName { get; private set; }
public HashCalculator(string fileName)
{
this.FileName = fileName;
}
public string CalculateFileHash()
{
if (Path.GetExtension(this.FileName).Equals(".dll",
System.StringComparison.InvariantCultureIgnoreCase)
|| Path.GetExtension(this.FileName).Equals(".exe",
System.StringComparison.InvariantCultureIgnoreCase))
{
return GetAssemblyFileHash();
}
else
{
return GetFileHash();
}
}
private string GetFileHash()
{
return CalculateHashFromStream(File.OpenRead(this.FileName));
}
private string GetAssemblyFileHash()
{
string tempFileName = null;
try
{
var assembly = Assembly.LoadFile(this.FileName);
tempFileName = Disassembler.GetDisassembledFile(this.FileName);
return CalculateHashFromStream(File.OpenRead(tempFileName));
}
catch(BadImageFormatException)
{
return GetFileHash();
}
finally
{
if (File.Exists(tempFileName))
{
File.Delete(tempFileName);
}
}
}
private string CalculateHashFromStream(Stream stream)
{
using (var readerSource = new System.IO.BufferedStream(stream, 1200000))
{
using (var md51 = new System.Security.Cryptography.MD5CryptoServiceProvider())
{
md51.ComputeHash(readerSource);
return Convert.ToBase64String(md51.Hash);
}
}
}
}
}
You can find the code here.