Have you ever wondered how utilities like Beyond Compare or DIFF are comparing files? They do it (I guess) by solving the longest common subsequence (LCS) problem.
After reading the Wikipedia article linked above, I obtained an overall view of the problem and I looked at the possible resolutions. So, I decided to implement a Delphi class to do the string
comparison trick, which is the base for the text file comparison.
Let me put it as follows: given two string
s to be compared, I want to highlight in blue the characters added to the first string
and in red the characters removed from it. The common (unchanged) characters will keep the default color.
For example:
String 1 = Delphi allows both structural and object oriented programming.
String 2 = Does Delphi allow object oriented programming?
Highlighted differences:
Does Delphi allows both structural and object oriented programming?
The Delphi class looks like this:
type
TDiff = record
Character: Char;
CharStatus: Char;
end;
TStringComparer = class
……………
public
class function Compare(aString1, aString2: string): TList<tdiff>;
end;</tdiff>
When you call TStringComparer.Compare
, a generic list of TDiff
records is created. A TDiff
record contains a character and whether this character was added (CharStatus = ‘+’
), removed (CharStatus = ‘-’
) or unchanged (CharStatus = ‘=’
) in both string
s under comparison.
Let’s drop two edits (Edit1
, Edit2
), a rich edit (RichEdit1
) and a button (Button1
) on a Delphi form. To highlight the differences, put the following code in the OnClick
event of the button:
procedure TForm1.Button1Click(Sender: TObject);
var
Differences: TList<tdiff>;
Diff: TDiff;
begin
Differences:= TStringComparer.Compare(Edit1.Text, Edit2.Text);
try
RichEdit1.Clear;
RichEdit1.SelStart:= RichEdit1.GetTextLen;
for Diff in Differences do
if Diff.CharStatus = '+' then
begin
RichEdit1.SelAttributes.Color:= clBlue;
RichEdit1.SelText := Diff.Character;
end
else if Diff.CharStatus = '-' then
begin
RichEdit1.SelAttributes.Color:= clRed;
RichEdit1.SelText:= Diff.Character;
end
else
begin
RichEdit1.SelAttributes.Color:= clDefault;
RichEdit1.SelText:= Diff.Character;
end;
finally
Differences.Free;
end;
end;</tdiff>
It looks like in the image below:
For the full implementation, read further down. Note that various optimizations could be added to the code below, but I didn’t implement them. Anyway, I hope this helps. Feedback is welcome! Feel free to find and correct bugs. ;-)
unit StringComparison;
interface
uses
Math, Generics.Collections;
type
TDiff = record
Character: Char;
CharStatus: Char;
end;
TStringComparer = class
strict private
type TIntArray = array of array of Integer;
class function LCSMatrix(X, Y: string): TIntArray;
class procedure ComputeDifferences(aLCSMatrix: TIntArray;
X, Y: string;
i, j: Integer;
aDifferences: TList<TDiff>);
public
class function Compare(aString1, aString2: string): TList<TDiff>;
end;
implementation
class function TStringComparer.LCSMatrix(X, Y: string): TIntArray;
var
m, n,
i, j: Integer;
begin
m:= Length(X);
n:= Length(Y);
SetLength(Result, m + 1, n + 1);
for i := 0 to m do
Result[i, 0] := 0;
for j:= 0 to n do
Result[0, j]:= 0;
for i:= 1 to m do
for j:= 1 to n do
if X[i] = Y[j] then
Result[i, j] := Result[i-1, j-1] + 1
else
Result[i, j]:= Max(Result[i, j-1], Result[i-1, j]);
end;
class procedure TStringComparer.ComputeDifferences(aLCSMatrix: TIntArray;
X, Y: string;
i, j: Integer;
aDifferences: TList<TDiff>);
var
CharDiff: TDiff;
begin
if (i > 0) and (j > 0) and (X[i] = Y[j]) then
begin
ComputeDifferences(aLCSMatrix, X, Y, i-1, j-1, aDifferences);
CharDiff.Character:= X[i];
CharDiff.CharStatus:= '=';
aDifferences.Add(CharDiff);
end
else
if (j > 0) and ((i = 0) or (aLCSMatrix[i,j-1] >= aLCSMatrix[i-1,j])) then
begin
ComputeDifferences(aLCSMatrix, X, Y, i, j-1, aDifferences);
CharDiff.Character:= Y[j];
CharDiff.CharStatus:= '+';
aDifferences.Add(CharDiff);
end
else if (i > 0) and ((j = 0) or (aLCSMatrix[i,j-1] < aLCSMatrix[i-1,j])) then
begin
ComputeDifferences(aLCSMatrix, X, Y, i-1, j, aDifferences);
CharDiff.Character:= X[i];
CharDiff.CharStatus:= '-';
aDifferences.Add(CharDiff);
end;
end;
class function TStringComparer.Compare(aString1, aString2: string): TList<TDiff>;
var
Matrix: TIntArray;
begin
Result:= TList<TDiff>.Create;
Matrix:= LCSMatrix(aString1, aString2);
ComputeDifferences(Matrix,
aString1, aString2,
Length(aString1), Length(aString2),
Result);
end;
end.