Hello
I would like to thank you very much for this article. Which has been very helpful to me. Since my applicatin is in C# I had to migrate your code. I also made some optimizations, handling lam alef, etc. Please find my code below.
using System;
using System.Data;
using System.Text;
namespace GNS.Mobility.Utilities
{
public sealed class Arabization
{
static Arabization()
{
arabicArray = new int[lookupArray.Length];
for(int i = 0; i < lookupArray.GetLength(0); i++)
{
for(int j = 0; j < lookupArray.GetLength(1); j++)
{
arabicArray[i * lookupArray.GetLength(1) + j] = lookupArray[i, j];
}
}
QuickSort(arabicArray, 0, arabicArray.Length -1);
arabicSearchStart = 0;
while(arabicArray[arabicSearchStart] == 0)
arabicSearchStart++;
}
private Arabization()
{
}
private static void QuickSort(int[] array, int left, int right)
{
do
{
int i = left;
int j = right;
int x = array[(i + j) >> 1];
do
{
while(array[i] < x) i++;
while(x < array[j]) j--;
if (i > j) break;
if (i < j)
{
int val = array[i];
array[i] = array[j];
array[j] = val;
}
i++;
j--;
}
while (i <= j);
if (j - left <= right - i)
{
if (left < j) QuickSort(array, left, j);
left = i;
}
else
{
if (i < right) QuickSort(array, i, right);
right = j;
}
}
while (left < right);
}
private static int BinarySearch(int[] array, int val)
{
int u, l, m, a;
l = array == arabicArray ? arabicSearchStart : 0;
u = array.Length - 1;
while(l <= u)
{
m = (l + u) >> 1;
a = array[m];
if(a == val)
{
return m;
}
else if(a < val)
{
l = m + 1;
}
else
u = m - 1;
}
return ~l;
}
private const int endIdx = 1;
private const int iniIdx = 2;
private const int midIdx = 3;
private const int isoIdx = 4;
private static readonly int[] arabicArray;
private static readonly int arabicSearchStart;
private static readonly int[,] lamalefarray =
{
{0x622, 0xfef6, 0xfef5, 0xfef6, 0xfef5},
{0x623, 0xfef8, 0xfef7, 0xfef8, 0xfef7},
{0x625, 0xfefa, 0xfef9, 0xfefa, 0xfef9},
{0x627, 0xfefc, 0xfefb, 0xfefc, 0xfefb},
};
private static readonly int[,] lookupArray =
{
{0x621, 0xfe80, 0xfe80, 0xfe80, 0xfe80},
{0x622, 0xfe82, 0xfe81, 0xfe82, 0xfe81},
{0x623, 0xfe84, 0xfe83, 0xfe84, 0xfe83},
{0x624, 0xfe86, 0xfe85, 0xfe86, 0xfe85},
{0x625, 0xfe88, 0xfe87, 0xfe88, 0xfe87},
{0x626, 0xfe8a, 0xfe8b, 0xfe8c, 0xfe89},
{0x627, 0xfe8e, 0xfe8d, 0xfe8e, 0xfe8d},
{0x628, 0xfe90, 0xfe91, 0xfe92, 0xfe8f},
{0x629, 0xfe94, 0xfe93, 0xfe93, 0xfe93},
{0x62a, 0xfe96, 0xfe97, 0xfe98, 0xfe95},
{0x62b, 0xfe9a, 0xfe9b, 0xfe9c, 0xfe99},
{0x62c, 0xfe9e, 0xfe9f, 0xfea0, 0xfe9d},
{0x62d, 0xfea2, 0xfea3, 0xfea4, 0xfea1},
{0x62e, 0xfea6, 0xfea7, 0xfea8, 0xfea5},
{0x62f, 0xfeaa, 0xfea9, 0xfeaa, 0xfea9},
{0x630, 0xfeac, 0xfeab, 0xfeac, 0xfeab},
{0x631, 0xfeae, 0xfead, 0xfeae, 0xfead},
{0x632, 0xfeb0, 0xfeaf, 0xfeb0, 0xfeaf},
{0x633, 0xfeb2, 0xfeb3, 0xfeb4, 0xfeb1},
{0x634, 0xfeb6, 0xfeb7, 0xfeb8, 0xfeb5},
{0x635, 0xfeba, 0xfebb, 0xfebc, 0xfeb9},
{0x636, 0xfebe, 0xfebf, 0xfec0, 0xfebd},
{0x637, 0xfec2, 0xfec3, 0xfec4, 0xfec1},
{0x638, 0xfec6, 0xfec7, 0xfec8, 0xfec5},
{0x639, 0xfeca, 0xfecb, 0xfecc, 0xfec9},
{0x63a, 0xfece, 0xfecf, 0xfed0, 0xfecd},
{0x63b, 0, 0, 0, 0},
{0x63c, 0, 0, 0, 0},
{0x63d, 0, 0, 0, 0},
{0x63e, 0, 0, 0, 0},
{0x63f, 0, 0, 0, 0},
{0x640, 0x0640, 0x0640, 0x0640, 0x0640},
{0x641, 0xfed2, 0xfed3, 0xfed4, 0xfed1},
{0x642, 0xfed6, 0xfed7, 0xfed8, 0xfed5},
{0x643, 0xfeda, 0xfedb, 0xfedc, 0xfed9},
{0x644, 0xfede, 0xfedf, 0xfee0, 0xfedd},
{0x645, 0xfee2, 0xfee3, 0xfee4, 0xfee1},
{0x646, 0xfee6, 0xfee7, 0xfee8, 0xfee5},
{0x647, 0xfeea, 0xfeeb, 0xfeec, 0xfee9},
{0x648, 0xfeee, 0xfeed, 0xfeee, 0xfeed},
{0x649, 0xfef0, 0xfeef, 0xfef0, 0xfeef},
{0x64a, 0xfef2, 0xfef3, 0xfef4, 0xfef1},
{0x64b, 0, 0, 0, 0},
{0x64c, 0, 0, 0, 0},
{0x64d, 0, 0, 0, 0},
{0x64e, 0, 0, 0, 0},
{0x64f, 0, 0, 0, 0},
{0x650, 0, 0, 0, 0},
{0x651, 0, 0, 0, 0},
{0x652, 0, 0, 0, 0},
};
private const int lamUnicode = 0x644;
private static readonly int[] set1 =
{
0x626, 0x628, 0x62a, 0x62b, 0x62c, 0x62d, 0x62e, 0x633,
0x634, 0x635, 0x636, 0x637, 0x638, 0x639, 0x63a, 0x640,
0x641, 0x642, 0x643, 0x644, 0x645, 0x646, 0x647, 0x64a
};
private static readonly int[] set2 =
{
0x622, 0x623, 0x624, 0x625, 0x627, 0x629, 0x62f, 0x630,
0x631, 0x632, 0x648, 0x649
};
private static bool IsArabic(char ch)
{
int ch1 = ch;
if((((ch1 & 0xff00) ^ 0x0600) != 0) && (((ch1 & 0xff00) ^ 0xfe00) != 0))
return false;
return BinarySearch(arabicArray, ch1) >= 0;
}
private static bool IsGenericArabic(char ch)
{
int ch1 = (int)ch;
return ch1 >= 0x0621 && ch1 <= 0x064a;
}
private static bool IsInSet1(char ch)
{
return BinarySearch(set1, (int)ch) >= 0;
}
private static bool IsInSet2(char ch)
{
return BinarySearch(set2, (int)ch) >= 0;
}
private static void StringReverse(StringBuilder input, int index, int length)
{
for(int i = 0; i < length; i++)
{
int revind = length - i - 1;
if(revind <= i)
return;
char temp = input[index + revind];
input[index + revind] = input[i + index];
input[i + index] = temp;
}
}
private static void StringReverse(StringBuilder input)
{
StringReverse(input, 0, input.Length);
}
private static void ArabicReverse(StringBuilder input)
{
StringReverse(input);
for(int i = 0; i < input.Length; i++)
{
if(!IsArabic(input[i]))
{
int len = 0;
while(((i + len) < input.Length) && !IsArabic(input[i + len]))
{
len++;
}
StringReverse(input, i, len);
i += len - 1;
}
}
}
public static string Arabize(string input)
{
bool linkBefore, linkAfter;
StringBuilder sbout = new StringBuilder(input);
for(int i = 0, k = 0; i < input.Length; i++, k++)
{
char ch = input[i];
int idx = (int)ch - lookupArray[0, 0];
if(idx >= 0 && idx < lookupArray.GetLength(0))
{
if(i == (input.Length - 1))
linkAfter = false;
else
linkAfter = IsInSet1(input[i + 1]) || IsInSet2(input[i + 1]);
if(i == 0)
linkBefore = false;
else
linkBefore = IsInSet1(input[i - 1]);
if(linkAfter && (int)ch == lamUnicode)
{
char ch1 = input[i + 1];
int j;
for(j = 0; j < lamalefarray.GetLength(0); j++)
{
if((int)ch1 == lamalefarray[j, 0])
{
sbout[k] = (char)lamalefarray[j, linkBefore ? midIdx : iniIdx];
sbout.Remove(k + 1, 1);
i++;
break;
}
}
if(j < lamalefarray.GetLength(0))
continue;
}
if(linkAfter && linkBefore)
sbout[k] = (char)lookupArray[idx, midIdx];
else if(linkBefore && !linkAfter)
sbout[k] = (char)lookupArray[idx, endIdx];
else if(!linkBefore && linkAfter)
sbout[k] = (char)lookupArray[idx, iniIdx];
else if(!linkBefore && !linkAfter)
sbout[k] = (char)lookupArray[idx, isoIdx];
if((int)sbout[k] == 0)
{
sbout.Remove(k, 1);
}
}
else if(IsArabic(ch))
return input;
}
ArabicReverse(sbout);
return sbout.ToString();
}
}
}
|