Java tutorial
//package org.amr.arabic; /* * Date : 25th of March 2008 * the class is Arabic string reshaper, this class is targeting Android platform * * By : Ahmed Essam Naiem * E-Mail : ahmed-essam@live.com * Web : www.ahmed-essam.com * * Updated Date : 20 of March 2009 * The class has been updated to include the Lam Alef Reshaping techniques * * By : Amr Ismail gawish * Email : amr.gawish@gmail.com * Web : www.amr-gawish.com * * Updated : 8th of June 2009 * Adding comments and Announcing Open Source * * Updated: 6th of May 2010 * Enahancing Functionality by Amine : bakhtout@gmail.com * * * */ public class ArabicReshaper { /** * The reshaped Word String */ private String _returnString; /** * The Reshaped Word * @return reshaped Word */ public String getReshapedWord() { return _returnString; } public static char DEFINED_CHARACTERS_ORGINAL_ALF_UPPER_MDD = 0x0622; public static char DEFINED_CHARACTERS_ORGINAL_ALF_UPPER_HAMAZA = 0x0623; public static char DEFINED_CHARACTERS_ORGINAL_ALF_LOWER_HAMAZA = 0x0625; public static char DEFINED_CHARACTERS_ORGINAL_ALF = 0x0627; public static char DEFINED_CHARACTERS_ORGINAL_LAM = 0x0644; public static char[][] LAM_ALEF_GLPHIES = { { 15270, 65270, 65269 }, { 15271, 65272, 65271 }, { 1575, 65276, 65275 }, { 1573, 65274, 65273 } }; public static char[] HARAKATE = { '\u064B', '\u064C', '\u064D', '\u064E', '\u064F', '\u0650', '\u0651', '\u0652', '\u0653', '\u0654', '\u0655', '\u0656' }; public static char[][] ARABIC_GLPHIES = { { 1569, 65152, 65163, 65164, 65152, 3 }, { 1570, 65153, 65153, 65154, 65154, 2 }, { 1571, 65155, 65155, 65156, 65156, 2 }, { 1572, 65157, 65157, 65158, 65158, 2 }, { 1573, 65159, 65159, 65160, 65160, 2 }, { 1575, 65165, 65165, 65166, 65166, 2 }, { 1576, 65167, 65169, 65170, 65168, 4 }, { 1577, 65171, 65171, 65172, 65172, 2 }, { 1578, 65173, 65175, 65176, 65174, 4 }, { 1579, 65177, 65179, 65180, 65178, 4 }, { 1580, 65181, 65183, 65184, 65182, 4 }, { 1581, 65185, 65187, 65188, 65186, 4 }, { 1582, 65189, 65191, 65192, 65190, 4 }, { 1583, 65193, 65193, 65194, 65194, 2 }, { 1584, 65195, 65195, 65196, 65196, 2 }, { 1585, 65197, 65197, 65198, 65198, 2 }, { 1586, 65199, 65199, 65200, 65200, 2 }, { 1587, 65201, 65203, 65204, 65202, 4 }, { 1588, 65205, 65207, 65208, 65206, 4 }, { 1589, 65209, 65211, 65212, 65210, 4 }, { 1590, 65213, 65215, 65216, 65214, 4 }, { 1591, 65217, 65219, 65218, 65220, 4 }, { 1592, 65221, 65223, 65222, 65222, 4 }, { 1593, 65225, 65227, 65228, 65226, 4 }, { 1594, 65229, 65231, 65232, 65230, 4 }, { 1601, 65233, 65235, 65236, 65234, 4 }, { 1602, 65237, 65239, 65240, 65238, 4 }, { 1603, 65241, 65243, 65244, 65242, 4 }, { 1604, 65245, 65247, 65248, 65246, 4 }, { 1605, 65249, 65251, 65252, 65250, 4 }, { 1606, 65253, 65255, 65256, 65254, 4 }, { 1607, 65257, 65259, 65260, 65258, 4 }, { 1608, 65261, 65261, 65262, 65262, 2 }, { 1609, 65263, 65263, 65264, 65264, 2 }, { 1574, 65161, 65163, 65163, 65162, 2 }, { 1610, 65265, 65267, 65268, 65266, 4 } }; /** * Searching for the letter and Get the right shape for the character depends on the location specified * @param target The character that needs to get its form * @param location The location of the Form letter * @return The letter with its right shape */ private char getReshapedGlphy(char target, int location) { //Iterate over the 36 characters in the GLPHIES Matrix for (int n = 0; n < ARABIC_GLPHIES.length; n++) { //Check if the character equals the target character if (ARABIC_GLPHIES[n][0] == target) { //Get the right shape for the character, depends on the location return ARABIC_GLPHIES[n][location]; } } //get the same character, If not found in the GLPHIES Matrix return target; } /** * Define which Character Type is This, that has 2,3 or 4 Forms variation? * @param target The character, that needed * @return the integer number indicated the Number of forms the Character has, return 2 otherwise */ private int getGlphyType(char target) { //Iterate over the 36 characters in the GLPHIES Matrix for (int n = 0; n < 36; n++) { //Check if the character equals the target character if (ARABIC_GLPHIES[n][0] == target) //Get the number of Forms that the character has return ARABIC_GLPHIES[n][5]; } //Return the number 2 Otherwise return 2; } private boolean isHaraka(char target) { return getHaraka(target) > 0; } private char getHaraka(char target) { //Iterate over the 36 characters in the GLPHIES Matrix for (int n = 0; n < HARAKATE.length; n++) { //Check if the character equals the target character if (HARAKATE[n] == target) //Get the number of Forms that the character has return HARAKATE[n]; } return 0; } /** * Get LamAlef right Character Presentation of the character * @param candidateAlef The letter that is supposed to Alef * @param candidateLam The letter that is supposed to Lam * @param isEndOfWord Is those characters at the end of the Word, to get its right form * @return Reshaped character of the LamAlef */ private char getLamAlef(char candidateAlef, char candidateLam, boolean isEndOfWord) { //The shift rate, depends if the the end of the word or not! int shiftRate = 1; //The reshaped Lam Alef char reshapedLamAlef = 0; //Check if at the end of the word if (isEndOfWord) shiftRate++; //check if the Lam is matching the candidate Lam if ((int) DEFINED_CHARACTERS_ORGINAL_LAM == (int) candidateLam) { //Check which Alef is matching after the Lam and get Its form if ((int) candidateAlef == (int) DEFINED_CHARACTERS_ORGINAL_ALF_UPPER_MDD) { reshapedLamAlef = LAM_ALEF_GLPHIES[0][shiftRate]; } if ((int) candidateAlef == (int) DEFINED_CHARACTERS_ORGINAL_ALF_UPPER_HAMAZA) { reshapedLamAlef = LAM_ALEF_GLPHIES[1][shiftRate]; } if ((int) candidateAlef == (int) DEFINED_CHARACTERS_ORGINAL_ALF_LOWER_HAMAZA) { reshapedLamAlef = LAM_ALEF_GLPHIES[3][shiftRate]; } if ((int) candidateAlef == (int) DEFINED_CHARACTERS_ORGINAL_ALF) { reshapedLamAlef = LAM_ALEF_GLPHIES[2][shiftRate]; } } //return the ReshapedLamAlef return reshapedLamAlef; } /** * Constructor of the Class * It doesn't support Alef Lam by Default * @param unshapedWord The unShaped Word */ public ArabicReshaper(String unshapedWord) { _returnString = reshapeIt(unshapedWord); } /** * The Enhanced Arabic Reshaper Constructor with Lam Alef Support * @param unshapedWord The unShaped Word * @param supportAlefLam To check If to support AlefLam or Not */ public ArabicReshaper(String unshapedWord, boolean supportAlefLam) { DecomposedWord decomposedWord = new DecomposedWord(unshapedWord); if (!supportAlefLam) { _returnString = reshapeIt(new String(decomposedWord.stripedRegularLetters)); } else { _returnString = reshapeItWithLamAlef(new String(decomposedWord.stripedRegularLetters)); } _returnString = decomposedWord.reconstructWord(_returnString); } class DecomposedWord { char[] stripedHarakates; int[] harakatesPositions; char[] stripedRegularLetters; int[] lettersPositions; DecomposedWord(String unshapedWord) { int wordLength = unshapedWord.length(); int harakatesCount = 0; for (int index = 0; index < wordLength; index++) { if (isHaraka(unshapedWord.charAt(index))) { harakatesCount++; } } harakatesPositions = new int[harakatesCount]; stripedHarakates = new char[harakatesCount]; lettersPositions = new int[wordLength - harakatesCount]; stripedRegularLetters = new char[wordLength - harakatesCount]; harakatesCount = 0; int letterCount = 0; for (int index = 0; index < unshapedWord.length(); index++) { if (isHaraka(unshapedWord.charAt(index))) { harakatesPositions[harakatesCount] = index; stripedHarakates[harakatesCount] = unshapedWord.charAt(index); harakatesCount++; } else { lettersPositions[letterCount] = index; stripedRegularLetters[letterCount] = unshapedWord.charAt(index); letterCount++; } } } String reconstructWord(String reshapedWord) { char[] wordWithHarakates = new char[reshapedWord.length() + stripedHarakates.length]; for (int index = 0; index < lettersPositions.length; index++) { wordWithHarakates[lettersPositions[index]] = reshapedWord.charAt(index); } for (int index = 0; index < harakatesPositions.length; index++) { wordWithHarakates[harakatesPositions[index]] = stripedHarakates[index]; } return new String(wordWithHarakates); } } /** * Main Reshaping function, Doesn't Support LamAlef * @param unshapedWord The unReshaped Word to Reshape * @return The Reshaped Word without the LamAlef Support */ public String reshapeIt(String unshapedWord) { //The reshaped Word to Return StringBuffer reshapedWord = new StringBuffer(""); //The Word length int wordLength = unshapedWord.length(); //The Word Letters char[] wordLetters = new char[wordLength]; //Copy the unreshapedWord to the WordLetters Character Array unshapedWord.getChars(0, wordLength, wordLetters, 0); //for the first letter reshapedWord.append(getReshapedGlphy(wordLetters[0], 2));//2 is the Form when the Letter is at the start of the word //iteration from the second till the second to last for (int i = 1; i < wordLength - 1; i++) { int beforeLast = i - 1; //Check if the Letter Before Last has only 2 Forms, for the current Letter to be as a start for a new Word! if (getGlphyType(wordLetters[beforeLast]) == 2) { //checking if it's only has 2 shapes //If the letter has only 2 shapes, then it doesnt matter which position it is, It'll be always the second form reshapedWord.append(getReshapedGlphy(wordLetters[i], 2)); } else { //Then it should be in the middle which should be placed in its right form [3] reshapedWord.append(getReshapedGlphy(wordLetters[i], 3)); } } //check for the last letter Before last has 2 forms, that means that the last Letter will be alone. if (getGlphyType(wordLetters[wordLength - 2]) == 2) { //If the letter has only 2 shapes, then it doesnt matter which position it is, It'll be always the second form reshapedWord.append(getReshapedGlphy(wordLetters[wordLength - 1], 1)); } else { //Put the right form of the character, 4 for the last letter in the word reshapedWord.append(getReshapedGlphy(wordLetters[wordLength - 1], 4)); } //Return the ReshapedWord return reshapedWord.toString(); } /** * Main Reshaping Function, With LamAlef Support * @param unshapedWord The UnReshaped Word * @return The Shaped Word with Lam Alef Support */ public String reshapeItWithLamAlef(String unshapedWord) { //The reshaped Word to Return StringBuffer reshapedWord = new StringBuffer(""); //The Word length int wordLength = unshapedWord.length(); //The Word Letters char[] wordLetters = new char[wordLength]; //The reshaped Letters char[] reshapedLetters = new char[wordLength]; //Indicator Character, to Tell that lam is exist char lamIndicator = 43;//The '+' //Copy the unreshapedWord to the WordLetters Character Array unshapedWord.getChars(0, wordLength, wordLetters, 0); //Check if the Word Length is 0, then return empty String if (wordLength == 0) { return ""; } //Check if the Word length is 1, then return the Reshaped One letter, which is the same character of input if (wordLength == 1) { return getReshapedGlphy(wordLetters[0], 1) + ""; } //Check if the word length is 2, Check if the Word is LamAlef if (wordLength == 2) { //Assign Candidate Lam char lam = wordLetters[0]; //Assign Candidate Alef char alef = wordLetters[1]; //Check if The word is Lam Alef. if (getLamAlef(alef, lam, true) > 0) { return (char) getLamAlef(alef, lam, true) + " "; } } //For the First Letter reshapedLetters[0] = getReshapedGlphy(wordLetters[0], 2); //The current Letter char currentLetter = wordLetters[0]; /** * The Main Iterator */ //Iterate over the word from the second character till the second to the last for (int i = 1; i < wordLength - 1; i++) { //Check if the Letters are Lam Alef if (getLamAlef(wordLetters[i], currentLetter, true) > 0) { //Check if the Letter before the Lam is 2 Forms Letter, to Make the Lam Alef as its the end of the Word if ((i - 2 < 0) || ((i - 2 >= 0) && (getGlphyType(wordLetters[i - 2]) == 2))) { //Mark the letter of Lam as Lam Indicator reshapedLetters[i - 1] = lamIndicator; //Assign Lam Alef to the Letter of Alef reshapedLetters[i] = (char) getLamAlef(wordLetters[i], currentLetter, true); } else { //The Letter before the Lam is more than 2 Forms Letter //Mark the letter of Lam as Lam Indicator reshapedLetters[i - 1] = lamIndicator; //Assign Lam Alef to the Letter of Alef reshapedLetters[i] = (char) getLamAlef(wordLetters[i], currentLetter, false); } } else { //The Word doesn't have LamAlef int beforeLast = i - 1; //Check if the Letter Before Last has only 2 Forms, for the current Letter to be as a start for a new Word! if (getGlphyType(wordLetters[beforeLast]) == 2) { //If the letter has only 2 shapes, then it doesnt matter which position it is, It'll be always the second form reshapedLetters[i] = getReshapedGlphy(wordLetters[i], 2); } else { //Then it should be in the middle which should be placed in its right form [3] reshapedLetters[i] = getReshapedGlphy(wordLetters[i], 3); } } //Assign the CurrentLetter as the Word Letter currentLetter = wordLetters[i]; } /** * The Last Letters Check */ //Check if the Letters are Lam Alef if (getLamAlef(wordLetters[wordLength - 1], wordLetters[wordLength - 2], true) > 0) { //Check if the Letter before the Lam is 2 Forms Letter, to Make the Lam Alef as its the end of the Word if (getGlphyType(wordLetters[wordLength - 3]) == 2) { //check for the last letter //Mark the letter of Lam as Lam Indicator reshapedLetters[wordLength - 2] = lamIndicator; //Assign Lam Alef to the Letter of Alef reshapedLetters[wordLength - 1] = (char) getLamAlef(wordLetters[wordLength - 1], wordLetters[wordLength - 2], true); } else { //Mark the letter of Lam as Lam Indicator reshapedLetters[wordLength - 2] = lamIndicator; //Assign Lam Alef to the Letter of Alef reshapedLetters[wordLength - 1] = (char) getLamAlef(wordLetters[wordLength - 1], wordLetters[wordLength - 2], false); } } else { //check for the last letter Before last has 2 forms, that means that the last Letter will be alone. if (getGlphyType(wordLetters[wordLength - 2]) == 2) { //If the letter has only 2 shapes, then it doesn't matter which position it is, It'll be always the second form reshapedLetters[wordLength - 1] = getReshapedGlphy(wordLetters[wordLength - 1], 1); } else { //Put the right form of the character, 4 for the last letter in the word reshapedLetters[wordLength - 1] = getReshapedGlphy(wordLetters[wordLength - 1], 4); } } /** * Assign the Final Results of Shaped Word */ //Iterate over the Reshaped Letters and remove the Lam Indicators for (int i = 0; i < reshapedLetters.length; i++) { //Check if the Letter is Lam Indicator if (reshapedLetters[i] != lamIndicator) reshapedWord.append(reshapedLetters[i]); } //Return the Reshaped Word return reshapedWord.toString(); } }