Java tutorial
package languages; /*- * #%L * Hangman Solver * %% * Copyright (C) 2016 Frederik Kammel * %% * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * #L% */ import algorithm.HangmanSolver; import com.github.vatbub.common.core.ArrayListWithSortableKey; import com.github.vatbub.common.core.logging.FOKLogger; import common.AppConfig; import common.AtomicDouble; import org.apache.commons.io.FileUtils; import java.io.File; import java.io.IOException; import java.net.URL; import java.util.*; import java.util.concurrent.atomic.AtomicInteger; import java.util.logging.Level; import java.util.regex.Pattern; @SuppressWarnings("SameParameterValue") public class TabFile { /** * The values in this *.tab file. */ private final ArrayList<ArrayListWithSortableKey<String>> values = new ArrayList<>(); /** * The column headers of this *.tab file. */ private String[] columnHeaders; /** * Creates a new object representation of the specified *.tab file. * * @param file The {@link URL} pointing to the desired *.tab file. * @throws IOException if the file cannot be read. */ public TabFile(URL file) throws IOException { readFile(file); } /** * Creates an empty *.tab file. */ public TabFile() { createNewFile(); } public TabFile(int columnCount) { createNewFile(columnCount); } public TabFile(String[] columnHeaders) { createNewFile(columnHeaders); } public TabFile(File fileToRead) throws IOException { this(fileToRead.toURI().toURL()); } /** * Creates a new object representation of the specified *.tab file. * * @param originFileName The absolute fileName of the *.tab file. * @throws IOException if the file cannot be read. */ public TabFile(String originFileName) throws IOException { this(new File(originFileName)); } public static void main(String[] args) { if (args[0].equals("optimize")) { Scanner sc = new Scanner(System.in); String targetPath; String originPath; System.out.println("Please enter the path of the original *.tab-files:"); originPath = sc.nextLine(); System.out.println( "Please enter the path where you wish to save the optimized *.tab-files (Directories will be created, existing files with same filenames will be overwritten):"); targetPath = sc.nextLine(); sc.close(); File folder = new File(originPath); File[] listOfFiles = folder.listFiles(); assert listOfFiles != null; for (File file : listOfFiles) { if (!file.getName().equals("LICENSE")) { TabFile origin; try { String originFileName = file.getAbsolutePath(); System.out.print("Reading file '" + originFileName + "'..."); origin = new TabFile(originFileName); System.out.println("Done!"); System.out.print("Optimizing file..."); TabFile res = TabFile.optimizeDictionaries(origin, 2, true); System.out.println("Done!"); String targetFileName = targetPath + File.separator + file.getName(); System.out.println("Saving new file as '" + targetFileName + "'..."); res.save(targetFileName); System.out.println("Done!"); } catch (IOException e) { FOKLogger.log(TabFile.class.getName(), Level.SEVERE, "An error occurred", e); } } } } else if (args[0].equals("merge")) { System.err.println( "Merging dictionaries is not supported anymore. Please checkout commit 1a6fa16 to merge dictionaries."); } } /** * Compares two strings and returns how equal they are as a percentage. The * two strings must be of equal length. * * @param str1 The first string to compare * @param str2 The second string to compare * @return {@code 0} if the two strings are completely different, {@code 1}, * if they are completely equal and values in between if they are * neither completely equal nor completely different. */ public static double stringCorrelation(String str1, String str2) { if (str1.length() != str2.length()) { throw new IllegalArgumentException("str1 and str2 must be of equal length"); } double equalLetters = 0; for (int i = 0; i < str1.length(); i++) { if (str1.substring(i, i + 1).equalsIgnoreCase(str2.substring(i, i + 1))) { equalLetters = equalLetters + 1; } } return equalLetters / str1.length(); } /** * Optimizes the dictionary of the app. As The dictionaries are a resource * of the app, this method is currently only intended to run in a dev * environment.<br> * <br> * Optimization means in this case that words are split up at spaces and * punctuation is deleted. * * @param origin The original {@code TabFile} that is to be optimized. * @param originWordColumnIndex The column index with the words to be optimized. * @param preserveColumnIndex If {@code true}, the words are written to the same column * index as in the origin-file and the values of all other * columns are preserved, if {@code false}, the optimized word * list will be written into the first column (index: 0) and all * other columns will be deleted. * @return A {@code TabFile} with the optimized word list. */ public static TabFile optimizeDictionaries(TabFile origin, int originWordColumnIndex, boolean preserveColumnIndex) { // Create new TabFile with one column String[] colHeads; if (preserveColumnIndex) { colHeads = origin.getColumnHeaders(); } else { colHeads = new String[] { "words" }; } TabFile res = new TabFile(colHeads); for (int lineIndex = 0; lineIndex < origin.getRowCount(); lineIndex++) { ArrayListWithSortableKey<String> line = origin.values.get(lineIndex); // Split at spaces String[] words; try { words = line.get(originWordColumnIndex).split(" "); } catch (ArrayIndexOutOfBoundsException e) { words = "".split(" "); } for (String word : words) { // Remove punctuation word = word.replaceAll("(" + Pattern.quote(".") + "|,)", ""); // Add word to result if (preserveColumnIndex) { String[] tempValues = new String[origin.getColumnCount()]; for (int i = 0; i < origin.getColumnCount(); i++) { if (i != originWordColumnIndex) { tempValues[i] = origin.getValueAt(lineIndex, i); } else { tempValues[i] = word; } } res.addRow(tempValues); } else { res.addRow(new String[] { word }); } } } return res; } public static TabFile mergeDictionaries(TabFile cldrFile, TabFile wiktFile, int columnIndex) { return mergeDictionaries(cldrFile, wiktFile, columnIndex, true); } public static TabFile mergeDictionaries(TabFile cldrFile, TabFile wiktFile, int columnIndex, boolean preserveColumnIndex) { if (cldrFile.getColumnCount() != wiktFile.getColumnCount()) { throw new RuntimeException("cldrFile and wiktFile must have an equal columnCount."); } String[] colHeads; if (preserveColumnIndex) { colHeads = wiktFile.getColumnHeaders(); } else { colHeads = new String[] { "words" }; } TabFile res = new TabFile(colHeads); // Copy wikt file for (int i = 0; i < wiktFile.getRowCount(); i++) { String word = wiktFile.getValueAt(i, columnIndex); // Add word to result if (preserveColumnIndex) { String[] tempValues = new String[cldrFile.getColumnCount()]; for (int t = 0; t < wiktFile.getColumnCount(); t++) { if (t != columnIndex) { tempValues[t] = wiktFile.getValueAt(i, t); } else { tempValues[t] = word; } } res.addRow(tempValues); } else { res.addRow(new String[] { word }); } } // Copy cldr file for (int i = 0; i < cldrFile.getRowCount(); i++) { String word = cldrFile.getValueAt(i, columnIndex); // Only add word if word cannot be found in wikt file List<Integer> index = wiktFile.indexOf(word, columnIndex); if (index.size() == 0) { // Add word to result if (preserveColumnIndex) { String[] tempValues = new String[cldrFile.getColumnCount()]; for (int t = 0; t < cldrFile.getColumnCount(); t++) { if (i != columnIndex) { tempValues[t] = cldrFile.getValueAt(i, t); } else { tempValues[t] = word; } } res.addRow(tempValues); } else { res.addRow(new String[] { word }); } } } res.setColumnHeader("wiktionary-cldr-merge", 0); return res; } /** * Reads the content of the specified *.tab file to this objects variables. * * @param file The file to read. * @throws IOException if the file cannot be read. */ private void readFile(URL file) throws IOException { // open the file Scanner scan = new Scanner(file.openStream(), "UTF-8"); // get the column headers columnHeaders = scan.nextLine().split(" "); while (scan.hasNextLine()) { ArrayListWithSortableKey<String> temp = new ArrayListWithSortableKey<>( Arrays.asList(scan.nextLine().split(" "))); while (temp.size() < this.getColumnCount()) { // Fill it up temp.add(""); } values.add(temp); } scan.close(); } /** * Creates an empty *.tab file. */ private void createNewFile() { columnHeaders = new String[0]; } private void createNewFile(int columnCount) { columnHeaders = new String[columnCount]; } private void createNewFile(String[] columnHeaders) { this.columnHeaders = columnHeaders; } /** * Returns the column header at the given index * * @param index The index of the column header to be returned * @return The column header of the column with the specified index */ public String getColumnHeader(int index) { return columnHeaders[index]; } /** * Sets the column header at the given index. * * @param newHeader The new header * @param index The index of the header to replace. */ public void setColumnHeader(String newHeader, int index) { columnHeaders[index] = newHeader; } /** * Returns an array that contains all column headers. * * @return An array that contains all column headers. */ public String[] getColumnHeaders() { return columnHeaders; } /** * Returns the column count of the *.tab file * * @return The column count of this file */ public int getColumnCount() { return columnHeaders.length; } /** * Returns the row count of the *.tab file excluding the column headers. * * @return The row count of this file */ public int getRowCount() { return values.size(); } /** * Returns the value at the specified position in the grid * * @param row The row index of the desired value * @param column The column index of the desired value * @return The value at the specified position and {@code ""} if the * requested address is outside the bounds (no exception thrown) */ public String getValueAt(int row, int column) { try { return values.get(row).get(column); } catch (ArrayIndexOutOfBoundsException e) { return ""; } } /** * Searches the entire file for the given value. The comparison is case * sensitive. * * @param valueToFind The value to find. * @return The "outer" list is a list of columns, the the "inner" list is a * list of hits. That means that {@code indexOf("someValue").get(0)} * returns a list of row indexes where the value was found in column * 0. */ public List<List<Integer>> indexOf(String valueToFind) { return indexOf(valueToFind, false); } /** * Searches the entire file for the given value. The comparison is case * insensitive. * * @param valueToFind The value to find. * @return The "outer" list is a list of columns, the the "inner" list is a * list of hits. That means that {@code indexOf("someValue").get(0)} * returns a list of row indexes where the value was found in column * 0. */ public List<List<Integer>> indexOfIgnoreCase(String valueToFind) { return indexOf(valueToFind, true); } /** * Searches the entire file for the given value. * * @param valueToFind The value to find. * @param ignoreCase if {@code true}, the string comparison will be case * insensitive. * @return The "outer" list is a list of columns, the the "inner" list is a * list of hits. That means that {@code indexOf("someValue").get(0)} * returns a list of row indexes where the value was found in column * 0. */ public List<List<Integer>> indexOf(String valueToFind, boolean ignoreCase) { List<List<Integer>> res = new ArrayList<>(); for (int i = 0; i < this.getColumnCount(); i++) { res.add(indexOf(valueToFind, i, ignoreCase)); } return res; } /** * Searches for the given value in the given column. The comparison is case * insensitive. * * @param valueToFind The value to find. * @param columnIndex The index of the column to be searched. * @return A list of row indexes where the value was found. */ public List<Integer> indexOfIgnoreCase(String valueToFind, int columnIndex) { return indexOf(valueToFind, columnIndex, true); } /** * Searches for the given value in the given column. The comparison is case * sensitive. * * @param valueToFind The value to find. * @param columnIndex The index of the column to be searched. * @return A list of row indexes where the value was found. */ public List<Integer> indexOf(String valueToFind, int columnIndex) { return indexOf(valueToFind, columnIndex, false); } /** * Searches for the given value in the given column. * * @param valueToFind The value to find. * @param columnIndex The index of the column to be searched. * @param ignoreCase if {@code true}, the string comparison will be case * insensitive. * @return A list of row indexes where the value was found. */ public List<Integer> indexOf(String valueToFind, int columnIndex, boolean ignoreCase) { List<Integer> res = new ArrayList<>(); for (int i = 0; i < this.getRowCount(); i++) { if (ignoreCase) { if (this.getValueAt(i, columnIndex).equalsIgnoreCase(valueToFind)) { res.add(i); } } else { if (this.getValueAt(i, columnIndex).equals(valueToFind)) { res.add(i); } } } return res; } /** * Replaces the old value at the specified positions in the *.tab-file with * the new value. * * @param newValue The new value o fthe given cells * @param columnsAndRows A list of column- and row indexes where the value will be * replaced. See the return value of {@link #indexOf(String)} to * see how the list needs to be built up. * @see #indexOf(String) */ public void setValueAt(String newValue, List<List<Integer>> columnsAndRows) { for (int c = 0; c < columnsAndRows.size(); c++) { setValueAt(newValue, columnsAndRows.get(c), c); } } /** * Replaces the old value at the specified positions in the *.tab-file with * the new value. * * @param newValue The new value of the given cells * @param rows A list of rows the values will be replaced * @param column The column of the cells to be replaced * @see #indexOf(String, int) */ public void setValueAt(String newValue, List<Integer> rows, int column) { for (int row : rows) { setValueAt(newValue, row, column); } } /** * Replaces the old value at the given position in the *.tab-file with the * new Value. This method cannot add rows to the *.tab-file. To add rows, * use {@link #addRow} * * @param newValue The new value of the given cell * @param row The row of the cell to be replaced. * @param column The column of the cell to be replaced. */ public void setValueAt(String newValue, int row, int column) { values.get(row).set(column, newValue); } public void addRow(String[] newValues) { if (newValues.length != getColumnCount()) { throw new ArrayIndexOutOfBoundsException( "The given values-array dows not match the column-count of this file. (The file has " + this.getColumnCount() + " columns and you wanted to add " + newValues.length + " columns)"); } values.add(new ArrayListWithSortableKey<>(Arrays.asList(newValues))); } /** * Gets all values with the given length. * * @param column The column to look for values. * @param length The length of the returned values. * @return A {@link List} with all values in the specified column that have * the specified length. */ public List<String> getValuesWithLength(int column, int length) { List<String> res = new ArrayList<>(); for (int i = 0; i < this.getRowCount(); i++) { if (this.getValueAt(i, column).length() == length) { res.add(this.getValueAt(i, column)); } } return res; } /** * Returns the value that has the highest {@link #stringCorrelation} with * the given {@link String}. * * @param column The column to look for values. * @param value The {@link String} to be compared. Only values with equal * length as {@code value} are returned due to the way * {@link #stringCorrelation} works. * @param ignoredWords Words to be filtered out before doing the comparison. * @return The value in the specified column that has the highest * correlation. */ public String getValueWithHighestCorrelation(int column, String value, List<String> ignoredWords) { ArrayList<Thread> threads = new ArrayList<>(); AtomicInteger currentIndex = new AtomicInteger(0); AtomicInteger maxIndex = new AtomicInteger(-1); AtomicDouble maxCorr = new AtomicDouble(-1); List<String> ignoredWordsCopy = new ArrayList<>(ignoredWords); // split all entries up that contain a space List<String> stringsToSplit = new ArrayList<>(); // Find words to split for (String word : ignoredWordsCopy) { if (word.contains(" ")) { stringsToSplit.add(word); } } // Actually to the splitting for (String word : stringsToSplit) { ignoredWordsCopy.remove(word); ignoredWordsCopy.addAll(Arrays.asList(word.split(" "))); } for (int i = 0; i < AppConfig.getParallelThreadCount(); i++) { threads.add(new Thread(() -> { int index = currentIndex.getAndIncrement(); while (index < getRowCount()) { if (value.length() == getValueAt(index, column).length() && !ignoredWordsCopy.contains(getValueAt(index, column)) && !HangmanSolver.currentWordContainsWrongChar(getValueAt(index, column))) { double corr = stringCorrelation(value, getValueAt(index, column)); if (corr > maxCorr.get()) { maxCorr.set(corr); maxIndex.set(index); } } // Grab the next index index = currentIndex.getAndIncrement(); } })); threads.get(i).start(); } // Wait for threads for (int i = 0; i < AppConfig.getParallelThreadCount(); i++) { try { threads.get(i).join(); } catch (InterruptedException e) { FOKLogger.log(TabFile.class.getName(), Level.SEVERE, "An error occurred", e); } } return getValueAt(maxIndex.get(), column); } /** * Sorts this TabFile. * * @param sortKey The column that will be compared to sort values. */ public void sort(int sortKey) { setSortKey(sortKey); Collections.sort(values); } public void sortDescending(int sortKey) { setSortKey(sortKey); values.sort(Collections.reverseOrder()); } private void setSortKey(int sortKey) { for (ArrayListWithSortableKey<String> line : values) { line.setSortKey(sortKey); } } /** * Saves this TabFile at the specified location. * * @param fileName The absolute qualified filename where the file should be * saved. Existing files will be overwritten. */ public void save(String fileName) { save(new File(fileName)); } /** * Saves this TabFile to the specified {@link File} * * @param destinationFile The {@link File} where this TabFile shall be saved in. * Existing files will be overwritten. */ public void save(File destinationFile) { System.out.print("Generating empty file in memory..."); // Generate the file StringBuilder str = new StringBuilder(); System.out.println("Done!"); // Column headers System.out.print("Processing column headers..."); for (String colHead : columnHeaders) { str.append(colHead); if (!colHead.equals(columnHeaders[columnHeaders.length - 1])) { str.append(" "); } } System.out.println("Done!"); str.append("\n"); System.out.print("Processing table contents..."); // Values for (ArrayListWithSortableKey<String> line : values) { for (String el : line) { str.append(el); if (!el.equals(line.get(line.size() - 1))) { str.append(" "); } } str.append("\n"); // str = str + String.join(" ", line) + "\n"; } System.out.println("Done!"); System.out.print("Writing to disc..."); try { FileUtils.writeStringToFile(destinationFile, str.toString(), "UTF-8"); } catch (IOException e) { FOKLogger.log(TabFile.class.getName(), Level.SEVERE, "An error occurred", e); } System.out.println("Done!"); } }