Java tutorial
/* * Copyright 2013 Ali Ok (aliokATapacheDOTorg) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.trnltk.apps.experiments; import com.google.common.base.Splitter; import com.google.common.collect.Lists; import org.apache.commons.lang3.Validate; import org.apache.commons.lang3.time.StopWatch; import org.junit.Ignore; import org.junit.Test; import org.trnltk.util.Constants; import java.text.Collator; import java.util.ArrayList; import java.util.Collections; import java.util.List; /** * Try with -Xmx7000m -Xms5000m * * @deprecated Collator is too slow! * On a pretty powerful machine, sorting 35M words took: * <ul> * <li>0:00:03.429 without the collator</li> * <li>0:02:29.257 with the collator with the strength PRIMARY</li> * </ul> */ public class TurkishCollatorPerformanceTest { @Test @Ignore public void testCollatorPerformance() { final Collator collator = Collator.getInstance(Constants.TURKISH_LOCALE); collator.setStrength(Collator.PRIMARY); List<String> biggerList = getList(); System.out.println("Entry count : " + biggerList.size()); final StopWatch stopWatch = new StopWatch(); stopWatch.start(); Collections.sort(biggerList); stopWatch.stop(); System.out.println("w/o collator, it took " + stopWatch.toString()); biggerList = getList(); stopWatch.reset(); stopWatch.start(); Collections.sort(biggerList, collator); stopWatch.stop(); System.out.println("w/ collator, it took " + stopWatch.toString()); } private List<String> getList() { String text = "aklamada 2735\n" + "gn 2678\n" + "ekilde 2405\n" + "zere 2325\n" + "tm 2285\n" + "10 2252\n" + "ylnda 2223\n" + "gn 2151\n" + "\". 2140\n" + "5 2107\n" + "4 2018\n" + "... 1940\n" + "Trkiye'de 1933\n" + "ynelik 1925\n" + "zerinde 1910\n" + "zel 1906\n" + "yksek 1812\n" + " 1746\n" + "kt 1732\n" + "yle 1727\n" + "u 1693\n" + "kan 1653\n" + "; 1608\n" + "gvenlik 1602\n" + "6 1570\n" + "srasnda 1569\n" + "sz 1554\n" + "srail 1539\n" + "aklad 1528\n" + "wn 1505\n" + "qn 1505\n" + "xn 1505\n" + "zn 1505\n" + "n 1505\n" + "Dileri 1489\n"; final ArrayList<String> wordList = new ArrayList<String>(); final Iterable<String> lines = Splitter.on("\n").trimResults().omitEmptyStrings().split(text); for (String line : lines) { final List<String> words = Lists .newArrayList(Splitter.on(" ").trimResults().omitEmptyStrings().split(line)); Validate.isTrue(words.size() == 2, line); wordList.add(words.get(0)); } List<String> biggerList = new ArrayList<String>(); final int N = 1 * 1000 * 1000; for (int i = 0; i < N; i++) { biggerList.addAll(wordList); if (i % (N / 10) == 0) System.out.println("Adding " + i + ". list to the bigger list"); } return biggerList; } }