List of usage examples for com.google.common.base Splitter split
@CheckReturnValue public Iterable<String> split(final CharSequence sequence)
From source file:yrun.commands.Ykill.java
public static void main(String[] args) throws IOException { YarnClient yarnClient = YarnClient.createYarnClient(); YarnConfiguration yarnConfiguration = new YarnConfiguration(); yarnClient.init(yarnConfiguration);//from ww w . ja va 2s.co m yarnClient.start(); boolean debug = false; try { Splitter splitter = Splitter.on('_'); for (String arg : args) { List<String> list = toList(splitter.split(arg)); if (list.size() != 3) { System.err.println("Application Id " + arg + " is not a valid application id."); } else { String prefix = list.get(0); if (!prefix.equals("application")) { System.err.println("Application Id " + arg + " is not a valid application id."); } else { try { long clusterTimestamp = Long.parseLong(list.get(1)); int id = Integer.parseInt(list.get(2)); ApplicationId applicationId = ApplicationId.newInstance(clusterTimestamp, id); yarnClient.killApplication(applicationId); System.out.println("Killed\t" + arg + ""); } catch (Exception e) { if (debug) { e.printStackTrace(); } System.err.println("Error while trying to kill " + arg + "."); } } } } } finally { yarnClient.stop(); yarnClient.close(); } }
From source file:com.cedarsoft.serialization.SplittingPerformanceRunner.java
public static void main(String[] args) throws Exception { final String uri = "http://www.cedarsoft.com/some/slashes/1.0.0"; run("String.plit", new Callable<String>() { @Override//from w ww . ja v a 2 s . co m public String call() throws Exception { String[] parts = uri.split("/"); return parts[parts.length - 1]; } }); run("Splitter", new Callable<String>() { @Override public String call() throws Exception { Splitter splitter = Splitter.on("/"); Iterable<String> parts = splitter.split(uri); Iterator<String> iterator = parts.iterator(); while (true) { String current = iterator.next(); if (!iterator.hasNext()) { return current; } } } }); run("static Splitter", new Callable<String>() { @Override public String call() throws Exception { Iterable<String> parts = SPLITTER.split(uri); Iterator<String> iterator = parts.iterator(); while (true) { String current = iterator.next(); if (!iterator.hasNext()) { return current; } } } }); run("indexOf", new Callable<String>() { @Override public String call() throws Exception { int index = uri.lastIndexOf("/"); return uri.substring(index + 1); } }); }
From source file:com.technobium.MultinomialLogisticRegression.java
public static void main(String[] args) throws Exception { // this test trains a 3-way classifier on the famous Iris dataset. // a similar exercise can be accomplished in R using this code: // library(nnet) // correct = rep(0,100) // for (j in 1:100) { // i = order(runif(150)) // train = iris[i[1:100],] // test = iris[i[101:150],] // m = multinom(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width, train) // correct[j] = mean(predict(m, newdata=test) == test$Species) // }//from ww w . j ava2 s . c o m // hist(correct) // // Note that depending on the training/test split, performance can be better or worse. // There is about a 5% chance of getting accuracy < 90% and about 20% chance of getting accuracy // of 100% // // This test uses a deterministic split that is neither outstandingly good nor bad RandomUtils.useTestSeed(); Splitter onComma = Splitter.on(","); // read the data List<String> raw = Resources.readLines(Resources.getResource("iris.csv"), Charsets.UTF_8); // holds features List<Vector> data = Lists.newArrayList(); // holds target variable List<Integer> target = Lists.newArrayList(); // for decoding target values Dictionary dict = new Dictionary(); // for permuting data later List<Integer> order = Lists.newArrayList(); for (String line : raw.subList(1, raw.size())) { // order gets a list of indexes order.add(order.size()); // parse the predictor variables Vector v = new DenseVector(5); v.set(0, 1); int i = 1; Iterable<String> values = onComma.split(line); for (String value : Iterables.limit(values, 4)) { v.set(i++, Double.parseDouble(value)); } data.add(v); // and the target target.add(dict.intern(Iterables.get(values, 4))); } // randomize the order ... original data has each species all together // note that this randomization is deterministic Random random = RandomUtils.getRandom(); Collections.shuffle(order, random); // select training and test data List<Integer> train = order.subList(0, 100); List<Integer> test = order.subList(100, 150); logger.warn("Training set = {}", train); logger.warn("Test set = {}", test); // now train many times and collect information on accuracy each time int[] correct = new int[test.size() + 1]; for (int run = 0; run < 200; run++) { OnlineLogisticRegression lr = new OnlineLogisticRegression(3, 5, new L2(1)); // 30 training passes should converge to > 95% accuracy nearly always but never to 100% for (int pass = 0; pass < 30; pass++) { Collections.shuffle(train, random); for (int k : train) { lr.train(target.get(k), data.get(k)); } } // check the accuracy on held out data int x = 0; int[] count = new int[3]; for (Integer k : test) { Vector vt = lr.classifyFull(data.get(k)); int r = vt.maxValueIndex(); count[r]++; x += r == target.get(k) ? 1 : 0; } correct[x]++; if (run == 199) { Vector v = new DenseVector(5); v.set(0, 1); int i = 1; Iterable<String> values = onComma.split("6.0,2.7,5.1,1.6,versicolor"); for (String value : Iterables.limit(values, 4)) { v.set(i++, Double.parseDouble(value)); } Vector vt = lr.classifyFull(v); for (String value : dict.values()) { System.out.println("target:" + value); } int t = dict.intern(Iterables.get(values, 4)); int r = vt.maxValueIndex(); boolean flag = r == t; lr.close(); Closer closer = Closer.create(); try { FileOutputStream byteArrayOutputStream = closer .register(new FileOutputStream(new File("model.txt"))); DataOutputStream dataOutputStream = closer .register(new DataOutputStream(byteArrayOutputStream)); PolymorphicWritable.write(dataOutputStream, lr); } finally { closer.close(); } } } // verify we never saw worse than 95% correct, for (int i = 0; i < Math.floor(0.95 * test.size()); i++) { System.out.println(String.format("%d trials had unacceptable accuracy of only %.0f%%: ", correct[i], 100.0 * i / test.size())); } // nor perfect System.out.println(String.format("%d trials had unrealistic accuracy of 100%%", correct[test.size() - 1])); }
From source file:com.mapr.PurchaseLog.java
public static void main(String[] args) throws IOException { Options opts = new Options(); CmdLineParser parser = new CmdLineParser(opts); try {/*from www . j av a2 s. co m*/ parser.parseArgument(args); } catch (CmdLineException e) { System.err.println("Usage: -count <number>G|M|K [ -users number ] log-file user-profiles"); return; } Joiner withTab = Joiner.on("\t"); // first generate lots of user definitions SchemaSampler users = new SchemaSampler( Resources.asCharSource(Resources.getResource("user-schema.txt"), Charsets.UTF_8).read()); File userFile = File.createTempFile("user", "tsv"); BufferedWriter out = Files.newBufferedWriter(userFile.toPath(), Charsets.UTF_8); for (int i = 0; i < opts.users; i++) { out.write(withTab.join(users.sample())); out.newLine(); } out.close(); // now generate a session for each user Splitter onTabs = Splitter.on("\t"); Splitter onComma = Splitter.on(","); Random gen = new Random(); SchemaSampler intermediate = new SchemaSampler( Resources.asCharSource(Resources.getResource("hit_step.txt"), Charsets.UTF_8).read()); final int COUNTRY = users.getFieldNames().indexOf("country"); final int CAMPAIGN = intermediate.getFieldNames().indexOf("campaign_list"); final int SEARCH_TERMS = intermediate.getFieldNames().indexOf("search_keywords"); Preconditions.checkState(COUNTRY >= 0, "Need country field in user schema"); Preconditions.checkState(CAMPAIGN >= 0, "Need campaign_list field in step schema"); Preconditions.checkState(SEARCH_TERMS >= 0, "Need search_keywords field in step schema"); out = Files.newBufferedWriter(new File(opts.out).toPath(), Charsets.UTF_8); for (String line : Files.readAllLines(userFile.toPath(), Charsets.UTF_8)) { long t = (long) (TimeUnit.MILLISECONDS.convert(30, TimeUnit.DAYS) * gen.nextDouble()); List<String> user = Lists.newArrayList(onTabs.split(line)); // pick session length int n = (int) Math.floor(-30 * Math.log(gen.nextDouble())); for (int i = 0; i < n; i++) { // time on page int dt = (int) Math.floor(-20000 * Math.log(gen.nextDouble())); t += dt; // hit specific values JsonNode step = intermediate.sample(); // check for purchase double p = 0.01; List<String> campaigns = Lists.newArrayList(onComma.split(step.get("campaign_list").asText())); List<String> keywords = Lists.newArrayList(onComma.split(step.get("search_keywords").asText())); if ((user.get(COUNTRY).equals("us") && campaigns.contains("5")) || (user.get(COUNTRY).equals("jp") && campaigns.contains("7")) || keywords.contains("homer") || keywords.contains("simpson")) { p = 0.5; } String events = gen.nextDouble() < p ? "1" : "-"; out.write(Long.toString(t)); out.write("\t"); out.write(line); out.write("\t"); out.write(withTab.join(step)); out.write("\t"); out.write(events); out.write("\n"); } } out.close(); }
From source file:edu.cmu.lti.oaqa.knn4qa.apps.ExtractDataAndQueryAsSparseVectors.java
public static void main(String[] args) { String optKeys[] = { CommonParams.MAX_NUM_QUERY_PARAM, MAX_NUM_DATA_PARAM, CommonParams.MEMINDEX_PARAM, IN_QUERIES_PARAM, OUT_QUERIES_PARAM, OUT_DATA_PARAM, TEXT_FIELD_PARAM, TEST_QTY_PARAM, }; String optDescs[] = { CommonParams.MAX_NUM_QUERY_DESC, MAX_NUM_DATA_DESC, CommonParams.MEMINDEX_DESC, IN_QUERIES_DESC, OUT_QUERIES_DESC, OUT_DATA_DESC, TEXT_FIELD_DESC, TEST_QTY_DESC }; boolean hasArg[] = { true, true, true, true, true, true, true, true }; ParamHelper prmHlp = null;/* w w w . j a va2 s.c o m*/ try { prmHlp = new ParamHelper(args, optKeys, optDescs, hasArg); CommandLine cmd = prmHlp.getCommandLine(); Options opt = prmHlp.getOptions(); int maxNumQuery = Integer.MAX_VALUE; String tmpn = cmd.getOptionValue(CommonParams.MAX_NUM_QUERY_PARAM); if (tmpn != null) { try { maxNumQuery = Integer.parseInt(tmpn); } catch (NumberFormatException e) { UsageSpecify(CommonParams.MAX_NUM_QUERY_PARAM, opt); } } int maxNumData = Integer.MAX_VALUE; tmpn = cmd.getOptionValue(MAX_NUM_DATA_PARAM); if (tmpn != null) { try { maxNumData = Integer.parseInt(tmpn); } catch (NumberFormatException e) { UsageSpecify(MAX_NUM_DATA_PARAM, opt); } } String memIndexPref = cmd.getOptionValue(CommonParams.MEMINDEX_PARAM); if (null == memIndexPref) { UsageSpecify(CommonParams.MEMINDEX_PARAM, opt); } String textField = cmd.getOptionValue(TEXT_FIELD_PARAM); if (null == textField) { UsageSpecify(TEXT_FIELD_PARAM, opt); } textField = textField.toLowerCase(); int fieldId = -1; for (int i = 0; i < FeatureExtractor.mFieldNames.length; ++i) if (FeatureExtractor.mFieldNames[i].compareToIgnoreCase(textField) == 0) { fieldId = i; break; } if (-1 == fieldId) { Usage("Wrong field index, should be one of the following: " + String.join(",", FeatureExtractor.mFieldNames), opt); } InMemForwardIndex indx = new InMemForwardIndex( FeatureExtractor.indexFileName(memIndexPref, FeatureExtractor.mFieldNames[fieldId])); BM25SimilarityLucene bm25simil = new BM25SimilarityLucene(FeatureExtractor.BM25_K1, FeatureExtractor.BM25_B, indx); String inQueryFile = cmd.getOptionValue(IN_QUERIES_PARAM); String outQueryFile = cmd.getOptionValue(OUT_QUERIES_PARAM); if ((inQueryFile == null) != (outQueryFile == null)) { Usage("You should either specify both " + IN_QUERIES_PARAM + " and " + OUT_QUERIES_PARAM + " or none of them", opt); } String outDataFile = cmd.getOptionValue(OUT_DATA_PARAM); tmpn = cmd.getOptionValue(TEST_QTY_PARAM); int testQty = 0; if (tmpn != null) { try { testQty = Integer.parseInt(tmpn); } catch (NumberFormatException e) { UsageSpecify(TEST_QTY_PARAM, opt); } } ArrayList<DocEntry> testDocEntries = new ArrayList<DocEntry>(); ArrayList<DocEntry> testQueryEntries = new ArrayList<DocEntry>(); ArrayList<TrulySparseVector> testDocVectors = new ArrayList<TrulySparseVector>(); ArrayList<TrulySparseVector> testQueryVectors = new ArrayList<TrulySparseVector>(); if (outDataFile != null) { BufferedWriter out = new BufferedWriter( new OutputStreamWriter(CompressUtils.createOutputStream(outDataFile))); ArrayList<DocEntryExt> docEntries = indx.getDocEntries(); for (int id = 0; id < Math.min(maxNumData, docEntries.size()); ++id) { DocEntry e = docEntries.get(id).mDocEntry; TrulySparseVector v = bm25simil.getDocSparseVector(e, false); if (id < testQty) { testDocEntries.add(e); testDocVectors.add(v); } outputVector(out, v); } out.close(); } Splitter splitOnSpace = Splitter.on(' ').trimResults().omitEmptyStrings(); if (outQueryFile != null) { BufferedReader inpText = new BufferedReader( new InputStreamReader(CompressUtils.createInputStream(inQueryFile))); BufferedWriter out = new BufferedWriter( new OutputStreamWriter(CompressUtils.createOutputStream(outQueryFile))); String queryText = XmlHelper.readNextXMLIndexEntry(inpText); for (int queryQty = 0; queryText != null && queryQty < maxNumQuery; queryText = XmlHelper .readNextXMLIndexEntry(inpText), queryQty++) { Map<String, String> queryFields = null; // 1. Parse a query try { queryFields = XmlHelper.parseXMLIndexEntry(queryText); } catch (Exception e) { System.err.println("Parsing error, offending QUERY:\n" + queryText); throw new Exception("Parsing error."); } String fieldText = queryFields.get(FeatureExtractor.mFieldsSOLR[fieldId]); if (fieldText == null) { fieldText = ""; } ArrayList<String> tmpa = new ArrayList<String>(); for (String s : splitOnSpace.split(fieldText)) tmpa.add(s); DocEntry e = indx.createDocEntry(tmpa.toArray(new String[tmpa.size()])); TrulySparseVector v = bm25simil.getDocSparseVector(e, true); if (queryQty < testQty) { testQueryEntries.add(e); testQueryVectors.add(v); } outputVector(out, v); } out.close(); } int testedQty = 0, diffQty = 0; // Now let's do some testing for (int iq = 0; iq < testQueryEntries.size(); ++iq) { DocEntry queryEntry = testQueryEntries.get(iq); TrulySparseVector queryVector = testQueryVectors.get(iq); for (int id = 0; id < testDocEntries.size(); ++id) { DocEntry docEntry = testDocEntries.get(id); TrulySparseVector docVector = testDocVectors.get(id); float val1 = bm25simil.compute(queryEntry, docEntry); float val2 = TrulySparseVector.scalarProduct(queryVector, docVector); ++testedQty; if (Math.abs(val1 - val2) > 1e5) { System.err.println( String.format("Potential mismatch BM25=%f <-> scalar product=%f", val1, val2)); ++diffQty; } } } if (testedQty > 0) System.out.println(String.format("Tested %d Mismatched %d", testedQty, diffQty)); } catch (ParseException e) { Usage("Cannot parse arguments: " + e, prmHlp != null ? prmHlp.getOptions() : null); e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); System.err.println("Terminating due to an exception: " + e); System.exit(1); } }
From source file:org.raml.jaxrs.parser.source.Utilities.java
private static Path transformIntoPath(Class<?> clazz) { Splitter splitter = Splitter.on('.').omitEmptyStrings(); Iterable<String> subPackages = splitter.split(clazz.getPackage().getName()); Path current = Paths.get(""); for (String subPackage : subPackages) { current = current.resolve(subPackage); }/*w w w .j ava 2 s . c o m*/ return current; }
From source file:com.joshondesign.bespokeide.Util.java
public static TextArray codeToTextArray(CharSequence methodCode) { final Splitter splitter = Splitter.on('\n'); final Iterable<String> lines = splitter.split(methodCode); String sep = ""; for (String s : lines) { final String s2 = CharMatcher.WHITESPACE.trimLeadingFrom(s); if (s2.length() < s.length()) { sep = s.substring(0, (s.length() - s2.length())); break; }//from ww w .java 2 s . c o m } if ("".equals(sep)) { return new TextArray(new CharSequence[] { methodCode }, ""); } return new TextArray(Splitter.on(sep).split(methodCode), sep); }
From source file:org.onehippo.cms7.essentials.dashboard.utils.PayloadUtils.java
public static List<String> extractValueList(final String value) { if (Strings.isNullOrEmpty(value)) { return Collections.emptyList(); }//w w w. ja va 2s . c om final Splitter splitter = Splitter.on(",").omitEmptyStrings().trimResults(); final Iterable<String> iterable = splitter.split(value); return Lists.newArrayList(iterable); }
From source file:com.facebook.presto.recordservice.RecordServiceConnectorConfig.java
public static ImmutableSet<HostAddress> parsePlannerHostPorts(String plannerHostPorts) { Splitter splitter = Splitter.on(',').omitEmptyStrings().trimResults(); return ImmutableSet.copyOf(transform(splitter.split(plannerHostPorts), (value -> HostAddress.fromString(value).withDefaultPort(RECORDSERVICE_PLANNER_DEFAULT_PORT)))); }
From source file:org.bitstrings.maven.plugins.portallocator.util.Helpers.java
public static Iterable<String> iterateOnSplit(String text, String separator, boolean omitEmpty) { final Splitter splitter = Splitter.on(separator).trimResults(); if (omitEmpty) { splitter.omitEmptyStrings();/* w w w . ja v a 2 s.c o m*/ } return splitter.split(text); }