Example usage for com.google.common.base Splitter split

List of usage examples for com.google.common.base Splitter split

Introduction

In this page you can find the example usage for com.google.common.base Splitter split.

Prototype

@CheckReturnValue
public Iterable<String> split(final CharSequence sequence) 

Source Link

Document

Splits sequence into string components and makes them available through an Iterator , which may be lazily evaluated.

Usage

From source file:yrun.commands.Ykill.java

public static void main(String[] args) throws IOException {
    YarnClient yarnClient = YarnClient.createYarnClient();
    YarnConfiguration yarnConfiguration = new YarnConfiguration();
    yarnClient.init(yarnConfiguration);//from   ww  w .  ja  va 2s.co m
    yarnClient.start();
    boolean debug = false;
    try {
        Splitter splitter = Splitter.on('_');
        for (String arg : args) {
            List<String> list = toList(splitter.split(arg));
            if (list.size() != 3) {
                System.err.println("Application Id " + arg + " is not a valid application id.");
            } else {
                String prefix = list.get(0);
                if (!prefix.equals("application")) {
                    System.err.println("Application Id " + arg + " is not a valid application id.");
                } else {
                    try {
                        long clusterTimestamp = Long.parseLong(list.get(1));
                        int id = Integer.parseInt(list.get(2));
                        ApplicationId applicationId = ApplicationId.newInstance(clusterTimestamp, id);
                        yarnClient.killApplication(applicationId);
                        System.out.println("Killed\t" + arg + "");
                    } catch (Exception e) {
                        if (debug) {
                            e.printStackTrace();
                        }
                        System.err.println("Error while trying to kill " + arg + ".");
                    }
                }
            }
        }
    } finally {
        yarnClient.stop();
        yarnClient.close();
    }
}

From source file:com.cedarsoft.serialization.SplittingPerformanceRunner.java

public static void main(String[] args) throws Exception {
    final String uri = "http://www.cedarsoft.com/some/slashes/1.0.0";

    run("String.plit", new Callable<String>() {
        @Override//from  w ww . ja v a  2  s .  co m
        public String call() throws Exception {
            String[] parts = uri.split("/");
            return parts[parts.length - 1];
        }
    });

    run("Splitter", new Callable<String>() {
        @Override
        public String call() throws Exception {
            Splitter splitter = Splitter.on("/");
            Iterable<String> parts = splitter.split(uri);

            Iterator<String> iterator = parts.iterator();
            while (true) {
                String current = iterator.next();
                if (!iterator.hasNext()) {
                    return current;
                }
            }
        }
    });

    run("static Splitter", new Callable<String>() {
        @Override
        public String call() throws Exception {
            Iterable<String> parts = SPLITTER.split(uri);

            Iterator<String> iterator = parts.iterator();
            while (true) {
                String current = iterator.next();
                if (!iterator.hasNext()) {
                    return current;
                }
            }
        }
    });

    run("indexOf", new Callable<String>() {
        @Override
        public String call() throws Exception {
            int index = uri.lastIndexOf("/");
            return uri.substring(index + 1);
        }
    });
}

From source file:com.technobium.MultinomialLogisticRegression.java

public static void main(String[] args) throws Exception {
    // this test trains a 3-way classifier on the famous Iris dataset.
    // a similar exercise can be accomplished in R using this code:
    //    library(nnet)
    //    correct = rep(0,100)
    //    for (j in 1:100) {
    //      i = order(runif(150))
    //      train = iris[i[1:100],]
    //      test = iris[i[101:150],]
    //      m = multinom(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width, train)
    //      correct[j] = mean(predict(m, newdata=test) == test$Species)
    //    }//from   ww w . j ava2 s . c  o m
    //    hist(correct)
    //
    // Note that depending on the training/test split, performance can be better or worse.
    // There is about a 5% chance of getting accuracy < 90% and about 20% chance of getting accuracy
    // of 100%
    //
    // This test uses a deterministic split that is neither outstandingly good nor bad

    RandomUtils.useTestSeed();
    Splitter onComma = Splitter.on(",");

    // read the data
    List<String> raw = Resources.readLines(Resources.getResource("iris.csv"), Charsets.UTF_8);

    // holds features
    List<Vector> data = Lists.newArrayList();

    // holds target variable
    List<Integer> target = Lists.newArrayList();

    // for decoding target values
    Dictionary dict = new Dictionary();

    // for permuting data later
    List<Integer> order = Lists.newArrayList();

    for (String line : raw.subList(1, raw.size())) {
        // order gets a list of indexes
        order.add(order.size());

        // parse the predictor variables
        Vector v = new DenseVector(5);
        v.set(0, 1);
        int i = 1;
        Iterable<String> values = onComma.split(line);
        for (String value : Iterables.limit(values, 4)) {
            v.set(i++, Double.parseDouble(value));
        }
        data.add(v);

        // and the target
        target.add(dict.intern(Iterables.get(values, 4)));
    }

    // randomize the order ... original data has each species all together
    // note that this randomization is deterministic
    Random random = RandomUtils.getRandom();
    Collections.shuffle(order, random);

    // select training and test data
    List<Integer> train = order.subList(0, 100);
    List<Integer> test = order.subList(100, 150);
    logger.warn("Training set = {}", train);
    logger.warn("Test set = {}", test);

    // now train many times and collect information on accuracy each time
    int[] correct = new int[test.size() + 1];
    for (int run = 0; run < 200; run++) {
        OnlineLogisticRegression lr = new OnlineLogisticRegression(3, 5, new L2(1));
        // 30 training passes should converge to > 95% accuracy nearly always but never to 100%
        for (int pass = 0; pass < 30; pass++) {
            Collections.shuffle(train, random);
            for (int k : train) {
                lr.train(target.get(k), data.get(k));
            }
        }

        // check the accuracy on held out data
        int x = 0;
        int[] count = new int[3];
        for (Integer k : test) {
            Vector vt = lr.classifyFull(data.get(k));
            int r = vt.maxValueIndex();
            count[r]++;
            x += r == target.get(k) ? 1 : 0;
        }
        correct[x]++;

        if (run == 199) {

            Vector v = new DenseVector(5);
            v.set(0, 1);
            int i = 1;
            Iterable<String> values = onComma.split("6.0,2.7,5.1,1.6,versicolor");
            for (String value : Iterables.limit(values, 4)) {
                v.set(i++, Double.parseDouble(value));
            }

            Vector vt = lr.classifyFull(v);
            for (String value : dict.values()) {
                System.out.println("target:" + value);
            }
            int t = dict.intern(Iterables.get(values, 4));

            int r = vt.maxValueIndex();
            boolean flag = r == t;
            lr.close();

            Closer closer = Closer.create();

            try {
                FileOutputStream byteArrayOutputStream = closer
                        .register(new FileOutputStream(new File("model.txt")));
                DataOutputStream dataOutputStream = closer
                        .register(new DataOutputStream(byteArrayOutputStream));
                PolymorphicWritable.write(dataOutputStream, lr);
            } finally {
                closer.close();
            }
        }
    }

    // verify we never saw worse than 95% correct,
    for (int i = 0; i < Math.floor(0.95 * test.size()); i++) {
        System.out.println(String.format("%d trials had unacceptable accuracy of only %.0f%%: ", correct[i],
                100.0 * i / test.size()));
    }
    // nor perfect
    System.out.println(String.format("%d trials had unrealistic accuracy of 100%%", correct[test.size() - 1]));
}

From source file:com.mapr.PurchaseLog.java

public static void main(String[] args) throws IOException {
    Options opts = new Options();
    CmdLineParser parser = new CmdLineParser(opts);
    try {/*from  www . j  av  a2  s. co m*/
        parser.parseArgument(args);
    } catch (CmdLineException e) {
        System.err.println("Usage: -count <number>G|M|K [ -users number ]  log-file user-profiles");
        return;
    }

    Joiner withTab = Joiner.on("\t");

    // first generate lots of user definitions
    SchemaSampler users = new SchemaSampler(
            Resources.asCharSource(Resources.getResource("user-schema.txt"), Charsets.UTF_8).read());
    File userFile = File.createTempFile("user", "tsv");
    BufferedWriter out = Files.newBufferedWriter(userFile.toPath(), Charsets.UTF_8);
    for (int i = 0; i < opts.users; i++) {
        out.write(withTab.join(users.sample()));
        out.newLine();
    }
    out.close();

    // now generate a session for each user
    Splitter onTabs = Splitter.on("\t");
    Splitter onComma = Splitter.on(",");

    Random gen = new Random();
    SchemaSampler intermediate = new SchemaSampler(
            Resources.asCharSource(Resources.getResource("hit_step.txt"), Charsets.UTF_8).read());

    final int COUNTRY = users.getFieldNames().indexOf("country");
    final int CAMPAIGN = intermediate.getFieldNames().indexOf("campaign_list");
    final int SEARCH_TERMS = intermediate.getFieldNames().indexOf("search_keywords");
    Preconditions.checkState(COUNTRY >= 0, "Need country field in user schema");
    Preconditions.checkState(CAMPAIGN >= 0, "Need campaign_list field in step schema");
    Preconditions.checkState(SEARCH_TERMS >= 0, "Need search_keywords field in step schema");

    out = Files.newBufferedWriter(new File(opts.out).toPath(), Charsets.UTF_8);

    for (String line : Files.readAllLines(userFile.toPath(), Charsets.UTF_8)) {
        long t = (long) (TimeUnit.MILLISECONDS.convert(30, TimeUnit.DAYS) * gen.nextDouble());
        List<String> user = Lists.newArrayList(onTabs.split(line));

        // pick session length
        int n = (int) Math.floor(-30 * Math.log(gen.nextDouble()));

        for (int i = 0; i < n; i++) {
            // time on page
            int dt = (int) Math.floor(-20000 * Math.log(gen.nextDouble()));
            t += dt;

            // hit specific values
            JsonNode step = intermediate.sample();

            // check for purchase
            double p = 0.01;
            List<String> campaigns = Lists.newArrayList(onComma.split(step.get("campaign_list").asText()));
            List<String> keywords = Lists.newArrayList(onComma.split(step.get("search_keywords").asText()));
            if ((user.get(COUNTRY).equals("us") && campaigns.contains("5"))
                    || (user.get(COUNTRY).equals("jp") && campaigns.contains("7")) || keywords.contains("homer")
                    || keywords.contains("simpson")) {
                p = 0.5;
            }

            String events = gen.nextDouble() < p ? "1" : "-";

            out.write(Long.toString(t));
            out.write("\t");
            out.write(line);
            out.write("\t");
            out.write(withTab.join(step));
            out.write("\t");
            out.write(events);
            out.write("\n");
        }
    }
    out.close();
}

From source file:edu.cmu.lti.oaqa.knn4qa.apps.ExtractDataAndQueryAsSparseVectors.java

public static void main(String[] args) {
    String optKeys[] = { CommonParams.MAX_NUM_QUERY_PARAM, MAX_NUM_DATA_PARAM, CommonParams.MEMINDEX_PARAM,
            IN_QUERIES_PARAM, OUT_QUERIES_PARAM, OUT_DATA_PARAM, TEXT_FIELD_PARAM, TEST_QTY_PARAM, };
    String optDescs[] = { CommonParams.MAX_NUM_QUERY_DESC, MAX_NUM_DATA_DESC, CommonParams.MEMINDEX_DESC,
            IN_QUERIES_DESC, OUT_QUERIES_DESC, OUT_DATA_DESC, TEXT_FIELD_DESC, TEST_QTY_DESC };
    boolean hasArg[] = { true, true, true, true, true, true, true, true };

    ParamHelper prmHlp = null;/*  w w  w . j  a va2 s.c  o m*/

    try {

        prmHlp = new ParamHelper(args, optKeys, optDescs, hasArg);

        CommandLine cmd = prmHlp.getCommandLine();
        Options opt = prmHlp.getOptions();

        int maxNumQuery = Integer.MAX_VALUE;

        String tmpn = cmd.getOptionValue(CommonParams.MAX_NUM_QUERY_PARAM);
        if (tmpn != null) {
            try {
                maxNumQuery = Integer.parseInt(tmpn);
            } catch (NumberFormatException e) {
                UsageSpecify(CommonParams.MAX_NUM_QUERY_PARAM, opt);
            }
        }

        int maxNumData = Integer.MAX_VALUE;
        tmpn = cmd.getOptionValue(MAX_NUM_DATA_PARAM);
        if (tmpn != null) {
            try {
                maxNumData = Integer.parseInt(tmpn);
            } catch (NumberFormatException e) {
                UsageSpecify(MAX_NUM_DATA_PARAM, opt);
            }
        }
        String memIndexPref = cmd.getOptionValue(CommonParams.MEMINDEX_PARAM);
        if (null == memIndexPref) {
            UsageSpecify(CommonParams.MEMINDEX_PARAM, opt);
        }
        String textField = cmd.getOptionValue(TEXT_FIELD_PARAM);
        if (null == textField) {
            UsageSpecify(TEXT_FIELD_PARAM, opt);
        }

        textField = textField.toLowerCase();
        int fieldId = -1;
        for (int i = 0; i < FeatureExtractor.mFieldNames.length; ++i)
            if (FeatureExtractor.mFieldNames[i].compareToIgnoreCase(textField) == 0) {
                fieldId = i;
                break;
            }
        if (-1 == fieldId) {
            Usage("Wrong field index, should be one of the following: "
                    + String.join(",", FeatureExtractor.mFieldNames), opt);
        }

        InMemForwardIndex indx = new InMemForwardIndex(
                FeatureExtractor.indexFileName(memIndexPref, FeatureExtractor.mFieldNames[fieldId]));

        BM25SimilarityLucene bm25simil = new BM25SimilarityLucene(FeatureExtractor.BM25_K1,
                FeatureExtractor.BM25_B, indx);

        String inQueryFile = cmd.getOptionValue(IN_QUERIES_PARAM);
        String outQueryFile = cmd.getOptionValue(OUT_QUERIES_PARAM);
        if ((inQueryFile == null) != (outQueryFile == null)) {
            Usage("You should either specify both " + IN_QUERIES_PARAM + " and " + OUT_QUERIES_PARAM
                    + " or none of them", opt);
        }
        String outDataFile = cmd.getOptionValue(OUT_DATA_PARAM);

        tmpn = cmd.getOptionValue(TEST_QTY_PARAM);
        int testQty = 0;
        if (tmpn != null) {
            try {
                testQty = Integer.parseInt(tmpn);
            } catch (NumberFormatException e) {
                UsageSpecify(TEST_QTY_PARAM, opt);
            }
        }

        ArrayList<DocEntry> testDocEntries = new ArrayList<DocEntry>();
        ArrayList<DocEntry> testQueryEntries = new ArrayList<DocEntry>();
        ArrayList<TrulySparseVector> testDocVectors = new ArrayList<TrulySparseVector>();
        ArrayList<TrulySparseVector> testQueryVectors = new ArrayList<TrulySparseVector>();

        if (outDataFile != null) {
            BufferedWriter out = new BufferedWriter(
                    new OutputStreamWriter(CompressUtils.createOutputStream(outDataFile)));

            ArrayList<DocEntryExt> docEntries = indx.getDocEntries();

            for (int id = 0; id < Math.min(maxNumData, docEntries.size()); ++id) {
                DocEntry e = docEntries.get(id).mDocEntry;
                TrulySparseVector v = bm25simil.getDocSparseVector(e, false);
                if (id < testQty) {
                    testDocEntries.add(e);
                    testDocVectors.add(v);
                }
                outputVector(out, v);
            }

            out.close();

        }

        Splitter splitOnSpace = Splitter.on(' ').trimResults().omitEmptyStrings();

        if (outQueryFile != null) {
            BufferedReader inpText = new BufferedReader(
                    new InputStreamReader(CompressUtils.createInputStream(inQueryFile)));
            BufferedWriter out = new BufferedWriter(
                    new OutputStreamWriter(CompressUtils.createOutputStream(outQueryFile)));

            String queryText = XmlHelper.readNextXMLIndexEntry(inpText);

            for (int queryQty = 0; queryText != null && queryQty < maxNumQuery; queryText = XmlHelper
                    .readNextXMLIndexEntry(inpText), queryQty++) {
                Map<String, String> queryFields = null;
                // 1. Parse a query

                try {
                    queryFields = XmlHelper.parseXMLIndexEntry(queryText);
                } catch (Exception e) {
                    System.err.println("Parsing error, offending QUERY:\n" + queryText);
                    throw new Exception("Parsing error.");
                }

                String fieldText = queryFields.get(FeatureExtractor.mFieldsSOLR[fieldId]);

                if (fieldText == null) {
                    fieldText = "";
                }

                ArrayList<String> tmpa = new ArrayList<String>();
                for (String s : splitOnSpace.split(fieldText))
                    tmpa.add(s);

                DocEntry e = indx.createDocEntry(tmpa.toArray(new String[tmpa.size()]));

                TrulySparseVector v = bm25simil.getDocSparseVector(e, true);
                if (queryQty < testQty) {
                    testQueryEntries.add(e);
                    testQueryVectors.add(v);
                }
                outputVector(out, v);
            }

            out.close();
        }

        int testedQty = 0, diffQty = 0;
        // Now let's do some testing
        for (int iq = 0; iq < testQueryEntries.size(); ++iq) {
            DocEntry queryEntry = testQueryEntries.get(iq);
            TrulySparseVector queryVector = testQueryVectors.get(iq);
            for (int id = 0; id < testDocEntries.size(); ++id) {
                DocEntry docEntry = testDocEntries.get(id);
                TrulySparseVector docVector = testDocVectors.get(id);
                float val1 = bm25simil.compute(queryEntry, docEntry);
                float val2 = TrulySparseVector.scalarProduct(queryVector, docVector);
                ++testedQty;
                if (Math.abs(val1 - val2) > 1e5) {
                    System.err.println(
                            String.format("Potential mismatch BM25=%f <-> scalar product=%f", val1, val2));
                    ++diffQty;
                }
            }
        }
        if (testedQty > 0)
            System.out.println(String.format("Tested %d Mismatched %d", testedQty, diffQty));

    } catch (ParseException e) {
        Usage("Cannot parse arguments: " + e, prmHlp != null ? prmHlp.getOptions() : null);
        e.printStackTrace();
    } catch (Exception e) {
        e.printStackTrace();
        System.err.println("Terminating due to an exception: " + e);
        System.exit(1);
    }
}

From source file:org.raml.jaxrs.parser.source.Utilities.java

private static Path transformIntoPath(Class<?> clazz) {
    Splitter splitter = Splitter.on('.').omitEmptyStrings();
    Iterable<String> subPackages = splitter.split(clazz.getPackage().getName());

    Path current = Paths.get("");
    for (String subPackage : subPackages) {
        current = current.resolve(subPackage);
    }/*w  w w  .j  ava 2  s  . c  o  m*/

    return current;
}

From source file:com.joshondesign.bespokeide.Util.java

public static TextArray codeToTextArray(CharSequence methodCode) {
    final Splitter splitter = Splitter.on('\n');
    final Iterable<String> lines = splitter.split(methodCode);
    String sep = "";

    for (String s : lines) {
        final String s2 = CharMatcher.WHITESPACE.trimLeadingFrom(s);
        if (s2.length() < s.length()) {
            sep = s.substring(0, (s.length() - s2.length()));
            break;
        }//from   ww w .java 2  s . c  o m
    }
    if ("".equals(sep)) {
        return new TextArray(new CharSequence[] { methodCode }, "");
    }
    return new TextArray(Splitter.on(sep).split(methodCode), sep);
}

From source file:org.onehippo.cms7.essentials.dashboard.utils.PayloadUtils.java

public static List<String> extractValueList(final String value) {
    if (Strings.isNullOrEmpty(value)) {
        return Collections.emptyList();
    }//w  w  w.  ja va  2s .  c  om
    final Splitter splitter = Splitter.on(",").omitEmptyStrings().trimResults();
    final Iterable<String> iterable = splitter.split(value);
    return Lists.newArrayList(iterable);
}

From source file:com.facebook.presto.recordservice.RecordServiceConnectorConfig.java

public static ImmutableSet<HostAddress> parsePlannerHostPorts(String plannerHostPorts) {
    Splitter splitter = Splitter.on(',').omitEmptyStrings().trimResults();
    return ImmutableSet.copyOf(transform(splitter.split(plannerHostPorts),
            (value -> HostAddress.fromString(value).withDefaultPort(RECORDSERVICE_PLANNER_DEFAULT_PORT))));
}

From source file:org.bitstrings.maven.plugins.portallocator.util.Helpers.java

public static Iterable<String> iterateOnSplit(String text, String separator, boolean omitEmpty) {
    final Splitter splitter = Splitter.on(separator).trimResults();

    if (omitEmpty) {
        splitter.omitEmptyStrings();/*  w  w  w  .  ja  v  a  2  s.c o m*/
    }

    return splitter.split(text);
}