Example usage for com.google.common.base Splitter on

List of usage examples for com.google.common.base Splitter on

Introduction

In this page you can find the example usage for com.google.common.base Splitter on.

Prototype

@CheckReturnValue
@GwtIncompatible("java.util.regex")
public static Splitter on(final Pattern separatorPattern) 

Source Link

Document

Returns a splitter that considers any subsequence matching pattern to be a separator.

Usage

From source file:tv.icntv.log.stb.commons.SplitterIcntv.java

public static void main(String[] args) {
    //List<String> list=Splitter.on(",").limit(17).trimResults().omitEmptyStrings().splitToList("catgId=217532, startDate=2014-11-13 11:38:10 324, endReason=, deviceCode=010143002008852, endDate=, contentType=MOVIE, videoType=, id=, programId=2569998, bufferingTotalTime=, programSeriesName=, bufferingCnt=, chargeType=0, epgCode=, outerCode=788109, ipAddress=, programName=010533501273793201411131138105870000/2/0/010533501273793/:/00000032AmlogicMDZ-05-201302261821793//117.151.171.132/2014-11-13 11:38:10 556/2014-11-13 11:37:47 926/2014-11-13 11:37:47 957/2014-11-13 11:38:10 587/1/401/Dispatch/result=0&strategy=0&hlist=centerdispatch\\r");
    List<String> abc = Lists.newArrayList("deviceCode", "catgId", "startDate", "endReason", "endDate",
            "videoType", "programId", "chargeType", "epgCode", "outerCode");
    Map<String, String> maps = SplitterIcntv.toMap(Splitter.on(",").limit(17).split(
            "catgId=, startDate=2014-11-13 11:38:10 324, endReason=, deviceCode=010143002008852, endDate=, contentType=MOVIE, videoType=, id=, programId=2569998, bufferingTotalTime=, programSeriesName=, bufferingCnt=, chargeType=0, epgCode=, outerCode=788109, ipAddress=, programName="),
            "=");//.withKeyValueSeparator("=").split(content);
    ContentViewDomain view = new ContentViewDomain();
    System.out.println(view.toString());
    for (String k : abc) {
        try {//from  w  ww. ja v a 2s .co m
            ReflectUtils.setFieldValue(view.getClass().getDeclaredField(k), view, new String[] { maps.get(k) });
        } catch (NoSuchFieldException e) {
            logger.error("reflect error", e);
        }
    }
    System.out.println(view.getEpgCode());

}

From source file:tv.icntv.grade.film.recommend.TopNJob.java

public static void main(String[] args) throws Exception {
    final Configuration configuration = HBaseConfiguration.create();
    configuration.addResource("grade.xml");
    String tables = configuration.get("hbase.cdn.tables");
    if (Strings.isNullOrEmpty(tables)) {
        return;/*from   w  w  w .j av  a2s  . c  om*/
    }
    List<String> list = Lists.newArrayList(Splitter.on(",").split(tables));
    List<String> results = Lists.transform(list, new Function<String, String>() {
        @Override
        public String apply(@Nullable java.lang.String input) {
            return String.format(configuration.get("hdfs.directory.base.db"), new Date(), input);
        }
    });

    String[] arrays = new String[] { Joiner.on(",").join(results),
            String.format(configuration.get("hdfs.directory.num.middle"), new Date()),
            String.format(configuration.get("hdfs.directory.num.result"), new Date()) };
    AbstractJob job = new TopNJob();
    //        job.setStart(true);
    int i = ToolRunner.run(configuration, job, arrays);
    System.exit(i);
}

From source file:org.apache.mahout.knn.Vectorize20NewsGroups.java

public static void main(String[] args) throws IOException {
    String weightingCode = args[0];
    boolean normalize = weightingCode.endsWith("c");

    legalHeaders = Sets.newHashSet();/*from w  ww .ja v a2 s .c  om*/
    Iterables.addAll(legalHeaders,
            Iterables.transform(Splitter.on(",").trimResults().split(args[1]), new Function<String, String>() {
                @Override
                public String apply(String s) {
                    return s.toLowerCase();
                }
            }));

    includeQuotes = Boolean.parseBoolean(args[2]);

    CorpusWeighting cw = CorpusWeighting.parse(weightingCode);
    if (cw.needCorpusWeights()) {
        Multiset<String> wordFrequency = HashMultiset.create();
        Set<String> documents = Sets.newHashSet();
        for (String file : Arrays.asList(args).subList(4, args.length)) {
            recursivelyCount(documents, wordFrequency, new File(file));
        }
        cw.setCorpusCounts(wordFrequency, documents.size());
    }

    int dimension = Integer.parseInt(args[3]);

    Configuration conf = new Configuration();
    SequenceFile.Writer sf = SequenceFile.createWriter(FileSystem.getLocal(conf), conf, new Path("output"),
            Text.class, VectorWritable.class);
    PrintWriter csv = new PrintWriter("output.csv");
    for (String file : Arrays.asList(args).subList(4, args.length)) {
        recursivelyVectorize(csv, sf, new File(file), cw, normalize, dimension);
    }
    csv.close();
    sf.close();
}

From source file:org.apache.mahout.knn.tools.Vectorize20NewsGroups.java

public static void main(String[] args) throws IOException {
    String weightingCode = args[0];
    boolean normalize = weightingCode.endsWith("c");

    legalHeaders = Sets.newHashSet();/*from www.j ava2  s.c  o m*/
    Iterables.addAll(legalHeaders,
            Iterables.transform(Splitter.on(",").trimResults().split(args[1]), new Function<String, String>() {
                @Override
                public String apply(String s) {
                    return s.toLowerCase();
                }
            }));

    includeQuotes = Boolean.parseBoolean(args[2]);

    CorpusWeighting cw = CorpusWeighting.parse(weightingCode);
    if (cw.needCorpusWeights()) {
        Multiset<String> wordFrequency = HashMultiset.create();
        Set<String> documents = Sets.newHashSet();
        for (String file : Arrays.asList(args).subList(4, args.length)) {
            recursivelyCount(documents, wordFrequency, new File(file));
        }
        cw.setCorpusCounts(wordFrequency, documents.size());
    }

    int dimension = Integer.parseInt(args[3]);

    Configuration conf = new Configuration();
    SequenceFile.Writer sf = SequenceFile.createWriter(FileSystem.getLocal(conf), conf, new Path("output-file"),
            Text.class, VectorWritable.class);
    PrintWriter csv = new PrintWriter("output-file.csv");
    for (String file : Arrays.asList(args).subList(4, args.length)) {
        recursivelyVectorize(csv, sf, new File(file), cw, normalize, dimension);
    }
    csv.close();
    sf.close();
}

From source file:com.technobium.MultinomialLogisticRegression.java

public static void main(String[] args) throws Exception {
    // this test trains a 3-way classifier on the famous Iris dataset.
    // a similar exercise can be accomplished in R using this code:
    //    library(nnet)
    //    correct = rep(0,100)
    //    for (j in 1:100) {
    //      i = order(runif(150))
    //      train = iris[i[1:100],]
    //      test = iris[i[101:150],]
    //      m = multinom(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width, train)
    //      correct[j] = mean(predict(m, newdata=test) == test$Species)
    //    }/* ww  w .  ja v a2s . c  o  m*/
    //    hist(correct)
    //
    // Note that depending on the training/test split, performance can be better or worse.
    // There is about a 5% chance of getting accuracy < 90% and about 20% chance of getting accuracy
    // of 100%
    //
    // This test uses a deterministic split that is neither outstandingly good nor bad

    RandomUtils.useTestSeed();
    Splitter onComma = Splitter.on(",");

    // read the data
    List<String> raw = Resources.readLines(Resources.getResource("iris.csv"), Charsets.UTF_8);

    // holds features
    List<Vector> data = Lists.newArrayList();

    // holds target variable
    List<Integer> target = Lists.newArrayList();

    // for decoding target values
    Dictionary dict = new Dictionary();

    // for permuting data later
    List<Integer> order = Lists.newArrayList();

    for (String line : raw.subList(1, raw.size())) {
        // order gets a list of indexes
        order.add(order.size());

        // parse the predictor variables
        Vector v = new DenseVector(5);
        v.set(0, 1);
        int i = 1;
        Iterable<String> values = onComma.split(line);
        for (String value : Iterables.limit(values, 4)) {
            v.set(i++, Double.parseDouble(value));
        }
        data.add(v);

        // and the target
        target.add(dict.intern(Iterables.get(values, 4)));
    }

    // randomize the order ... original data has each species all together
    // note that this randomization is deterministic
    Random random = RandomUtils.getRandom();
    Collections.shuffle(order, random);

    // select training and test data
    List<Integer> train = order.subList(0, 100);
    List<Integer> test = order.subList(100, 150);
    logger.warn("Training set = {}", train);
    logger.warn("Test set = {}", test);

    // now train many times and collect information on accuracy each time
    int[] correct = new int[test.size() + 1];
    for (int run = 0; run < 200; run++) {
        OnlineLogisticRegression lr = new OnlineLogisticRegression(3, 5, new L2(1));
        // 30 training passes should converge to > 95% accuracy nearly always but never to 100%
        for (int pass = 0; pass < 30; pass++) {
            Collections.shuffle(train, random);
            for (int k : train) {
                lr.train(target.get(k), data.get(k));
            }
        }

        // check the accuracy on held out data
        int x = 0;
        int[] count = new int[3];
        for (Integer k : test) {
            Vector vt = lr.classifyFull(data.get(k));
            int r = vt.maxValueIndex();
            count[r]++;
            x += r == target.get(k) ? 1 : 0;
        }
        correct[x]++;

        if (run == 199) {

            Vector v = new DenseVector(5);
            v.set(0, 1);
            int i = 1;
            Iterable<String> values = onComma.split("6.0,2.7,5.1,1.6,versicolor");
            for (String value : Iterables.limit(values, 4)) {
                v.set(i++, Double.parseDouble(value));
            }

            Vector vt = lr.classifyFull(v);
            for (String value : dict.values()) {
                System.out.println("target:" + value);
            }
            int t = dict.intern(Iterables.get(values, 4));

            int r = vt.maxValueIndex();
            boolean flag = r == t;
            lr.close();

            Closer closer = Closer.create();

            try {
                FileOutputStream byteArrayOutputStream = closer
                        .register(new FileOutputStream(new File("model.txt")));
                DataOutputStream dataOutputStream = closer
                        .register(new DataOutputStream(byteArrayOutputStream));
                PolymorphicWritable.write(dataOutputStream, lr);
            } finally {
                closer.close();
            }
        }
    }

    // verify we never saw worse than 95% correct,
    for (int i = 0; i < Math.floor(0.95 * test.size()); i++) {
        System.out.println(String.format("%d trials had unacceptable accuracy of only %.0f%%: ", correct[i],
                100.0 * i / test.size()));
    }
    // nor perfect
    System.out.println(String.format("%d trials had unrealistic accuracy of 100%%", correct[test.size() - 1]));
}

From source file:org.icgc.dcc.generator.utils.RegexMatches.java

/**
 * Utility to generate the code above./*w w w .  j  ava  2  s . c  om*/
 */
public static void main(String... args) {
    // Set to remove duplicates, TreeSet to sort
    val regexes = Maps.<String, String>newTreeMap();
    val splitter = Splitter.on(",").trimResults();

    // Find all the unique regexes
    val fileSchemas = new FileSchemas();
    for (val fileSchema : fileSchemas.getSchemas()) {
        for (val field : fileSchema.getFields()) {
            for (val restriction : field.getRestrictions()) {
                if (restriction.getType() == RestrictionType.REGEX) {
                    val config = restriction.getConfig();
                    val regex = (String) config.get("pattern");
                    val examples = (String) config.get("examples");

                    if (regexes.containsKey(regex) && !isBlank(examples)) {
                        continue;
                    }

                    val example = isBlank(examples) ? "" : splitter.splitToList(examples).get(0);
                    regexes.put(regex, example);
                }
            }
        }
    }

    for (val entry : regexes.entrySet()) {
        val regex = entry.getKey();
        val example = entry.getValue();

        val line = "      .put(\"" + escapeJava(regex) + "\",\"" + example + "\")";
        System.out.println(line);
    }
}

From source file:tv.icntv.grade.film.recommend.CFRecommendJob.java

public static void main(String[] args) throws Exception {
    final Configuration configuration = HBaseConfiguration.create();
    configuration.addResource("grade.xml");
    String baseCfData = String.format(configuration.get("hdfs.directory.base.score"), new Date());
    String output = String.format(configuration.get("icntv.cf.recommend.directory.target"), new Date());
    String temp = String.format(configuration.get("icntv.cf.recommend.directory.temp"), new Date());
    StringBuilder sb = new StringBuilder();
    sb.append("--input ").append(baseCfData);
    sb.append(" --output ").append(output);
    sb.append(" --numRecommendations ").append(configuration.get("icntv.cf.recommend.num"));
    sb.append(" --similarityClassname ").append(configuration.get("icntv.cf.recommend.similarityClassname"));
    sb.append(" --tempDir ").append(temp);

    String tables = configuration.get("hbase.cdn.tables");

    if (Strings.isNullOrEmpty(tables)) {
        return;//ww w. j  a v a  2 s  . c om
    }
    List<String> list = Lists.newArrayList(Splitter.on(",").split(tables));
    List<String> results = Lists.transform(list, new Function<String, String>() {
        @Override
        public String apply(@Nullable java.lang.String input) {
            return String.format(configuration.get("hdfs.directory.base.db"), new Date(), input);
        }
    });

    int i = ToolRunner.run(configuration, new CFRecommendJob(),
            new String[] { Joiner.on(",").join(results), baseCfData, sb.toString(), output, temp });
    System.exit(i);
}

From source file:com.sina.dip.twill.HelloWorldMultipleRunnablesAnyOrder.java

public static void main(String[] args) {
    String zkStr = "localhost:2181";

    YarnConfiguration yarnConfiguration = new YarnConfiguration();

    final TwillRunnerService twillRunner = new YarnTwillRunnerService(yarnConfiguration, zkStr);

    twillRunner.start();/*from w  w w.java 2s  .c  o m*/

    String yarnClasspath = yarnConfiguration.get(YarnConfiguration.YARN_APPLICATION_CLASSPATH,
            "/usr/lib/hadoop/*,/usr/lib/hadoop-0.20-mapreduce/*,/usr/lib/hadoop-hdfs/*,/usr/lib/hadoop-mapreduce/*,/usr/lib/hadoop-yarn/*");

    List<String> applicationClassPaths = Lists.newArrayList();

    Iterables.addAll(applicationClassPaths, Splitter.on(",").split(yarnClasspath));

    final TwillController controller = twillRunner.prepare(new HelloWorldApplication())
            .addLogHandler(new PrinterLogHandler(new PrintWriter(System.out, true)))
            .withApplicationClassPaths(applicationClassPaths)
            .withBundlerClassAcceptor(new HadoopClassExcluder()).start();

    Runtime.getRuntime().addShutdownHook(new Thread() {
        @Override
        public void run() {
            try {
                Futures.getUnchecked(controller.terminate());
            } finally {
                twillRunner.stop();
            }
        }
    });

    try {
        controller.awaitTerminated();
    } catch (ExecutionException e) {
        e.printStackTrace();
    }
}

From source file:com.sina.dip.twill.HelloWorldClassDependent.java

public static void main(String[] args) {
    String zkStr = "localhost:2181";

    YarnConfiguration yarnConfiguration = new YarnConfiguration();

    final TwillRunnerService twillRunner = new YarnTwillRunnerService(yarnConfiguration, zkStr);

    twillRunner.start();//  w w w .  j a v a 2  s . co m

    String yarnClasspath = yarnConfiguration.get(YarnConfiguration.YARN_APPLICATION_CLASSPATH,
            "/usr/lib/hadoop/*,/usr/lib/hadoop-0.20-mapreduce/*,/usr/lib/hadoop-hdfs/*,/usr/lib/hadoop-mapreduce/*,/usr/lib/hadoop-yarn/*");

    List<String> applicationClassPaths = Lists.newArrayList();

    Iterables.addAll(applicationClassPaths, Splitter.on(",").split(yarnClasspath));

    final TwillController controller = twillRunner.prepare(new HelloWorldApplication())
            .withApplicationClassPaths(applicationClassPaths)
            .withBundlerClassAcceptor(new HadoopClassExcluder()).start();

    Runtime.getRuntime().addShutdownHook(new Thread() {
        @Override
        public void run() {
            try {
                Futures.getUnchecked(controller.terminate());
            } finally {
                twillRunner.stop();
            }
        }
    });

    try {
        controller.awaitTerminated();
    } catch (ExecutionException e) {
        e.printStackTrace();
    }
}

From source file:com.sina.dip.twill.HelloWorldArguments.java

public static void main(String[] args) {
    String zkStr = "localhost:2181";

    YarnConfiguration yarnConfiguration = new YarnConfiguration();

    final TwillRunnerService twillRunner = new YarnTwillRunnerService(yarnConfiguration, zkStr);

    twillRunner.start();//  w  ww .  ja  v  a2  s  . c  o m

    String yarnClasspath = yarnConfiguration.get(YarnConfiguration.YARN_APPLICATION_CLASSPATH,
            "/usr/lib/hadoop/*,/usr/lib/hadoop-0.20-mapreduce/*,/usr/lib/hadoop-hdfs/*,/usr/lib/hadoop-mapreduce/*,/usr/lib/hadoop-yarn/*");

    List<String> applicationClassPaths = Lists.newArrayList();

    Iterables.addAll(applicationClassPaths, Splitter.on(",").split(yarnClasspath));

    final TwillController controller = twillRunner.prepare(new HelloWorldApplication())
            // Application arguments will be visible to all runnables
            .withApplicationArguments("--arg", "arg-app")
            // Arguments only visible to instance of hello1.
            .withArguments("hello1", "--arg1", "arg-hello1").withArguments("hello1", "--arg2", "arg-hello2")
            // Arguments only visible to instance of hello2.
            .withArguments("hello2", "--arg3", "arg-hello3").withArguments("hello2", "--arg4", "arg-hello4")
            .withArguments("hello2", "--arg5", "arg-hello5")
            .addLogHandler(new PrinterLogHandler(new PrintWriter(System.out, true)))
            .withApplicationClassPaths(applicationClassPaths)
            .withBundlerClassAcceptor(new HadoopClassExcluder()).start();

    Runtime.getRuntime().addShutdownHook(new Thread() {
        @Override
        public void run() {
            try {
                Futures.getUnchecked(controller.terminate());
            } finally {
                twillRunner.stop();
            }
        }
    });

    try {
        controller.awaitTerminated();
    } catch (ExecutionException e) {
        e.printStackTrace();
    }
}