List of usage examples for com.google.common.base Splitter on
@CheckReturnValue @GwtIncompatible("java.util.regex") public static Splitter on(final Pattern separatorPattern)
From source file:tv.icntv.log.stb.commons.SplitterIcntv.java
public static void main(String[] args) { //List<String> list=Splitter.on(",").limit(17).trimResults().omitEmptyStrings().splitToList("catgId=217532, startDate=2014-11-13 11:38:10 324, endReason=, deviceCode=010143002008852, endDate=, contentType=MOVIE, videoType=, id=, programId=2569998, bufferingTotalTime=, programSeriesName=, bufferingCnt=, chargeType=0, epgCode=, outerCode=788109, ipAddress=, programName=010533501273793201411131138105870000/2/0/010533501273793/:/00000032AmlogicMDZ-05-201302261821793//117.151.171.132/2014-11-13 11:38:10 556/2014-11-13 11:37:47 926/2014-11-13 11:37:47 957/2014-11-13 11:38:10 587/1/401/Dispatch/result=0&strategy=0&hlist=centerdispatch\\r"); List<String> abc = Lists.newArrayList("deviceCode", "catgId", "startDate", "endReason", "endDate", "videoType", "programId", "chargeType", "epgCode", "outerCode"); Map<String, String> maps = SplitterIcntv.toMap(Splitter.on(",").limit(17).split( "catgId=, startDate=2014-11-13 11:38:10 324, endReason=, deviceCode=010143002008852, endDate=, contentType=MOVIE, videoType=, id=, programId=2569998, bufferingTotalTime=, programSeriesName=, bufferingCnt=, chargeType=0, epgCode=, outerCode=788109, ipAddress=, programName="), "=");//.withKeyValueSeparator("=").split(content); ContentViewDomain view = new ContentViewDomain(); System.out.println(view.toString()); for (String k : abc) { try {//from w ww. ja v a 2s .co m ReflectUtils.setFieldValue(view.getClass().getDeclaredField(k), view, new String[] { maps.get(k) }); } catch (NoSuchFieldException e) { logger.error("reflect error", e); } } System.out.println(view.getEpgCode()); }
From source file:tv.icntv.grade.film.recommend.TopNJob.java
public static void main(String[] args) throws Exception { final Configuration configuration = HBaseConfiguration.create(); configuration.addResource("grade.xml"); String tables = configuration.get("hbase.cdn.tables"); if (Strings.isNullOrEmpty(tables)) { return;/*from w w w .j av a2s . c om*/ } List<String> list = Lists.newArrayList(Splitter.on(",").split(tables)); List<String> results = Lists.transform(list, new Function<String, String>() { @Override public String apply(@Nullable java.lang.String input) { return String.format(configuration.get("hdfs.directory.base.db"), new Date(), input); } }); String[] arrays = new String[] { Joiner.on(",").join(results), String.format(configuration.get("hdfs.directory.num.middle"), new Date()), String.format(configuration.get("hdfs.directory.num.result"), new Date()) }; AbstractJob job = new TopNJob(); // job.setStart(true); int i = ToolRunner.run(configuration, job, arrays); System.exit(i); }
From source file:org.apache.mahout.knn.Vectorize20NewsGroups.java
public static void main(String[] args) throws IOException { String weightingCode = args[0]; boolean normalize = weightingCode.endsWith("c"); legalHeaders = Sets.newHashSet();/*from w ww .ja v a2 s .c om*/ Iterables.addAll(legalHeaders, Iterables.transform(Splitter.on(",").trimResults().split(args[1]), new Function<String, String>() { @Override public String apply(String s) { return s.toLowerCase(); } })); includeQuotes = Boolean.parseBoolean(args[2]); CorpusWeighting cw = CorpusWeighting.parse(weightingCode); if (cw.needCorpusWeights()) { Multiset<String> wordFrequency = HashMultiset.create(); Set<String> documents = Sets.newHashSet(); for (String file : Arrays.asList(args).subList(4, args.length)) { recursivelyCount(documents, wordFrequency, new File(file)); } cw.setCorpusCounts(wordFrequency, documents.size()); } int dimension = Integer.parseInt(args[3]); Configuration conf = new Configuration(); SequenceFile.Writer sf = SequenceFile.createWriter(FileSystem.getLocal(conf), conf, new Path("output"), Text.class, VectorWritable.class); PrintWriter csv = new PrintWriter("output.csv"); for (String file : Arrays.asList(args).subList(4, args.length)) { recursivelyVectorize(csv, sf, new File(file), cw, normalize, dimension); } csv.close(); sf.close(); }
From source file:org.apache.mahout.knn.tools.Vectorize20NewsGroups.java
public static void main(String[] args) throws IOException { String weightingCode = args[0]; boolean normalize = weightingCode.endsWith("c"); legalHeaders = Sets.newHashSet();/*from www.j ava2 s.c o m*/ Iterables.addAll(legalHeaders, Iterables.transform(Splitter.on(",").trimResults().split(args[1]), new Function<String, String>() { @Override public String apply(String s) { return s.toLowerCase(); } })); includeQuotes = Boolean.parseBoolean(args[2]); CorpusWeighting cw = CorpusWeighting.parse(weightingCode); if (cw.needCorpusWeights()) { Multiset<String> wordFrequency = HashMultiset.create(); Set<String> documents = Sets.newHashSet(); for (String file : Arrays.asList(args).subList(4, args.length)) { recursivelyCount(documents, wordFrequency, new File(file)); } cw.setCorpusCounts(wordFrequency, documents.size()); } int dimension = Integer.parseInt(args[3]); Configuration conf = new Configuration(); SequenceFile.Writer sf = SequenceFile.createWriter(FileSystem.getLocal(conf), conf, new Path("output-file"), Text.class, VectorWritable.class); PrintWriter csv = new PrintWriter("output-file.csv"); for (String file : Arrays.asList(args).subList(4, args.length)) { recursivelyVectorize(csv, sf, new File(file), cw, normalize, dimension); } csv.close(); sf.close(); }
From source file:com.technobium.MultinomialLogisticRegression.java
public static void main(String[] args) throws Exception { // this test trains a 3-way classifier on the famous Iris dataset. // a similar exercise can be accomplished in R using this code: // library(nnet) // correct = rep(0,100) // for (j in 1:100) { // i = order(runif(150)) // train = iris[i[1:100],] // test = iris[i[101:150],] // m = multinom(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width, train) // correct[j] = mean(predict(m, newdata=test) == test$Species) // }/* ww w . ja v a2s . c o m*/ // hist(correct) // // Note that depending on the training/test split, performance can be better or worse. // There is about a 5% chance of getting accuracy < 90% and about 20% chance of getting accuracy // of 100% // // This test uses a deterministic split that is neither outstandingly good nor bad RandomUtils.useTestSeed(); Splitter onComma = Splitter.on(","); // read the data List<String> raw = Resources.readLines(Resources.getResource("iris.csv"), Charsets.UTF_8); // holds features List<Vector> data = Lists.newArrayList(); // holds target variable List<Integer> target = Lists.newArrayList(); // for decoding target values Dictionary dict = new Dictionary(); // for permuting data later List<Integer> order = Lists.newArrayList(); for (String line : raw.subList(1, raw.size())) { // order gets a list of indexes order.add(order.size()); // parse the predictor variables Vector v = new DenseVector(5); v.set(0, 1); int i = 1; Iterable<String> values = onComma.split(line); for (String value : Iterables.limit(values, 4)) { v.set(i++, Double.parseDouble(value)); } data.add(v); // and the target target.add(dict.intern(Iterables.get(values, 4))); } // randomize the order ... original data has each species all together // note that this randomization is deterministic Random random = RandomUtils.getRandom(); Collections.shuffle(order, random); // select training and test data List<Integer> train = order.subList(0, 100); List<Integer> test = order.subList(100, 150); logger.warn("Training set = {}", train); logger.warn("Test set = {}", test); // now train many times and collect information on accuracy each time int[] correct = new int[test.size() + 1]; for (int run = 0; run < 200; run++) { OnlineLogisticRegression lr = new OnlineLogisticRegression(3, 5, new L2(1)); // 30 training passes should converge to > 95% accuracy nearly always but never to 100% for (int pass = 0; pass < 30; pass++) { Collections.shuffle(train, random); for (int k : train) { lr.train(target.get(k), data.get(k)); } } // check the accuracy on held out data int x = 0; int[] count = new int[3]; for (Integer k : test) { Vector vt = lr.classifyFull(data.get(k)); int r = vt.maxValueIndex(); count[r]++; x += r == target.get(k) ? 1 : 0; } correct[x]++; if (run == 199) { Vector v = new DenseVector(5); v.set(0, 1); int i = 1; Iterable<String> values = onComma.split("6.0,2.7,5.1,1.6,versicolor"); for (String value : Iterables.limit(values, 4)) { v.set(i++, Double.parseDouble(value)); } Vector vt = lr.classifyFull(v); for (String value : dict.values()) { System.out.println("target:" + value); } int t = dict.intern(Iterables.get(values, 4)); int r = vt.maxValueIndex(); boolean flag = r == t; lr.close(); Closer closer = Closer.create(); try { FileOutputStream byteArrayOutputStream = closer .register(new FileOutputStream(new File("model.txt"))); DataOutputStream dataOutputStream = closer .register(new DataOutputStream(byteArrayOutputStream)); PolymorphicWritable.write(dataOutputStream, lr); } finally { closer.close(); } } } // verify we never saw worse than 95% correct, for (int i = 0; i < Math.floor(0.95 * test.size()); i++) { System.out.println(String.format("%d trials had unacceptable accuracy of only %.0f%%: ", correct[i], 100.0 * i / test.size())); } // nor perfect System.out.println(String.format("%d trials had unrealistic accuracy of 100%%", correct[test.size() - 1])); }
From source file:org.icgc.dcc.generator.utils.RegexMatches.java
/** * Utility to generate the code above./*w w w . j ava 2 s . c om*/ */ public static void main(String... args) { // Set to remove duplicates, TreeSet to sort val regexes = Maps.<String, String>newTreeMap(); val splitter = Splitter.on(",").trimResults(); // Find all the unique regexes val fileSchemas = new FileSchemas(); for (val fileSchema : fileSchemas.getSchemas()) { for (val field : fileSchema.getFields()) { for (val restriction : field.getRestrictions()) { if (restriction.getType() == RestrictionType.REGEX) { val config = restriction.getConfig(); val regex = (String) config.get("pattern"); val examples = (String) config.get("examples"); if (regexes.containsKey(regex) && !isBlank(examples)) { continue; } val example = isBlank(examples) ? "" : splitter.splitToList(examples).get(0); regexes.put(regex, example); } } } } for (val entry : regexes.entrySet()) { val regex = entry.getKey(); val example = entry.getValue(); val line = " .put(\"" + escapeJava(regex) + "\",\"" + example + "\")"; System.out.println(line); } }
From source file:tv.icntv.grade.film.recommend.CFRecommendJob.java
public static void main(String[] args) throws Exception { final Configuration configuration = HBaseConfiguration.create(); configuration.addResource("grade.xml"); String baseCfData = String.format(configuration.get("hdfs.directory.base.score"), new Date()); String output = String.format(configuration.get("icntv.cf.recommend.directory.target"), new Date()); String temp = String.format(configuration.get("icntv.cf.recommend.directory.temp"), new Date()); StringBuilder sb = new StringBuilder(); sb.append("--input ").append(baseCfData); sb.append(" --output ").append(output); sb.append(" --numRecommendations ").append(configuration.get("icntv.cf.recommend.num")); sb.append(" --similarityClassname ").append(configuration.get("icntv.cf.recommend.similarityClassname")); sb.append(" --tempDir ").append(temp); String tables = configuration.get("hbase.cdn.tables"); if (Strings.isNullOrEmpty(tables)) { return;//ww w. j a v a 2 s . c om } List<String> list = Lists.newArrayList(Splitter.on(",").split(tables)); List<String> results = Lists.transform(list, new Function<String, String>() { @Override public String apply(@Nullable java.lang.String input) { return String.format(configuration.get("hdfs.directory.base.db"), new Date(), input); } }); int i = ToolRunner.run(configuration, new CFRecommendJob(), new String[] { Joiner.on(",").join(results), baseCfData, sb.toString(), output, temp }); System.exit(i); }
From source file:com.sina.dip.twill.HelloWorldMultipleRunnablesAnyOrder.java
public static void main(String[] args) { String zkStr = "localhost:2181"; YarnConfiguration yarnConfiguration = new YarnConfiguration(); final TwillRunnerService twillRunner = new YarnTwillRunnerService(yarnConfiguration, zkStr); twillRunner.start();/*from w w w.java 2s .c o m*/ String yarnClasspath = yarnConfiguration.get(YarnConfiguration.YARN_APPLICATION_CLASSPATH, "/usr/lib/hadoop/*,/usr/lib/hadoop-0.20-mapreduce/*,/usr/lib/hadoop-hdfs/*,/usr/lib/hadoop-mapreduce/*,/usr/lib/hadoop-yarn/*"); List<String> applicationClassPaths = Lists.newArrayList(); Iterables.addAll(applicationClassPaths, Splitter.on(",").split(yarnClasspath)); final TwillController controller = twillRunner.prepare(new HelloWorldApplication()) .addLogHandler(new PrinterLogHandler(new PrintWriter(System.out, true))) .withApplicationClassPaths(applicationClassPaths) .withBundlerClassAcceptor(new HadoopClassExcluder()).start(); Runtime.getRuntime().addShutdownHook(new Thread() { @Override public void run() { try { Futures.getUnchecked(controller.terminate()); } finally { twillRunner.stop(); } } }); try { controller.awaitTerminated(); } catch (ExecutionException e) { e.printStackTrace(); } }
From source file:com.sina.dip.twill.HelloWorldClassDependent.java
public static void main(String[] args) { String zkStr = "localhost:2181"; YarnConfiguration yarnConfiguration = new YarnConfiguration(); final TwillRunnerService twillRunner = new YarnTwillRunnerService(yarnConfiguration, zkStr); twillRunner.start();// w w w . j a v a 2 s . co m String yarnClasspath = yarnConfiguration.get(YarnConfiguration.YARN_APPLICATION_CLASSPATH, "/usr/lib/hadoop/*,/usr/lib/hadoop-0.20-mapreduce/*,/usr/lib/hadoop-hdfs/*,/usr/lib/hadoop-mapreduce/*,/usr/lib/hadoop-yarn/*"); List<String> applicationClassPaths = Lists.newArrayList(); Iterables.addAll(applicationClassPaths, Splitter.on(",").split(yarnClasspath)); final TwillController controller = twillRunner.prepare(new HelloWorldApplication()) .withApplicationClassPaths(applicationClassPaths) .withBundlerClassAcceptor(new HadoopClassExcluder()).start(); Runtime.getRuntime().addShutdownHook(new Thread() { @Override public void run() { try { Futures.getUnchecked(controller.terminate()); } finally { twillRunner.stop(); } } }); try { controller.awaitTerminated(); } catch (ExecutionException e) { e.printStackTrace(); } }
From source file:com.sina.dip.twill.HelloWorldArguments.java
public static void main(String[] args) { String zkStr = "localhost:2181"; YarnConfiguration yarnConfiguration = new YarnConfiguration(); final TwillRunnerService twillRunner = new YarnTwillRunnerService(yarnConfiguration, zkStr); twillRunner.start();// w ww . ja v a2 s . c o m String yarnClasspath = yarnConfiguration.get(YarnConfiguration.YARN_APPLICATION_CLASSPATH, "/usr/lib/hadoop/*,/usr/lib/hadoop-0.20-mapreduce/*,/usr/lib/hadoop-hdfs/*,/usr/lib/hadoop-mapreduce/*,/usr/lib/hadoop-yarn/*"); List<String> applicationClassPaths = Lists.newArrayList(); Iterables.addAll(applicationClassPaths, Splitter.on(",").split(yarnClasspath)); final TwillController controller = twillRunner.prepare(new HelloWorldApplication()) // Application arguments will be visible to all runnables .withApplicationArguments("--arg", "arg-app") // Arguments only visible to instance of hello1. .withArguments("hello1", "--arg1", "arg-hello1").withArguments("hello1", "--arg2", "arg-hello2") // Arguments only visible to instance of hello2. .withArguments("hello2", "--arg3", "arg-hello3").withArguments("hello2", "--arg4", "arg-hello4") .withArguments("hello2", "--arg5", "arg-hello5") .addLogHandler(new PrinterLogHandler(new PrintWriter(System.out, true))) .withApplicationClassPaths(applicationClassPaths) .withBundlerClassAcceptor(new HadoopClassExcluder()).start(); Runtime.getRuntime().addShutdownHook(new Thread() { @Override public void run() { try { Futures.getUnchecked(controller.terminate()); } finally { twillRunner.stop(); } } }); try { controller.awaitTerminated(); } catch (ExecutionException e) { e.printStackTrace(); } }