List of usage examples for java.util.regex Pattern split
public String[] split(CharSequence input)
From source file:com.cloudera.oryx.rdf.computation.WineQualityIT.java
private static List<Example> readWineQualityExamples() throws IOException { List<Example> allExamples = Lists.newArrayList(); Pattern delimiter = Pattern.compile(";"); File dataFile = new File(TEST_TEMP_INBOUND_DIR, "winequality-white.csv"); for (CharSequence line : new FileLineIterable(dataFile)) { if (line.length() == 0) { continue; }/*from www .jav a 2s .c o m*/ String[] tokens = delimiter.split(line); Feature[] features = new Feature[11]; for (int i = 0; i < features.length; i++) { features[i] = NumericFeature.forValue(Float.parseFloat(tokens[i])); } Example trainingExample = new Example(NumericFeature.forValue(Float.parseFloat(tokens[11])), features); allExamples.add(trainingExample); } return allExamples; }
From source file:com.ephesoft.dcma.util.EphesoftStringUtil.java
/** * Returns array of string separated by the given split pattern. In case the split pattern is empty, then the input string will be * split character-wise.//from w w w.j a v a2 s. co m * * @param inputString {@link String} the input string that is to be split. * @param splitPattern {@link String} the parameter on which input string should be split. * @return {@link String[]} The array of strings computed by splitting this string, null if inputString being empty/null or * splitPattern is null. */ public static String[] splitString(final String inputString, final String splitPattern) { String[] tokens = null; if (!isNullOrEmpty(inputString) && null != splitPattern) { final Pattern pattern = Pattern.compile(splitPattern); tokens = pattern.split(inputString); } return tokens; }
From source file:utility.Tools.java
public static String[] splitDocName(String word) { word = word.replace("XML", "Xml"); word = word.replace("DOM", "Dom"); word = word.replace("JHotDraw", "Jhotdraw"); word = word.replace("ID", "Id"); String regEx = "[A-Z]"; Pattern p1 = Pattern.compile(regEx); Matcher m1 = p1.matcher(word); boolean startWithUpper = false; startWithUpper = Pattern.matches("[A-Z].*", word); String[] words = p1.split(word); List<String> list = new ArrayList<>(); for (int i = 0; i < words.length; i++) { list.add(words[i]);/*from w ww.ja va2 s . c om*/ } int count = 0; while (m1.find()) { if (count + 1 < words.length) { list.set(count + 1, m1.group() + list.get(count + 1)); ++count; } else { list.add(m1.group()); } } if (startWithUpper && words.length != 0) { list.remove(0); } // for (int i = 0; i < list.size(); ++i) { // list.set(i, list.get(i).toLowerCase()); // } String[] result = list.toArray(new String[1]); return result; }
From source file:utility.Tools.java
public static String[] splitCamelWords(String word) { word = word.replace("XML", "Xml"); word = word.replace("DOM", "Dom"); word = word.replace("JHotDraw", "Jhotdraw"); word = word.replace("ID", "Id"); String regEx = "[A-Z]"; Pattern p1 = Pattern.compile(regEx); Matcher m1 = p1.matcher(word); boolean startWithUpper = false; startWithUpper = Pattern.matches("[A-Z].*", word); String[] words = p1.split(word); List<String> list = new ArrayList<>(); for (int i = 0; i < words.length; i++) { list.add(words[i]);/*from w w w .ja v a 2 s. c o m*/ } int count = 0; while (m1.find()) { if (count + 1 < words.length) { list.set(count + 1, m1.group() + list.get(count + 1)); ++count; } else { list.add(m1.group()); } } if (startWithUpper && words.length != 0) { list.remove(0); } for (int i = 0; i < list.size(); ++i) { list.set(i, list.get(i).toLowerCase()); } String[] result = list.toArray(new String[1]); return result; }
From source file:org.openconcerto.sql.utils.SQLUtils.java
static public void executeScript(final String sql, final DBSystemRoot sysRoot, final Pattern p) throws SQLException { try {// w w w . j a v a2 s.com for (final String s : p.split(sql)) { final String trimmed = s.trim(); if (trimmed.length() > 0) sysRoot.getDataSource().execute(trimmed, null); } } catch (final Exception e) { throw new SQLException("unable to execute " + sql, e); } }
From source file:com.hurence.logisland.botsearch.Trace.java
/** * take a tab separated string representing a trace and converts it to a * Trace object 10.113.140.213 77.67.21.141 (248.98, 41528.56, 381.64, * 34.91)/*from www .j a v a 2s.co m*/ * * @param line * @return */ public static Trace parse(String line) throws IllegalArgumentException { final Pattern tabPattern = Pattern.compile("\t"); final Pattern commaPattern = Pattern.compile(","); String[] fields = tabPattern.split(line); Trace trace = new Trace(); trace.setIpSource(fields[0]); trace.setIpTarget(fields[1]); String vector = fields[2].replace("(", "").replace(")", ""); fields = commaPattern.split(vector); if (fields.length == 4) { trace.setAvgUploadedBytes(Double.parseDouble(fields[0])); trace.setAvgDownloadedBytes(Double.parseDouble(fields[1])); trace.setAvgTimeBetweenTwoFLows(Double.parseDouble(fields[2])); trace.setMostSignificantFrequency(Double.parseDouble(fields[3])); // trace.setId(Integer.toString(trace.hashCode())); } else { throw new IllegalArgumentException("unable to parse Trace from String : " + line); } return trace; }
From source file:org.apache.nutch.indexer.IndexSorterArquivoWeb.java
/** * Sort the documents by score// w ww. j a va2 s . com * @param reader * @param searcher * @return * @throws IOException */ //private static int[] oldToNew(IndexReader reader, Searcher searcher) throws IOException { private static DocScore[] newToOld(IndexReader reader, Searcher searcher) throws IOException { int readerMax = reader.maxDoc(); DocScore[] newToOld = new DocScore[readerMax]; // use site, an indexed, un-tokenized field to get boost //byte[] boosts = reader.norms("site"); TODO MC /* TODO MC */ Document docMeta; Pattern includes = Pattern.compile("\\|"); String value = NutchConfiguration.create().get(INCLUDE_EXTENSIONS_KEY, ""); String includeExtensions[] = includes.split(value); Hashtable<String, Boolean> validExtensions = new Hashtable<String, Boolean>(); for (int i = 0; i < includeExtensions.length; i++) { validExtensions.put(includeExtensions[i], true); System.out.println("extension boosted " + includeExtensions[i]); } /* TODO MC */ for (int oldDoc = 0; oldDoc < readerMax; oldDoc++) { float score; if (reader.isDeleted(oldDoc)) { //score = 0.0f; score = -1f; // TODO MC } else { //score = Similarity.decodeNorm(boosts[oldDoc]); TODO MC /* TODO MC */ docMeta = searcher.doc(oldDoc); if (validExtensions.get(docMeta.get("subType")) == null) { // searched extensions will have higher scores score = -0.5f; } else { score = Integer.parseInt(docMeta.get("inlinks")); /* if (score==0) { score=0.001f; // TODO MC - to not erase } */ } /* TODO MC */ //System.out.println("Score for old document "+oldDoc+" is "+score+" and type "+docMeta.get("subType")); // TODO MC debug remove } DocScore docScore = new DocScore(); docScore.doc = oldDoc; docScore.score = score; newToOld[oldDoc] = docScore; } System.out.println("Sorting " + newToOld.length + " documents."); Arrays.sort(newToOld); //HeapSorter.sort(newToOld); // TODO MC - due to the lack of space /* TODO MC int[] oldToNew = new int[readerMax]; for (int newDoc = 0; newDoc < readerMax; newDoc++) { DocScore docScore = newToOld[newDoc]; //oldToNew[docScore.oldDoc] = docScore.score > 0.0f ? newDoc : -1; // TODO MC oldToNew[docScore.oldDoc] = newDoc; // TODO MC } */ /* TODO MC * for (int newDoc = 0; newDoc < readerMax; newDoc++) { DocScore docScore = newToOld[newDoc]; System.out.println("Score for new document "+newDoc+" is "+docScore.score); // TODO MC debug remove } * TODO MC */ //return oldToNew; TODO MC return newToOld; // TODO MC }
From source file:com.gs.obevo.dbmetadata.impl.dialects.AbstractDbMetadataManagerIT.java
public static String[] splitSql(String filePath) throws Exception { String sqlContent = pathToString(filePath); Pattern splitter = Pattern.compile("(?i)^GO$", Pattern.MULTILINE); return splitter.split(sqlContent); }
From source file:com.denimgroup.threadfix.framework.util.CommonPathFinder.java
@Nullable private static String parseRoot(@Nullable List<String> items) { if (items == null || items.isEmpty()) { return null; }/*ww w.j ava 2s .com*/ String response = null; String[] commonParts = null; int maxLength = Integer.MAX_VALUE; boolean startsWithCharacter = false; Pattern splitPattern = null; char splitChar = 0; for (String item : items) { if (splitPattern == null) { if (item.indexOf('\\') != -1) { splitPattern = BACKSLASH_PATTERN; splitChar = backwardSlash; } else { splitPattern = FORWARD_SLASH_PATTERN; splitChar = forwardSlash; } startsWithCharacter = item.indexOf(splitChar) == 0; } String[] parts = splitPattern.split(item); if (parts.length < maxLength) { maxLength = parts.length; } commonParts = getCommonParts(commonParts, parts); } if (commonParts != null) { StringBuilder builder = new StringBuilder(); for (String string : commonParts) { if (string != null && !string.equals("")) { builder.append(splitChar).append(string); } } response = builder.toString(); if (!startsWithCharacter && response.indexOf(splitChar) == 0) { response = response.substring(1); } } return response; }
From source file:juicebox.data.HiCFileTools.java
/** * Load the list of chromosomes based on given genome id or file * * @param idOrFile string/*from w w w . j a va2s . c o m*/ * @return list of chromosomes */ public static List<Chromosome> loadChromosomes(String idOrFile) { InputStream is = null; try { // Note: to get this to work, had to edit Intellij settings // so that "?*.sizes" are considered sources to be copied to class path is = ChromosomeSizes.class.getResourceAsStream(idOrFile + ".chrom.sizes"); if (is == null) { // Not an ID, see if its a file File file = new File(idOrFile); try { if (file.exists()) { is = new FileInputStream(file); } else { System.err.println("Could not find chromosome sizes file for: " + idOrFile); System.exit(-3); } } catch (Exception e) { e.printStackTrace(); } } List<Chromosome> chromosomes = new ArrayList<Chromosome>(); chromosomes.add(0, null); // Index 0 reserved for "whole genome" pseudo-chromosome Pattern pattern = Pattern.compile("\t"); BufferedReader reader = new BufferedReader(new InputStreamReader(is), HiCGlobals.bufferSize); String nextLine; long genomeLength = 0; int idx = 1; try { while ((nextLine = reader.readLine()) != null) { String[] tokens = pattern.split(nextLine); if (tokens.length == 2) { String name = tokens[0]; int length = Integer.parseInt(tokens[1]); genomeLength += length; chromosomes.add(idx, new Chromosome(idx, name, length)); idx++; } else { System.out.println("Skipping " + nextLine); } } } catch (Exception e) { e.printStackTrace(); } // Add the "pseudo-chromosome" All, representing the whole genome. Units are in kilo-bases chromosomes.set(0, new Chromosome(0, "All", (int) (genomeLength / 1000))); return chromosomes; } finally { if (is != null) { try { is.close(); } catch (Exception e) { e.printStackTrace(); } } } }