Example usage for java.util.regex Pattern split

List of usage examples for java.util.regex Pattern split

Introduction

In this page you can find the example usage for java.util.regex Pattern split.

Prototype

public String[] split(CharSequence input) 

Source Link

Document

Splits the given input sequence around matches of this pattern.

Usage

From source file:com.cloudera.oryx.rdf.computation.WineQualityIT.java

private static List<Example> readWineQualityExamples() throws IOException {
    List<Example> allExamples = Lists.newArrayList();
    Pattern delimiter = Pattern.compile(";");
    File dataFile = new File(TEST_TEMP_INBOUND_DIR, "winequality-white.csv");
    for (CharSequence line : new FileLineIterable(dataFile)) {
        if (line.length() == 0) {
            continue;
        }/*from   www .jav  a 2s .c o m*/
        String[] tokens = delimiter.split(line);
        Feature[] features = new Feature[11];
        for (int i = 0; i < features.length; i++) {
            features[i] = NumericFeature.forValue(Float.parseFloat(tokens[i]));
        }
        Example trainingExample = new Example(NumericFeature.forValue(Float.parseFloat(tokens[11])), features);
        allExamples.add(trainingExample);
    }
    return allExamples;
}

From source file:com.ephesoft.dcma.util.EphesoftStringUtil.java

/**
 * Returns array of string separated by the given split pattern. In case the split pattern is empty, then the input string will be
 * split character-wise.//from  w  w  w.j a  v a2 s. co m
 * 
 * @param inputString {@link String} the input string that is to be split.
 * @param splitPattern {@link String} the parameter on which input string should be split.
 * @return {@link String[]} The array of strings computed by splitting this string, null if inputString being empty/null or
 *         splitPattern is null.
 */
public static String[] splitString(final String inputString, final String splitPattern) {
    String[] tokens = null;
    if (!isNullOrEmpty(inputString) && null != splitPattern) {
        final Pattern pattern = Pattern.compile(splitPattern);
        tokens = pattern.split(inputString);
    }
    return tokens;
}

From source file:utility.Tools.java

public static String[] splitDocName(String word) {
    word = word.replace("XML", "Xml");
    word = word.replace("DOM", "Dom");
    word = word.replace("JHotDraw", "Jhotdraw");
    word = word.replace("ID", "Id");
    String regEx = "[A-Z]";
    Pattern p1 = Pattern.compile(regEx);
    Matcher m1 = p1.matcher(word);

    boolean startWithUpper = false;
    startWithUpper = Pattern.matches("[A-Z].*", word);

    String[] words = p1.split(word);
    List<String> list = new ArrayList<>();
    for (int i = 0; i < words.length; i++) {
        list.add(words[i]);/*from w  ww.ja  va2 s .  c om*/
    }

    int count = 0;
    while (m1.find()) {
        if (count + 1 < words.length) {
            list.set(count + 1, m1.group() + list.get(count + 1));
            ++count;
        } else {
            list.add(m1.group());
        }
    }

    if (startWithUpper && words.length != 0) {
        list.remove(0);
    }

    //        for (int i = 0; i < list.size(); ++i) {
    //            list.set(i, list.get(i).toLowerCase());
    //        }

    String[] result = list.toArray(new String[1]);
    return result;
}

From source file:utility.Tools.java

public static String[] splitCamelWords(String word) {
    word = word.replace("XML", "Xml");
    word = word.replace("DOM", "Dom");
    word = word.replace("JHotDraw", "Jhotdraw");
    word = word.replace("ID", "Id");

    String regEx = "[A-Z]";
    Pattern p1 = Pattern.compile(regEx);
    Matcher m1 = p1.matcher(word);

    boolean startWithUpper = false;
    startWithUpper = Pattern.matches("[A-Z].*", word);

    String[] words = p1.split(word);
    List<String> list = new ArrayList<>();
    for (int i = 0; i < words.length; i++) {
        list.add(words[i]);/*from  w  w w  .ja  v  a  2 s. c o  m*/
    }

    int count = 0;
    while (m1.find()) {
        if (count + 1 < words.length) {
            list.set(count + 1, m1.group() + list.get(count + 1));
            ++count;
        } else {
            list.add(m1.group());
        }
    }

    if (startWithUpper && words.length != 0) {
        list.remove(0);
    }

    for (int i = 0; i < list.size(); ++i) {
        list.set(i, list.get(i).toLowerCase());
    }

    String[] result = list.toArray(new String[1]);
    return result;
}

From source file:org.openconcerto.sql.utils.SQLUtils.java

static public void executeScript(final String sql, final DBSystemRoot sysRoot, final Pattern p)
        throws SQLException {
    try {//  w w w . j  a v a2 s.com
        for (final String s : p.split(sql)) {
            final String trimmed = s.trim();
            if (trimmed.length() > 0)
                sysRoot.getDataSource().execute(trimmed, null);
        }
    } catch (final Exception e) {
        throw new SQLException("unable to execute " + sql, e);
    }
}

From source file:com.hurence.logisland.botsearch.Trace.java

/**
 * take a tab separated string representing a trace and converts it to a
 * Trace object 10.113.140.213   77.67.21.141   (248.98, 41528.56, 381.64,
 * 34.91)/*from  www  .j a v  a  2s.co m*/
 *
 * @param line
 * @return
 */
public static Trace parse(String line) throws IllegalArgumentException {

    final Pattern tabPattern = Pattern.compile("\t");
    final Pattern commaPattern = Pattern.compile(",");

    String[] fields = tabPattern.split(line);
    Trace trace = new Trace();
    trace.setIpSource(fields[0]);
    trace.setIpTarget(fields[1]);

    String vector = fields[2].replace("(", "").replace(")", "");
    fields = commaPattern.split(vector);

    if (fields.length == 4) {
        trace.setAvgUploadedBytes(Double.parseDouble(fields[0]));
        trace.setAvgDownloadedBytes(Double.parseDouble(fields[1]));
        trace.setAvgTimeBetweenTwoFLows(Double.parseDouble(fields[2]));
        trace.setMostSignificantFrequency(Double.parseDouble(fields[3]));

        //   trace.setId(Integer.toString(trace.hashCode()));
    } else {
        throw new IllegalArgumentException("unable to parse Trace from String : " + line);
    }

    return trace;
}

From source file:org.apache.nutch.indexer.IndexSorterArquivoWeb.java

/**
 * Sort the documents by score// w  ww. j a va2  s  . com
 * @param reader
 * @param searcher
 * @return
 * @throws IOException
 */
//private static int[] oldToNew(IndexReader reader, Searcher searcher) throws IOException {
private static DocScore[] newToOld(IndexReader reader, Searcher searcher) throws IOException {
    int readerMax = reader.maxDoc();
    DocScore[] newToOld = new DocScore[readerMax];

    // use site, an indexed, un-tokenized field to get boost
    //byte[] boosts = reader.norms("site"); TODO MC
    /* TODO MC */
    Document docMeta;
    Pattern includes = Pattern.compile("\\|");
    String value = NutchConfiguration.create().get(INCLUDE_EXTENSIONS_KEY, "");
    String includeExtensions[] = includes.split(value);
    Hashtable<String, Boolean> validExtensions = new Hashtable<String, Boolean>();
    for (int i = 0; i < includeExtensions.length; i++) {
        validExtensions.put(includeExtensions[i], true);
        System.out.println("extension boosted " + includeExtensions[i]);
    }
    /* TODO MC */

    for (int oldDoc = 0; oldDoc < readerMax; oldDoc++) {
        float score;
        if (reader.isDeleted(oldDoc)) {
            //score = 0.0f;       
            score = -1f; // TODO MC
        } else {
            //score = Similarity.decodeNorm(boosts[oldDoc]); TODO MC
            /* TODO MC */
            docMeta = searcher.doc(oldDoc);
            if (validExtensions.get(docMeta.get("subType")) == null) { // searched extensions will have higher scores 
                score = -0.5f;
            } else {
                score = Integer.parseInt(docMeta.get("inlinks"));
                /*
                if (score==0) {
                   score=0.001f; // TODO MC - to not erase
                }
                */
            }
            /* TODO MC */
            //System.out.println("Score for old document "+oldDoc+" is "+score+" and type "+docMeta.get("subType")); // TODO MC debug remove
        }
        DocScore docScore = new DocScore();
        docScore.doc = oldDoc;
        docScore.score = score;
        newToOld[oldDoc] = docScore;
    }

    System.out.println("Sorting " + newToOld.length + " documents.");
    Arrays.sort(newToOld);
    //HeapSorter.sort(newToOld); // TODO MC - due to the lack of space

    /* TODO MC
    int[] oldToNew = new int[readerMax];
    for (int newDoc = 0; newDoc < readerMax; newDoc++) {
      DocScore docScore = newToOld[newDoc];
      //oldToNew[docScore.oldDoc] = docScore.score > 0.0f ? newDoc : -1; // TODO MC
      oldToNew[docScore.oldDoc] = newDoc; // TODO MC
    } 
    */

    /* TODO MC *
    for (int newDoc = 0; newDoc < readerMax; newDoc++) {
       DocScore docScore = newToOld[newDoc];
       System.out.println("Score for new document "+newDoc+" is "+docScore.score); // TODO MC debug remove
    }
    * TODO MC */

    //return oldToNew; TODO MC
    return newToOld; // TODO MC
}

From source file:com.gs.obevo.dbmetadata.impl.dialects.AbstractDbMetadataManagerIT.java

public static String[] splitSql(String filePath) throws Exception {
    String sqlContent = pathToString(filePath);
    Pattern splitter = Pattern.compile("(?i)^GO$", Pattern.MULTILINE);
    return splitter.split(sqlContent);
}

From source file:com.denimgroup.threadfix.framework.util.CommonPathFinder.java

@Nullable
private static String parseRoot(@Nullable List<String> items) {
    if (items == null || items.isEmpty()) {
        return null;
    }/*ww w.j ava  2s  .com*/

    String response = null;

    String[] commonParts = null;
    int maxLength = Integer.MAX_VALUE;
    boolean startsWithCharacter = false;

    Pattern splitPattern = null;
    char splitChar = 0;

    for (String item : items) {
        if (splitPattern == null) {
            if (item.indexOf('\\') != -1) {
                splitPattern = BACKSLASH_PATTERN;
                splitChar = backwardSlash;
            } else {
                splitPattern = FORWARD_SLASH_PATTERN;
                splitChar = forwardSlash;
            }
            startsWithCharacter = item.indexOf(splitChar) == 0;
        }

        String[] parts = splitPattern.split(item);

        if (parts.length < maxLength) {
            maxLength = parts.length;
        }

        commonParts = getCommonParts(commonParts, parts);
    }

    if (commonParts != null) {
        StringBuilder builder = new StringBuilder();

        for (String string : commonParts) {
            if (string != null && !string.equals("")) {
                builder.append(splitChar).append(string);
            }
        }

        response = builder.toString();

        if (!startsWithCharacter && response.indexOf(splitChar) == 0) {
            response = response.substring(1);
        }
    }

    return response;
}

From source file:juicebox.data.HiCFileTools.java

/**
 * Load the list of chromosomes based on given genome id or file
 *
 * @param idOrFile string/*from   w w  w  .  j a  va2s . c  o m*/
 * @return list of chromosomes
 */
public static List<Chromosome> loadChromosomes(String idOrFile) {

    InputStream is = null;

    try {
        // Note: to get this to work, had to edit Intellij settings
        // so that "?*.sizes" are considered sources to be copied to class path
        is = ChromosomeSizes.class.getResourceAsStream(idOrFile + ".chrom.sizes");

        if (is == null) {
            // Not an ID,  see if its a file
            File file = new File(idOrFile);

            try {
                if (file.exists()) {
                    is = new FileInputStream(file);
                } else {
                    System.err.println("Could not find chromosome sizes file for: " + idOrFile);
                    System.exit(-3);
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        }

        List<Chromosome> chromosomes = new ArrayList<Chromosome>();
        chromosomes.add(0, null); // Index 0 reserved for "whole genome" pseudo-chromosome

        Pattern pattern = Pattern.compile("\t");
        BufferedReader reader = new BufferedReader(new InputStreamReader(is), HiCGlobals.bufferSize);
        String nextLine;
        long genomeLength = 0;
        int idx = 1;

        try {
            while ((nextLine = reader.readLine()) != null) {
                String[] tokens = pattern.split(nextLine);
                if (tokens.length == 2) {
                    String name = tokens[0];
                    int length = Integer.parseInt(tokens[1]);
                    genomeLength += length;
                    chromosomes.add(idx, new Chromosome(idx, name, length));
                    idx++;
                } else {
                    System.out.println("Skipping " + nextLine);
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        }

        // Add the "pseudo-chromosome" All, representing the whole genome.  Units are in kilo-bases
        chromosomes.set(0, new Chromosome(0, "All", (int) (genomeLength / 1000)));

        return chromosomes;
    } finally {
        if (is != null) {
            try {
                is.close();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }
}