Example usage for java.util.regex Pattern split

Introduction

In this page you can find the example usage for java.util.regex Pattern split.

Prototype

public String[] split(CharSequence input)

Source Link

Document

Splits the given input sequence around matches of this pattern.

Usage

From source file:com.cloudera.oryx.rdf.computation.WineQualityIT.java

private static List<Example> readWineQualityExamples() throws IOException {
    List<Example> allExamples = Lists.newArrayList();
    Pattern delimiter = Pattern.compile(";");
    File dataFile = new File(TEST_TEMP_INBOUND_DIR, "winequality-white.csv");
    for (CharSequence line : new FileLineIterable(dataFile)) {
        if (line.length() == 0) {
            continue;
        }/*from   www .jav  a 2s .c o m*/
        String[] tokens = delimiter.split(line);
        Feature[] features = new Feature[11];
        for (int i = 0; i < features.length; i++) {
            features[i] = NumericFeature.forValue(Float.parseFloat(tokens[i]));
        }
        Example trainingExample = new Example(NumericFeature.forValue(Float.parseFloat(tokens[11])), features);
        allExamples.add(trainingExample);
    }
    return allExamples;
}

From source file:com.ephesoft.dcma.util.EphesoftStringUtil.java

/**
 * Returns array of string separated by the given split pattern. In case the split pattern is empty, then the input string will be
 * split character-wise.//from  w  w  w.j a  v a2 s. co m
 * 
 * @param inputString {@link String} the input string that is to be split.
 * @param splitPattern {@link String} the parameter on which input string should be split.
 * @return {@link String[]} The array of strings computed by splitting this string, null if inputString being empty/null or
 *         splitPattern is null.
 */
public static String[] splitString(final String inputString, final String splitPattern) {
    String[] tokens = null;
    if (!isNullOrEmpty(inputString) && null != splitPattern) {
        final Pattern pattern = Pattern.compile(splitPattern);
        tokens = pattern.split(inputString);
    }
    return tokens;
}

From source file:utility.Tools.java

public static String[] splitDocName(String word) {
    word = word.replace("XML", "Xml");
    word = word.replace("DOM", "Dom");
    word = word.replace("JHotDraw", "Jhotdraw");
    word = word.replace("ID", "Id");
    String regEx = "[A-Z]";
    Pattern p1 = Pattern.compile(regEx);
    Matcher m1 = p1.matcher(word);

    boolean startWithUpper = false;
    startWithUpper = Pattern.matches("[A-Z].*", word);

    String[] words = p1.split(word);
    List<String> list = new ArrayList<>();
    for (int i = 0; i < words.length; i++) {
        list.add(words[i]);/*from w  ww.ja  va2 s .  c om*/
    }

    int count = 0;
    while (m1.find()) {
        if (count + 1 < words.length) {
            list.set(count + 1, m1.group() + list.get(count + 1));
            ++count;
        } else {
            list.add(m1.group());
        }
    }

    if (startWithUpper && words.length != 0) {
        list.remove(0);
    }

    //        for (int i = 0; i < list.size(); ++i) {
    //            list.set(i, list.get(i).toLowerCase());
    //        }

    String[] result = list.toArray(new String[1]);
    return result;
}

From source file:utility.Tools.java

public static String[] splitCamelWords(String word) {
    word = word.replace("XML", "Xml");
    word = word.replace("DOM", "Dom");
    word = word.replace("JHotDraw", "Jhotdraw");
    word = word.replace("ID", "Id");

    String regEx = "[A-Z]";
    Pattern p1 = Pattern.compile(regEx);
    Matcher m1 = p1.matcher(word);

    boolean startWithUpper = false;
    startWithUpper = Pattern.matches("[A-Z].*", word);

    String[] words = p1.split(word);
    List<String> list = new ArrayList<>();
    for (int i = 0; i < words.length; i++) {
        list.add(words[i]);/*from  w  w w  .ja  v  a  2 s. c o  m*/
    }

    int count = 0;
    while (m1.find()) {
        if (count + 1 < words.length) {
            list.set(count + 1, m1.group() + list.get(count + 1));
            ++count;
        } else {
            list.add(m1.group());
        }
    }

    if (startWithUpper && words.length != 0) {
        list.remove(0);
    }

    for (int i = 0; i < list.size(); ++i) {
        list.set(i, list.get(i).toLowerCase());
    }

    String[] result = list.toArray(new String[1]);
    return result;
}

From source file:org.openconcerto.sql.utils.SQLUtils.java

static public void executeScript(final String sql, final DBSystemRoot sysRoot, final Pattern p)
        throws SQLException {
    try {//  w w w . j  a v a2 s.com
        for (final String s : p.split(sql)) {
            final String trimmed = s.trim();
            if (trimmed.length() > 0)
                sysRoot.getDataSource().execute(trimmed, null);
        }
    } catch (final Exception e) {
        throw new SQLException("unable to execute " + sql, e);
    }
}

From source file:com.hurence.logisland.botsearch.Trace.java

/**
 * take a tab separated string representing a trace and converts it to a
 * Trace object 10.113.140.213   77.67.21.141   (248.98, 41528.56, 381.64,
 * 34.91)/*from  www  .j a v  a  2s.co m*/
 *
 * @param line
 * @return
 */
public static Trace parse(String line) throws IllegalArgumentException {

    final Pattern tabPattern = Pattern.compile("\t");
    final Pattern commaPattern = Pattern.compile(",");

    String[] fields = tabPattern.split(line);
    Trace trace = new Trace();
    trace.setIpSource(fields[0]);
    trace.setIpTarget(fields[1]);

    String vector = fields[2].replace("(", "").replace(")", "");
    fields = commaPattern.split(vector);

    if (fields.length == 4) {
        trace.setAvgUploadedBytes(Double.parseDouble(fields[0]));
        trace.setAvgDownloadedBytes(Double.parseDouble(fields[1]));
        trace.setAvgTimeBetweenTwoFLows(Double.parseDouble(fields[2]));
        trace.setMostSignificantFrequency(Double.parseDouble(fields[3]));

        //   trace.setId(Integer.toString(trace.hashCode()));
    } else {
        throw new IllegalArgumentException("unable to parse Trace from String : " + line);
    }

    return trace;
}

From source file:org.apache.nutch.indexer.IndexSorterArquivoWeb.java

/**
 * Sort the documents by score// w  ww. j a va2  s  . com
 * @param reader
 * @param searcher
 * @return
 * @throws IOException
 */
//private static int[] oldToNew(IndexReader reader, Searcher searcher) throws IOException {
private static DocScore[] newToOld(IndexReader reader, Searcher searcher) throws IOException {
    int readerMax = reader.maxDoc();
    DocScore[] newToOld = new DocScore[readerMax];

    // use site, an indexed, un-tokenized field to get boost
    //byte[] boosts = reader.norms("site"); TODO MC
    /* TODO MC */
    Document docMeta;
    Pattern includes = Pattern.compile("\\|");
    String value = NutchConfiguration.create().get(INCLUDE_EXTENSIONS_KEY, "");
    String includeExtensions[] = includes.split(value);
    Hashtable<String, Boolean> validExtensions = new Hashtable<String, Boolean>();
    for (int i = 0; i < includeExtensions.length; i++) {
        validExtensions.put(includeExtensions[i], true);
        System.out.println("extension boosted " + includeExtensions[i]);
    }
    /* TODO MC */

    for (int oldDoc = 0; oldDoc < readerMax; oldDoc++) {
        float score;
        if (reader.isDeleted(oldDoc)) {
            //score = 0.0f;       
            score = -1f; // TODO MC
        } else {
            //score = Similarity.decodeNorm(boosts[oldDoc]); TODO MC
            /* TODO MC */
            docMeta = searcher.doc(oldDoc);
            if (validExtensions.get(docMeta.get("subType")) == null) { // searched extensions will have higher scores 
                score = -0.5f;
            } else {
                score = Integer.parseInt(docMeta.get("inlinks"));
                /*
                if (score==0) {
                   score=0.001f; // TODO MC - to not erase
                }
                */
            }
            /* TODO MC */
            //System.out.println("Score for old document "+oldDoc+" is "+score+" and type "+docMeta.get("subType")); // TODO MC debug remove
        }
        DocScore docScore = new DocScore();
        docScore.doc = oldDoc;
        docScore.score = score;
        newToOld[oldDoc] = docScore;
    }

    System.out.println("Sorting " + newToOld.length + " documents.");
    Arrays.sort(newToOld);
    //HeapSorter.sort(newToOld); // TODO MC - due to the lack of space

    /* TODO MC
    int[] oldToNew = new int[readerMax];
    for (int newDoc = 0; newDoc < readerMax; newDoc++) {
      DocScore docScore = newToOld[newDoc];
      //oldToNew[docScore.oldDoc] = docScore.score > 0.0f ? newDoc : -1; // TODO MC
      oldToNew[docScore.oldDoc] = newDoc; // TODO MC
    } 
    */

    /* TODO MC *
    for (int newDoc = 0; newDoc < readerMax; newDoc++) {
       DocScore docScore = newToOld[newDoc];
       System.out.println("Score for new document "+newDoc+" is "+docScore.score); // TODO MC debug remove
    }
    * TODO MC */

    //return oldToNew; TODO MC
    return newToOld; // TODO MC
}

From source file:com.gs.obevo.dbmetadata.impl.dialects.AbstractDbMetadataManagerIT.java

public static String[] splitSql(String filePath) throws Exception {
    String sqlContent = pathToString(filePath);
    Pattern splitter = Pattern.compile("(?i)^GO$", Pattern.MULTILINE);
    return splitter.split(sqlContent);
}

From source file:com.denimgroup.threadfix.framework.util.CommonPathFinder.java

@Nullable
private static String parseRoot(@Nullable List<String> items) {
    if (items == null || items.isEmpty()) {
        return null;
    }/*ww w.j ava  2s  .com*/

    String response = null;

    String[] commonParts = null;
    int maxLength = Integer.MAX_VALUE;
    boolean startsWithCharacter = false;

    Pattern splitPattern = null;
    char splitChar = 0;

    for (String item : items) {
        if (splitPattern == null) {
            if (item.indexOf('\\') != -1) {
                splitPattern = BACKSLASH_PATTERN;
                splitChar = backwardSlash;
            } else {
                splitPattern = FORWARD_SLASH_PATTERN;
                splitChar = forwardSlash;
            }
            startsWithCharacter = item.indexOf(splitChar) == 0;
        }

        String[] parts = splitPattern.split(item);

        if (parts.length < maxLength) {
            maxLength = parts.length;
        }

        commonParts = getCommonParts(commonParts, parts);
    }

    if (commonParts != null) {
        StringBuilder builder = new StringBuilder();

        for (String string : commonParts) {
            if (string != null && !string.equals("")) {
                builder.append(splitChar).append(string);
            }
        }

        response = builder.toString();

        if (!startsWithCharacter && response.indexOf(splitChar) == 0) {
            response = response.substring(1);
        }
    }

    return response;
}

From source file:juicebox.data.HiCFileTools.java

/**
 * Load the list of chromosomes based on given genome id or file
 *
 * @param idOrFile string/*from   w w  w  .  j a  va2s . c  o m*/
 * @return list of chromosomes
 */
public static List<Chromosome> loadChromosomes(String idOrFile) {

    InputStream is = null;

    try {
        // Note: to get this to work, had to edit Intellij settings
        // so that "?*.sizes" are considered sources to be copied to class path
        is = ChromosomeSizes.class.getResourceAsStream(idOrFile + ".chrom.sizes");

        if (is == null) {
            // Not an ID,  see if its a file
            File file = new File(idOrFile);

            try {
                if (file.exists()) {
                    is = new FileInputStream(file);
                } else {
                    System.err.println("Could not find chromosome sizes file for: " + idOrFile);
                    System.exit(-3);
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        }

        List<Chromosome> chromosomes = new ArrayList<Chromosome>();
        chromosomes.add(0, null); // Index 0 reserved for "whole genome" pseudo-chromosome

        Pattern pattern = Pattern.compile("\t");
        BufferedReader reader = new BufferedReader(new InputStreamReader(is), HiCGlobals.bufferSize);
        String nextLine;
        long genomeLength = 0;
        int idx = 1;

        try {
            while ((nextLine = reader.readLine()) != null) {
                String[] tokens = pattern.split(nextLine);
                if (tokens.length == 2) {
                    String name = tokens[0];
                    int length = Integer.parseInt(tokens[1]);
                    genomeLength += length;
                    chromosomes.add(idx, new Chromosome(idx, name, length));
                    idx++;
                } else {
                    System.out.println("Skipping " + nextLine);
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        }

        // Add the "pseudo-chromosome" All, representing the whole genome.  Units are in kilo-bases
        chromosomes.set(0, new Chromosome(0, "All", (int) (genomeLength / 1000)));

        return chromosomes;
    } finally {
        if (is != null) {
            try {
                is.close();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }
}