List of usage examples for java.util.regex Pattern split
public String[] split(CharSequence input)
From source file:com.github.lindenb.jvarkit.tools.biostar.Biostar103303.java
private void readGTF(String uri, SAMSequenceDictionary dict) throws IOException { int count_exons = 0; final Set<String> unknown = new HashSet<String>(); LOG.info("Reading " + uri); final Pattern tab = Pattern.compile("[\t]"); final Map<String, GTFGene> transcript2gene = new HashMap<String, GTFGene>(); LineIterator iter = IOUtils.openURIForLineIterator(uri); while (iter.hasNext()) { String line = iter.next(); if (line.startsWith("#")) continue; String tokens[] = tab.split(line); if (tokens.length < 9) continue; if (!tokens[2].equals("exon")) continue; if (dict.getSequence(tokens[0]) == null) { if (!unknown.contains(tokens[0])) { LOG.warn("chromosome in " + line + " not in SAMSequenceDictionary "); unknown.add(tokens[0]);// ww w .j a v a2 s .c o m } continue; } String transcript_id = null, gene_id = null, gene_name = null, exon_id = null; StreamTokenizer st = new StreamTokenizer(new StringReader(tokens[8])); st.wordChars('_', '_'); String key = null; while (st.nextToken() != StreamTokenizer.TT_EOF) { String s = null; switch (st.ttype) { case StreamTokenizer.TT_NUMBER: s = String.valueOf(st.nval); break; case '"': case '\'': case StreamTokenizer.TT_WORD: s = st.sval; break; case ';': break; default: break; } if (s == null) continue; if (key == null) { key = s; } else { if (key.equals("transcript_id")) { transcript_id = s; } else if (key.equals("gene_id")) { gene_id = s; } else if (key.equals("gene_name")) { gene_name = s; } else if (key.equals("exon_id")) { exon_id = s; } key = null; } } if (transcript_id == null || transcript_id.isEmpty()) continue; GTFGene gene = transcript2gene.get(tokens[0] + " " + transcript_id); if (gene == null) { gene = new GTFGene(); gene.transcript_id = transcript_id; gene.gene_id = gene_id; gene.gene_name = gene_name; gene.chrom = tokens[0]; transcript2gene.put(tokens[0] + " " + transcript_id, gene); } GTFGene.Exon exon = gene.createExon(Integer.parseInt(tokens[3]), Integer.parseInt(tokens[4])); exon.exon_id = exon_id; } CloserUtil.close(iter); for (GTFGene g : transcript2gene.values()) { Collections.sort(g.exons, new Comparator<GTFGene.Exon>() { @Override public int compare(GTFGene.Exon o1, GTFGene.Exon o2) { return o1.start - o2.start; } }); for (int i = 0; i < g.exons.size(); ++i) { GTFGene.Exon exon = g.exons.get(i); exon.index = i; if (i > 0) { GTFGene.Exon prev = g.exons.get(i - 1); if (prev.end >= exon.start) { throw new IOException("exons " + (i) + " and " + (i + 1) + " overlap in " + g); } } Interval interval = new Interval(g.chrom, exon.start, exon.end); List<GTFGene.Exon> L = exonMap.get(interval); if (L == null) { L = new ArrayList<GTFGene.Exon>(1); exonMap.put(interval, L); } L.add(exon); ++count_exons; } } LOG.info("End Reading " + uri + " N=" + count_exons); }
From source file:org.parosproxy.paros.network.HttpMethodHelper.java
public HttpMethod createRequestMethod(HttpRequestHeader header, HttpBody body) throws URIException { HttpMethod httpMethod = null;//w w w . ja v a2 s. com String method = header.getMethod(); URI uri = header.getURI(); String version = header.getVersion(); if (method == null || method.trim().length() < 3) { throw new URIException("Invalid HTTP method: " + method); } if (method.equalsIgnoreCase(GET)) { //httpMethod = new GetMethod(); // ZAP: avoid discarding HTTP status code 101 that is used for WebSocket upgrade httpMethod = new ZapGetMethod(); } else if (method.equalsIgnoreCase(POST)) { httpMethod = new ZapPostMethod(); } else if (method.equalsIgnoreCase(DELETE)) { httpMethod = new ZapDeleteMethod(); } else if (method.equalsIgnoreCase(PUT)) { httpMethod = new ZapPutMethod(); } else if (method.equalsIgnoreCase(HEAD)) { httpMethod = new ZapHeadMethod(); } else if (method.equalsIgnoreCase(OPTIONS)) { httpMethod = new ZapOptionsMethod(); } else if (method.equalsIgnoreCase(TRACE)) { httpMethod = new ZapTraceMethod(uri.toString()); } else { httpMethod = new GenericMethod(method); } try { httpMethod.setURI(uri); } catch (Exception e1) { logger.error(e1.getMessage(), e1); } HttpMethodParams httpParams = httpMethod.getParams(); // default to use HTTP 1.0 httpParams.setVersion(HttpVersion.HTTP_1_0); if (version.equalsIgnoreCase(HttpHeader.HTTP11)) { httpParams.setVersion(HttpVersion.HTTP_1_1); } // set various headers int pos = 0; // ZAP: changed to always use CRLF, like the HttpHeader Pattern pattern = patternCRLF; String delimiter = header.getLineDelimiter(); // ZAP: Shouldn't happen as the HttpHeader always uses CRLF if (delimiter.equals(LF)) { delimiter = LF; pattern = patternLF; } String msg = header.getHeadersAsString(); String[] split = pattern.split(msg); String token = null; String name = null; String value = null; for (int i = 0; i < split.length; i++) { token = split[i]; if (token.equals("")) { continue; } if ((pos = token.indexOf(":")) < 0) { return null; } name = token.substring(0, pos).trim(); value = token.substring(pos + 1).trim(); httpMethod.addRequestHeader(name, value); } // set body if post method or put method if (body != null && body.length() > 0 && (httpMethod instanceof EntityEnclosingMethod)) { EntityEnclosingMethod post = (EntityEnclosingMethod) httpMethod; // post.setRequestEntity(new StringRequestEntity(body.toString())); post.setRequestEntity(new ByteArrayRequestEntity(body.getBytes())); } httpMethod.setFollowRedirects(false); return httpMethod; }
From source file:org.parosproxy.paros.network.HttpMethodHelper.java
public HttpMethod createRequestMethodNew(HttpRequestHeader header, HttpBody body) throws URIException { HttpMethod httpMethod = null;//from w w w . ja va2 s. c o m String method = header.getMethod(); URI uri = header.getURI(); String version = header.getVersion(); httpMethod = new GenericMethod(method); httpMethod.setURI(uri); HttpMethodParams httpParams = httpMethod.getParams(); // default to use HTTP 1.0 httpParams.setVersion(HttpVersion.HTTP_1_0); if (version.equalsIgnoreCase(HttpHeader.HTTP11)) { httpParams.setVersion(HttpVersion.HTTP_1_1); } // set various headers int pos = 0; // ZAP: FindBugs fix - always initialise pattern Pattern pattern = patternCRLF; String delimiter = CRLF; String msg = header.getHeadersAsString(); if ((pos = msg.indexOf(CRLF)) < 0) { if ((pos = msg.indexOf(LF)) < 0) { delimiter = LF; pattern = patternLF; } } else { delimiter = CRLF; pattern = patternCRLF; } String[] split = pattern.split(msg); String token = null; String name = null; String value = null; //String host = null; for (int i = 0; i < split.length; i++) { token = split[i]; if (token.equals("")) { continue; } if ((pos = token.indexOf(":")) < 0) { return null; } name = token.substring(0, pos).trim(); value = token.substring(pos + 1).trim(); httpMethod.addRequestHeader(name, value); } // set body if post method or put method if (body != null && body.length() > 0) { EntityEnclosingMethod generic = (EntityEnclosingMethod) httpMethod; // generic.setRequestEntity(new StringRequestEntity(body.toString())); generic.setRequestEntity(new ByteArrayRequestEntity(body.getBytes())); } httpMethod.setFollowRedirects(false); return httpMethod; }
From source file:org.apache.ctakes.ytex.uima.annotators.NegexAnnotator.java
private List<NegexRule> initializeRules() { List<String> listRules = this.initalizeRuleList(); List<NegexRule> listNegexRules = new ArrayList<NegexRule>(listRules.size()); Iterator<String> iRule = listRules.iterator(); while (iRule.hasNext()) { String rule = iRule.next(); Pattern p = Pattern.compile("[\\t]+"); // Working. String[] ruleTokens = p.split(rule.trim()); if (ruleTokens.length == 2) { // Add the regular expression characters to tokens and asemble // the // rule again. String[] ruleMembers = ruleTokens[0].trim().split(" "); String rule2 = ""; for (int i = 0; i <= ruleMembers.length - 1; i++) { if (!ruleMembers[i].equals("")) { if (ruleMembers.length == 1) { rule2 = ruleMembers[i]; } else { rule2 = rule2 + ruleMembers[i].trim() + "\\s+"; }//from ww w . j av a 2s . c o m } } // Remove the last s+ if (rule2.endsWith("\\s+")) { rule2 = rule2.substring(0, rule2.lastIndexOf("\\s+")); } String rule3 = "(?m)(?i)[[\\p{Punct}&&[^\\]\\[]]|\\s+](" + rule2 + ")[[\\p{Punct}&&[^_]]|\\s+]"; Pattern p2 = Pattern.compile(rule3.trim()); listNegexRules.add(new NegexRule(p2, rule2, ruleTokens[1].trim())); } else { log.warn("could not parse rule:" + rule); } // Matcher m = p2.matcher(sentence); // // while (m.find() == true) { // sentence = m.replaceAll(" " + ruleTokens[1].trim() // + m.group().trim().replaceAll(" ", filler) // + ruleTokens[1].trim() + " "); // } } return listNegexRules; }
From source file:org.prettyx.DistributeServer.Modeling.Sim.java
/** * Using the content of the sim element to create a sim object * @param simContent/*from w w w . ja v a 2 s.c o m*/ */ public Sim(String simContent) { // System.out.println(simContent); String[] content = simContent.split("\n"); for (int i = 0; i < content.length; i++) { String line = content[i]; if (i == 0) { Pattern pattern = Pattern.compile("\\(.*?\\)"); Matcher matcher = pattern.matcher(line); while (matcher.find()) { String name = matcher.group(); if (matcher.group() != "") { int start = matcher.start(); int end = matcher.end(); name = line.substring(start + 1, end - 1); pattern = Pattern.compile(":"); String[] name1 = pattern.split(name); if (name1[0].trim().equals("name")) { setName(DEPFS.removeSpace(name1[1].replace("\"", ""))); } } } continue; } else if (line.contains("resource")) { Pattern pattern = Pattern.compile("\""); String[] r = pattern.split(line); if (r[0].trim().equals("resource")) { setResource(DEPFS.removeSpace(r[1])); } continue; } else if (line.contains("outputstrategy")) { Pattern pattern = Pattern.compile("\\(.*?\\)"); Matcher matcher = pattern.matcher(line); while (matcher.find()) { String name = matcher.group(); if (matcher.group() != "") { int start = matcher.start(); int end = matcher.end(); name = line.substring(start + 1, end - 1); // System.out.println(name); outputstrategy = name; } } continue; } else if (line.contains("model")) { String modelContent = ""; int start = i; int end = 0; int count = 0; boolean found = false; while (i < content.length && !found) { line = content[i]; for (int j = 0; j < line.length(); j++) { if (line.charAt(j) == '{') { count++; } else if (line.charAt(j) == '}') { count--; if (count == 0) { found = true; end = i; break; } } } i++; } while (start <= end) { modelContent += content[start] + "\n"; start++; } // System.out.println(modelContent); setModel(modelContent); } else if (line.contains("efficiency")) { Pattern pattern = Pattern.compile("\\(.*?\\)"); Matcher matcher = pattern.matcher(line); while (matcher.find()) { String name = matcher.group(); if (matcher.group() != "") { int start = matcher.start(); int end = matcher.end(); name = line.substring(start + 1, end - 1); // System.out.println(name); efficiency = name; } } continue; } else if (line.contains("summary")) { Pattern pattern = Pattern.compile("\\(.*?\\)"); Matcher matcher = pattern.matcher(line); while (matcher.find()) { String name = matcher.group(); if (matcher.group() != "") { int start = matcher.start(); int end = matcher.end(); name = line.substring(start + 1, end - 1); // System.out.println(name); summary.add(name); } } continue; } else if (line.contains("analysis")) { String analysisContent = ""; int start = i; int end = 0; int count = 0; boolean found = false; while (i < content.length && !found) { line = content[i]; for (int j = 0; j < line.length(); j++) { if (line.charAt(j) == '{') { count++; } else if (line.charAt(j) == '}') { count--; if (count == 0) { found = true; end = i; break; } } } i++; } while (start <= end) { analysisContent += content[start] + "\n"; start++; } analysis = analysisContent; } } }
From source file:com.zbt.trie.linklist.SimpleTrie.java
/** * Reads words from a file into the trie. The file is read one line at a * time, a line may contain multiple words separated by whitespace. * /* ww w . j a va2 s . co m*/ * @param file * The file to read * @param strip1 * When added the file will replace any instances of 'strip' from * the file. This can be used to remove quotes or turn commas * into spaces. * @param strip2 * Like strip1 */ public void addFile(String file, String strip1, String strip2) throws IOException { BufferedReader in = new BufferedReader(new FileReader(file)); String line; String[] words; Pattern whitespace = Pattern.compile("\\s+"); Pattern quote1 = Pattern.compile(strip1); Pattern quote2 = Pattern.compile(strip2); String empty = " "; while ((line = in.readLine()) != null) { line = quote1.matcher(line).replaceAll(empty); // line = quote2.matcher(line).replaceAll(empty); words = whitespace.split(line); for (String w : words) if (w.length() > 0) if (!quote2.matcher(w.toLowerCase()).matches()) add(w); } in.close(); }
From source file:org.openanzo.glitter.query.QueryController.java
/** * Pretty print query string/* ww w. j a va2s.com*/ * * @param printFlags * print flags * @param startIndentLevel * @return pretty print version of query */ @SuppressWarnings("all") public String prettyPrintQueryString(EnumSet<QueryStringPrintOptions> printFlags, int startIndentLevel) { QueryResultForm queryResultForm = this.getQueryResultForm(); StringBuilder s = new StringBuilder(); // output the base, if any if (this.baseUri != null) { s.append("BASE <"); s.append(this.baseUri); s.append(">"); printSeparator(printFlags, 0, s); } // add prefixes for all URIs mentioned in the query final BidiMap<String, String> prefix2uri = new TreeBidiMap<String, String>(); Map<String, String> uri2prefix = prefix2uri.inverseBidiMap(); for (Entry<String, URI> e : this.prefixMap.entrySet()) prefix2uri.put(e.getKey(), e.getValue().toString()); for (Entry<String, URI> e : this.queryOptions.entrySet()) prefix2uri.put(e.getKey(), e.getValue().toString()); if (printFlags.contains(QueryStringPrintOptions.GENERATE_NEW_PREFIXES)) { visitURIs(new URIVisitor() { private int p = 0; public boolean visitURI(URI u) { if (!prefix2uri.containsValue(u.getNamespace())) { // TODO - could use next-to-last URI component to name prefix while (prefix2uri.containsKey("p" + ++p)) ; prefix2uri.put("p" + p, u.getNamespace()); } return true; } }, false, true); } // output prefixes MapIterator<String, String> it = prefix2uri.mapIterator(); while (it.hasNext()) { String key = it.next(); String value = it.getValue(); s.append("PREFIX "); s.append(key); s.append(": <"); s.append(value); s.append(">"); printSeparator(printFlags, 0, s); } this.resultForm.prettyPrintQueryPart(printFlags, startIndentLevel, uri2prefix, s); printSeparator(printFlags, startIndentLevel, s); if (isDatasetFromQuery()) { for (URI u : getQueryDataset().getDefaultGraphURIs()) { s.append("FROM "); printURI(u, printFlags, uri2prefix, s); printSeparator(printFlags, startIndentLevel, s); } for (URI u : getQueryDataset().getNamedGraphURIs()) { s.append("FROM NAMED "); printURI(u, printFlags, uri2prefix, s); printSeparator(printFlags, startIndentLevel, s); } for (URI u : getQueryDataset().getNamedDatasetURIs()) { s.append("FROM DATASET "); printURI(u, printFlags, uri2prefix, s); printSeparator(printFlags, startIndentLevel, s); } } s.append("WHERE {"); printSeparator(printFlags, startIndentLevel + 1, s); this.queryPattern.prettyPrintQueryPart(printFlags, startIndentLevel + 1, uri2prefix, s); printSeparator(printFlags, startIndentLevel, s); s.append("}"); if (queryResultForm instanceof Projection) { Projection projection = (Projection) queryResultForm; if (!projection.getGroupByVariables().isEmpty()) { printSeparator(printFlags, startIndentLevel, s); projection.prettyPrintGroupByQueryPart(printFlags, startIndentLevel, uri2prefix, s); } } if (this.ordering.size() > 0) { printSeparator(printFlags, startIndentLevel, s); s.append("ORDER BY "); for (int i = 0; i < this.ordering.size(); i++) { OrderingCondition c = this.ordering.get(i); if (i != 0) s.append(' '); c.prettyPrintQueryPart(printFlags, startIndentLevel, uri2prefix, s); } printSeparator(printFlags, startIndentLevel, s); } if (this.limit > -1) { printSeparator(printFlags, startIndentLevel, s); s.append("LIMIT "); s.append(this.limit); } if (this.offset > -1) { printSeparator(printFlags, startIndentLevel, s); s.append("OFFSET "); s.append(this.offset); } if (printFlags.contains(QueryStringPrintOptions.REMOVE_UNUSED_PREFIXES)) { String q = s.toString(); s = new StringBuilder(); String[] lines = q.split("\n"); Pattern p = Pattern.compile("^PREFIX\\s*(\\w+:)", Pattern.CASE_INSENSITIVE); boolean first = true; for (String line : lines) { Matcher m = p.matcher(line); if (m.find()) { Pattern prefix = Pattern.compile("\\W" + m.group(1)); if (prefix.split(q).length <= 2) continue; } if (!first) s.append('\n'); s.append(line); first = false; } } return s.toString(); }
From source file:com.googlecode.clearnlp.component.AbstractStatisticalComponent.java
protected List<Map<String, String[]>> getRules(BufferedReader fin) { Pattern space = Pattern.compile(" "), tab = Pattern.compile("\t"); List<Map<String, String[]>> rules = null; String[] tmp, val; String line;//from w ww. ja v a2 s . c o m int i, ngram; try { ngram = Integer.parseInt(fin.readLine()); rules = new ArrayList<Map<String, String[]>>(ngram); for (i = 0; i < ngram; i++) rules.add(new HashMap<String, String[]>()); while ((line = fin.readLine()) != null) { tmp = tab.split(line); val = space.split(tmp[1]); if (val.length <= ngram) rules.get(val.length - 1).put(tmp[0].trim(), val); } } catch (IOException e) { e.printStackTrace(); } return rules; }
From source file:org.prettyx.DistributeServer.Modeling.Model.java
/** * Using the content of the model element to create a model object * @param string/*w w w .java 2s. c o m*/ */ public Model(String string) { String[] content = string.split("\n"); for (int i = 0; i < content.length; i++) { String line = content[i]; if (i == 0) { Pattern pattern = Pattern.compile("\\(.*?\\)"); Matcher matcher = pattern.matcher(line); while (matcher.find()) { String name = ""; if (matcher.group() != "") { int start = matcher.start(); int end = matcher.end(); name = line.substring(start + 1, end - 1); pattern = Pattern.compile(":"); String[] name1 = pattern.split(name); if (name1.length != 1) { setModelParameter(DEPFS.removeSpace(name1[0]), DEPFS.removeSpace(name1[1].replace("\"", ""))); } } } continue; } else if (line.contains("components")) { while (i < content.length) { // System.out.println(line); if (line.contains("}")) break; line = line.split("//")[0]; if (line.trim() != "") { Pattern pattern = Pattern.compile("\""); String[] r = pattern.split(line); if (r.length >= 4) { String object = DEPFS.removeSpace(r[1]); String name = DEPFS.removeSpace(r[3]); setComponent(object, name); } else if (r.length >= 2) { String object = DEPFS.removeSpace(r[0]); String name = DEPFS.removeSpace(r[1]); setComponent(object, name); } } i++; line = content[i]; } continue; } else if (line.contains("connect")) { while (i < content.length) { // System.out.println(line); if (line.contains("}")) break; line = line.split("//")[0]; if (line.trim() != "") { Pattern pattern = Pattern.compile("\""); String[] r = pattern.split(line); if (r.length >= 4) { String outputVariable = DEPFS.removeSpace(r[1]); String inputVariable = DEPFS.removeSpace(r[3]); setConnect(outputVariable, inputVariable); } } i++; line = content[i]; } continue; } else if (line.contains("feedback")) { while (i < content.length) { // System.out.println(line); if (line.contains("}")) break; line = line.split("//")[0]; if (line.trim() != "") { Pattern pattern = Pattern.compile("\""); String[] r = pattern.split(line); if (r.length >= 4) { String outputVariable = DEPFS.removeSpace(r[1]); String inputVariable = DEPFS.removeSpace(r[3]); setFeedback(outputVariable, inputVariable); } } i++; line = content[i]; } continue; } // else if(line.contains("parameter")){ //// System.out.println(line); // // parameter = new Parameter(); // Pattern pattern=Pattern.compile("\\(.*?\\)"); // Matcher matcher = pattern.matcher(line); // while(matcher.find()) { // // String name = matcher.group(); // if(matcher.group() != "") { // int start = matcher.start(); // int end = matcher.end(); // name = line.substring(start + 1, end - 1); // pattern = Pattern.compile(":"); // String[] filePath = pattern.split(name); // if (filePath[0].trim().equals("file")) { // parameter.setParameterFile(DEPFS.removeSpace(filePath[1].replace("\"",""))); // } // } // } // i++; // line = content[i]; //// System.out.println(line); // while (i<content.length){ //// System.out.println(line); // // if(line.contains("}")) break; // line = line.split("//")[0]; // if(line.trim() != ""){ // pattern = Pattern.compile("\""); // String []r = pattern.split(line); // if(r.length >= 4){ // String name = DEPFS.removeSpace(r[1]); // String value = "\"" + DEPFS.removeSpace(r[3]) + "\""; // setParameter(name, value); // // } // else if(r.length == 3){ // String name = DEPFS.removeSpace(r[1]); // String value = DEPFS.removeSpace(r[2]); // setParameter(name, value); // // } // } // i++; // line = content[i]; // // } // continue; // } else if (line.contains("logging")) { while (i < content.length) { // System.out.println(line); line = line.split("//")[0]; if (line.trim() != "") { Pattern pattern = Pattern.compile("\""); String[] r = pattern.split(line); if (r.length == 4) { String name = DEPFS.removeSpace(r[1]); String level = DEPFS.removeSpace(r[3]); setLogging(name, level); } } i++; line = content[i]; if (line.contains("}")) break; } continue; } //iter while until if } }
From source file:net.duckling.ddl.service.export.impl.ExportServiceImpl.java
private String processImageLink(String html, VWBContext context, String path, ArchiveOutputStream out, Map<String, String> id2Title, List<String> allPages, boolean isEpub) { String regex = "[0-9a-zA-Z\\-\\/]+/download/([0-9]+)(\\?func=cache){0,1}"; Pattern p = Pattern.compile(regex); String[] cells = p.split(html); Matcher m = p.matcher(html);//from w w w. j a v a 2 s. c om StringBuilder sb = new StringBuilder(); sb.append(cells[0]); int index = 1; while (m.find()) { int imageId = Integer.parseInt(m.group(1)); String resKey = imageId + "_" + VWBContext.getCurrentTid() + "_" + LynxConstants.TYPE_FILE; String tagname = path.substring(0, path.lastIndexOf("/")); String resPath = this.getRelativeResPath(resKey, tagname); if (null == resPath) {// ? writeAttFile(path, VWBContext.getCurrentTid(), imageId, context, out); resPath = getResNoTagPath(resKey); resPath = (null == resPath) ? "#" : resPath; } sb.append(resPath); sb.append(cells[index++]); } if (index < cells.length) { sb.append(cells[index]); } return sb.toString(); }