List of usage examples for java.util Scanner useDelimiter
public Scanner useDelimiter(String pattern)
From source file:org.apache.openaz.xacml.admin.util.RESTfulPAPEngine.java
/** * Send a request to the PAP Servlet and get the response. * /*from w w w.j a v a 2 s . c o m*/ * The content is either an InputStream to be copied to the Request OutputStream * OR it is an object that is to be encoded into JSON and pushed into the Request OutputStream. * * The Request parameters may be encoded in multiple "name=value" sets, or parameters may be combined by the caller. * * @param method * @param content - EITHER an InputStream OR an Object to be encoded in JSON * @param collectionTypeClass * @param responseContentClass * @param parameters * @return * @throws Exception */ private Object sendToPAP(String method, Object content, Class collectionTypeClass, Class responseContentClass, String... parameters) throws PAPException { HttpURLConnection connection = null; try { String fullURL = papServletURLString; if (parameters != null && parameters.length > 0) { String queryString = ""; for (String p : parameters) { queryString += "&" + p; } fullURL += "?" + queryString.substring(1); } // special case - Status (actually the detailed status) comes from the PDP directly, not the PAP if (method.equals("GET") && content instanceof PDP && responseContentClass == StdPDPStatus.class) { // Adjust the url and properties appropriately fullURL = ((PDP) content).getId() + "?type=Status"; content = null; } URL url = new URL(fullURL); // // Open up the connection // connection = (HttpURLConnection) url.openConnection(); // // Setup our method and headers // connection.setRequestMethod(method); // connection.setRequestProperty("Accept", "text/x-java-properties"); // connection.setRequestProperty("Content-Type", "text/x-java-properties"); connection.setUseCaches(false); // // Adding this in. It seems the HttpUrlConnection class does NOT // properly forward our headers for POST re-direction. It does so // for a GET re-direction. // // So we need to handle this ourselves. // connection.setInstanceFollowRedirects(false); connection.setDoOutput(true); connection.setDoInput(true); if (content != null) { if (content instanceof InputStream) { try { // // Send our current policy configuration // try (OutputStream os = connection.getOutputStream()) { int count = IOUtils.copy((InputStream) content, os); if (logger.isDebugEnabled()) { logger.debug("copied to output, bytes=" + count); } } } catch (Exception e) { logger.error("Failed to write content in '" + method + "'", e); throw e; } } else { // The content is an object to be encoded in JSON ObjectMapper mapper = new ObjectMapper(); mapper.writeValue(connection.getOutputStream(), content); } } // // Do the connect // connection.connect(); if (connection.getResponseCode() == 204) { logger.info("Success - no content."); return null; } else if (connection.getResponseCode() == 200) { logger.info("Success. We have a return object."); // get the response content into a String String json = null; // read the inputStream into a buffer (trick found online scans entire input looking for end-of-file) java.util.Scanner scanner = new java.util.Scanner(connection.getInputStream()); scanner.useDelimiter("\\A"); json = scanner.hasNext() ? scanner.next() : ""; scanner.close(); logger.info("JSON response from PAP: " + json); // convert Object sent as JSON into local object ObjectMapper mapper = new ObjectMapper(); if (collectionTypeClass != null) { // collection of objects expected final CollectionType javaType = mapper.getTypeFactory() .constructCollectionType(collectionTypeClass, responseContentClass); Object objectFromJSON = mapper.readValue(json, javaType); return objectFromJSON; } else { // single value object expected Object objectFromJSON = mapper.readValue(json, responseContentClass); return objectFromJSON; } } else if (connection.getResponseCode() >= 300 && connection.getResponseCode() <= 399) { // redirection String newURL = connection.getHeaderField("Location"); if (newURL == null) { logger.error( "No Location header to redirect to when response code=" + connection.getResponseCode()); throw new IOException( "No redirect Location header when response code=" + connection.getResponseCode()); } int qIndex = newURL.indexOf("?"); if (qIndex > 0) { newURL = newURL.substring(0, qIndex); } logger.info("Redirect seen. Redirecting " + fullURL + " to " + newURL); return newURL; } else { logger.warn("Unexpected response code: " + connection.getResponseCode() + " message: " + connection.getResponseMessage()); throw new IOException("Server Response: " + connection.getResponseCode() + ": " + connection.getResponseMessage()); } } catch (Exception e) { logger.error("HTTP Request/Response to PAP: " + e, e); throw new PAPException("Request/Response threw :" + e); } finally { // cleanup the connection if (connection != null) { try { // For some reason trying to get the inputStream from the connection // throws an exception rather than returning null when the InputStream does not exist. InputStream is = null; try { is = connection.getInputStream(); } catch (Exception e1) { //NOPMD // ignore this } if (is != null) { is.close(); } } catch (IOException ex) { logger.error("Failed to close connection: " + ex, ex); } connection.disconnect(); } } }
From source file:gov.usgs.cida.coastalhazards.rest.data.util.MetadataUtilTest.java
private String loadResourceAsString(String fileName) throws IOException { Scanner scanner = new Scanner(getClass().getClassLoader().getResourceAsStream(fileName)); String contents = scanner.useDelimiter("\\A").next(); scanner.close();//from w w w . j ava 2 s . co m return contents; }
From source file:org.apache.openaz.xacml.std.json.JSONRequest.java
/** * Read characters from the given <code>InputStream</code> and parse them into an XACML * {@link org.apache.openaz.xacml.api.Request} object. * * @param is/* ww w . jav a 2s . c o m*/ * @return * @throws JSONStructureException */ public static Request load(InputStream is) throws JSONStructureException { // TODO - ASSUME that order of members within an object does not matter (Different from XML, in JSON // everything is handled as Maps so order does not matter) // ensure shorthand map is set up if (shorthandMap == null) { initShorthandMap(); } // ensure that we have an instance of the DataTypeFactory for generating AttributeValues by DataType if (dataTypeFactory == null) { try { dataTypeFactory = DataTypeFactory.newInstance(); if (dataTypeFactory == null) { throw new NullPointerException("No DataTypeFactory found"); } } catch (FactoryException e) { throw new JSONStructureException("Unable to find DataTypeFactory, e=" + e); } } // create a new Request object to be filled in StdMutableRequest stdMutableRequest = null; String json = null; ObjectMapper mapper = null; try { // read the inputStream into a buffer (trick found online scans entire input looking for // end-of-file) java.util.Scanner scanner = new java.util.Scanner(is); scanner.useDelimiter("\\A"); json = scanner.hasNext() ? scanner.next() : ""; scanner.close(); mapper = new ObjectMapper().setVisibility(PropertyAccessor.FIELD, Visibility.ANY); // TODO - ASSUME that any duplicated component is a bad thing (probably indicating an error in the // incoming JSON) mapper.configure(JsonParser.Feature.STRICT_DUPLICATE_DETECTION, true); Map<?, ?> root = mapper.readValue(json, Map.class); // // Does the request exist? // Map<?, ?> jsonRequestMap = (Map<?, ?>) root.remove("Request"); if (jsonRequestMap == null) { throw new JSONStructureException("No \"Request\" property found."); } checkUnknown("Top-level message", root); stdMutableRequest = new StdMutableRequest(); // // Is there a Category? // Object categoryList = jsonRequestMap.remove("Category"); if (categoryList != null && !(categoryList instanceof List)) { throw new JSONStructureException( "Category must contain list of objects, not '" + categoryList.getClass() + "'"); } if (categoryList != null) { // // Iterate each Category // Iterator<?> iter = ((List<?>) categoryList).iterator(); while (iter.hasNext()) { Object category = iter.next(); if (!(category instanceof Map)) { throw new JSONStructureException( "Category list must contain objects contained within curly braces ({})"); } parseCategory((Map<?, ?>) category, "Category", null, stdMutableRequest); } } // The following may be either a single instance or an array. This allows multiple decisions to // work with the Default Category objects. // Example: // "AccessSubject" : [ {attributes group one}, // {attributes group two} // ] // // Look for default Shorthand AccessSubject // parseDefaultCategory(jsonRequestMap, "AccessSubject", "urn:oasis:names:tc:xacml:1.0:subject-category:access-subject", stdMutableRequest); // // Provide backward compatibility for our PEP's // parseDefaultCategory(jsonRequestMap, "Subject", "urn:oasis:names:tc:xacml:1.0:subject-category:access-subject", stdMutableRequest); // // Look for default Shorthand Action // parseDefaultCategory(jsonRequestMap, "Action", "urn:oasis:names:tc:xacml:3.0:attribute-category:action", stdMutableRequest); // // Look for default Shorthand Resource // parseDefaultCategory(jsonRequestMap, "Resource", "urn:oasis:names:tc:xacml:3.0:attribute-category:resource", stdMutableRequest); // // Look for default Shorthand Environment // parseDefaultCategory(jsonRequestMap, "Environment", "urn:oasis:names:tc:xacml:3.0:attribute-category:environment", stdMutableRequest); // // Look for default Shorthand RecipientSubject // parseDefaultCategory(jsonRequestMap, "RecipientSubject", "urn:oasis:names:tc:xacml:1.0:subject-category:recipient-subject", stdMutableRequest); // // Look for default Shorthand IntermediarySubject // parseDefaultCategory(jsonRequestMap, "IntermediarySubject", "urn:oasis:names:tc:xacml:1.0:subject-category:intermediary-subject", stdMutableRequest); // // Look for default Shorthand Codebase // parseDefaultCategory(jsonRequestMap, "Codebase", "urn:oasis:names:tc:xacml:1.0:subject-category:codebase", stdMutableRequest); // // Look for default Shorthand RequestingMachine // parseDefaultCategory(jsonRequestMap, "RequestingMachine", "urn:oasis:names:tc:xacml:1.0:subject-category:requesting-machine", stdMutableRequest); // // MultiRequest // Map<?, ?> multiRequests = (Map<?, ?>) jsonRequestMap.remove("MultiRequests"); if (multiRequests != null) { if (!(multiRequests instanceof Map)) { throw new JSONStructureException("MultiRequests must be object structure, not single value"); } List<?> requestReferenceList = (List<?>) multiRequests.remove("RequestReference"); if (requestReferenceList == null) { throw new JSONStructureException("MultiRequest must contain a RequestReference element"); } if (requestReferenceList.size() < 1) { throw new JSONStructureException( "MultiRequest must contain at least one element in the RequestReference list"); } checkUnknown("MultiRequest", multiRequests); for (Object requestReferenceMapObject : requestReferenceList) { if (!(requestReferenceMapObject instanceof Map)) { throw new JSONStructureException("MultiRequest RequestReference must be object"); } Map<?, ?> requestReferenceMap = (Map<?, ?>) requestReferenceMapObject; // each object within the list must contain a ReferenceId and only a ReferenceId Object referenceIdListObject = requestReferenceMap.remove("ReferenceId"); if (referenceIdListObject == null) { throw new JSONStructureException( "MultiRequest RequestReference list element must contain ReferenceId"); } List<?> referenceIdList = (List<?>) referenceIdListObject; if (referenceIdList.size() == 0) { // the spec does not disallow empty list RequestReference objects continue; } checkUnknown("RequestReference", requestReferenceMap); // create reference corresponding to RequestReference list element StdMutableRequestReference requestReference = new StdMutableRequestReference(); for (Object referenceId : referenceIdList) { // add attributes to the reference // Since the order of the JSON is not constrained, we could process this section // before the section containing attribute being referenced, // so we cannot do a cross-check here to verify that the attribute reference exists. // That will happen later when the PDP attempts to find the attribute. StdRequestAttributesReference requestAttributesReference = new StdRequestAttributesReference( (String) referenceId); requestReference.add(requestAttributesReference); } stdMutableRequest.add(requestReference); } } // // ReturnPolicyIdList // // If omitted this is set to a default of false by the StdMutableRequest constructor. // Object returnPolicyIdList = jsonRequestMap.remove("ReturnPolicyIdList"); Boolean returnPolicyIdListBoolean = makeBoolean(returnPolicyIdList, "ReturnPolicyIdList"); if (returnPolicyIdList != null) { stdMutableRequest.setReturnPolicyIdList(returnPolicyIdListBoolean); } // // CombinedDecision // // If omitted this is set to a default of false by the StdMutableRequest constructor. // Object combinedDecision = jsonRequestMap.remove("CombinedDecision"); Boolean combinedDecisionBoolean = makeBoolean(combinedDecision, "CombinedDecision"); if (combinedDecision != null) { stdMutableRequest.setCombinedDecision(combinedDecisionBoolean); } // // XPath // // The JSON spec says that this has a default value, implying that if it is missing in the Request // we should fill it in. // However the XML (DOM) version does not do that. If the value is missing it leaves the // requestDefaults object blank. // We are following the XML approach and ignoring the Default value for this field in the spec. // TODO - Assume that no value for XPathVersion means "leave as null", not "fill in the default // value from spec. This violates the JSON spec Object xPath = jsonRequestMap.remove("XPathVersion"); if (xPath != null) { // XPath is given in the JSON input if (!(xPath instanceof String)) { throw new JSONStructureException("XPathVersion not a URI passed as a String"); } URI xPathUri = null; try { xPathUri = new URI(xPath.toString()); } catch (Exception e) { throw new JSONStructureException("XPathVersion not a valid URI: '" + xPath + "'", e); } StdRequestDefaults requestDefaults = new StdRequestDefaults(xPathUri); stdMutableRequest.setRequestDefaults(requestDefaults); } checkUnknown("Request", jsonRequestMap); } catch (JsonParseException e) { // try to point to problem area in JSON input, if possible JsonLocation location = e.getLocation(); String locationOfError = "(unavailable)"; if (location != null && location != JsonLocation.NA) { String jsonText = json; if (location.getLineNr() > 1) { String[] jsonArray = jsonText.split("\\r?\\n|\\r"); jsonText = jsonArray[location.getLineNr()]; } if (location.getCharOffset() < jsonText.length()) { if (location.getCharOffset() > 0) { locationOfError = jsonText.substring((int) location.getCharOffset() - 1); } if (locationOfError.length() > 30) { locationOfError = locationOfError.substring(0, 30); } } } throw new JSONStructureException("Unable to parse JSON starting at text'" + locationOfError + "', input was '" + json + "', exception: " + e, e); } catch (JsonMappingException e) { throw new JSONStructureException("Unable to map JSON '" + json + "', exception: " + e, e); } catch (IOException e) { throw new JSONStructureException("Unable to read JSON input, exception: " + e, e); } // all done return new StdRequest(stdMutableRequest); }
From source file:decision_tree_learning.Matrix.java
public void loadArff(String filename) throws Exception, FileNotFoundException { m_data = new ArrayList<double[]>(); m_attr_name = new ArrayList<String>(); m_str_to_enum = new ArrayList<TreeMap<String, Integer>>(); m_enum_to_str = new ArrayList<TreeMap<Integer, String>>(); boolean READDATA = false; Scanner s = new Scanner(new File(filename)); while (s.hasNext()) { String line = s.nextLine().trim(); if (line.length() > 0 && line.charAt(0) != '%') { if (!READDATA) { Scanner t = new Scanner(line); String firstToken = t.next().toUpperCase(); if (firstToken.equals("@RELATION")) { String datasetName = t.nextLine(); }//from w w w . j av a2 s . co m if (firstToken.equals("@ATTRIBUTE")) { TreeMap<String, Integer> ste = new TreeMap<String, Integer>(); m_str_to_enum.add(ste); TreeMap<Integer, String> ets = new TreeMap<Integer, String>(); m_enum_to_str.add(ets); Scanner u = new Scanner(line); if (line.indexOf("'") != -1) u.useDelimiter("'"); u.next(); String attributeName = u.next(); if (line.indexOf("'") != -1) attributeName = "'" + attributeName + "'"; m_attr_name.add(attributeName); int vals = 0; String type = u.next().trim().toUpperCase(); if (type.equals("REAL") || type.equals("CONTINUOUS") || type.equals("INTEGER") || type.equals("NUMERIC")) { } else { try { String values = line.substring(line.indexOf("{") + 1, line.indexOf("}")); Scanner v = new Scanner(values); v.useDelimiter(","); while (v.hasNext()) { String value = v.next().trim(); if (value.length() > 0) { ste.put(value, new Integer(vals)); ets.put(new Integer(vals), value); vals++; } } } catch (Exception e) { throw new Exception("Error parsing line: " + line + "\n" + e.toString()); } } } if (firstToken.equals("@DATA")) { READDATA = true; } } else { double[] newrow = new double[cols()]; int curPos = 0; try { Scanner t = new Scanner(line); t.useDelimiter(","); while (t.hasNext()) { String textValue = t.next().trim(); //System.out.println(textValue); if (textValue.length() > 0) { double doubleValue; int vals = m_enum_to_str.get(curPos).size(); //Missing instances appear in the dataset as a double defined as MISSING if (textValue.equals("?")) { missing_val = true; doubleValue = MISSING; } // Continuous values appear in the instance vector as they are else if (vals == 0) { doubleValue = Double.parseDouble(textValue); } // Discrete values appear as an index to the "name" // of that value in the "attributeValue" structure else { doubleValue = m_str_to_enum.get(curPos).get(textValue); if (doubleValue == -1) { throw new Exception( "Error parsing the value '" + textValue + "' on line: " + line); } } newrow[curPos] = doubleValue; curPos++; } } } catch (Exception e) { throw new Exception("Error parsing line: " + line + "\n" + e.toString()); } m_data.add(newrow); } } } if (hasMissing()) postmodifyMetadata(); }
From source file:de.ingrid.interfaces.csw.server.cswt.impl.GenericServerCSWT.java
/** * Get a Document from a class path location. The actual name of the file is * retrieved from the config.properties file. * * With variant a specific variant (like a localization) can be retrieved. * The file name is extended by the variant in the form * [name]_[variant].[extension]./*from www . ja v a 2 s.c o m*/ * * If the variant could not be retrieved, the base file is returned as a * fall back. * * The content is cached. The cache can be controlled by the * config.properties entry 'cache.enable'. * * @param key * One of the keys config.properties, defining the actual * filename to be retrieved. * @param variant * The variant of the file. * @return The Document instance */ protected Document getDocument(String key, String variant) { // fetch the document from the file system if it is not cached String filename = ApplicationProperties.getMandatory(key); String filenameVariant = filename; if (variant != null && variant.length() > 0) { if (filename.contains(FilenameUtils.EXTENSION_SEPARATOR_STR)) { filenameVariant = FilenameUtils.getBaseName(filename) + "_" + variant + FilenameUtils.EXTENSION_SEPARATOR_STR + FilenameUtils.getExtension(filename); } else { filenameVariant = FilenameUtils.getBaseName(filename) + "_" + variant; } } Document doc = null; Scanner scanner = null; try { URL resource = this.getClass().getClassLoader().getResource(filenameVariant); if (resource == null) { log.warn("Document '" + filenameVariant + "' could not be found in class path."); resource = this.getClass().getClassLoader().getResource(filename); } String path = resource.getPath().replaceAll("%20", " "); File file = new File(path); scanner = new Scanner(file); scanner.useDelimiter("\\A"); String content = scanner.next(); scanner.close(); doc = StringUtils.stringToDocument(content); } catch (Exception e) { log.error("Error reading document configured in configuration key '" + key + "': " + filename + ", " + variant, e); throw new RuntimeException("Error reading document configured in configuration key '" + key + "': " + filename + ", " + variant, e); } finally { if (scanner != null) { scanner.close(); } } return doc; }
From source file:gtu._work.ui.JSFMakerUI.java
void resetPasteClipboardHtmlToJtable() { String content = ClipboardUtil.getInstance().getContents(); Pattern tdStartPattern = Pattern.compile("<[tT][dDhH][^>]*>"); Pattern tdEndPattern = Pattern.compile("</[tT][dDhH]>"); Pattern innerPattern_HasTag = Pattern.compile("<[\\w:]+\\s[^>]*value=\"([^\"]*)\"[^>]*>", Pattern.MULTILINE);// ww w . j a va2 s . co m Matcher innerMatcher = null; Scanner scan = new Scanner(content); Scanner tdScan = null; String currentContent = null; String tdContent = null; StringBuilder sb = new StringBuilder(); scan.useDelimiter("<tr>"); for (; scan.hasNext();) { boolean anyMatcher = false; tdScan = new Scanner(scan.next()); tdScan.useDelimiter(tdStartPattern); while (tdScan.hasNext()) { tdScan.useDelimiter(tdEndPattern); if (tdScan.hasNext()) { tdContent = tdScan.next().replaceAll(tdStartPattern.pattern(), ""); { innerMatcher = innerPattern_HasTag.matcher(tdContent.toString()); if (innerMatcher.find()) { currentContent = StringUtils.defaultIfEmpty(innerMatcher.group(1), " "); // System.out.format("1[%s]\n", currentContent); sb.append(currentContent + "\t"); continue; } currentContent = tdContent.toString().replaceAll("<[\\w:=,.#;/'?\"\\s\\{\\}\\(\\)\\[\\]]+>", ""); currentContent = currentContent.replaceAll("[\\s\t\n]", ""); currentContent = StringUtils.defaultIfEmpty(currentContent, " "); // System.out.format("2[%s]\n", currentContent); sb.append(currentContent + "\t"); anyMatcher = true; } } tdScan.useDelimiter(tdStartPattern); } if (anyMatcher) { sb.append("\n"); } } scan.close(); ClipboardUtil.getInstance().setContents(sb); System.out.println("####################################"); System.out.println(sb); System.out.println("####################################"); }
From source file:com.rockhoppertech.music.scale.Scale.java
/** * @param name//from w ww .j a va 2 s.c o m * the name to set */ public void setName(final String name) { if (name.contains(",")) { Scanner s = new Scanner(name); s.useDelimiter(","); this.name = s.next(); while (s.hasNext()) { aliases.add(s.next()); } s.close(); } else { this.name = name; } }
From source file:it.polito.tellmefirst.web.rest.clients.ClientEpub.java
private HashMap<String, String> parseEpub(File file) throws IOException, TMFVisibleException { LOG.debug("[parseEpub] - BEGIN"); ZipFile fi = new ZipFile(file); for (Enumeration e = fi.entries(); e.hasMoreElements();) { ZipEntry entry = (ZipEntry) e.nextElement(); if (entry.getName().endsWith("ncx")) { InputStream tocMaybeDirty = fi.getInputStream(entry); Scanner scanner = new Scanner(tocMaybeDirty, "UTF-8").useDelimiter("\\A"); String theString = scanner.hasNext() ? scanner.next() : ""; tocMaybeDirty.close();//from w ww . j a v a 2s . c om scanner.close(); String res = theString.replaceAll(">[\\s]*?<", "><"); InputStream toc = new ByteArrayInputStream(res.getBytes(StandardCharsets.UTF_8)); try { DocumentBuilder dBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); Document doc = dBuilder.parse(toc); toc.close(); if (doc.hasChildNodes()) { findNavMap(doc.getChildNodes()); } } catch (Exception ex) { LOG.error("Unable to navigate the TOC"); } removeEmptyTOC(epub); //search anchors in links and split Set set = epub.entrySet(); Iterator i = set.iterator(); while (i.hasNext()) { Map.Entry me = (Map.Entry) i.next(); if (me.getValue().toString().contains("#")) { String[] parts = me.getValue().toString().split("#"); String anchor = parts[1]; epub.put(me.getKey().toString(), anchor); } } } if (entry.getName().endsWith("opf")) { //manage files because order is important InputStream content = fi.getInputStream(entry); Scanner scanner = new Scanner(content, "UTF-8").useDelimiter("\\A"); String contentString = scanner.hasNext() ? scanner.next() : ""; content.close(); scanner.close(); String filenameRegex = "href=\"(.*.htm(|l))\".*media-type=\"application/xhtml"; Pattern pattern = Pattern.compile(filenameRegex); Matcher matcher = pattern.matcher(contentString); Integer count = 0; while (matcher.find()) { files.put(count, matcher.group(1)); count++; } } if (entry.getName().endsWith("html") || entry.getName().endsWith("htm") || entry.getName().endsWith("xhtml")) { InputStream htmlFile = fi.getInputStream(entry); Scanner scanner = new Scanner(htmlFile, "UTF-8").useDelimiter("\\A"); String htmlString = scanner.hasNext() ? scanner.next() : ""; String regex1 = htmlString.replaceAll("^[^_]*?<body>", ""); //remove head String regex2 = regex1.replaceAll("</body>.*$", ""); //remove tail String htmlCleaned = regex2.replaceAll("<a.*?/>", ""); //anchor with one tag String[] bits = entry.getName().split("/"); String fileName = bits[bits.length - 1]; htmls.put(fileName, htmlCleaned); } } fi.close(); Integer i; for (i = 0; i < files.size(); i++) { stringBuilder.append("<p id=\"" + files.get(i) + "\"></p>"); // "anchor" also the heads of each files stringBuilder.append(htmls.get(files.get(i))); } String htmlAll = stringBuilder.toString(); /* We have all needed files, start to split For each link -> made a chunk Start from the bottom */ Metadata metadata = new Metadata(); Parser parser = new HtmlParser(); ListIterator<Map.Entry<String, String>> iter = new ArrayList<>(epub.entrySet()).listIterator(epub.size()); while (iter.hasPrevious()) { Map.Entry<String, String> me = iter.previous(); try { ContentHandler contenthandler = new BodyContentHandler(10 * htmlAll.length()); Scanner sc = new Scanner(htmlAll); sc.useDelimiter("id=\"" + me.getValue().toString() + "\">"); htmlAll = sc.next(); InputStream stream = new ByteArrayInputStream(sc.next().getBytes(StandardCharsets.UTF_8)); parser.parse(stream, contenthandler, metadata, new ParseContext()); String chapterText = contenthandler.toString().toLowerCase().replaceAll("\\d+.*", ""); String chapterTextWithoutNo = chapterText.replaceAll("\\d+.*", ""); // Remove the Project Gutenberg meta information from the text String chapterTextCleaned = chapterTextWithoutNo.split("end of the project gutenberg ebook")[0]; epub.put(me.getKey().toString(), chapterTextCleaned); } catch (Exception ex) { LOG.error("Unable to parse content for index: " + me.getKey() + ", this chapter will be deleted"); removeChapter(epub, me.getKey().toString()); } } /* I remove the Project Gutenberg license chapter from the Map, because it is useless for the classification and it generates a Lucene Exception in case of the Italian language (the license text is always in English). You can use this method in order to remove each chapter that is useless for classifying your Epub document. */ removeChapter(epub, "A Word from Project Gutenberg"); removeEmptyItems(epub); //If the Epub file has a bad structure, I try to use the basic Epub extractor of Tika. if (epub.size() == 0) { LOG.info("The Epub file has a bad structure. Try to use the Tika extractor"); epub.put("All text", autoParseAll(file)); } removeEmptyItems(epub); if (epub.size() == 0) { LOG.error("Unable to extract text from this Epub"); throw new TMFVisibleException("Unable to extract any text from this Epub."); } removeDownloadedFile(TEMPORARY_PATH); LOG.debug("[parseEpub] - END"); return epub; }
From source file:com.gatf.executor.core.AcceptanceTestContext.java
private void initSoapContextAndHttpHeaders() throws Exception { Field[] declaredFields = HttpHeaders.class.getDeclaredFields(); for (Field field : declaredFields) { if (java.lang.reflect.Modifier.isStatic(field.getModifiers()) && field.getType().equals(String.class)) { httpHeaders.put(field.get(null).toString().toLowerCase(), field.get(null).toString()); }/*from ww w . j a va 2 s.co m*/ } File file = null; if (gatfExecutorConfig.getWsdlLocFile() != null && !gatfExecutorConfig.getWsdlLocFile().trim().isEmpty()) file = getResourceFile(gatfExecutorConfig.getWsdlLocFile()); if (file != null) { Scanner s = new Scanner(file); s.useDelimiter("\n"); List<String> list = new ArrayList<String>(); while (s.hasNext()) { list.add(s.next().replace("\r", "")); } s.close(); for (String wsdlLoc : list) { if (!wsdlLoc.trim().isEmpty()) { String[] wsdlLocParts = wsdlLoc.split(","); logger.info("Started Parsing WSDL location - " + wsdlLocParts[1]); Wsdl wsdl = Wsdl.parse(wsdlLocParts[1]); for (QName bindingName : wsdl.getBindings()) { SoapBuilder builder = wsdl.getBuilder(bindingName); for (SoapOperation operation : builder.getOperations()) { String request = builder.buildInputMessage(operation); DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); DocumentBuilder db = dbf.newDocumentBuilder(); Document soapMessage = db.parse(new ByteArrayInputStream(request.getBytes())); if (gatfExecutorConfig.isDistributedLoadTests()) { soapStrMessages.put(wsdlLocParts[0] + operation.getOperationName(), request); } soapMessages.put(wsdlLocParts[0] + operation.getOperationName(), soapMessage); if (operation.getSoapAction() != null) { soapActions.put(wsdlLocParts[0] + operation.getOperationName(), operation.getSoapAction()); } logger.info("Adding message for SOAP operation - " + operation.getOperationName()); } soapEndpoints.put(wsdlLocParts[0], builder.getServiceUrls().get(0)); logger.info("Adding SOAP Service endpoint - " + builder.getServiceUrls().get(0)); } logger.info("Done Parsing WSDL location - " + wsdlLocParts[1]); } } } }
From source file:com.knowprocess.bpm.bdd.BpmSpec.java
protected String getJson(String messageResource) { InputStream is = null;//from w ww .j a v a 2s . c o m Reader source = null; Scanner scanner = null; String json = null; try { is = getClass().getResourceAsStream(messageResource); // assertNotNull("Unable to load test resource: " + messageResource, // is); source = new InputStreamReader(is); scanner = new Scanner(source); json = scanner.useDelimiter("\\A").next(); } catch (NullPointerException e) { // assume message supplied directly json = messageResource; } finally { try { scanner.close(); } catch (Exception e) { ; } } return json; }