List of usage examples for javax.xml.parsers DocumentBuilderFactory newDocumentBuilder
public abstract DocumentBuilder newDocumentBuilder() throws ParserConfigurationException;
From source file:DOMDump.java
static public void main(String[] arg) { String filename = null;//w w w . j av a 2 s . c om boolean validate = false; if (arg.length == 1) { filename = arg[0]; } else if (arg.length == 2) { if (!arg[0].equals("-v")) usage(); validate = true; filename = arg[1]; } else { usage(); } DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); dbf.setValidating(validate); dbf.setNamespaceAware(true); dbf.setIgnoringElementContentWhitespace(true); // Parse the input to produce a parse tree with its root // in the form of a Document object Document doc = null; try { DocumentBuilder builder = dbf.newDocumentBuilder(); builder.setErrorHandler(new MyErrorHandler()); InputSource is = new InputSource(filename); doc = builder.parse(is); } catch (SAXException e) { System.exit(1); } catch (ParserConfigurationException e) { System.err.println(e); System.exit(1); } catch (IOException e) { System.err.println(e); System.exit(1); } // Use a TreeDumper to list the tree TreeDumper td = new TreeDumper(); td.dump(doc); }
From source file:ValidateLicenseHeaders.java
/** * ValidateLicenseHeaders jboss-src-root * /* w ww. ja v a2 s . c o m*/ * @param args */ public static void main(String[] args) throws Exception { if (args.length == 0 || args[0].startsWith("-h")) { log.info("Usage: ValidateLicenseHeaders [-addheader] jboss-src-root"); System.exit(1); } int rootArg = 0; if (args.length == 2) { if (args[0].startsWith("-add")) addDefaultHeader = true; else { log.severe("Uknown argument: " + args[0]); log.info("Usage: ValidateLicenseHeaders [-addheader] jboss-src-root"); System.exit(1); } rootArg = 1; } File jbossSrcRoot = new File(args[rootArg]); if (jbossSrcRoot.exists() == false) { log.info("Src root does not exist, check " + jbossSrcRoot.getAbsolutePath()); System.exit(1); } URL u = Thread.currentThread().getContextClassLoader() .getResource("META-INF/services/javax.xml.parsers.DocumentBuilderFactory"); System.err.println(u); // Load the valid copyright statements for the licenses File licenseInfo = new File(jbossSrcRoot, "varia/src/etc/license-info.xml"); if (licenseInfo.exists() == false) { log.severe("Failed to find the varia/src/etc/license-info.xml under the src root"); System.exit(1); } DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); DocumentBuilder db = factory.newDocumentBuilder(); Document doc = db.parse(licenseInfo); NodeList licenses = doc.getElementsByTagName("license"); for (int i = 0; i < licenses.getLength(); i++) { Element license = (Element) licenses.item(i); String key = license.getAttribute("id"); ArrayList headers = new ArrayList(); licenseHeaders.put(key, headers); NodeList copyrights = license.getElementsByTagName("terms-header"); for (int j = 0; j < copyrights.getLength(); j++) { Element copyright = (Element) copyrights.item(j); copyright.normalize(); String id = copyright.getAttribute("id"); // The id will be blank if there is no id attribute if (id.length() == 0) continue; String text = getElementContent(copyright); if (text == null) continue; // Replace all duplicate whitespace and '*' with a single space text = text.replaceAll("[\\s*]+", " "); if (text.length() == 1) continue; text = text.toLowerCase().trim(); // Replace any copyright date0-date1,date2 with copyright ... text = text.replaceAll(COPYRIGHT_REGEX, "..."); LicenseHeader lh = new LicenseHeader(id, text); headers.add(lh); } } log.fine(licenseHeaders.toString()); File[] files = jbossSrcRoot.listFiles(dotJavaFilter); log.info("Root files count: " + files.length); processSourceFiles(files, 0); log.info("Processed " + totalCount); log.info("Updated jboss headers: " + jbossCount); // Files with no headers details log.info("Files with no headers: " + noheaders.size()); FileWriter fw = new FileWriter("NoHeaders.txt"); for (Iterator iter = noheaders.iterator(); iter.hasNext();) { File f = (File) iter.next(); fw.write(f.getAbsolutePath()); fw.write('\n'); } fw.close(); // Files with unknown headers details log.info("Files with invalid headers: " + invalidheaders.size()); fw = new FileWriter("InvalidHeaders.txt"); for (Iterator iter = invalidheaders.iterator(); iter.hasNext();) { File f = (File) iter.next(); fw.write(f.getAbsolutePath()); fw.write('\n'); } fw.close(); // License usage summary log.info("Creating HeadersSummary.txt"); fw = new FileWriter("HeadersSummary.txt"); for (Iterator iter = licenseHeaders.entrySet().iterator(); iter.hasNext();) { Map.Entry entry = (Map.Entry) iter.next(); String key = (String) entry.getKey(); fw.write("+++ License type=" + key); fw.write('\n'); List list = (List) entry.getValue(); Iterator jiter = list.iterator(); while (jiter.hasNext()) { LicenseHeader lh = (LicenseHeader) jiter.next(); fw.write('\t'); fw.write(lh.id); fw.write(", count="); fw.write("" + lh.count); fw.write('\n'); } } fw.close(); }
From source file:AwsConsoleApp.java
public static void main(String[] args) throws Exception { System.out.println("==========================================="); System.out.println("Welcome to the AWS VPN connection creator"); System.out.println("==========================================="); init();//from w w w . j a v a 2 s .c om List<String> CIDRblocks = new ArrayList<String>(); String vpnType = null; String vpnGatewayId = null; String customerGatewayId = null; String customerGatewayInfoPath = null; String routes = null; options.addOption("h", "help", false, "show help."); options.addOption("vt", "vpntype", true, "Set vpn tunnel type e.g. (ipec.1)"); options.addOption("vgw", "vpnGatewayId", true, "Set AWS VPN Gateway ID e.g. (vgw-eca54d85)"); options.addOption("cgw", "customerGatewayId", true, "Set AWS Customer Gateway ID e.g. (cgw-c16e87a8)"); options.addOption("r", "staticroutes", true, "Set static routes e.g. cutomer subnet 10.77.77.0/24"); options.addOption("vi", "vpninfo", true, "path to vpn info file c:\\temp\\customerGatewayInfo.xml"); CommandLineParser parser = new BasicParser(); CommandLine cmd = null; // Parse command line options try { cmd = parser.parse(options, args); if (cmd.hasOption("h")) help(); if (cmd.hasOption("vt")) { log.log(Level.INFO, "Using cli argument -vt=" + cmd.getOptionValue("vt")); vpnType = cmd.getOptionValue("vt"); // Whatever you want to do with the setting goes here } else { log.log(Level.SEVERE, "Missing vt option"); help(); } if (cmd.hasOption("vgw")) { log.log(Level.INFO, "Using cli argument -vgw=" + cmd.getOptionValue("vgw")); vpnGatewayId = cmd.getOptionValue("vgw"); } else { log.log(Level.SEVERE, "Missing vgw option"); help(); } if (cmd.hasOption("cgw")) { log.log(Level.INFO, "Using cli argument -cgw=" + cmd.getOptionValue("cgw")); customerGatewayId = cmd.getOptionValue("cgw"); } else { log.log(Level.SEVERE, "Missing cgw option"); help(); } if (cmd.hasOption("r")) { log.log(Level.INFO, "Using cli argument -r=" + cmd.getOptionValue("r")); routes = cmd.getOptionValue("r"); String[] routeItems = routes.split(","); CIDRblocks = Arrays.asList(routeItems); } else { log.log(Level.SEVERE, "Missing r option"); help(); } if (cmd.hasOption("vi")) { log.log(Level.INFO, "Using cli argument -vi=" + cmd.getOptionValue("vi")); customerGatewayInfoPath = cmd.getOptionValue("vi"); } else { log.log(Level.SEVERE, "Missing vi option"); help(); } } catch (ParseException e) { log.log(Level.SEVERE, "Failed to parse comand line properties", e); help(); } /* * Amazon VPC * Create and delete VPN tunnel to customer VPN hardware */ try { //String vpnType = "ipsec.1"; //String vpnGatewayId = "vgw-eca54d85"; //String customerGatewayId = "cgw-c16e87a8"; //List<String> CIDRblocks = new ArrayList<String>(); //CIDRblocks.add("10.77.77.0/24"); //CIDRblocks.add("172.16.1.0/24"); //CIDRblocks.add("172.18.1.0/24"); //CIDRblocks.add("10.66.66.0/24"); //CIDRblocks.add("10.8.1.0/24"); //String customerGatewayInfoPath = "c:\\temp\\customerGatewayInfo.xml"; Boolean staticRoutesOnly = true; List<String> connectionIds = new ArrayList<String>(); List<String> connectionIdList = new ArrayList<String>(); connectionIdList = vpnExists(connectionIds); if (connectionIdList.size() == 0) { CreateVpnConnectionRequest vpnReq = new CreateVpnConnectionRequest(vpnType, customerGatewayId, vpnGatewayId); CreateVpnConnectionResult vpnRes = new CreateVpnConnectionResult(); VpnConnectionOptionsSpecification vpnspec = new VpnConnectionOptionsSpecification(); vpnspec.setStaticRoutesOnly(staticRoutesOnly); vpnReq.setOptions(vpnspec); System.out.println("Creating VPN connection"); vpnRes = ec2.createVpnConnection(vpnReq); String vpnConnId = vpnRes.getVpnConnection().getVpnConnectionId(); String customerGatewayInfo = vpnRes.getVpnConnection().getCustomerGatewayConfiguration(); //System.out.println("Customer Gateway Info:" + customerGatewayInfo); // Write Customer Gateway Info to file System.out.println("Writing Customer Gateway Info to file:" + customerGatewayInfoPath); try (PrintStream out = new PrintStream(new FileOutputStream(customerGatewayInfoPath))) { out.print(customerGatewayInfo); } System.out.println("Creating VPN routes"); for (String destCIDR : CIDRblocks) { CreateVpnConnectionRouteRequest routeReq = new CreateVpnConnectionRouteRequest(); CreateVpnConnectionRouteResult routeRes = new CreateVpnConnectionRouteResult(); routeReq.setDestinationCidrBlock(destCIDR); routeReq.setVpnConnectionId(vpnConnId); routeRes = ec2.createVpnConnectionRoute(routeReq); } // Parse XML file File file = new File(customerGatewayInfoPath); DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); DocumentBuilder db = dbf.newDocumentBuilder(); Document document = db.parse(customerGatewayInfoPath); XPathFactory xPathfactory = XPathFactory.newInstance(); XPath xpath = xPathfactory.newXPath(); XPathExpression exprGetipAddress = xpath .compile("/vpn_connection/ipsec_tunnel/vpn_gateway/tunnel_outside_address/ip_address"); NodeList vpnGateway = (NodeList) exprGetipAddress.evaluate(document, XPathConstants.NODESET); if (vpnGateway != null) { for (int i = 0; i < vpnGateway.getLength(); i++) { String vpnGatewayIP = vpnGateway.item(i).getTextContent(); System.out .println("AWS vpnGatewayIP for tunnel " + Integer.toString(i) + " " + vpnGatewayIP); } } System.out.println("=============================================="); XPathExpression exprGetKey = xpath.compile("/vpn_connection/ipsec_tunnel/ike/pre_shared_key"); NodeList presharedKeyList = (NodeList) exprGetKey.evaluate(document, XPathConstants.NODESET); if (presharedKeyList != null) { for (int i = 0; i < presharedKeyList.getLength(); i++) { String pre_shared_key = presharedKeyList.item(i).getTextContent(); System.out.println( "AWS pre_shared_key for tunnel " + Integer.toString(i) + " " + pre_shared_key); } } System.out.println("Creating VPN creation completed!"); } else { boolean yn; Scanner scan = new Scanner(System.in); System.out.println("Enter yes or no to delete VPN connection: "); String input = scan.next(); String answer = input.trim().toLowerCase(); while (true) { if (answer.equals("yes")) { yn = true; break; } else if (answer.equals("no")) { yn = false; System.exit(0); } else { System.out.println("Sorry, I didn't catch that. Please answer yes/no"); } } // Delete all existing VPN connections System.out.println("Deleting AWS VPN connection(s)"); for (String vpnConID : connectionIdList) { DeleteVpnConnectionResult delVPNres = new DeleteVpnConnectionResult(); DeleteVpnConnectionRequest delVPNreq = new DeleteVpnConnectionRequest(); delVPNreq.setVpnConnectionId(vpnConID); delVPNres = ec2.deleteVpnConnection(delVPNreq); System.out.println("Successfully deleted AWS VPN conntion: " + vpnConID); } } } catch (AmazonServiceException ase) { System.out.println("Caught Exception: " + ase.getMessage()); System.out.println("Reponse Status Code: " + ase.getStatusCode()); System.out.println("Error Code: " + ase.getErrorCode()); System.out.println("Request ID: " + ase.getRequestId()); } }
From source file:TreeDumper2.java
static public void main(String[] arg) { String filename = null;/*from ww w . j a v a 2 s. c o m*/ boolean validate = false; if (arg.length == 1) { filename = arg[0]; } else if (arg.length == 2) { if (!arg[0].equals("-v")) usage(); validate = true; filename = arg[1]; } else { usage(); } DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); dbf.setValidating(validate); dbf.setNamespaceAware(true); dbf.setIgnoringElementContentWhitespace(true); // Parse the input to produce a parse tree with its root // in the form of a Document object Document doc = null; try { DocumentBuilder builder = dbf.newDocumentBuilder(); builder.setErrorHandler(new MyErrorHandler()); InputSource is = new InputSource(filename); doc = builder.parse(is); } catch (SAXException e) { System.exit(1); } catch (ParserConfigurationException e) { System.err.println(e); System.exit(1); } catch (IOException e) { System.err.println(e); System.exit(1); } // Use a TreeDumper to list the tree TreeDumper2 td = new TreeDumper2(); td.dump(doc); }
From source file:client.QueryLastFm.java
License:asdf
public static void main(String[] args) throws Exception { // isAlreadyInserted("asdfs","jas,jnjkah"); // FileWriter fw = new FileWriter(".\\tracks.csv"); OutputStream track_os = new FileOutputStream(".\\tracks.csv"); PrintWriter out = new PrintWriter(new OutputStreamWriter(track_os, "UTF-8")); OutputStream track_id_os = new FileOutputStream(".\\track_id_sim_track_id.csv"); PrintWriter track_id_out = new PrintWriter(new OutputStreamWriter(track_id_os, "UTF-8")); track_id_out.print(""); ByteArrayInputStream input;/*from ww w.ja v a2s . co m*/ Document doc = null; CloseableHttpClient httpclient = HttpClients.createDefault(); String trackName = ""; String artistName = ""; String sourceMbid = ""; out.print("ID");// first row first column out.print(","); out.print("TrackName");// first row second column out.print(","); out.println("Artist");// first row third column track_id_out.print("source");// first row second column track_id_out.print(","); track_id_out.println("target");// first row third column // track_id_out.print(","); // track_id_out.println("type");// first row third column // out.flush(); // out.close(); // fw.close(); // os.close(); try { URI uri = new URIBuilder().setScheme("http").setHost("ws.audioscrobbler.com").setPath("/2.0/") .setParameter("method", "track.getsimilar").setParameter("artist", "cher") .setParameter("track", "believe").setParameter("limit", "100") .setParameter("api_key", "88858618961414f8bec919bddd057044").build(); // new URIBuilder(). HttpGet request = new HttpGet(uri); // request. // This is useful for last.fm logging and preventing them from blocking this client request.setHeader(HttpHeaders.USER_AGENT, "nileshmore@gatech.edu - ClassAssignment at GeorgiaTech Non-commercial use"); HttpGet httpGet = new HttpGet( "http://ws.audioscrobbler.com/2.0/?method=track.getsimilar&artist=cher&track=believe&limit=4&api_key=88858618961414f8bec919bddd057044"); CloseableHttpResponse response = httpclient.execute(request); int statusCode = response.getStatusLine().getStatusCode(); DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); DocumentBuilder builder = factory.newDocumentBuilder(); // The underlying HTTP connection is still held by the response object // to allow the response content to be streamed directly from the network socket. // In order to ensure correct deallocation of system resources // the user MUST call CloseableHttpResponse#close() from a finally clause. // Please note that if response content is not fully consumed the underlying // connection cannot be safely re-used and will be shut down and discarded // by the connection manager. try { if (statusCode == 200) { HttpEntity entity1 = response.getEntity(); BufferedReader br = new BufferedReader( new InputStreamReader((response.getEntity().getContent()))); Document document = builder.parse((response.getEntity().getContent())); Element root = document.getDocumentElement(); root.normalize(); // Need to focus and resolve this part NodeList nodes; nodes = root.getChildNodes(); nodes = root.getElementsByTagName("track"); if (nodes.getLength() == 0) { // System.out.println("empty"); return; } Node trackNode; for (int k = 0; k < nodes.getLength(); k++) // can access all tracks now { trackNode = nodes.item(k); NodeList trackAttributes = trackNode.getChildNodes(); // check if mbid is present in track attributes // System.out.println("Length " + (trackAttributes.item(5).getNodeName().compareToIgnoreCase("mbid") == 0)); if ((trackAttributes.item(5).getNodeName().compareToIgnoreCase("mbid") == 0)) { if (((Element) trackAttributes.item(5)).hasChildNodes()) ;// System.out.println("Go aHead"); else continue; } else continue; for (int n = 0; n < trackAttributes.getLength(); n++) { Node attribute = trackAttributes.item(n); if ((attribute.getNodeName().compareToIgnoreCase("name")) == 0) { // System.out.println(((Element)attribute).getFirstChild().getNodeValue()); trackName = ((Element) attribute).getFirstChild().getNodeValue(); // make string encoding as UTF-8 ************ } if ((attribute.getNodeName().compareToIgnoreCase("mbid")) == 0) { // System.out.println(n + " " + ((Element)attribute).getFirstChild().getNodeValue()); sourceMbid = attribute.getFirstChild().getNodeValue(); } if ((attribute.getNodeName().compareToIgnoreCase("artist")) == 0) { NodeList ArtistNodeList = attribute.getChildNodes(); for (int j = 0; j < ArtistNodeList.getLength(); j++) { Node Artistnode = ArtistNodeList.item(j); if ((Artistnode.getNodeName().compareToIgnoreCase("name")) == 0) { // System.out.println(((Element)Artistnode).getFirstChild().getNodeValue()); artistName = ((Element) Artistnode).getFirstChild().getNodeValue(); } } } } out.print(sourceMbid); out.print(","); out.print(trackName); out.print(","); out.println(artistName); // out.print(","); findSimilarTracks(track_id_out, sourceMbid, trackName, artistName); } track_id_out.flush(); out.flush(); out.close(); track_id_out.close(); track_os.close(); // fw.close(); Element trac = (Element) nodes.item(0); // trac.normalize(); nodes = trac.getChildNodes(); // System.out.println(nodes.getLength()); for (int i = 0; i < nodes.getLength(); i++) { Node node = nodes.item(i); // System.out.println(node.getNodeName()); if ((node.getNodeName().compareToIgnoreCase("name")) == 0) { // System.out.println(((Element)node).getFirstChild().getNodeValue()); } if ((node.getNodeName().compareToIgnoreCase("mbid")) == 0) { // System.out.println(((Element)node).getFirstChild().getNodeValue()); } if ((node.getNodeName().compareToIgnoreCase("artist")) == 0) { // System.out.println("Well"); NodeList ArtistNodeList = node.getChildNodes(); for (int j = 0; j < ArtistNodeList.getLength(); j++) { Node Artistnode = ArtistNodeList.item(j); if ((Artistnode.getNodeName().compareToIgnoreCase("name")) == 0) { /* System.out.println(((Element)Artistnode).getFirstChild().getNodeValue());*/ } /*System.out.println(Artistnode.getNodeName());*/ } } } /*if(node instanceof Element){ //a child element to process Element child = (Element) node; String attribute = child.getAttribute("width"); }*/ // System.out.println(root.getAttribute("status")); NodeList tracks = root.getElementsByTagName("track"); Element track = (Element) tracks.item(0); // System.out.println(track.getTagName()); track.getChildNodes(); } else { System.out.println("failed with status" + response.getStatusLine()); } // input = (ByteArrayInputStream)entity1.getContent(); // do something useful with the response body // and ensure it is fully consumed } finally { response.close(); } } finally { System.out.println("Exited succesfully."); httpclient.close(); } }
From source file:com.cladonia.security.signature.SignatureGenerator.java
public static void main(String args[]) throws Exception { // use this if you want to configure logging, normally would put this in a static block, // but this is just for testing (see jre\lib\logging.properties) org.apache.commons.logging.Log log = org.apache.commons.logging.LogFactory .getLog(SignatureGenerator.class.getName()); //System.out.println("Using the logger: "+log.getClass().getName()); //log.debug("Debug is on"); //log.warn("Warning is on"); //log.error("Error is on"); log.info("**** Testing Signature Generator *****"); //All the parameters for the keystore String keystoreType = "JKS"; String keystoreFile = "data/keystore.jks"; String keystorePass = "xmlexchanger"; String privateKeyAlias = "exchanger"; String privateKeyPass = "xmlexchanger"; String certificateAlias = "exchanger"; // set the keystore and private key properties KeyBuilder.setParams(keystoreType, keystoreFile, keystorePass, privateKeyAlias, privateKeyPass, certificateAlias);//from ww w. j a v a2s .c o m // get the private key for signing. PrivateKey privateKey = KeyBuilder.getPrivateKey(); // get the cert X509Certificate cert = KeyBuilder.getCertificate(); // ************* create a sample to be signed ****************** javax.xml.parsers.DocumentBuilderFactory dbf = javax.xml.parsers.DocumentBuilderFactory.newInstance(); //XML Signature needs to be namespace aware dbf.setNamespaceAware(true); javax.xml.parsers.DocumentBuilder db = dbf.newDocumentBuilder(); org.w3c.dom.Document document = db.newDocument(); //Build a sample document. It will look something like: //<!-- Comment before --> //<cladonia:Exchanger xmlns:cladonia="http://www.exchangerxml.com"> //</cladonia:Exchanger> document.appendChild(document.createComment(" Comment before ")); Element root = document.createElementNS("http://www.exchangerxml.com", "cladonia:Exchanger"); root.setAttributeNS(null, "attr1", "test1"); root.setAttributeNS("http://www.w3.org/2000/xmlns/", "xmlns:foo", "http://www.exchangerxml.com/#foo"); root.setAttributeNS("http://example.org/#foo", "foo:attr1", "foo's test"); root.setAttributeNS("http://www.w3.org/2000/xmlns/", "xmlns:cladonia", "http://www.exchangerxml.com"); document.appendChild(root); Element firstchild = document.createElementNS("http://www.exchangerxml.com", "cladonia:Editor"); firstchild.appendChild(document.createTextNode("simple text\n")); firstchild.setAttributeNS(null, "Id", "CladoniaId"); root.appendChild(firstchild); //******************** End of sample to be signed************************* // *************** Signature 1 // create SignatureGenerator using private key, cert and the dom (i.e an enveloped signature) SignatureGenerator gen = new SignatureGenerator(privateKey, cert, document); // set the c14n algorithm (Exclusive) gen.setC14nAlgorithm(SignatureGenerator.TRANSFORM_C14N_EXCL_WITH_COMMENTS); // set the xpath transform gen.setXpath("//cladonia:Editor"); // set the id gen.setId("CladoniaId"); // sign the document document = gen.sign(null); // output the enveloped signature FileOutputStream fos = new FileOutputStream("c:\\temp\\sigout.xml"); XMLUtils.outputDOMc14nWithComments(document, fos); fos.close(); System.out.println("Created Signature 1 - an enveloped signature"); // ************** Signature 2 // now sign the previous output as an example of a detached signature SignatureGenerator gen2 = new SignatureGenerator(privateKey, cert, "file:///c:/temp/sigout.xml"); // set the c14n algorithm gen2.setC14nAlgorithm(SignatureGenerator.TRANSFORM_C14N_WITH_COMMENTS); // sign the document Document document2 = gen2.sign(null); // output the detached signature FileOutputStream fos2 = new FileOutputStream("c:\\temp\\sigout2.xml"); XMLUtils.outputDOMc14nWithComments(document2, fos2); fos2.close(); System.out.println("Created Signature 2 - a detached signature"); System.out.println(""); }
From source file:com.crawler.app.run.CrawlSiteController.java
public static void main(String[] args) throws Exception { logger.info("Start...: "); /*/*from w w w. j a v a 2 s . co m*/ * if (args.length != 2) { logger.info("Needed parameters: "); * logger.info * ("\t rootFolder (it will contain intermediate crawl data)"); * logger.info("\t numberOfCralwers (number of concurrent threads)"); * return; } */ /* * crawlStorageFolder is a folder where intermediate crawl data is * stored. */ String crawlStorageFolder = "D:\\/Java\\/storage";//"/crawler4j/storage";// args[0]; /* * numberOfCrawlers shows the number of concurrent threads that should * be initiated for crawling. */ // int numberOfCrawlers = Integer.parseInt(args[1]); int numberOfCrawlers = 1; CrawlConfig config = new CrawlConfig(); config.setCrawlStorageFolder(crawlStorageFolder); /* * Be polite: Make sure that we don't send more than 1 request per * second (1000 milliseconds between requests). */ config.setPolitenessDelay(1000); // config.setFollowRedirects(false); /* * You can set the maximum crawl depth here. The default value is -1 for * unlimited depth */ config.setMaxDepthOfCrawling(1);// ( use -1 for unlimited depth ) /* * You can set the maximum number of pages to crawl. The default value * is -1 for unlimited number of pages */ config.setMaxPagesToFetch(-1);// ( use -1 for unlimited pages ) /** * Do you want crawler4j to crawl also binary data ? example: the * contents of pdf, or the metadata of images etc */ config.setIncludeBinaryContentInCrawling(false); /* * Do you need to set a proxy? If so, you can use: * config.setProxyHost("proxyserver.example.com"); * config.setProxyPort(8080); * * If your proxy also needs authentication: * config.setProxyUsername(username); config.getProxyPassword(password); */ /* * This config parameter can be used to set your crawl to be resumable * (meaning that you can resume the crawl from a previously * interrupted/crashed crawl). Note: if you enable resuming feature and * want to start a fresh crawl, you need to delete the contents of * rootFolder manually. */ config.setResumableCrawling(false); /* * Overwrite user ddagent */ config.setUserAgentString("Crawler"); /* * Instantiate the controller for this crawl. */ PageFetcher pageFetcher = new PageFetcher(config); RobotstxtConfig robotstxtConfig = new RobotstxtConfig(); // by me robotstxtConfig.setEnabled(false); RobotstxtServer robotstxtServer = new RobotstxtServer(robotstxtConfig, pageFetcher); // by me CrawlController controller = new CrawlController(config, pageFetcher, robotstxtServer); /* * For each crawl, you need to add some seed urls. These are the first * URLs that are fetched and then the crawler starts following links * which are found in these pages */ // controller.addSeed("http://careerbuilder.vn/"); try { String tag_size = "site102"; int sizeIDXML = -1; String provinceYESNO, linkCrawlerBegin, linkCrawlerPage; int pageNumberBegin = -1, pageNumberEnd = -1, pageLoopInit = -1, pageLoop = -1; File fXmlFile = new File(pathXmlFile); DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance(); DocumentBuilder dBuilder = dbFactory.newDocumentBuilder(); org.w3c.dom.Document doc = dBuilder.parse(fXmlFile); org.w3c.dom.NodeList nList = doc.getElementsByTagName(tag_size); org.w3c.dom.Node nNode = nList.item(0); if (nNode.getNodeType() == org.w3c.dom.Node.ELEMENT_NODE) { org.w3c.dom.Element eElement = (org.w3c.dom.Element) nNode; sizeIDXML = Integer.parseInt(eElement.getAttribute("id")); String pageDefine = eElement.getElementsByTagName("pageDefine").item(0).getTextContent(); // read config ReadConfigPageNumberEnd(eElement); // if define then get define value to using if (!pageDefine.isEmpty() && pageDefine.toUpperCase().equals("YES")) { org.w3c.dom.NodeList nListOnewebsite = eElement.getElementsByTagName("website"); org.w3c.dom.Element eElementOnewebsite = (org.w3c.dom.Element) nListOnewebsite.item(0); linkCrawlerBegin = eElementOnewebsite.getElementsByTagName("linkCrawlerBegin").item(0) .getTextContent(); linkCrawlerPage = eElementOnewebsite.getElementsByTagName("linkCrawlerPage").item(0) .getTextContent(); int pageNumberTotal = Integer.parseInt( eElementOnewebsite.getElementsByTagName("pageNumberTotal").item(0).getTextContent()); pageNumberBegin = Integer.parseInt( eElementOnewebsite.getElementsByTagName("pageNumberBegin").item(0).getTextContent()); pageLoopInit = Integer.parseInt( eElementOnewebsite.getElementsByTagName("pageLoopInit").item(0).getTextContent()); pageLoop = Integer .parseInt(eElementOnewebsite.getElementsByTagName("pageLoop").item(0).getTextContent()); if (!linkCrawlerBegin.isEmpty()) { controller.addSeed(linkCrawlerBegin); //pageNumberEnd = getPageNumberEnd(linkCrawlerBegin); if (pageNumberTotal > 1) { int i = 0; for (; pageNumberTotal >= pageNumberBegin; pageNumberBegin++) { String convertlinkCrawlerPage = linkCrawlerPage.replace("%s", String.valueOf(pageLoopInit)); controller.addSeed(convertlinkCrawlerPage); pageLoopInit += pageLoop; i++; System.out.println(i); } } } } else { provinceYESNO = eElement.getElementsByTagName("provinceYESNO").item(0).getTextContent(); if (!provinceYESNO.isEmpty() && provinceYESNO.toUpperCase().equals("YES")) { // have sevent province org.w3c.dom.NodeList nListProvince = eElement.getElementsByTagName("province"); for (int index = 0; index < nListProvince.getLength(); index++) { org.w3c.dom.Element eElementProvince = (org.w3c.dom.Element) nListProvince.item(index); linkCrawlerBegin = eElementProvince.getElementsByTagName("linkCrawlerBegin").item(0) .getTextContent(); linkCrawlerPage = eElementProvince.getElementsByTagName("linkCrawlerPage").item(0) .getTextContent(); if (!eElementProvince.getElementsByTagName("pageNumberBegin").item(0).getTextContent() .isEmpty()) { pageNumberBegin = Integer.parseInt(eElementProvince .getElementsByTagName("pageNumberBegin").item(0).getTextContent()); } if (!eElementProvince.getElementsByTagName("pageLoopInit").item(0).getTextContent() .isEmpty()) { pageLoopInit = Integer.parseInt(eElementProvince .getElementsByTagName("pageLoopInit").item(0).getTextContent()); } if (!eElementProvince.getElementsByTagName("pageLoop").item(0).getTextContent() .isEmpty()) { pageLoop = Integer.parseInt( eElementProvince.getElementsByTagName("pageLoop").item(0).getTextContent()); } if (!linkCrawlerBegin.isEmpty()) { controller.addSeed(linkCrawlerBegin); pageNumberEnd = getPageNumberEnd(linkCrawlerBegin); if (pageNumberEnd > 1) { for (; pageNumberBegin <= pageNumberEnd; pageNumberBegin++) { String convertlinkCrawlerPage = linkCrawlerPage.replace("%s", String.valueOf(pageLoopInit)); controller.addSeed(convertlinkCrawlerPage); pageLoopInit += pageLoop; } } } } } else if (!provinceYESNO.isEmpty() && provinceYESNO.toUpperCase().equals("NO")) { // don't have sevent province org.w3c.dom.NodeList nListOnewebsite = eElement.getElementsByTagName("website"); org.w3c.dom.Element eElementOnewebsite = (org.w3c.dom.Element) nListOnewebsite.item(0); // read config of pagenumber end linkCrawlerBegin = eElementOnewebsite.getElementsByTagName("linkCrawlerBegin").item(0) .getTextContent(); linkCrawlerPage = eElementOnewebsite.getElementsByTagName("linkCrawlerPage").item(0) .getTextContent(); if (!eElementOnewebsite.getElementsByTagName("pageNumberBegin").item(0).getTextContent() .isEmpty()) { pageNumberBegin = Integer.parseInt(eElementOnewebsite .getElementsByTagName("pageNumberBegin").item(0).getTextContent()); } if (!eElementOnewebsite.getElementsByTagName("pageLoopInit").item(0).getTextContent() .isEmpty()) { pageLoopInit = Integer.parseInt(eElementOnewebsite.getElementsByTagName("pageLoopInit") .item(0).getTextContent()); } if (!eElementOnewebsite.getElementsByTagName("pageLoop").item(0).getTextContent() .isEmpty()) { pageLoop = Integer.parseInt( eElementOnewebsite.getElementsByTagName("pageLoop").item(0).getTextContent()); } if (!linkCrawlerBegin.isEmpty()) { controller.addSeed(linkCrawlerBegin); pageNumberEnd = getPageNumberEnd(linkCrawlerBegin); if (pageNumberEnd > 1) { for (; pageNumberBegin <= pageNumberEnd; pageNumberBegin++) { String convertlinkCrawlerPage = linkCrawlerPage.replace("%s", String.valueOf(pageLoopInit)); controller.addSeed(convertlinkCrawlerPage); pageLoopInit += pageLoop; } } } } } CrawlSite.tag_size = tag_size; CrawlSite.siteIDXML = sizeIDXML; controller.start(CrawlSite.class, numberOfCrawlers); } } catch (Exception ex) { System.out.print("can't read config xml, review xml file !!"); System.out.print(ex.getMessage()); } }
From source file:com.occamlab.te.parsers.ImageParser.java
public static void main(String[] args) throws Exception { if (args.length < 2) { System.err.println("Parameters: xml_url image_url"); return;/*from ww w .j a va2s . c om*/ } java.net.URL xml_url; try { xml_url = new java.net.URL(args[0]); } catch (Exception e) { jlogger.log(Level.INFO, "Error building xmlurl, will prefix file://", e); xml_url = new java.net.URL("file://" + args[0]); } java.net.URL image_url; try { image_url = new java.net.URL(args[1]); } catch (Exception e) { jlogger.log(Level.INFO, "Error building xmlurl, will prefix file://", e); image_url = new java.net.URL("file://" + args[1]); } DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); dbf.setNamespaceAware(true); DocumentBuilder db = dbf.newDocumentBuilder(); Document doc = db.parse(xml_url.openStream()); // Element instruction = (Element) // doc.getElementsByTagNameNS("http://www.occamlab.com/te/parsers", // "ImageParser").item(0); Element instruction = (Element) doc.getDocumentElement(); PrintWriter logger = new PrintWriter(System.out); InputStream image_is = image_url.openConnection().getInputStream(); Document result = parse(image_is, instruction, logger); logger.flush(); if (result != null) { TransformerFactory tf = TransformerFactory.newInstance(); try { tf.setAttribute("http://saxon.sf.net/feature/strip-whitespace", "all"); } catch (IllegalArgumentException e) { jlogger.log(Level.INFO, "setAttribute(\"http://saxon.sf.net/feature/strip-whitespace\", \"all\");", e); } Transformer t = tf.newTransformer(); t.setOutputProperty(OutputKeys.INDENT, "yes"); t.transform(new DOMSource(result), new StreamResult(System.out)); } System.exit(0); }
From source file:InlineSchemaValidator.java
/** Main program entry point. */ public static void main(String[] argv) { // is there anything to do? if (argv.length == 0) { printUsage();/*from ww w . j av a 2 s . co m*/ System.exit(1); } // variables Vector schemas = null; Vector instances = null; HashMap prefixMappings = null; HashMap uriMappings = null; String docURI = argv[argv.length - 1]; String schemaLanguage = DEFAULT_SCHEMA_LANGUAGE; int repetition = DEFAULT_REPETITION; boolean schemaFullChecking = DEFAULT_SCHEMA_FULL_CHECKING; boolean honourAllSchemaLocations = DEFAULT_HONOUR_ALL_SCHEMA_LOCATIONS; boolean validateAnnotations = DEFAULT_VALIDATE_ANNOTATIONS; boolean generateSyntheticAnnotations = DEFAULT_GENERATE_SYNTHETIC_ANNOTATIONS; boolean memoryUsage = DEFAULT_MEMORY_USAGE; // process arguments for (int i = 0; i < argv.length - 1; ++i) { String arg = argv[i]; if (arg.startsWith("-")) { String option = arg.substring(1); if (option.equals("l")) { // get schema language name if (++i == argv.length) { System.err.println("error: Missing argument to -l option."); } else { schemaLanguage = argv[i]; } continue; } if (option.equals("x")) { if (++i == argv.length) { System.err.println("error: Missing argument to -x option."); continue; } String number = argv[i]; try { int value = Integer.parseInt(number); if (value < 1) { System.err.println("error: Repetition must be at least 1."); continue; } repetition = value; } catch (NumberFormatException e) { System.err.println("error: invalid number (" + number + ")."); } continue; } if (arg.equals("-a")) { // process -a: xpath expressions for schemas if (schemas == null) { schemas = new Vector(); } while (i + 1 < argv.length - 1 && !(arg = argv[i + 1]).startsWith("-")) { schemas.add(arg); ++i; } continue; } if (arg.equals("-i")) { // process -i: xpath expressions for instance documents if (instances == null) { instances = new Vector(); } while (i + 1 < argv.length - 1 && !(arg = argv[i + 1]).startsWith("-")) { instances.add(arg); ++i; } continue; } if (arg.equals("-nm")) { String prefix; String uri; while (i + 2 < argv.length - 1 && !(prefix = argv[i + 1]).startsWith("-") && !(uri = argv[i + 2]).startsWith("-")) { if (prefixMappings == null) { prefixMappings = new HashMap(); uriMappings = new HashMap(); } prefixMappings.put(prefix, uri); HashSet prefixes = (HashSet) uriMappings.get(uri); if (prefixes == null) { prefixes = new HashSet(); uriMappings.put(uri, prefixes); } prefixes.add(prefix); i += 2; } continue; } if (option.equalsIgnoreCase("f")) { schemaFullChecking = option.equals("f"); continue; } if (option.equalsIgnoreCase("hs")) { honourAllSchemaLocations = option.equals("hs"); continue; } if (option.equalsIgnoreCase("va")) { validateAnnotations = option.equals("va"); continue; } if (option.equalsIgnoreCase("ga")) { generateSyntheticAnnotations = option.equals("ga"); continue; } if (option.equalsIgnoreCase("m")) { memoryUsage = option.equals("m"); continue; } if (option.equals("h")) { printUsage(); continue; } System.err.println("error: unknown option (" + option + ")."); continue; } } try { // Create new instance of inline schema validator. InlineSchemaValidator inlineSchemaValidator = new InlineSchemaValidator(prefixMappings, uriMappings); // Parse document containing schemas and validation roots DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); dbf.setNamespaceAware(true); DocumentBuilder db = dbf.newDocumentBuilder(); db.setErrorHandler(inlineSchemaValidator); Document doc = db.parse(docURI); // Create XPath factory for selecting schema and validation roots XPathFactory xpf = XPathFactory.newInstance(); XPath xpath = xpf.newXPath(); xpath.setNamespaceContext(inlineSchemaValidator); // Select schema roots from the DOM NodeList[] schemaNodes = new NodeList[schemas != null ? schemas.size() : 0]; for (int i = 0; i < schemaNodes.length; ++i) { XPathExpression xpathSchema = xpath.compile((String) schemas.elementAt(i)); schemaNodes[i] = (NodeList) xpathSchema.evaluate(doc, XPathConstants.NODESET); } // Select validation roots from the DOM NodeList[] instanceNodes = new NodeList[instances != null ? instances.size() : 0]; for (int i = 0; i < instanceNodes.length; ++i) { XPathExpression xpathInstance = xpath.compile((String) instances.elementAt(i)); instanceNodes[i] = (NodeList) xpathInstance.evaluate(doc, XPathConstants.NODESET); } // Create SchemaFactory and configure SchemaFactory factory = SchemaFactory.newInstance(schemaLanguage); factory.setErrorHandler(inlineSchemaValidator); try { factory.setFeature(SCHEMA_FULL_CHECKING_FEATURE_ID, schemaFullChecking); } catch (SAXNotRecognizedException e) { System.err.println("warning: SchemaFactory does not recognize feature (" + SCHEMA_FULL_CHECKING_FEATURE_ID + ")"); } catch (SAXNotSupportedException e) { System.err.println("warning: SchemaFactory does not support feature (" + SCHEMA_FULL_CHECKING_FEATURE_ID + ")"); } try { factory.setFeature(HONOUR_ALL_SCHEMA_LOCATIONS_ID, honourAllSchemaLocations); } catch (SAXNotRecognizedException e) { System.err.println("warning: SchemaFactory does not recognize feature (" + HONOUR_ALL_SCHEMA_LOCATIONS_ID + ")"); } catch (SAXNotSupportedException e) { System.err.println( "warning: SchemaFactory does not support feature (" + HONOUR_ALL_SCHEMA_LOCATIONS_ID + ")"); } try { factory.setFeature(VALIDATE_ANNOTATIONS_ID, validateAnnotations); } catch (SAXNotRecognizedException e) { System.err.println( "warning: SchemaFactory does not recognize feature (" + VALIDATE_ANNOTATIONS_ID + ")"); } catch (SAXNotSupportedException e) { System.err.println( "warning: SchemaFactory does not support feature (" + VALIDATE_ANNOTATIONS_ID + ")"); } try { factory.setFeature(GENERATE_SYNTHETIC_ANNOTATIONS_ID, generateSyntheticAnnotations); } catch (SAXNotRecognizedException e) { System.err.println("warning: SchemaFactory does not recognize feature (" + GENERATE_SYNTHETIC_ANNOTATIONS_ID + ")"); } catch (SAXNotSupportedException e) { System.err.println("warning: SchemaFactory does not support feature (" + GENERATE_SYNTHETIC_ANNOTATIONS_ID + ")"); } // Build Schema from sources Schema schema; { DOMSource[] sources; int size = 0; for (int i = 0; i < schemaNodes.length; ++i) { size += schemaNodes[i].getLength(); } sources = new DOMSource[size]; if (size == 0) { schema = factory.newSchema(); } else { int count = 0; for (int i = 0; i < schemaNodes.length; ++i) { NodeList nodeList = schemaNodes[i]; int nodeListLength = nodeList.getLength(); for (int j = 0; j < nodeListLength; ++j) { sources[count++] = new DOMSource(nodeList.item(j)); } } schema = factory.newSchema(sources); } } // Setup validator and input source. Validator validator = schema.newValidator(); validator.setErrorHandler(inlineSchemaValidator); try { validator.setFeature(SCHEMA_FULL_CHECKING_FEATURE_ID, schemaFullChecking); } catch (SAXNotRecognizedException e) { System.err.println( "warning: Validator does not recognize feature (" + SCHEMA_FULL_CHECKING_FEATURE_ID + ")"); } catch (SAXNotSupportedException e) { System.err.println( "warning: Validator does not support feature (" + SCHEMA_FULL_CHECKING_FEATURE_ID + ")"); } try { validator.setFeature(HONOUR_ALL_SCHEMA_LOCATIONS_ID, honourAllSchemaLocations); } catch (SAXNotRecognizedException e) { System.err.println( "warning: Validator does not recognize feature (" + HONOUR_ALL_SCHEMA_LOCATIONS_ID + ")"); } catch (SAXNotSupportedException e) { System.err.println( "warning: Validator does not support feature (" + HONOUR_ALL_SCHEMA_LOCATIONS_ID + ")"); } try { validator.setFeature(VALIDATE_ANNOTATIONS_ID, validateAnnotations); } catch (SAXNotRecognizedException e) { System.err .println("warning: Validator does not recognize feature (" + VALIDATE_ANNOTATIONS_ID + ")"); } catch (SAXNotSupportedException e) { System.err.println("warning: Validator does not support feature (" + VALIDATE_ANNOTATIONS_ID + ")"); } try { validator.setFeature(GENERATE_SYNTHETIC_ANNOTATIONS_ID, generateSyntheticAnnotations); } catch (SAXNotRecognizedException e) { System.err.println("warning: Validator does not recognize feature (" + GENERATE_SYNTHETIC_ANNOTATIONS_ID + ")"); } catch (SAXNotSupportedException e) { System.err.println( "warning: Validator does not support feature (" + GENERATE_SYNTHETIC_ANNOTATIONS_ID + ")"); } // Validate instance documents for (int i = 0; i < instanceNodes.length; ++i) { NodeList nodeList = instanceNodes[i]; int nodeListLength = nodeList.getLength(); for (int j = 0; j < nodeListLength; ++j) { DOMSource source = new DOMSource(nodeList.item(j)); source.setSystemId(docURI); inlineSchemaValidator.validate(validator, source, docURI, repetition, memoryUsage); } } } catch (SAXParseException e) { // ignore } catch (Exception e) { System.err.println("error: Parse error occurred - " + e.getMessage()); if (e instanceof SAXException) { Exception nested = ((SAXException) e).getException(); if (nested != null) { e = nested; } } e.printStackTrace(System.err); } }
From source file:com.adobe.aem.demomachine.gui.AemDemo.java
public static void main(String[] args) { String demoMachineRootFolder = null; // Command line options for this tool Options options = new Options(); options.addOption("f", true, "Path to Demo Machine root folder"); CommandLineParser parser = new BasicParser(); try {//w ww . j a v a2 s. com CommandLine cmd = parser.parse(options, args); if (cmd.hasOption("f")) { demoMachineRootFolder = cmd.getOptionValue("f"); } } catch (ParseException ex) { logger.error(ex.getMessage()); } // Let's grab the version number for the core Maven file String mavenFilePath = (demoMachineRootFolder != null ? demoMachineRootFolder : System.getProperty("user.dir")) + File.separator + "java" + File.separator + "core" + File.separator + "pom.xml"; File mavenFile = new File(mavenFilePath); if (mavenFile.exists() && !mavenFile.isDirectory()) { try { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); DocumentBuilder builder = factory.newDocumentBuilder(); Document document; document = builder.parse(mavenFile); NodeList list = document.getElementsByTagName("version"); if (list != null && list.getLength() > 0) { aemDemoMachineVersion = list.item(0).getFirstChild().getNodeValue(); } } catch (Exception e) { logger.error("Can't parse Maven pom.xml file"); } } // Let's check if we have a valid build.xml file to work with... String buildFilePath = (demoMachineRootFolder != null ? demoMachineRootFolder : System.getProperty("user.dir")) + File.separator + "build.xml"; logger.debug("Trying to load build file from " + buildFilePath); buildFile = new File(buildFilePath); if (buildFile.exists() && !buildFile.isDirectory()) { // Launching the main window EventQueue.invokeLater(new Runnable() { public void run() { try { UIManager.getLookAndFeelDefaults().put("defaultFont", new Font("Arial", Font.BOLD, 14)); AemDemo window = new AemDemo(); window.frameMain.setVisible(true); } catch (Exception e) { e.printStackTrace(); } } }); } else { logger.error("No valid build.xml file to work with"); System.exit(-1); } }