List of usage examples for java.util List toArray
<T> T[] toArray(T[] a);
From source file:esiptestbed.mudrod.ontology.process.LocalOntology.java
public static void main(String[] args) throws Exception { // boolean options Option helpOpt = new Option("h", "help", false, "show this help message"); // argument options Option ontDirOpt = Option.builder(ONT_DIR).required(true).numberOfArgs(1).hasArg(true) .desc("A directory containing .owl files.").argName(ONT_DIR).build(); // create the options Options options = new Options(); options.addOption(helpOpt);/*from w ww . j ava2 s .c o m*/ options.addOption(ontDirOpt); String ontDir; CommandLineParser parser = new DefaultParser(); try { CommandLine line = parser.parse(options, args); if (line.hasOption(ONT_DIR)) { ontDir = line.getOptionValue(ONT_DIR).replace("\\", "/"); } else { ontDir = LocalOntology.class.getClassLoader().getResource("ontology").getFile(); } if (!ontDir.endsWith("/")) { ontDir += "/"; } } catch (Exception e) { LOG.error("Error whilst processing main method of LocalOntology.", e); HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("LocalOntology: 'ontDir' argument is mandatory. ", options, true); return; } File fileDir = new File(ontDir); //Fail if the input is not a directory. if (fileDir.isDirectory()) { List<String> owlFiles = new ArrayList<>(); for (File owlFile : fileDir.listFiles()) { owlFiles.add(owlFile.toString()); } MudrodEngine mEngine = new MudrodEngine(); Properties props = mEngine.loadConfig(); Ontology ontology = new OntologyFactory(props).getOntology(); //convert to correct iput for ontology loading. String[] owlArray = new String[owlFiles.size()]; owlArray = owlFiles.toArray(owlArray); ontology.load(owlArray); String[] terms = new String[] { "Glacier ice" }; //Demonstrate that we can do basic ontology heirarchy navigation and log output. for (Iterator<OntClass> i = getParser().rootClasses(getModel()); i.hasNext();) { //print Ontology Class Hierarchy OntClass c = i.next(); renderHierarchy(System.out, c, new LinkedList<>(), 0); for (Iterator<OntClass> subClass = c.listSubClasses(true); subClass.hasNext();) { OntClass sub = subClass.next(); //This means that the search term is present as an OntClass if (terms[0].equalsIgnoreCase(sub.getLabel(null))) { //Add the search term(s) above to the term cache. for (int j = 0; j < terms.length; j++) { addSearchTerm(terms[j], sub); } //Query the ontology and return subclasses of the search term(s) for (int k = 0; k < terms.length; k++) { Iterator<String> iter = ontology.subclasses(terms[k]); while (iter.hasNext()) { LOG.info("Subclasses >> " + iter.next()); } } //print any synonymic relationships to demonstrate that we can //undertake synonym-based query expansion for (int l = 0; l < terms.length; l++) { Iterator<String> iter = ontology.synonyms(terms[l]); while (iter.hasNext()) { LOG.info("Synonym >> " + iter.next()); } } } } } mEngine.end(); } }
From source file:gov.nasa.jpl.mudrod.ontology.process.LocalOntology.java
public static void main(String[] args) throws Exception { // boolean options Option helpOpt = new Option("h", "help", false, "show this help message"); // argument options Option ontDirOpt = OptionBuilder.hasArg(true).withArgName(ONT_DIR) .withDescription("A directory containing .owl files.").isRequired(false).create(); // create the options Options options = new Options(); options.addOption(helpOpt);//from w w w . j av a 2 s. c o m options.addOption(ontDirOpt); String ontDir; CommandLineParser parser = new GnuParser(); try { CommandLine line = parser.parse(options, args); if (line.hasOption(ONT_DIR)) { ontDir = line.getOptionValue(ONT_DIR).replace("\\", "/"); } else { ontDir = LocalOntology.class.getClassLoader().getResource("ontology").getFile(); } if (!ontDir.endsWith("/")) { ontDir += "/"; } } catch (Exception e) { LOG.error("Error whilst processing main method of LocalOntology.", e); HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("LocalOntology: 'ontDir' argument is mandatory. ", options, true); return; } File fileDir = new File(ontDir); //Fail if the input is not a directory. if (fileDir.isDirectory()) { List<String> owlFiles = new ArrayList<>(); for (File owlFile : fileDir.listFiles()) { owlFiles.add(owlFile.toString()); } MudrodEngine mEngine = new MudrodEngine(); Properties props = mEngine.loadConfig(); Ontology ontology = new OntologyFactory(props).getOntology(); //convert to correct iput for ontology loading. String[] owlArray = new String[owlFiles.size()]; owlArray = owlFiles.toArray(owlArray); ontology.load(owlArray); String[] terms = new String[] { "Glacier ice" }; //Demonstrate that we can do basic ontology heirarchy navigation and log output. for (Iterator<OntClass> i = getParser().rootClasses(getModel()); i.hasNext();) { //print Ontology Class Hierarchy OntClass c = i.next(); renderHierarchy(System.out, c, new LinkedList<>(), 0); for (Iterator<OntClass> subClass = c.listSubClasses(true); subClass.hasNext();) { OntClass sub = subClass.next(); //This means that the search term is present as an OntClass if (terms[0].equalsIgnoreCase(sub.getLabel(null))) { //Add the search term(s) above to the term cache. for (int j = 0; j < terms.length; j++) { addSearchTerm(terms[j], sub); } //Query the ontology and return subclasses of the search term(s) for (int k = 0; k < terms.length; k++) { Iterator<String> iter = ontology.subclasses(terms[k]); while (iter.hasNext()) { LOG.info("Subclasses >> " + iter.next()); } } //print any synonymic relationships to demonstrate that we can //undertake synonym-based query expansion for (int l = 0; l < terms.length; l++) { Iterator<String> iter = ontology.synonyms(terms[l]); while (iter.hasNext()) { LOG.info("Synonym >> " + iter.next()); } } } } } mEngine.end(); } }
From source file:com.aliyun.openservices.odps.console.ODPSConsole.java
public static void main(String[] args) throws ODPSConsoleException { List<String> options = new ArrayList<String>(); // ?confignull String config = prepareOptions(args, options); // session context ExecutionContext sessionContext = ExecutionContext.load(config); checkSDKEnviron();/* w w w . j a va 2 s . c om*/ DefaultOutputWriter writer = sessionContext.getOutputWriter(); writer.writeDebug("ODPSConsole Start"); try { // apache.commons.logging??? System.setProperty("org.apache.commons.logging.Log", "org.apache.commons.logging.impl.NoOpLog"); // sessionContextcommand // ??command???? AbstractCommand oa = CommandParserUtils.parseOptions( config == null ? args : (String[]) options.toArray(new String[0]), sessionContext); oa.run(); } catch (OdpsException e) { writer.writeError(ODPSConsoleConstants.FAILED_MESSAGE + e.getMessage()); // debug??stack? writer.writeDebug(StringUtils.stringifyException(e)); System.exit(1); } catch (ODPSConsoleException e) { writer.writeError(ODPSConsoleConstants.FAILED_MESSAGE + e.getMessage()); writer.writeDebug(StringUtils.stringifyException(e)); System.exit(e.getExitCode()); } catch (Exception e) { // , e.printStackTrace(); System.exit(1); } sessionContext.getOutputWriter().writeDebug("ODPSConsole End"); // System.exit(0); }
From source file:io.s4.MainApp.java
public static void main(String args[]) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("corehome").hasArg().withDescription("core home").create("c")); options.addOption(/*from ww w.j av a 2 s .c o m*/ OptionBuilder.withArgName("appshome").hasArg().withDescription("applications home").create("a")); options.addOption(OptionBuilder.withArgName("s4clock").hasArg().withDescription("s4 clock").create("d")); options.addOption(OptionBuilder.withArgName("seedtime").hasArg() .withDescription("event clock initialization time").create("s")); options.addOption( OptionBuilder.withArgName("extshome").hasArg().withDescription("extensions home").create("e")); options.addOption( OptionBuilder.withArgName("instanceid").hasArg().withDescription("instance id").create("i")); options.addOption( OptionBuilder.withArgName("configtype").hasArg().withDescription("configuration type").create("t")); CommandLineParser parser = new GnuParser(); CommandLine commandLine = null; String clockType = "wall"; try { commandLine = parser.parse(options, args); } catch (ParseException pe) { System.err.println(pe.getLocalizedMessage()); System.exit(1); } int instanceId = -1; if (commandLine.hasOption("i")) { String instanceIdStr = commandLine.getOptionValue("i"); try { instanceId = Integer.parseInt(instanceIdStr); } catch (NumberFormatException nfe) { System.err.println("Bad instance id: %s" + instanceIdStr); System.exit(1); } } if (commandLine.hasOption("c")) { coreHome = commandLine.getOptionValue("c"); } if (commandLine.hasOption("a")) { appsHome = commandLine.getOptionValue("a"); } if (commandLine.hasOption("d")) { clockType = commandLine.getOptionValue("d"); } if (commandLine.hasOption("e")) { extsHome = commandLine.getOptionValue("e"); } String configType = "typical"; if (commandLine.hasOption("t")) { configType = commandLine.getOptionValue("t"); } long seedTime = 0; if (commandLine.hasOption("s")) { seedTime = Long.parseLong(commandLine.getOptionValue("s")); } File coreHomeFile = new File(coreHome); if (!coreHomeFile.isDirectory()) { System.err.println("Bad core home: " + coreHome); System.exit(1); } File appsHomeFile = new File(appsHome); if (!appsHomeFile.isDirectory()) { System.err.println("Bad applications home: " + appsHome); System.exit(1); } if (instanceId > -1) { System.setProperty("instanceId", "" + instanceId); } else { System.setProperty("instanceId", "" + S4Util.getPID()); } List loArgs = commandLine.getArgList(); if (loArgs.size() < 1) { // System.err.println("No bean configuration file specified"); // System.exit(1); } // String s4ConfigXml = (String) loArgs.get(0); // System.out.println("s4ConfigXml is " + s4ConfigXml); ClassPathResource propResource = new ClassPathResource("s4-core.properties"); Properties prop = new Properties(); if (propResource.exists()) { prop.load(propResource.getInputStream()); } else { System.err.println("Unable to find s4-core.properties. It must be available in classpath"); System.exit(1); } ApplicationContext coreContext = null; String configBase = coreHome + File.separatorChar + "conf" + File.separatorChar + configType; String configPath = ""; List<String> coreConfigUrls = new ArrayList<String>(); File configFile = null; // load clock configuration configPath = configBase + File.separatorChar + clockType + "-clock.xml"; coreConfigUrls.add(configPath); // load core config xml configPath = configBase + File.separatorChar + "s4-core-conf.xml"; configFile = new File(configPath); if (!configFile.exists()) { System.err.printf("S4 core config file %s does not exist\n", configPath); System.exit(1); } coreConfigUrls.add(configPath); String[] coreConfigFiles = new String[coreConfigUrls.size()]; coreConfigUrls.toArray(coreConfigFiles); String[] coreConfigFileUrls = new String[coreConfigFiles.length]; for (int i = 0; i < coreConfigFiles.length; i++) { coreConfigFileUrls[i] = "file:" + coreConfigFiles[i]; } coreContext = new FileSystemXmlApplicationContext(coreConfigFileUrls, coreContext); ApplicationContext context = coreContext; Clock s4Clock = (Clock) context.getBean("clock"); if (s4Clock instanceof EventClock && seedTime > 0) { EventClock s4EventClock = (EventClock) s4Clock; s4EventClock.updateTime(seedTime); System.out.println("Intializing event clock time with seed time " + s4EventClock.getCurrentTime()); } PEContainer peContainer = (PEContainer) context.getBean("peContainer"); Watcher w = (Watcher) context.getBean("watcher"); w.setConfigFilename(configPath); // load extension modules String[] configFileNames = getModuleConfigFiles(extsHome, prop); if (configFileNames.length > 0) { String[] configFileUrls = new String[configFileNames.length]; for (int i = 0; i < configFileNames.length; i++) { configFileUrls[i] = "file:" + configFileNames[i]; } context = new FileSystemXmlApplicationContext(configFileUrls, context); } // load application modules configFileNames = getModuleConfigFiles(appsHome, prop); if (configFileNames.length > 0) { String[] configFileUrls = new String[configFileNames.length]; for (int i = 0; i < configFileNames.length; i++) { configFileUrls[i] = "file:" + configFileNames[i]; } context = new FileSystemXmlApplicationContext(configFileUrls, context); // attach any beans that implement ProcessingElement to the PE // Container String[] processingElementBeanNames = context.getBeanNamesForType(ProcessingElement.class); for (String processingElementBeanName : processingElementBeanNames) { Object bean = context.getBean(processingElementBeanName); try { Method getS4ClockMethod = bean.getClass().getMethod("getS4Clock"); if (getS4ClockMethod.getReturnType().equals(Clock.class)) { if (getS4ClockMethod.invoke(bean) == null) { Method setS4ClockMethod = bean.getClass().getMethod("setS4Clock", Clock.class); setS4ClockMethod.invoke(bean, coreContext.getBean("clock")); } } } catch (NoSuchMethodException mnfe) { // acceptable } System.out.println("Adding processing element with bean name " + processingElementBeanName + ", id " + ((ProcessingElement) bean).getId()); peContainer.addProcessor((ProcessingElement) bean, processingElementBeanName); } } }
From source file:org.apache.s4.MainApp.java
public static void main(String args[]) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("corehome").hasArg().withDescription("core home").create("c")); options.addOption(//from w w w . jav a2 s .c o m OptionBuilder.withArgName("appshome").hasArg().withDescription("applications home").create("a")); options.addOption(OptionBuilder.withArgName("s4clock").hasArg().withDescription("s4 clock").create("d")); options.addOption(OptionBuilder.withArgName("seedtime").hasArg() .withDescription("event clock initialization time").create("s")); options.addOption( OptionBuilder.withArgName("extshome").hasArg().withDescription("extensions home").create("e")); options.addOption( OptionBuilder.withArgName("instanceid").hasArg().withDescription("instance id").create("i")); options.addOption( OptionBuilder.withArgName("configtype").hasArg().withDescription("configuration type").create("t")); CommandLineParser parser = new GnuParser(); CommandLine commandLine = null; String clockType = "wall"; try { commandLine = parser.parse(options, args); } catch (ParseException pe) { System.err.println(pe.getLocalizedMessage()); System.exit(1); } int instanceId = -1; if (commandLine.hasOption("i")) { String instanceIdStr = commandLine.getOptionValue("i"); try { instanceId = Integer.parseInt(instanceIdStr); } catch (NumberFormatException nfe) { System.err.println("Bad instance id: %s" + instanceIdStr); System.exit(1); } } if (commandLine.hasOption("c")) { coreHome = commandLine.getOptionValue("c"); } if (commandLine.hasOption("a")) { appsHome = commandLine.getOptionValue("a"); } if (commandLine.hasOption("d")) { clockType = commandLine.getOptionValue("d"); } if (commandLine.hasOption("e")) { extsHome = commandLine.getOptionValue("e"); } String configType = "typical"; if (commandLine.hasOption("t")) { configType = commandLine.getOptionValue("t"); } long seedTime = 0; if (commandLine.hasOption("s")) { seedTime = Long.parseLong(commandLine.getOptionValue("s")); } File coreHomeFile = new File(coreHome); if (!coreHomeFile.isDirectory()) { System.err.println("Bad core home: " + coreHome); System.exit(1); } File appsHomeFile = new File(appsHome); if (!appsHomeFile.isDirectory()) { System.err.println("Bad applications home: " + appsHome); System.exit(1); } if (instanceId > -1) { System.setProperty("instanceId", "" + instanceId); } else { System.setProperty("instanceId", "" + S4Util.getPID()); } List loArgs = commandLine.getArgList(); if (loArgs.size() < 1) { // System.err.println("No bean configuration file specified"); // System.exit(1); } // String s4ConfigXml = (String) loArgs.get(0); // System.out.println("s4ConfigXml is " + s4ConfigXml); ClassPathResource propResource = new ClassPathResource("s4-core.properties"); Properties prop = new Properties(); if (propResource.exists()) { prop.load(propResource.getInputStream()); } else { System.err.println("Unable to find s4-core.properties. It must be available in classpath"); System.exit(1); } ApplicationContext coreContext = null; String configBase = coreHome + File.separatorChar + "conf" + File.separatorChar + configType; String configPath = ""; List<String> coreConfigUrls = new ArrayList<String>(); File configFile = null; // load clock configuration configPath = configBase + File.separatorChar + clockType + "-clock.xml"; coreConfigUrls.add(configPath); // load core config xml configPath = configBase + File.separatorChar + "s4-core-conf.xml"; configFile = new File(configPath); if (!configFile.exists()) { System.err.printf("S4 core config file %s does not exist\n", configPath); System.exit(1); } coreConfigUrls.add(configPath); String[] coreConfigFiles = new String[coreConfigUrls.size()]; coreConfigUrls.toArray(coreConfigFiles); String[] coreConfigFileUrls = new String[coreConfigFiles.length]; for (int i = 0; i < coreConfigFiles.length; i++) { coreConfigFileUrls[i] = "file:" + coreConfigFiles[i]; } coreContext = new FileSystemXmlApplicationContext(coreConfigFileUrls, coreContext); ApplicationContext context = coreContext; Clock clock = (Clock) context.getBean("clock"); if (clock instanceof EventClock && seedTime > 0) { EventClock s4EventClock = (EventClock) clock; s4EventClock.updateTime(seedTime); System.out.println("Intializing event clock time with seed time " + s4EventClock.getCurrentTime()); } PEContainer peContainer = (PEContainer) context.getBean("peContainer"); Watcher w = (Watcher) context.getBean("watcher"); w.setConfigFilename(configPath); // load extension modules String[] configFileNames = getModuleConfigFiles(extsHome, prop); if (configFileNames.length > 0) { String[] configFileUrls = new String[configFileNames.length]; for (int i = 0; i < configFileNames.length; i++) { configFileUrls[i] = "file:" + configFileNames[i]; } context = new FileSystemXmlApplicationContext(configFileUrls, context); } // load application modules configFileNames = getModuleConfigFiles(appsHome, prop); if (configFileNames.length > 0) { String[] configFileUrls = new String[configFileNames.length]; for (int i = 0; i < configFileNames.length; i++) { configFileUrls[i] = "file:" + configFileNames[i]; } context = new FileSystemXmlApplicationContext(configFileUrls, context); // attach any beans that implement ProcessingElement to the PE // Container String[] processingElementBeanNames = context.getBeanNamesForType(AbstractPE.class); for (String processingElementBeanName : processingElementBeanNames) { AbstractPE bean = (AbstractPE) context.getBean(processingElementBeanName); bean.setClock(clock); try { bean.setSafeKeeper((SafeKeeper) context.getBean("safeKeeper")); } catch (NoSuchBeanDefinitionException ignored) { // no safe keeper = no checkpointing / recovery } // if the application did not specify an id, use the Spring bean name if (bean.getId() == null) { bean.setId(processingElementBeanName); } System.out.println("Adding processing element with bean name " + processingElementBeanName + ", id " + ((AbstractPE) bean).getId()); peContainer.addProcessor((AbstractPE) bean); } } }
From source file:ch.sdi.core.impl.ftp.FTPClientExample.java
public static void main(String[] aArgs) throws UnknownHostException { List<String> args = new ArrayList<String>(Arrays.asList(aArgs)); args.add("-s"); // store file on sesrver args.add("-b"); // binary transfer mode args.add("-#"); args.add("192.168.99.1"); args.add("heri"); // user args.add("heri"); // pw args.add("/var/www/log4j2.xml"); URL url = ClassLoader.getSystemResource("sdimain_test.properties"); // URL url = ClassLoader.getSystemResource( "log4j2.xml" ); args.add(url.getFile());/* w ww .j a va 2s . c o m*/ FTPClientExample example = new FTPClientExample(); try { example.init(args.toArray(new String[args.size()])); example.run(); } catch (Throwable t) { myLog.error("Exception caught", t); myLog.info(USAGE); System.exit(1); } }
From source file:com.lenovo.tensorhusky.common.utils.Shell.java
public static void main(String[] args) throws IOException { List<String> command = new ArrayList<String>(); command.add("ping"); command.add("-a"); command.add("baidu.com"); ShellCommandExecutor exe = new ShellCommandExecutor(command.toArray(new String[command.size()])); exe.execute();// www . ja v a 2s. c o m System.out.println(exe.getOutput()); }
From source file:com.music.tools.ScaleTester.java
public static void main(String[] args) { System.out.println(/*from w w w .j a v a 2 s .c om*/ "Usage: java ScaleTester <fundamental frequency> <chromatic scale size> <scale size> <use ET>"); final AudioFormat af = new AudioFormat(sampleRate, 16, 1, true, true); try { fundamentalFreq = getArgument(args, 0, FUNDAMENTAL_FREQUENCY, Double.class); int pitchesInChromaticScale = getArgument(args, 1, CHROMATIC_SCALE_SILZE, Integer.class); List<Double> harmonicFrequencies = new ArrayList<>(); List<String> ratios = new ArrayList<>(); Set<Double> frequencies = new HashSet<Double>(); frequencies.add(fundamentalFreq); int octaveMultiplier = 2; for (int i = 2; i < 100; i++) { // Exclude the 7th harmonic TODO exclude the 11th as well? // http://www.phy.mtu.edu/~suits/badnote.html if (i % 7 == 0) { continue; } double actualFreq = fundamentalFreq * i; double closestTonicRatio = actualFreq / (fundamentalFreq * octaveMultiplier); if (closestTonicRatio < 1 || closestTonicRatio > 2) { octaveMultiplier *= 2; } double closestTonic = actualFreq - actualFreq % (fundamentalFreq * octaveMultiplier); double normalizedFreq = fundamentalFreq * (actualFreq / closestTonic); harmonicFrequencies.add(actualFreq); frequencies.add(normalizedFreq); if (frequencies.size() == pitchesInChromaticScale) { break; } } System.out.println("Harmonic (overtone) frequencies: " + harmonicFrequencies); System.out.println("Transposed harmonic frequencies: " + frequencies); List<Double> chromaticScale = new ArrayList<>(frequencies); Collections.sort(chromaticScale); // find the "perfect" interval (e.g. perfect fifth) int perfectIntervalIndex = 0; int idx = 0; for (Iterator<Double> it = chromaticScale.iterator(); it.hasNext();) { Double noteFreq = it.next(); long[] fraction = findCommonFraction(noteFreq / fundamentalFreq); fractionCache.put(noteFreq, fraction); if (fraction[0] == 3 && fraction[1] == 2) { perfectIntervalIndex = idx; System.out.println("Perfect interval (3/2) idx: " + perfectIntervalIndex); } idx++; ratios.add(Arrays.toString(fraction)); } System.out.println("Ratios to fundemental frequency: " + ratios); if (getBooleanArgument(args, 4, USE_ET)) { chromaticScale = temper(chromaticScale); } System.out.println(); System.out.println("Chromatic scale: " + chromaticScale); Set<Double> scaleSet = new HashSet<Double>(); scaleSet.add(chromaticScale.get(0)); idx = 0; List<Double> orderedInCircle = new ArrayList<>(); // now go around the circle of perfect intervals and put the notes // in order while (orderedInCircle.size() < chromaticScale.size()) { orderedInCircle.add(chromaticScale.get(idx)); idx += perfectIntervalIndex; idx = idx % chromaticScale.size(); } System.out.println("Pitches Ordered in circle of perfect intervals: " + orderedInCircle); List<Double> scale = new ArrayList<Double>(scaleSet); int currentIdxInCircle = orderedInCircle.size() - 1; // start with // the last // note in the // circle int scaleSize = getArgument(args, 3, SCALE_SIZE, Integer.class); while (scale.size() < scaleSize) { double pitch = orderedInCircle.get(currentIdxInCircle % orderedInCircle.size()); if (!scale.contains(pitch)) { scale.add(pitch); } currentIdxInCircle++; } Collections.sort(scale); System.out.println("Scale: " + scale); SourceDataLine line = AudioSystem.getSourceDataLine(af); line.open(af); line.start(); Double[] scaleFrequencies = scale.toArray(new Double[scale.size()]); // first play the whole scale WaveMelodyGenerator.playScale(line, scaleFrequencies); // then generate a random melody in the scale WaveMelodyGenerator.playMelody(line, scaleFrequencies); line.drain(); line.close(); } catch (Exception e) { e.printStackTrace(); } }
From source file:com.finderbots.miner2.pinterest.PinterestCrawlAndMinerTool.java
public static void main(String[] args) { Options options = new Options(); CmdLineParser parser = new CmdLineParser(options); try {// w w w . j a va2 s . c o m parser.parseArgument(args); } catch (CmdLineException e) { System.err.println(e.getMessage()); printUsageAndExit(parser); } // Before we get too far along, see if the domain looks valid. String domain = options.getDomain(); String urlsFile = options.getUrlsFile(); if (domain != null) { validateDomain(domain, parser); } else { if (urlsFile == null) { System.err.println( "Either a target domain should be specified or a file with a list of urls needs to be provided"); printUsageAndExit(parser); } } if (domain != null && urlsFile != null) { System.out.println("Warning: Both domain and urls file list provided - using domain"); } String outputDirName = options.getOutputDir(); if (options.isDebugLogging()) { System.setProperty("bixo.root.level", "DEBUG"); } else { System.setProperty("bixo.root.level", "INFO"); } if (options.getLoggingAppender() != null) { // Set console vs. DRFA vs. something else System.setProperty("bixo.appender", options.getLoggingAppender()); } String logsDir = options.getLogsDir(); if (!logsDir.endsWith("/")) { logsDir = logsDir + "/"; } try { JobConf conf = new JobConf(); Path outputPath = new Path(outputDirName); FileSystem fs = outputPath.getFileSystem(conf); // First check if the user wants to clean if (options.isCleanOutputDir()) { if (fs.exists(outputPath)) { fs.delete(outputPath, true); } } // See if the user isn't starting from scratch then set up the // output directory and create an initial urls subdir. if (!fs.exists(outputPath)) { fs.mkdirs(outputPath); // Create a "0-<timestamp>" sub-directory with just a /crawldb subdir // In the /crawldb dir the input file will have a single URL for the target domain. Path curLoopDir = CrawlDirUtils.makeLoopDir(fs, outputPath, 0); String curLoopDirName = curLoopDir.getName(); setLoopLoggerFile(logsDir + curLoopDirName, 0); Path crawlDbPath = new Path(curLoopDir, CrawlConfig.CRAWLDB_SUBDIR_NAME); if (domain != null) { importOneDomain(domain, crawlDbPath, conf); } else { importUrls(urlsFile, crawlDbPath); } } Path latestDirPath = CrawlDirUtils.findLatestLoopDir(fs, outputPath); if (latestDirPath == null) { System.err.println("No previous cycle output dirs exist in " + outputDirName); printUsageAndExit(parser); } Path crawlDbPath = new Path(latestDirPath, CrawlConfig.CRAWLDB_SUBDIR_NAME); // Set up the start and end loop counts. int startLoop = CrawlDirUtils.extractLoopNumber(latestDirPath); int endLoop = startLoop + options.getNumLoops(); // Set up the UserAgent for the fetcher. UserAgent userAgent = new UserAgent(options.getAgentName(), CrawlConfig.EMAIL_ADDRESS, CrawlConfig.WEB_ADDRESS); // You also get to customize the FetcherPolicy FetcherPolicy defaultPolicy; if (options.getCrawlDuration() != 0) { defaultPolicy = new AdaptiveFetcherPolicy(options.getEndCrawlTime(), options.getCrawlDelay()); } else { defaultPolicy = new FetcherPolicy(); } defaultPolicy.setMaxContentSize(CrawlConfig.MAX_CONTENT_SIZE); defaultPolicy.setRequestTimeout(10L * 1000L);//10 seconds // COMPLETE for crawling a single site, EFFICIENT for many sites if (options.getCrawlPolicy().equals(Options.IMPOLITE_CRAWL_POLICY)) { defaultPolicy.setFetcherMode(FetcherPolicy.FetcherMode.IMPOLITE); } else if (options.getCrawlPolicy().equals(Options.EFFICIENT_CRAWL_POLICY)) { defaultPolicy.setFetcherMode(FetcherPolicy.FetcherMode.EFFICIENT); } else if (options.getCrawlPolicy().equals(Options.COMPLETE_CRAWL_POLICY)) { defaultPolicy.setFetcherMode(FetcherPolicy.FetcherMode.COMPLETE); } // It is a good idea to set up a crawl duration when running long crawls as you may // end up in situations where the fetch slows down due to a 'long tail' and by // specifying a crawl duration you know exactly when the crawl will end. int crawlDurationInMinutes = options.getCrawlDuration(); boolean hasEndTime = crawlDurationInMinutes != Options.NO_CRAWL_DURATION; long targetEndTime = hasEndTime ? System.currentTimeMillis() + (crawlDurationInMinutes * CrawlConfig.MILLISECONDS_PER_MINUTE) : FetcherPolicy.NO_CRAWL_END_TIME; // By setting up a url filter we only deal with urls that we want to // instead of all the urls that we extract. BaseUrlFilter urlFilter = null; List<String> patterns = null; String regexUrlFiltersFile = options.getRegexUrlFiltersFile(); if (regexUrlFiltersFile != null) { patterns = RegexUrlDatumFilter.getUrlFilterPatterns(regexUrlFiltersFile); } else { patterns = RegexUrlDatumFilter.getDefaultUrlFilterPatterns(); if (domain != null) { String domainPatterStr = "+(?i)^(http|https)://([a-z0-9]*\\.)*" + domain; patterns.add(domainPatterStr); } else { String protocolPatterStr = "+(?i)^(http|https)://*"; patterns.add(protocolPatterStr); //Log.warn("Defaulting to basic url regex filtering (just suffix and protocol"); } } urlFilter = new RegexUrlDatumFilter(patterns.toArray(new String[patterns.size()])); // get a list of patterns which tell the miner which URLs to include or exclude. patterns.clear(); RegexUrlStringFilter urlsToMineFilter = null; String regexUrlsToMineFiltersFile = options.getRegexUrlToMineFile(); AnalyzeHtml analyzer = null; if (regexUrlsToMineFiltersFile != null) { patterns = RegexUrlDatumFilter.getUrlFilterPatterns(regexUrlsToMineFiltersFile); urlsToMineFilter = new RegexUrlStringFilter(patterns.toArray(new String[patterns.size()])); analyzer = new AnalyzeHtml(urlsToMineFilter); } // OK, now we're ready to start looping, since we've got our current // settings for (int curLoop = startLoop + 1; curLoop <= endLoop; curLoop++) { // Adjust target end time, if appropriate. if (hasEndTime) { int remainingLoops = (endLoop - curLoop) + 1; long now = System.currentTimeMillis(); long perLoopTime = (targetEndTime - now) / remainingLoops; defaultPolicy.setCrawlEndTime(now + perLoopTime); } Path curLoopDirPath = CrawlDirUtils.makeLoopDir(fs, outputPath, curLoop); String curLoopDirName = curLoopDirPath.getName(); setLoopLoggerFile(logsDir + curLoopDirName, curLoop); Flow flow = PinterestCrawlAndMinerWorkflow.createFlow(curLoopDirPath, crawlDbPath, defaultPolicy, userAgent, urlFilter, analyzer, options); flow.complete(); // Writing out .dot files is a good way to verify your flows. flow.writeDOT("valid-flow.dot"); // Update crawlDbPath to point to the latest crawl db crawlDbPath = new Path(curLoopDirPath, CrawlConfig.CRAWLDB_SUBDIR_NAME); } } catch (PlannerException e) { e.writeDOT("failed-flow.dot"); System.err.println("PlannerException: " + e.getMessage()); e.printStackTrace(System.err); System.exit(-1); } catch (Throwable t) { System.err.println("Exception running tool: " + t.getMessage()); t.printStackTrace(System.err); System.exit(-1); } }
From source file:com.finderbots.miner2.tomatoes.RTCriticsCrawlAndMinerTool.java
public static void main(String[] args) { Options options = new Options(); CmdLineParser parser = new CmdLineParser(options); try {// w w w . j a v a2 s . com parser.parseArgument(args); } catch (CmdLineException e) { System.err.println(e.getMessage()); printUsageAndExit(parser); } // Before we get too far along, see if the domain looks valid. String domain = options.getDomain(); String urlsFile = options.getUrlsFile(); if (domain != null) { validateDomain(domain, parser); } else { if (urlsFile == null) { System.err.println( "Either a target domain should be specified or a file with a list of urls needs to be provided"); printUsageAndExit(parser); } } if (domain != null && urlsFile != null) { System.out.println("Warning: Both domain and urls file list provided - using domain"); } String outputDirName = options.getOutputDir(); if (options.isDebugLogging()) { System.setProperty("bixo.root.level", "DEBUG"); } else { System.setProperty("bixo.root.level", "INFO"); } if (options.getLoggingAppender() != null) { // Set console vs. DRFA vs. something else System.setProperty("bixo.appender", options.getLoggingAppender()); } String logsDir = options.getLogsDir(); if (!logsDir.endsWith("/")) { logsDir = logsDir + "/"; } try { JobConf conf = new JobConf(); Path outputPath = new Path(outputDirName); FileSystem fs = outputPath.getFileSystem(conf); // First check if the user wants to clean if (options.isCleanOutputDir()) { if (fs.exists(outputPath)) { fs.delete(outputPath, true); } } // See if the user isn't starting from scratch then set up the // output directory and create an initial urls subdir. if (!fs.exists(outputPath)) { fs.mkdirs(outputPath); // Create a "0-<timestamp>" sub-directory with just a /crawldb subdir // In the /crawldb dir the input file will have a single URL for the target domain. Path curLoopDir = CrawlDirUtils.makeLoopDir(fs, outputPath, 0); String curLoopDirName = curLoopDir.getName(); setLoopLoggerFile(logsDir + curLoopDirName, 0); Path crawlDbPath = new Path(curLoopDir, CrawlConfig.CRAWLDB_SUBDIR_NAME); if (domain != null) { importOneDomain(domain, crawlDbPath, conf); } else { importUrls(urlsFile, crawlDbPath); } } Path latestDirPath = CrawlDirUtils.findLatestLoopDir(fs, outputPath); if (latestDirPath == null) { System.err.println("No previous cycle output dirs exist in " + outputDirName); printUsageAndExit(parser); } Path crawlDbPath = new Path(latestDirPath, CrawlConfig.CRAWLDB_SUBDIR_NAME); // Set up the start and end loop counts. int startLoop = CrawlDirUtils.extractLoopNumber(latestDirPath); int endLoop = startLoop + options.getNumLoops(); // Set up the UserAgent for the fetcher. UserAgent userAgent = new UserAgent(options.getAgentName(), CrawlConfig.EMAIL_ADDRESS, CrawlConfig.WEB_ADDRESS); // You also get to customize the FetcherPolicy FetcherPolicy defaultPolicy; if (options.getCrawlDuration() != 0) { defaultPolicy = new AdaptiveFetcherPolicy(options.getEndCrawlTime(), options.getCrawlDelay()); } else { defaultPolicy = new FetcherPolicy(); } defaultPolicy.setMaxContentSize(CrawlConfig.MAX_CONTENT_SIZE); defaultPolicy.setRequestTimeout(10L * 1000L);//10 seconds // COMPLETE for crawling a single site, EFFICIENT for many sites if (options.getCrawlPolicy().equals(Options.IMPOLITE_CRAWL_POLICY)) { defaultPolicy.setFetcherMode(FetcherPolicy.FetcherMode.IMPOLITE); } else if (options.getCrawlPolicy().equals(Options.EFFICIENT_CRAWL_POLICY)) { defaultPolicy.setFetcherMode(FetcherPolicy.FetcherMode.EFFICIENT); } else if (options.getCrawlPolicy().equals(Options.COMPLETE_CRAWL_POLICY)) { defaultPolicy.setFetcherMode(FetcherPolicy.FetcherMode.COMPLETE); } // It is a good idea to set up a crawl duration when running long crawls as you may // end up in situations where the fetch slows down due to a 'long tail' and by // specifying a crawl duration you know exactly when the crawl will end. int crawlDurationInMinutes = options.getCrawlDuration(); boolean hasEndTime = crawlDurationInMinutes != Options.NO_CRAWL_DURATION; long targetEndTime = hasEndTime ? System.currentTimeMillis() + (crawlDurationInMinutes * CrawlConfig.MILLISECONDS_PER_MINUTE) : FetcherPolicy.NO_CRAWL_END_TIME; // By setting up a url filter we only deal with urls that we want to // instead of all the urls that we extract. BaseUrlFilter urlFilter = null; List<String> patterns = null; String regexUrlFiltersFile = options.getRegexUrlFiltersFile(); if (regexUrlFiltersFile != null) { patterns = RegexUrlDatumFilter.getUrlFilterPatterns(regexUrlFiltersFile); } else { patterns = RegexUrlDatumFilter.getDefaultUrlFilterPatterns(); if (domain != null) { String domainPatterStr = "+(?i)^(http|https)://([a-z0-9]*\\.)*" + domain; patterns.add(domainPatterStr); } else { String protocolPatterStr = "+(?i)^(http|https)://*"; patterns.add(protocolPatterStr); //Log.warn("Defaulting to basic url regex filtering (just suffix and protocol"); } } urlFilter = new RegexUrlDatumFilter(patterns.toArray(new String[patterns.size()])); // get a list of patterns which tell the miner which URLs to include or exclude. patterns.clear(); RegexUrlStringFilter urlsToMineFilter = null; String regexUrlsToMineFiltersFile = options.getRegexUrlToMineFile(); MineRTCriticsPreferences prefsAnalyzer = null; if (regexUrlsToMineFiltersFile != null) { patterns = RegexUrlDatumFilter.getUrlFilterPatterns(regexUrlsToMineFiltersFile); urlsToMineFilter = new RegexUrlStringFilter(patterns.toArray(new String[patterns.size()])); prefsAnalyzer = new MineRTCriticsPreferences(urlsToMineFilter); } // OK, now we're ready to start looping, since we've got our current // settings for (int curLoop = startLoop + 1; curLoop <= endLoop; curLoop++) { // Adjust target end time, if appropriate. if (hasEndTime) { int remainingLoops = (endLoop - curLoop) + 1; long now = System.currentTimeMillis(); long perLoopTime = (targetEndTime - now) / remainingLoops; defaultPolicy.setCrawlEndTime(now + perLoopTime); } Path curLoopDirPath = CrawlDirUtils.makeLoopDir(fs, outputPath, curLoop); String curLoopDirName = curLoopDirPath.getName(); setLoopLoggerFile(logsDir + curLoopDirName, curLoop); Flow flow = RTCriticsCrawlAndMinerWorkflow.createFlow(curLoopDirPath, crawlDbPath, defaultPolicy, userAgent, urlFilter, prefsAnalyzer, options); flow.complete(); // Writing out .dot files is a good way to verify your flows. flow.writeDOT("valid-flow.dot"); // Update crawlDbPath to point to the latest crawl db crawlDbPath = new Path(curLoopDirPath, CrawlConfig.CRAWLDB_SUBDIR_NAME); } } catch (PlannerException e) { e.writeDOT("failed-flow.dot"); System.err.println("PlannerException: " + e.getMessage()); e.printStackTrace(System.err); System.exit(-1); } catch (Throwable t) { System.err.println("Exception running tool: " + t.getMessage()); t.printStackTrace(System.err); System.exit(-1); } }