List of usage examples for org.apache.hadoop.conf Configuration getLong
public long getLong(String name, long defaultValue)
name
property as a long
. From source file:gov.jgi.meta.exec.BlastCommand.java
License:Open Source License
/** * new blast command based on values stored in the configuration. * <p/>//from ww w .j av a 2 s . co m * Looks for the following config values: blast.commandline, * blast.commandpath, and blast.tmpdir, blast.cleanup * * @param config is the hadoop configuration with overriding values * for commandline options and paths * @throws IOException if executable can not be found */ public BlastCommand(Configuration config) throws IOException { log.info("initializing"); String c; log.info("initializing new blast command"); if ((c = config.get("blast.commandline")) != null) { commandLine = c; } if ((c = config.get("blast.commandpath")) != null) { commandPath = c; } if ((c = config.get("formatdb.commandline")) != null) { formatdbCommandLine = c; } if ((c = config.get("formatdb.commandpath")) != null) { formatdbCommandPath = c; } if ((c = config.get("blast.tmpdir")) != null) { tmpDir = c; } docleanup = config.getBoolean("blast.cleanup", true); effectiveSize = config.getLong("blast.effectivedatabasesize", 0); useScaledEValue = config.getBoolean("blast.usescaledevalue", false); useEffectiveSize = config.getBoolean("blast.useeffectivesize", false); useEValue = config.getFloat("blast.useevalue", 10F); /* * do sanity check to make sure all paths exist */ checkFileExists(commandLine); checkFileExists(commandPath); checkDirExists(tmpDir); /* * if all is good, create a working space inside tmpDir */ tmpDirFile = MetaUtils.createTempDir("blast_", tmpDir); log.info("done initializing: tmp dir = " + tmpDirFile); }
From source file:gov.nasa.jpl.memex.nutch.protocol.selenium.handlers.login.LoginHandler26.java
License:Apache License
public void processDriver(WebDriver driver) { try {// w w w.j av a 2 s. c om String accumulatedData = ""; driver.findElement(By.tagName("body")).getAttribute("innerHTML"); Configuration conf = NutchConfiguration.create(); new WebDriverWait(driver, conf.getLong("libselenium.page.load.delay", 3)); List<WebElement> atags = driver.findElements(By.tagName("a")); int numberofajaxlinks = atags.size(); for (int i = 0; i < numberofajaxlinks; i++) { if (atags.get(i).getAttribute("href") != null && atags.get(i).getAttribute("href").equals("javascript:void(null);")) { atags.get(i).click(); if (i == numberofajaxlinks - 1) { // append everything to the driver in the last round JavascriptExecutor jsx = (JavascriptExecutor) driver; jsx.executeScript( "document.body.innerHTML=document.body.innerHTML " + accumulatedData + ";"); continue; } accumulatedData += driver.findElement(By.tagName("body")).getAttribute("innerHTML"); // refreshing the handlers as the page was interacted with driver.navigate().refresh(); new WebDriverWait(driver, conf.getLong("libselenium.page.load.delay", 3)); atags = driver.findElements(By.tagName("a")); } } } catch (Exception e) { LOG.info(StringUtils.stringifyException(e)); } }
From source file:gov.nasa.jpl.memex.nutch.protocol.selenium.handlers.login.LoginHandler30.java
License:Apache License
public void processDriver(WebDriver driver) { try {//from w ww . ja v a2 s . co m if (currenturl.contains("http://forum.xencentral.com") || currenturl.contains("http://www.theoutdoorstrader.com") || currenturl.contains("http://xenforo.com") || currenturl.contains("http://worldwidesurvival.com")) { Configuration conf = NutchConfiguration.create(); new WebDriverWait(driver, conf.getLong("libselenium.page.load.delay", 3)); if (currenturl.contains("http://xenforo.com")) { WebElement visibleele = driver.findElement(By.xpath("//a[@href='login/']")); if (visibleele != null) visibleele.click(); else return; } WebElement passele = driver.findElement(By.xpath("//input[@type='password']")); WebElement userele = driver.findElement(By.xpath("//input[@name='login']")); WebElement submitele = driver.findElement(By.xpath("//input[@type='submit']")); if (passele != null && userele != null && submitele != null) { passele.sendKeys("Keys"); userele.sendKeys("Username"); submitele.click(); } else { System.out.println("fetch of " + currenturl + " failed with: Http code=403, url=" + currenturl); } driver.close(); } else { return; } } catch (Exception e) { LOG.info(StringUtils.stringifyException(e)); } }
From source file:gov.nasa.jpl.memex.nutch.protocol.selenium.handlers.login.LoginHandler35.java
License:Apache License
public void processDriver(WebDriver driver) { String cur = driver.getCurrentUrl(); if (cur.contains("/members/") || cur.contains("slickguns.com/alerts/")) { Configuration conf = NutchConfiguration.create(); new WebDriverWait(driver, conf.getLong("libselenium.page.load.delay", 3)); // Wait for the page to load try {// ww w. j av a2 s . co m WebElement uname = driver.findElement(By.cssSelector("input[type=\"text\"]")); // handling multiple text boxes not needed for these pages WebElement remember = driver.findElement(By.cssSelector("input[type=\"checkbox\"]")); WebElement pwd = driver.findElement(By.cssSelector("input[type=\"password\"]")); uname.sendKeys("team35csci572@outlook.com"); // login info for all 403s pwd.sendKeys("qwer1234"); remember.click(); // Check Remember Me option :) pwd.submit(); } catch (Exception e) { } driver.navigate().refresh(); // new WebDriverWait(driver, 5)) // wait for click and any ending client process to compete execution (we are being generous here) } driver.close(); }
From source file:gov.nasa.jpl.memex.nutch.protocol.selenium.handlers.MultiplePatterns.CS572LoginAndClickHandler.java
License:Apache License
private void loginScriptIDIDXPath(Configuration conf, WebDriver driver, String usernameString, String passwordString, String XPathString, boolean isEmail) { new WebDriverWait(driver, conf.getLong("libselenium.page.load.delay", 3)); WebElement usernameElement = driver.findElement(By.id(usernameString)); WebElement passwordElement = driver.findElement(By.id(passwordString)); WebElement submitElement = driver.findElement(By.xpath(XPathString)); loginActionScript(driver, usernameElement, passwordElement, submitElement, isEmail); }
From source file:gov.nasa.jpl.memex.nutch.protocol.selenium.handlers.MultiplePatterns.CS572LoginAndClickHandler.java
License:Apache License
private void loginScriptIDIDID(Configuration conf, WebDriver driver, String usernameString, String passwordString, String submitString, boolean isEmail) { new WebDriverWait(driver, conf.getLong("libselenium.page.load.delay", 3)); WebElement usernameElement = driver.findElement(By.id(usernameString)); WebElement passwordElement = driver.findElement(By.id(passwordString)); WebElement submitElement = driver.findElement(By.id(submitString)); loginActionScript(driver, usernameElement, passwordElement, submitElement, isEmail); }
From source file:gov.nasa.jpl.memex.nutch.protocol.selenium.handlers.MultiplePatterns.CS572LoginAndClickHandler.java
License:Apache License
private void loginScriptNameNameXPath(Configuration conf, WebDriver driver, String usernameString, String passwordString, String XPathString, boolean isEmail) { new WebDriverWait(driver, conf.getLong("libselenium.page.load.delay", 3)); WebElement usernameElement = driver.findElement(By.name(usernameString)); WebElement passwordElement = driver.findElement(By.name(passwordString)); WebElement submitElement = driver.findElement(By.xpath(XPathString)); loginActionScript(driver, usernameElement, passwordElement, submitElement, isEmail); }
From source file:gov.nasa.jpl.memex.nutch.protocol.selenium.handlers.MultiplePatterns.CS572LoginAndClickHandler.java
License:Apache License
private void clickAllAjaxLinks(Configuration conf, WebDriver driver) { int count = 0; new WebDriverWait(driver, conf.getLong("libselenium.page.load.delay", 3)); List<WebElement> atags = driver.findElements(By.tagName("a")); int numberofajaxlinks = atags.size(); for (int i = 0; i < numberofajaxlinks; i++) { if ((atags.get(i).getAttribute("href") != null) && (atags.get(i).getAttribute("href").contains("javascript:void") || atags.get(i).getAttribute("href").equals("javascript:;") || (atags.get(i).getAttribute("onclick") != null && atags.get(i).getAttribute("onclick").contains("return false"))) ) {//from w w w .j av a 2s . c o m // System.out.println(atags.get(i).getAttribute("onClick")); try { atags.get(i).click(); try { Thread.sleep(1000); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } count++; recordHtmlBodyWithDynamicChange(driver); } catch (Exception e) { LOG.info(StringUtils.stringifyException(e)); continue; } } if ((i == numberofajaxlinks - 1) || (count > 18)) { //Don't over-click JavascriptExecutor jsx = (JavascriptExecutor) driver; Iterator<String> itr = accumulatedDataArrayList.iterator(); while (itr.hasNext()) { String accumulatedData = itr.next(); try { jsx.executeScript("document.body.innerHTML = document.body.innerHTML + " + "'" + accumulatedData + "'" + ";"); } catch (Exception ee) { System.out.println(ee); } } accumulatedDataArrayList.clear(); break; } if (refreshFlag) { refreshFlag = false; // refresh page to refresh handler to the un-touched state driver.navigate().refresh(); new WebDriverWait(driver, conf.getLong("libselenium.page.load.delay", 3)); atags = driver.findElements(By.tagName("a")); } } // System.exit(0); }
From source file:gov.nasa.jpl.memex.nutch.protocol.selenium.handlers.PageNavigation.PageNavigationUK2.java
License:Apache License
public void processDriver(WebDriver driver) { try {// w ww . ja v a 2 s .c o m String accumulatedData = driver.findElement(By.tagName("body")).getAttribute("innerHTML"); Configuration conf = NutchConfiguration.create(); new WebDriverWait(driver, conf.getLong("libselenium.page.load.delay", 3)); //handle ajax content List<WebElement> atags = driver.findElements(By.tagName("a")); int numberofajaxlinks = atags.size(); for (int i = 0; i < numberofajaxlinks; i++) { if (atags.get(i).getAttribute("href") != null && atags.get(i).getAttribute("href").equals("javascript:void(null);")) { atags.get(i).click(); if (i == numberofajaxlinks - 1) { //append everything to the driver in the last round JavascriptExecutor jsx = (JavascriptExecutor) driver; jsx.executeScript( "document.body.innerHTML=document.body.innerHTML " + accumulatedData + ";"); continue; } accumulatedData += driver.findElement(By.tagName("body")).getAttribute("innerHTML"); //refreshing the handlers as the page was interacted with driver.navigate().refresh(); new WebDriverWait(driver, conf.getLong("libselenium.page.load.delay", 3)); atags = driver.findElements(By.tagName("a")); } } //handle content behind a form List<WebElement> inputTags = driver.findElements(By.tagName("input")); int numberOfInput = inputTags.size(); for (int i = 0; i < numberOfInput; i++) { if (inputTags.get(i).getAttribute("type") != null && inputTags.get(i).getAttribute("type").equals("text")) { //take the first text input as the search input inputTags.get(i).sendKeys("weapon"); inputTags.get(i).submit(); accumulatedData += driver.findElement(By.tagName("body")).getAttribute("innerHTML"); JavascriptExecutor jsxInput = (JavascriptExecutor) driver; //append the data to the driver jsxInput.executeScript( "document.body.innerHTML=document.body.innerHTML " + accumulatedData + ";"); break; } } } catch (Exception e) { LOG.info(StringUtils.stringifyException(e)); } }
From source file:gov.nasa.jpl.memex.nutch.protocol.selenium.handlers.Pagination.PaginationHandler9.java
License:Apache License
public void processDriver(WebDriver driver) { List<String> accumulatedData = new ArrayList<>(); //used to keep image tags from each page after click boolean paginationFound = false; boolean nextPageFound = false; System.err//from w w w .ja v a 2 s .co m .println("DallasGunsPaginationHandler: Entered default dallasguns.com/guns_online page!!!!!!!!!,"); String startPage = driver.getCurrentUrl(); driver.findElement(By.tagName("body")).getAttribute("innerHTML"); Configuration conf = NutchConfiguration.create(); try { long end = System.currentTimeMillis() + 5000; while (System.currentTimeMillis() < end) { //Check if this page has pagination element, if so, need to process List<WebElement> testDivs = null; testDivs = driver.findElements(new By.ById("pagination")); if (testDivs != null && testDivs.size() > 0) { WebElement paginationTest = testDivs.get(0); if (paginationTest != null) { paginationFound = true; System.err.println("DallasGunsPaginationHandler: " + driver.getCurrentUrl() + " found pagination!!!!!!!!!!!!!!!!"); break; } } } WebElement nextPage = null; List<WebElement> paginationDivs = null; List<WebElement> links = null; WebElement span = null; if (paginationFound) { paginationDivs = driver.findElements(new By.ById("pagination")); if (paginationDivs.size() > 0) { span = paginationDivs.get(0).findElement(new By.ByTagName("span")); if (span != null) { // System.err.println("DallasGunsPaginationHandler: found span!!!!!"); links = span.findElements(new By.ByTagName("a")); if (links != null && links.size() > 0) { System.err.println("DallasGunsPaginationHandler: found pagination links!!!!!"); nextPage = links.get(links.size() - 1); if (nextPage.getAttribute("title").equals("Next Page")) { System.err.println("DallasGunsPaginationHandler: found Next Page link at " + driver.getCurrentUrl() + "!!!!!!"); nextPageFound = true; } } } } } while (nextPageFound) { nextPage.click(); new WebDriverWait(driver, conf.getLong("libselenium.page.load.delay", 3)); System.err.println("DallasGunsPaginationHandler: Arrived at new URL: " + driver.getCurrentUrl() + " from click!!!!!!!"); List<WebElement> imgs = driver.findElements(new By.ByTagName("img")); if (imgs != null && imgs.size() > 0) { System.err.println("DallasGunsPaginationHandler: Found new Images at " + driver.getCurrentUrl() + "!!!!!!!"); for (int j = 0; j < imgs.size(); j++) { String newImgSrc = imgs.get(j).getAttribute("src"); //String newImageTag = "<a href='"+newImgSrc+"'> another gun img </a>"; accumulatedData.add(newImgSrc); //accumulatedData+=newImageTag; System.err.println("DallasGunsPaginationHandler: updated accumulatedData with + " + newImgSrc + "!!!!!!!!!"); } } paginationDivs = driver.findElements(new By.ById("pagination")); if (paginationDivs.size() > 0) { span = paginationDivs.get(0).findElement(new By.ByTagName("span")); if (span != null) { // System.err.println("DallasGunsPaginationHandler: found span!!!!!"); links = span.findElements(new By.ByTagName("a")); if (links != null && links.size() > 0) { System.err.println("DallasGunsPaginationHandler: found pagination links!!!!!"); nextPage = links.get(links.size() - 1); if (nextPage.getAttribute("title").equals("Next Page")) { System.err.println("DallasGunsPaginationHandler: found Next Page link at " + driver.getCurrentUrl() + "!!!!!!"); nextPageFound = true; } else { nextPageFound = false; } } else { nextPageFound = false; } } else { nextPageFound = false; } } else { nextPageFound = false; } } if (accumulatedData.size() > 0) { //append images data to driver so that it can be processed by parser //navigating back to original page driver.get(startPage); System.err.println("DallasGunsPaginationHandler: navigated back to start page " + driver.getCurrentUrl() + "!!!!!!"); new WebDriverWait(driver, conf.getLong("libselenium.page.load.delay", 3)); System.err.println("DallasGunsPaginationHandler: appending new data!!!!!!!!"); JavascriptExecutor jsx = (JavascriptExecutor) driver; for (String src : accumulatedData) { jsx.executeScript("var aTag = document.createElement('a'); aTag.setAttribute('href',\"" + src + "\"); aTag.innerText = \"gun image\"; document.body.appendChild(aTag);"); //jsx.executeScript("aTag.setAttribute('href','"+src+"');"); //jsx.executeScript("aTag.innerText = 'gun image');"); //jsx.executeScript("document.body.appendChild(aTag);"); } //jsx.executeScript("document.body.innerHTML+= "+ accumulatedData+";"); } System.err.println("DallasGunsIndexHandler: Finished Pagination Handler at " + driver.getCurrentUrl() + "!!!!!!!!!!!!"); } catch (Exception e) { System.err.println("ERROR: DallasGunsIndexHandler @" + driver.getCurrentUrl() + e.getMessage()); } }