Java tutorial
/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package Logic; import Main.Database; import static java.lang.System.setProperty; import java.util.List; import org.openqa.selenium.By; import static org.openqa.selenium.By.xpath; import org.openqa.selenium.ElementNotVisibleException; import org.openqa.selenium.JavascriptExecutor; import org.openqa.selenium.NoSuchElementException; import org.openqa.selenium.WebDriver; import org.openqa.selenium.WebDriverException; import org.openqa.selenium.WebElement; import org.openqa.selenium.chrome.ChromeDriver; import org.openqa.selenium.support.ui.ExpectedConditions; import org.openqa.selenium.support.ui.WebDriverWait; import org.openqa.selenium.Keys; /** * * @author jason * * this class utilizes Selenium to grabs job results from GlassDoor by * navigating through the webpages and grabbing job data by appending to * database and Excel. */ public class GlassdoorScraper implements Scraper { String glassDoorUrl = "https://www.glassdoor.com/index.htm"; List<WebElement> glassDoorLinksList; List<WebElement> glassDoorDatesList; WebDriver driver = null; WebDriverWait wait = null; String title, link, date; Database database = new Database(); String query, location; /** * * @param query - the desired search result * @param location - the city where the user is looking the city for. * * Use SELENIUM to comb through Glassdoor's website */ public GlassdoorScraper(String query, String location) { this.query = query; this.location = location; } public String getUrl() { return glassDoorUrl; } public void fetchJobs(String url) { try { setProperty("webdriver.chrome.driver", "C:\\Users\\jason\\Desktop\\Selenium Jars\\chromedriver.exe"); //setProperty("webdriver.chrome.driver", "C:\\Users\\jleung\\Desktop\\chromedriver.exe"); driver = new ChromeDriver(); driver.manage().window().maximize(); driver.get(url); //waits at least 10 seconds for each element before timesout. wait = new WebDriverWait(driver, 60); //query search WebElement querySearch = wait .until(ExpectedConditions.visibilityOfElementLocated(By.xpath("//input[@id='KeywordSearch']"))); querySearch.sendKeys(query); //location search WebElement locationSearch = wait.until( ExpectedConditions.visibilityOfElementLocated(By.xpath("//input[@id='LocationSearch']"))); locationSearch.clear(); locationSearch.sendKeys(location); locationSearch.sendKeys(Keys.ENTER); //submit button WebElement submitBtn = wait .until(ExpectedConditions.visibilityOfElementLocated(By.xpath("//button[@type='submit']"))); submitBtn.click(); //calls upon helper method to grab links and place into list grabGlassDoorLinks(); //iterates to the next tabs of glassdoor jobs to grab their urls as well. iterateGlassDoor(); } catch (NoSuchElementException ex) { return; } } //grabs all glassdoor urls, and adds it to the linkedList. private void grabGlassDoorLinks() { wait.until(ExpectedConditions.visibilityOfElementLocated(By.xpath("//li[@class='jobListing']//div//a"))); glassDoorLinksList = driver.findElements(xpath("//li[@class='jobListing']//div//span//a")); System.out.println("Size of links list is " + glassDoorLinksList.size()); glassDoorDatesList = driver.findElements(By.xpath("//span[@class='minor nowrap']")); System.out.println("Size of Dates List is " + glassDoorDatesList.size()); for (int i = 0; i < glassDoorLinksList.size(); i++) { title = glassDoorLinksList.get(i).getText(); link = glassDoorLinksList.get(i).getAttribute("href"); date = glassDoorDatesList.get(i).getText(); database.addToDataBase(title, link, date, "glassDoorJobs"); } } //helper method to go through the tabs starting with the second tab private void iterateGlassDoor() { for (int i = 2; i < 50; i++) { try { ((JavascriptExecutor) driver).executeScript("scroll(0,2300)"); driver.findElement(xpath("//a[text()=" + i + "]")).click(); wait.until(ExpectedConditions .visibilityOfElementLocated(By.xpath("//li[@class='jobListing']//div//a"))); grabGlassDoorLinks(); } //handles the email popup. catch (WebDriverException ex) { try { driver.findElement(xpath("//button[@title='Close (Esc)']")).click(); wait.until(ExpectedConditions .visibilityOfElementLocated(By.xpath("//li[@class='jobListing']//div//a"))); grabGlassDoorLinks(); } catch (ElementNotVisibleException ex2) { return; } } } } }