Logic.GlassdoorScraper.java Source code

Java tutorial

Introduction

Here is the source code for Logic.GlassdoorScraper.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package Logic;

import Main.Database;
import static java.lang.System.setProperty;
import java.util.List;
import org.openqa.selenium.By;
import static org.openqa.selenium.By.xpath;
import org.openqa.selenium.ElementNotVisibleException;
import org.openqa.selenium.JavascriptExecutor;
import org.openqa.selenium.NoSuchElementException;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebDriverException;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.support.ui.ExpectedConditions;
import org.openqa.selenium.support.ui.WebDriverWait;
import org.openqa.selenium.Keys;

/**
 *
 * @author jason
 *
 * this class utilizes Selenium to grabs job results from GlassDoor by
 * navigating through the webpages and grabbing job data by appending to
 * database and Excel.
 */
public class GlassdoorScraper implements Scraper {

    String glassDoorUrl = "https://www.glassdoor.com/index.htm";
    List<WebElement> glassDoorLinksList;
    List<WebElement> glassDoorDatesList;
    WebDriver driver = null;
    WebDriverWait wait = null;
    String title, link, date;
    Database database = new Database();
    String query, location;

    /**
     *
     * @param query - the desired search result
     * @param location - the city where the user is looking the city for.
     *
     * Use SELENIUM to comb through Glassdoor's website
     */

    public GlassdoorScraper(String query, String location) {
        this.query = query;
        this.location = location;
    }

    public String getUrl() {
        return glassDoorUrl;
    }

    public void fetchJobs(String url) {
        try {
            setProperty("webdriver.chrome.driver", "C:\\Users\\jason\\Desktop\\Selenium Jars\\chromedriver.exe");
            //setProperty("webdriver.chrome.driver", "C:\\Users\\jleung\\Desktop\\chromedriver.exe");
            driver = new ChromeDriver();
            driver.manage().window().maximize();
            driver.get(url);

            //waits at least 10 seconds for each element before timesout.
            wait = new WebDriverWait(driver, 60);
            //query search
            WebElement querySearch = wait
                    .until(ExpectedConditions.visibilityOfElementLocated(By.xpath("//input[@id='KeywordSearch']")));
            querySearch.sendKeys(query);
            //location search
            WebElement locationSearch = wait.until(
                    ExpectedConditions.visibilityOfElementLocated(By.xpath("//input[@id='LocationSearch']")));
            locationSearch.clear();
            locationSearch.sendKeys(location);
            locationSearch.sendKeys(Keys.ENTER);
            //submit button
            WebElement submitBtn = wait
                    .until(ExpectedConditions.visibilityOfElementLocated(By.xpath("//button[@type='submit']")));
            submitBtn.click();
            //calls upon helper method to grab links and place into list
            grabGlassDoorLinks();
            //iterates to the next tabs of glassdoor jobs to grab their urls as well.
            iterateGlassDoor();
        } catch (NoSuchElementException ex) {
            return;
        }
    }

    //grabs all glassdoor urls, and adds it to the linkedList.
    private void grabGlassDoorLinks() {
        wait.until(ExpectedConditions.visibilityOfElementLocated(By.xpath("//li[@class='jobListing']//div//a")));
        glassDoorLinksList = driver.findElements(xpath("//li[@class='jobListing']//div//span//a"));
        System.out.println("Size of links list is " + glassDoorLinksList.size());
        glassDoorDatesList = driver.findElements(By.xpath("//span[@class='minor nowrap']"));
        System.out.println("Size of Dates List is " + glassDoorDatesList.size());
        for (int i = 0; i < glassDoorLinksList.size(); i++) {
            title = glassDoorLinksList.get(i).getText();
            link = glassDoorLinksList.get(i).getAttribute("href");
            date = glassDoorDatesList.get(i).getText();
            database.addToDataBase(title, link, date, "glassDoorJobs");
        }
    }

    //helper method to go through the tabs starting with the second tab 
    private void iterateGlassDoor() {
        for (int i = 2; i < 50; i++) {
            try {
                ((JavascriptExecutor) driver).executeScript("scroll(0,2300)");
                driver.findElement(xpath("//a[text()=" + i + "]")).click();
                wait.until(ExpectedConditions
                        .visibilityOfElementLocated(By.xpath("//li[@class='jobListing']//div//a")));
                grabGlassDoorLinks();
            }
            //handles the email popup.
            catch (WebDriverException ex) {
                try {
                    driver.findElement(xpath("//button[@title='Close (Esc)']")).click();
                    wait.until(ExpectedConditions
                            .visibilityOfElementLocated(By.xpath("//li[@class='jobListing']//div//a")));
                    grabGlassDoorLinks();
                } catch (ElementNotVisibleException ex2) {
                    return;
                }
            }
        }
    }
}