net.parser.JobParser.java Source code

Java tutorial

Introduction

Here is the source code for net.parser.JobParser.java

Source

package net.parser;

import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import net.domain.Category;
import net.domain.County;
import net.domain.Employer;
import net.domain.Job;
import net.domain.JobType;
import net.domain.Qualification;
import net.domain.Skill;
import net.enums.CategoryEnum;
import net.enums.CountyEnum;
import net.enums.JobTypeEnum;
import net.enums.QualificationEnum;
import net.enums.SkillEnum;

import org.apache.log4j.Logger;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.stereotype.Service;

@Service
public class JobParser {

    private Logger log = Logger.getLogger(JobParser.class);

    private String html;
    private Document doc;

    public String getTitle() {

        return doc.select("#page-title h1 span").text();
    }

    public Set<County> getCounties() {

        String jobIds = doc.select("#job-position .details dd").eq(1).html();

        County county = null;
        if (jobIds == null || jobIds.equals("")) {
            return null;
        } else {
            Set<County> counties = new HashSet<>();
            String[] a = jobIds.split("<br />");
            for (int i = 0; i < a.length; i++) {
                county = new County();
                county.setId(CountyEnum.getId(a[i].trim()));
                counties.add(county);
            }
            return counties;
        }
    }

    public Set<Category> getCategories() {

        Elements elements = doc.select("#job-position .details dd a");

        Category category = null;
        if (elements.size() == 0) {
            return null;
        } else {
            Set<Category> categories = new HashSet<>();
            for (Element element : elements) {
                category = new Category();
                category.setId(CategoryEnum.getId(element.text()));
                categories.add(category);
            }
            return categories;
        }
    }

    public Set<JobType> getJobTypes() {

        String jobTypesStr = getPropertyString("Vrsta zaposlenja:", "#job-benefits dt");

        JobType jobType = null;
        if (jobTypesStr == null || jobTypesStr.equals("")) {
            return null;
        } else {
            Set<JobType> jobTypes = new HashSet<>();
            String[] jt = jobTypesStr.split(",");
            for (int i = 0; i < jt.length; i++) {
                jobType = new JobType();
                jobType.setId(JobTypeEnum.getId(jt[i].trim()));
                jobTypes.add(jobType);
            }
            return jobTypes;
        }
    }

    public Set<Qualification> getQualification() {

        String qual = getPropertyString("Stru?na sprema:", "#job-expect dt");

        Qualification qualification = null;
        if (qual == null || qual.equals("")) {
            return null;
        } else {
            Set<Qualification> qualifications = new HashSet<>();
            String[] qualArray = qual.split(",");
            for (int i = 0; i < qualArray.length; i++) {
                qualification = new Qualification();
                qualification.setId(QualificationEnum.getId(qualArray[i].trim()));
                qualifications.add(qualification);
            }
            return qualifications;
        }
    }

    public Date getApplicationDeadline() {

        String appDeadline = doc.select("#page-title .font14 strong").text();
        Date date = null;
        try {
            date = new SimpleDateFormat("dd.MM.yyyy.").parse(appDeadline);
        } catch (ParseException e) {
            log.error("ParseException. Method: getApplicationDeadline: " + e.toString());
            e.printStackTrace();
        }

        return date;
    }

    public boolean checkCloseBeforeDeadline() {

        return true;
    }

    public String getDescription() {

        String desc = doc.select("#job-description p").html();
        desc = desc.replace("<br />", "\n");

        return desc;
    }

    public String getConditions() {

        String cond = doc.select("#job-expect p").html();
        cond = cond.replace("<br />", "\n");

        return cond;
    }

    public Set<Skill> getTechSkills() {

        String cond = getConditions();
        Set<Skill> techSkills = new HashSet<>();

        if (cond == null) {
            return null;
        } else {
            cond = cond.toLowerCase();
        }

        Skill skill = null;
        String skillName = "";
        for (SkillEnum se : SkillEnum.values()) {
            skillName = se.getName().toLowerCase();
            if (cond.contains(skillName)) {
                if (skillName.equals("java") || skillName.equals("ember")) {
                    Pattern pattern = Pattern.compile("([^a-zA-Z]|[ ]|^)" + skillName + "([^a-zA-Z]|[ ]|$|\\r\\n)");
                    Matcher matcher = pattern.matcher(cond);

                    if (matcher.find()) {
                        skill = new Skill();
                        skill.setId(se.getId());
                        techSkills.add(skill);
                    }
                } else {
                    skill = new Skill();
                    skill.setId(se.getId());
                    techSkills.add(skill);
                }
            }
        }
        if (techSkills.size() == 0) {
            return null;
        }

        return techSkills;
    }

    public String getYearsOfExperience() {

        String years = getPropertyString("Potrebne godine iskustva:", "#job-expect dt");
        return years;
    }

    public String getLanguages() {

        String languages = getPropertyString("Jezici:", "#job-expect dt");
        return languages;
    }

    public String getDrivingLicense() {

        String drivingLicence = getPropertyString("Voza?ka dozvola:", "#job-expect dt");
        return drivingLicence;
    }

    public String getSkills() {

        String skills = getPropertyString("Vjetine:", "#job-expect dt");
        return skills;
    }

    private String getPropertyString(String helpStr, String cssQuery) {

        Elements elements = doc.select(cssQuery);
        String propertyString = null;

        for (Element element : elements) {
            if (element.getElementsByTag("strong").text().equals(helpStr)) {
                propertyString = element.nextElementSibling().text();
                break;
            }
        }

        return propertyString;
    }

    public String getEmployeeOffer() {

        String offer = doc.select("#job-benefits p").html();

        if (offer.equals("") || offer == null) {
            return null;
        }

        offer = offer.replace("<br />", "\n");
        return offer;
    }

    // gets job id and job link, employer id 
    public List<Job> getJobIdAndLink(List<Job> jobs) {
        Job job = null;
        Employer employer = null;
        Elements elements = doc.select(".searchlist").eq(0).select(".job .details");

        for (Element jobElement : elements) {

            Elements aElements = jobElement.select("a");
            job = new Job();
            employer = new Employer();

            Element aJob = aElements.get(0);
            String linkJob = aJob.attr("href");
            linkJob = linkJob.replaceFirst("www", "m");
            job.setLink(linkJob);

            linkJob = linkJob.substring(linkJob.indexOf("/") + 1, linkJob.length());
            linkJob = linkJob.substring(linkJob.indexOf("/") + 1, linkJob.length());
            linkJob = linkJob.substring(linkJob.indexOf("/") + 1, linkJob.length());
            linkJob = linkJob.substring(linkJob.indexOf("/") + 1, linkJob.length());
            String id = linkJob.substring(0, linkJob.indexOf("/"));

            job.setId(Integer.parseInt(id));

            Element aEmpoyer = null;
            try {
                aEmpoyer = aElements.get(1);
                String linkEmployer = aEmpoyer.attr("href");
                employer.setLink(linkEmployer);

                linkEmployer = linkEmployer.substring(linkEmployer.indexOf("/") + 1, linkEmployer.length());
                linkEmployer = linkEmployer.substring(linkEmployer.indexOf("/") + 1, linkEmployer.length());
                linkEmployer = linkEmployer.substring(linkEmployer.indexOf("/") + 1, linkEmployer.length());
                linkEmployer = linkEmployer.substring(linkEmployer.indexOf("/") + 1, linkEmployer.length());
                id = linkEmployer.substring(0, linkEmployer.indexOf("/"));

                employer.setId(Integer.parseInt(id));
            } catch (IndexOutOfBoundsException e) {
                employer.setId(0);
            }

            employer.setName(getEmployerName(jobElement, employer.getId()));

            job.setEmployer(employer);
            jobs.add(job);
        }

        return jobs;
    }

    public List<Job> getFeaturedJobIdAndLink(List<Job> jobs) {

        Job job = null;
        Employer employer = null;
        Elements elements = doc.select("#featured-jobs li");

        for (Element jobElement : elements) {

            employer = new Employer();
            Elements empElements = jobElement.select(".logo");

            if (empElements.size() == 1) {
                employer.setName(empElements.get(0).attr("title"));
                employer.setId(0);
            } else {
                String linkEmployer = empElements.get(0).attr("href");
                employer.setLink(linkEmployer);

                linkEmployer = linkEmployer.substring(linkEmployer.indexOf("/") + 1, linkEmployer.length());
                linkEmployer = linkEmployer.substring(linkEmployer.indexOf("/") + 1, linkEmployer.length());
                linkEmployer = linkEmployer.substring(linkEmployer.indexOf("/") + 1, linkEmployer.length());
                linkEmployer = linkEmployer.substring(linkEmployer.indexOf("/") + 1, linkEmployer.length());
                String id = linkEmployer.substring(0, linkEmployer.indexOf("/"));

                employer.setId(Integer.parseInt(id));
                employer.setName(empElements.get(1).attr("title"));
            }

            Elements aElements = jobElement.select("span a");
            for (Element aJob : aElements) {
                job = new Job();
                String linkJob = aJob.attr("href");
                linkJob = linkJob.replaceFirst("www", "m");
                job.setLink(linkJob);

                String id = aJob.attr("data-id");
                job.setId(Integer.parseInt(id));

                job.setEmployer(employer);
                jobs.add(job);
            }
        }
        return jobs;
    }

    public boolean featuredJobsExist() {

        Elements elements = doc.select("#featured-jobs");

        try {
            elements.get(0);
            return true;
        } catch (IndexOutOfBoundsException e) {
            return false;
        }
    }

    public boolean regionalJobFirst() {

        Elements elements = doc.select(".searchlist").eq(0);
        for (Element element : elements) {
            if (element.select("h2 span").text().contains("Regionalni poslovi")) {
                return true;
            } else {
                return false;
            }
        }
        return false;
    }

    private String getEmployerName(Element jobElement, int id) {

        String name = null;
        if (id == 0) {
            name = jobElement.select("strong").eq(0).text();
        } else {
            name = jobElement.select(".employer").text();
        }

        return name;
    }

    public String getEmployerAddress() {

        Elements elements = doc.select("#employer-profile .details li");
        String address = null;

        for (Element element : elements) {
            if (element.text().contains("Adresa:")) {
                address = element.text().replace("Adresa: ", "");
            }
        }

        return address;
    }

    public boolean checkIfLastPage() {

        Elements elements1 = doc.select(".searchlist").eq(1);

        if (elements1.size() == 0) {
            Elements elements2 = doc.select(".searchlist").eq(0);
            for (Element element : elements2) {
                if (element.select("h2 span").text().contains("Regionalni poslovi")) {
                    return true;
                } else {
                    return false;
                }
            }
            return true;
        } else {
            return true;
        }
    }

    public String getHtml() {
        return html;
    }

    public void setHtml(String html) {
        this.html = html;
        doc = Jsoup.parse(this.html);
    }
}