Example usage for org.jsoup.nodes Element select

List of usage examples for org.jsoup.nodes Element select

Introduction

In this page you can find the example usage for org.jsoup.nodes Element select.

Prototype

public Elements select(String cssQuery) 

Source Link

Document

Find elements that match the Selector CSS query, with this element as the starting context.

Usage

From source file:me.vertretungsplan.parser.UntisCommonParser.java

/**
 * Parses an Untis substitution schedule table
 *
 * @param table        the <code>table</code> Element from the HTML document
 * @param data         {@link SubstitutionScheduleData#getData()}
 * @param day          the {@link SubstitutionScheduleDay} where the substitutions will be stored
 * @param defaultClass the class that should be set if there is no class column in the table
 *///from   www. ja va 2 s.  com
private void parseSubstitutionScheduleTable(Element table, JSONObject data, SubstitutionScheduleDay day,
        String defaultClass) throws JSONException, CredentialInvalidException {
    if (data.optBoolean(PARAM_CLASS_IN_EXTRA_LINE) || data.optBoolean("class_in_extra_line")) { // backwards compatibility
        for (Element element : table.select("td.inline_header")) {
            String className = getClassName(element.text(), data);
            if (isValidClass(className)) {
                Element zeile = null;
                try {
                    zeile = element.parent().nextElementSibling();
                    if (zeile.select("td") == null) {
                        zeile = zeile.nextElementSibling();
                    }
                    int skipLines = 0;
                    while (zeile != null && !zeile.select("td").attr("class").equals("list inline_header")) {
                        if (skipLines > 0) {
                            skipLines--;
                            zeile = zeile.nextElementSibling();
                            continue;
                        }

                        Substitution v = new Substitution();

                        int i = 0;
                        for (Element spalte : zeile.select("td")) {
                            String text = spalte.text();
                            if (isEmpty(text)) {
                                i++;
                                continue;
                            }

                            int skipLinesForThisColumn = 0;
                            Element nextLine = zeile.nextElementSibling();
                            boolean continueSkippingLines = true;
                            while (continueSkippingLines) {
                                if (nextLine != null && nextLine.children().size() == zeile.children().size()) {
                                    Element columnInNextLine = nextLine.child(spalte.elementSiblingIndex());
                                    if (columnInNextLine.text().replaceAll("\u00A0", "").trim()
                                            .equals(nextLine.text().replaceAll("\u00A0", "").trim())) {
                                        // Continued in the next line
                                        text += " " + columnInNextLine.text();
                                        skipLinesForThisColumn++;
                                        nextLine = nextLine.nextElementSibling();
                                    } else {
                                        continueSkippingLines = false;
                                    }
                                } else {
                                    continueSkippingLines = false;
                                }
                            }
                            if (skipLinesForThisColumn > skipLines)
                                skipLines = skipLinesForThisColumn;

                            String type = data.getJSONArray(PARAM_COLUMNS).getString(i);

                            switch (type) {
                            case "lesson":
                                v.setLesson(text);
                                break;
                            case "subject":
                                handleSubject(v, spalte);
                                break;
                            case "previousSubject":
                                v.setPreviousSubject(text);
                                break;
                            case "type":
                                v.setType(text);
                                v.setColor(colorProvider.getColor(text));
                                break;
                            case "type-entfall":
                                if (text.equals("x")) {
                                    v.setType("Entfall");
                                    v.setColor(colorProvider.getColor("Entfall"));
                                } else {
                                    v.setType("Vertretung");
                                    v.setColor(colorProvider.getColor("Vertretung"));
                                }
                                break;
                            case "room":
                                handleRoom(v, spalte);
                                break;
                            case "teacher":
                                handleTeacher(v, spalte, data);
                                break;
                            case "previousTeacher":
                                v.setPreviousTeachers(splitTeachers(text, data));
                                break;
                            case "desc":
                                v.setDesc(text);
                                break;
                            case "desc-type":
                                v.setDesc(text);
                                String recognizedType = recognizeType(text);
                                v.setType(recognizedType);
                                v.setColor(colorProvider.getColor(recognizedType));
                                break;
                            case "previousRoom":
                                v.setPreviousRoom(text);
                                break;
                            case "substitutionFrom":
                                v.setSubstitutionFrom(text);
                                break;
                            case "teacherTo":
                                v.setTeacherTo(text);
                                break;
                            case "ignore":
                                break;
                            case "date": // used by UntisSubstitutionParser
                                break;
                            default:
                                throw new IllegalArgumentException("Unknown column type: " + type);
                            }
                            i++;
                        }

                        autoDetectType(data, zeile, v);

                        v.getClasses().add(className);

                        if (v.getLesson() != null && !v.getLesson().equals("")) {
                            day.addSubstitution(v);
                        }

                        zeile = zeile.nextElementSibling();

                    }
                } catch (Throwable e) {

                    e.printStackTrace();
                }
            }
        }
    } else {
        boolean hasType = false;
        for (int i = 0; i < data.getJSONArray(PARAM_COLUMNS).length(); i++) {
            if (data.getJSONArray(PARAM_COLUMNS).getString(i).equals("type")) {
                hasType = true;
            }
        }
        int skipLines = 0;
        for (Element zeile : table.select("tr.list.odd:not(:has(td.inline_header)), "
                + "tr.list.even:not(:has(td.inline_header)), " + "tr:has(td[align=center]):gt(0)")) {
            if (skipLines > 0) {
                skipLines--;
                continue;
            }

            Substitution v = new Substitution();
            String klassen = defaultClass != null ? defaultClass : "";
            int i = 0;
            for (Element spalte : zeile.select("td")) {
                String text = spalte.text();

                String type = data.getJSONArray(PARAM_COLUMNS).getString(i);
                if (isEmpty(text) && !type.equals("type-entfall")) {
                    i++;
                    continue;
                }

                int skipLinesForThisColumn = 0;
                Element nextLine = zeile.nextElementSibling();
                boolean continueSkippingLines = true;
                while (continueSkippingLines) {
                    if (nextLine != null && nextLine.children().size() == zeile.children().size()) {
                        Element columnInNextLine = nextLine.child(spalte.elementSiblingIndex());
                        if (columnInNextLine.text().replaceAll("\u00A0", "").trim()
                                .equals(nextLine.text().replaceAll("\u00A0", "").trim())) {
                            // Continued in the next line
                            text += " " + columnInNextLine.text();
                            skipLinesForThisColumn++;
                            nextLine = nextLine.nextElementSibling();
                        } else {
                            continueSkippingLines = false;
                        }
                    } else {
                        continueSkippingLines = false;
                    }
                }
                if (skipLinesForThisColumn > skipLines)
                    skipLines = skipLinesForThisColumn;

                switch (type) {
                case "lesson":
                    v.setLesson(text);
                    break;
                case "subject":
                    handleSubject(v, spalte);
                    break;
                case "previousSubject":
                    v.setPreviousSubject(text);
                    break;
                case "type":
                    v.setType(text);
                    v.setColor(colorProvider.getColor(text));
                    break;
                case "type-entfall":
                    if (text.equals("x")) {
                        v.setType("Entfall");
                        v.setColor(colorProvider.getColor("Entfall"));
                    } else if (!hasType) {
                        v.setType("Vertretung");
                        v.setColor(colorProvider.getColor("Vertretung"));
                    }
                    break;
                case "room":
                    handleRoom(v, spalte);
                    break;
                case "previousRoom":
                    v.setPreviousRoom(text);
                    break;
                case "desc":
                    v.setDesc(text);
                    break;
                case "desc-type":
                    v.setDesc(text);
                    String recognizedType = recognizeType(text);
                    v.setType(recognizedType);
                    v.setColor(colorProvider.getColor(recognizedType));
                    break;
                case "teacher":
                    handleTeacher(v, spalte, data);
                    break;
                case "previousTeacher":
                    v.setPreviousTeachers(splitTeachers(text, data));
                    break;
                case "substitutionFrom":
                    v.setSubstitutionFrom(text);
                    break;
                case "teacherTo":
                    v.setTeacherTo(text);
                    break;
                case "class":
                    klassen = getClassName(text, data);
                    break;
                case "ignore":
                    break;
                case "date": // used by UntisSubstitutionParser
                    break;
                default:
                    throw new IllegalArgumentException("Unknown column type: " + type);
                }
                i++;
            }

            if (v.getLesson() == null || v.getLesson().equals("")) {
                continue;
            }

            autoDetectType(data, zeile, v);

            List<String> affectedClasses;

            // Detect things like "7"
            Pattern singlePattern = Pattern.compile("(\\d+)");
            Matcher singleMatcher = singlePattern.matcher(klassen);

            // Detect things like "5-12"
            Pattern rangePattern = Pattern.compile("(\\d+) ?- ?(\\d+)");
            Matcher rangeMatcher = rangePattern.matcher(klassen);

            Pattern pattern2 = Pattern.compile("^(\\d+).*");

            if (rangeMatcher.matches()) {
                affectedClasses = new ArrayList<>();
                int min = Integer.parseInt(rangeMatcher.group(1));
                int max = Integer.parseInt(rangeMatcher.group(2));
                try {
                    for (String klasse : getAllClasses()) {
                        Matcher matcher2 = pattern2.matcher(klasse);
                        if (matcher2.matches()) {
                            int num = Integer.parseInt(matcher2.group(1));
                            if (min <= num && num <= max)
                                affectedClasses.add(klasse);
                        }
                    }
                } catch (IOException e) {
                    e.printStackTrace();
                }
            } else if (singleMatcher.matches()) {
                affectedClasses = new ArrayList<>();
                int grade = Integer.parseInt(singleMatcher.group(1));
                try {
                    for (String klasse : getAllClasses()) {
                        Matcher matcher2 = pattern2.matcher(klasse);
                        if (matcher2.matches() && grade == Integer.parseInt(matcher2.group(1))) {
                            affectedClasses.add(klasse);
                        }
                    }
                } catch (IOException e) {
                    e.printStackTrace();
                }
            } else {
                if (data.optBoolean(PARAM_CLASSES_SEPARATED, true)
                        && data.optBoolean("classes_separated", true)) { // backwards compatibility
                    affectedClasses = Arrays.asList(klassen.split(", "));
                } else {
                    affectedClasses = new ArrayList<>();
                    try {
                        for (String klasse : getAllClasses()) { // TODO: is there a better way?
                            StringBuilder regex = new StringBuilder();
                            for (char character : klasse.toCharArray()) {
                                if (character == '?') {
                                    regex.append("\\?");
                                } else {
                                    regex.append(character);
                                }
                                regex.append(".*");
                            }
                            if (klassen.matches(regex.toString())) {
                                affectedClasses.add(klasse);
                            }
                        }
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                }
            }

            for (String klasse : affectedClasses) {
                if (isValidClass(klasse)) {
                    v.getClasses().add(klasse);
                }
            }

            if (data.optBoolean(PARAM_MERGE_WITH_DIFFERENT_TYPE, false)) {
                boolean found = false;
                for (Substitution subst : day.getSubstitutions()) {
                    if (subst.equalsExcludingType(v)) {
                        found = true;

                        if (v.getType().equals("Vertretung")) {
                            subst.setType("Vertretung");
                            subst.setColor(colorProvider.getColor("Vertretung"));
                        }

                        break;
                    }
                }
                if (!found) {
                    day.addSubstitution(v);
                }
            } else {
                day.addSubstitution(v);
            }
        }
    }
}

From source file:de.geeksfactory.opacclient.apis.BiBer1992.java

private SearchRequestResult parse_search(String html, int page) {
    List<SearchResult> results = new ArrayList<>();
    Document doc = Jsoup.parse(html);

    if (doc.select("h3").text().contains("Es wurde nichts gefunden")) {
        return new SearchRequestResult(results, 0, page);
    }//from  w w w  . j av a 2s.c o m

    Elements trList = doc.select("form table tr[valign]"); // <tr
    // valign="top">
    Elements elem;
    int rows_per_hit = 2;
    if (trList.size() == 1 || (trList.size() > 1 && trList.get(0).select("input[type=checkbox]").size() > 0
            && trList.get(1).select("input[type=checkbox]").size() > 0)) {
        rows_per_hit = 1;
    }

    try {
        rows_per_hit = data.getInt("rows_per_hit");
    } catch (JSONException e) {
    }

    // Overall search results
    // are very differently layouted, but have always the text:
    // "....Treffer Gesamt (nnn)"
    int results_total;
    Pattern pattern = Pattern.compile("Treffer Gesamt \\(([0-9]+)\\)");
    Matcher matcher = pattern.matcher(html);
    if (matcher.find()) {
        results_total = Integer.parseInt(matcher.group(1));
    } else {
        results_total = -1;
    }

    // limit to 20 entries
    int numOfEntries = trList.size() / rows_per_hit; // two rows per entry
    if (numOfEntries > numOfResultsPerPage) {
        numOfEntries = numOfResultsPerPage;
    }

    for (int i = 0; i < numOfEntries; i++) {
        Element tr = trList.get(i * rows_per_hit);
        SearchResult sr = new SearchResult();

        // ID as href tag
        elem = tr.select("td a");
        if (elem.size() > 0) {
            String hrefID = elem.get(0).attr("href");
            sr.setId(hrefID);
        } else {
            // no ID as href found, look for the ID in the input form
            elem = tr.select("td input");
            if (elem.size() > 0) {
                String nameID = elem.get(0).attr("name").trim();
                String hrefID = "/" + opacDir + "/ftitle" + opacSuffix + "?LANG=de&FUNC=full&" + nameID
                        + "=YES";
                sr.setId(hrefID);
            }
        }

        // media type
        elem = tr.select("td img");
        if (elem.size() > 0) {
            setMediaTypeFromImageFilename(sr, elem.get(0).attr("src"));
        }

        // description
        String desc = "";
        try {
            // array "searchtable" list the column numbers of the
            // description
            JSONArray searchtable = data.getJSONArray("searchtable");
            for (int j = 0; j < searchtable.length(); j++) {
                int colNum = searchtable.getInt(j);
                if (j > 0) {
                    desc = desc + "<br />";
                }
                String c = tr.child(colNum).html();
                if (tr.child(colNum).childNodes().size() == 1
                        && tr.child(colNum).select("a[href*=ftitle.]").size() > 0) {
                    c = tr.select("a[href*=ftitle.]").text();
                }
                desc = desc + c;
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        // remove links "<a ...>...</a>
        // needed for Friedrichshafen: "Warenkorb", "Vormerkung"
        // Herford: "Medienkorb"
        desc = desc.replaceAll("<a .*?</a>", "");
        sr.setInnerhtml(desc);

        if (tr.select("font.p04x09b").size() > 0 && tr.select("font.p02x09b").size() == 0) {
            sr.setStatus(Status.GREEN);
        } else if (tr.select("font.p04x09b").size() == 0 && tr.select("font.p02x09b").size() > 0) {
            sr.setStatus(Status.RED);
        } else if (tr.select("font.p04x09b").size() > 0 && tr.select("font.p02x09b").size() > 0) {
            sr.setStatus(Status.YELLOW);
        }

        // number
        sr.setNr(i / rows_per_hit);
        results.add(sr);
    }

    // m_resultcount = results.size();
    return new SearchRequestResult(results, results_total, page);
}

From source file:at.ac.tuwien.dsg.quelle.cloudDescriptionParsers.impl.AmazonCloudJSONDescriptionParser.java

public CloudProvider getCloudProviderDescription() {

    //used to fast add cost properties
    //key is ServiceUnit name = its ID, such as m1.large
    Map<String, CloudOfferedService> units = new HashMap<>();

    //key is ServiceUnit name = its ID, such as m1.large
    Map<String, List<ElasticityCapability.Dependency>> costDependencies = new HashMap<>();

    CloudProvider cloudProvider = new CloudProvider("Amazon EC2", CloudProvider.Type.IAAS);

    cloudProvider.withUuid(UUID.randomUUID());

    //other misc Amazon Services 
    {//  www.ja  v  a 2s .  c om
        //create EBS instance
        CloudOfferedService ebsStorageUtility = new CloudOfferedService("IaaS", "Storage", "EBS");
        ebsStorageUtility.withUuid(UUID.randomUUID());

        cloudProvider.addCloudOfferedService(ebsStorageUtility);
        {
            List<ElasticityCapability.Dependency> qualityCapabilityTargets = new ArrayList<>();

            // utility quality
            Quality stdQuality = new Quality("Standard I/O Performance");
            Metric storageIOPS = new Metric("Storage", "IOPS");
            storageIOPS.setType(Metric.MetricType.QUALITY);
            stdQuality.addProperty(storageIOPS, new MetricValue("100"));
            qualityCapabilityTargets.add(new ElasticityCapability.Dependency(stdQuality,
                    ElasticityCapability.Type.OPTIONAL_ASSOCIATION).withVolatility(new Volatility(1, 1)));
            // utility.addQualityProperty(stdQuality);

            // utility quality
            Quality highQuality = new Quality("High I/O Performance");
            highQuality.addProperty(new Metric("Storage", "IOPS"), new MetricValue("4000"));
            qualityCapabilityTargets.add(new ElasticityCapability.Dependency(highQuality,
                    ElasticityCapability.Type.OPTIONAL_ASSOCIATION).withVolatility(new Volatility(1, 1)));
            // utility.addQualityProperty(highQuality);

            {
                ElasticityCapability characteristic = new ElasticityCapability("Quality");

                characteristic.setPhase(ElasticityCapability.Phase.INSTANTIATION_TIME);

                for (ElasticityCapability.Dependency d : qualityCapabilityTargets) {

                    characteristic.addCapabilityDependency(d);
                }

                ebsStorageUtility.addElasticityCapability(characteristic);
            }

            List<ElasticityCapability.Dependency> costCapabilityTargets = new ArrayList<>();

            CostFunction costFunctionForStdPerformance = new CostFunction("StandardIOPerformanceCost");
            costCapabilityTargets.add(new ElasticityCapability.Dependency(costFunctionForStdPerformance,
                    ElasticityCapability.Type.OPTIONAL_ASSOCIATION).withVolatility(new Volatility(1, 1)));
            {
                // currently Cost is cost unit agnostic?
                CostElement costPerGB = new CostElement("StorageCost",
                        new Metric("diskSize", "GB", Metric.MetricType.COST), CostElement.Type.USAGE)
                                .withBillingCycle(CostElement.BillingCycle.HOUR);
                //convert from month to hour
                costPerGB.addBillingInterval(new MetricValue(1), 0.1 / 30 / 24);
                costFunctionForStdPerformance.addCostElement(costPerGB);
            }

            {
                CostElement costPerIO = new CostElement("I/OCost",
                        new Metric("diskIOCount", "#", Metric.MetricType.COST), CostElement.Type.USAGE);
                costPerIO.addBillingInterval(new MetricValue(1), 0.1);
                costFunctionForStdPerformance.addCostElement(costPerIO);
            }
            costFunctionForStdPerformance.addAppliedIfServiceInstanceUses(stdQuality);
            // utility.addCostFunction(costFunctionForStdPerformance);

            CostFunction costFunctionForMaxPerformance = new CostFunction("HighIOPerformanceCost");
            costCapabilityTargets.add(new ElasticityCapability.Dependency(costFunctionForMaxPerformance,
                    ElasticityCapability.Type.OPTIONAL_ASSOCIATION).withVolatility(new Volatility(1, 1)));
            {
                // currently Cost is cost unit agnostic?
                CostElement costPerGB = new CostElement("StorageCost",
                        new Metric("diskSize", "GB", Metric.MetricType.COST), CostElement.Type.USAGE)
                                .withBillingCycle(CostElement.BillingCycle.HOUR);
                costPerGB.addBillingInterval(new MetricValue(1), 0.125 / 30 / 24);
                costFunctionForMaxPerformance.addCostElement(costPerGB);
            }

            {
                CostElement costPerIO = new CostElement("I/OCost",
                        new Metric("diskIOCount", "#", Metric.MetricType.COST), CostElement.Type.USAGE);
                costPerIO.addBillingInterval(new MetricValue(1), 0.1);
                costFunctionForMaxPerformance.addCostElement(costPerIO);
            }
            costFunctionForMaxPerformance.addAppliedIfServiceInstanceUses(highQuality);
            // utility.addCostFunction(costFunctionForMaxPerformance);

            {

                ElasticityCapability characteristic = new ElasticityCapability("PerformanceCost");
                characteristic.setPhase(ElasticityCapability.Phase.INSTANTIATION_TIME);
                for (ElasticityCapability.Dependency d : costCapabilityTargets) {
                    characteristic.addCapabilityDependency(d);
                }

                ebsStorageUtility.addElasticityCapability(characteristic);
            }
        }
    }

    {
        //Monitoring
        {
            CloudOfferedService utility = new CloudOfferedService("MaaS", "Monitoring", "Monitoring");
            utility.withUuid(UUID.randomUUID());
            cloudProvider.addCloudOfferedService(utility);

            List<ElasticityCapability.Dependency> qualityCapabilityTargets = new ArrayList<>();
            List<ElasticityCapability.Dependency> costCapabilityTargets = new ArrayList<>();

            //utility quality
            Quality stdQuality = new Quality("StdMonitoringFreq");
            stdQuality.addProperty(new Metric("monitoredFreq", "min"), new MetricValue(5));

            qualityCapabilityTargets.add(new ElasticityCapability.Dependency(stdQuality,
                    ElasticityCapability.Type.OPTIONAL_ASSOCIATION).withVolatility(new Volatility(0, 0)));

            //utility quality
            Quality higherQuality = new Quality("HighMonitoringFreq");
            higherQuality.addProperty(new Metric("monitoredFreq", "min"), new MetricValue(1));

            qualityCapabilityTargets.add(new ElasticityCapability.Dependency(higherQuality,
                    ElasticityCapability.Type.OPTIONAL_ASSOCIATION).withVolatility(new Volatility(0, 0)));

            //  quality elasticity
            {
                ElasticityCapability characteristic = new ElasticityCapability("MonitoringQuality");
                characteristic.setPhase(ElasticityCapability.Phase.INSTANTIATION_TIME);

                for (ElasticityCapability.Dependency d : qualityCapabilityTargets) {
                    characteristic.addCapabilityDependency(d);
                }
                utility.addElasticityCapability(characteristic);
            }

            CostFunction costFunctionForStdMonitoring = new CostFunction("StdMonitoringFreqCost");
            {
                //currently Cost is cost unit agnostic?
                CostElement monCost = new CostElement("MonitoringCost",
                        new Metric("monitoringCost", "$/hour", Metric.MetricType.COST), CostElement.Type.USAGE);
                monCost.addBillingInterval(new MetricValue(1), 0.0);
                costFunctionForStdMonitoring.addCostElement(monCost);
                costFunctionForStdMonitoring.addAppliedIfServiceInstanceUses(stdQuality);
                costCapabilityTargets.add(new ElasticityCapability.Dependency(costFunctionForStdMonitoring,
                        ElasticityCapability.Type.OPTIONAL_ASSOCIATION).withVolatility(new Volatility(0, 0)));
            }

            CostFunction costFunctionForCustomMonitoring = new CostFunction("HighMonitoringFreqCost");
            {
                CostElement monCost = new CostElement("MonitoringCost",
                        new Metric("monitoringCost", "$/month", Metric.MetricType.COST),
                        CostElement.Type.USAGE);
                monCost.addBillingInterval(new MetricValue(1), 3.5);
                costFunctionForCustomMonitoring.addCostElement(monCost);
                costFunctionForCustomMonitoring.addAppliedIfServiceInstanceUses(higherQuality);
                costCapabilityTargets.add(new ElasticityCapability.Dependency(costFunctionForCustomMonitoring,
                        ElasticityCapability.Type.OPTIONAL_ASSOCIATION).withVolatility(new Volatility(0, 0)));
            }

            //cost   elasticity
            {
                ElasticityCapability characteristic = new ElasticityCapability("MonitoringCost");
                characteristic.setPhase(ElasticityCapability.Phase.INSTANTIATION_TIME);

                for (ElasticityCapability.Dependency d : costCapabilityTargets) {
                    characteristic.addCapabilityDependency(d);
                }
                utility.addElasticityCapability(characteristic);
            }

        }
    }

    {
        //Amazon SQS 
        {
            CloudOfferedService sqs = new CloudOfferedService("PaaS", "CommunicationServices", "SimpleQueue");
            sqs.withUuid(UUID.randomUUID());
            cloudProvider.addCloudOfferedService(sqs);

            //utility quality
            Resource resource = new Resource("MessagingService");
            resource.addProperty(new Metric("message", "queue"), new MetricValue(""));

            sqs.addResourceProperty(resource);

            CostFunction messagingCost = new CostFunction("MessagingCostFct");
            {
                //currently Cost is cost unit agnostic?
                {
                    CostElement d = new CostElement("MessagingCost",
                            new Metric("messages", "#", Metric.MetricType.COST), CostElement.Type.USAGE);
                    d.addBillingInterval(new MetricValue(1), 0.5);
                    messagingCost.addCostElement(d);
                }
            }

            sqs.addCostFunction(messagingCost);
        }
    }

    //put old instance types
    try {
        Document doc = Jsoup.connect(amazonPreviousInstanceTypesURL).get();
        Elements tableElements = doc.select("div.aws-table*").get(5).getElementsByTag("table");

        Elements tableHeaderEles = tableElements.select("thead tr th");
        System.out.println("headers");
        for (int i = 0; i < tableHeaderEles.size(); i++) {
            System.out.println(tableHeaderEles.get(i).text());
        }
        System.out.println();

        Elements tableRowElements = tableElements.select(":not(thead) tr");
        Elements headers = tableRowElements.get(0).select("td");

        //at i = 0 is the HEADER of the table
        for (int i = 1; i < tableRowElements.size(); i++) {

            //for each row we create another ServiceUnit
            ServiceUnitBuilder builder = new ServiceUnitBuilder("IaaS", "VM");

            Element row = tableRowElements.get(i);
            System.out.println("row");

            Elements rowItems = row.select("td");
            for (int j = 0; j < rowItems.size(); j++) {
                //* marks notes, such as 1 *1 (note 1)
                String value = rowItems.get(j).text().split("\\*")[0];

                //do not know why, for large VMs amazon says 24 x 2,048 GB
                value = value.replaceAll(",", "");
                String propertyName = headers.get(j).text();
                if (builder.getPropertyNames().contains(propertyName)) {
                    builder.addProperty(propertyName, value);
                } else {
                    log.error("Property {} not found in property builder", propertyName);
                }
            }
            CloudOfferedService unit = builder.getUnit();
            unit.withUuid(UUID.randomUUID());
            cloudProvider.addCloudOfferedService(unit);
            units.put(unit.getName(), unit);

            System.out.println();
        }

    } catch (Exception e) {
        e.printStackTrace();
    }

    //put new instance types
    try {
        Document doc = Jsoup.connect(amazonInstanceTypesURL).get();
        Elements tableElements = doc.select("div.aws-table*").get(8).getElementsByTag("table");

        Elements tableHeaderEles = tableElements.select("thead tr th");
        System.out.println("headers");
        for (int i = 0; i < tableHeaderEles.size(); i++) {
            System.out.println(tableHeaderEles.get(i).text());
        }
        System.out.println();

        Elements tableRowElements = tableElements.select(":not(thead) tr");
        Elements headers = tableRowElements.get(0).select("td");

        //at i = 0 is the HEADER of the table
        for (int i = 1; i < tableRowElements.size(); i++) {

            //for each row we create another ServiceUnit
            ServiceUnitBuilder builder = new ServiceUnitBuilder("IaaS", "VM");

            Element row = tableRowElements.get(i);
            System.out.println("row");

            Elements rowItems = row.select("td");
            for (int j = 0; j < rowItems.size(); j++) {
                //* marks notes, such as 1 *1 (note 1)
                String value = rowItems.get(j).text().split("\\*")[0];

                //do not know why, for large VMs amazon says 24 x 2,048 GB
                value = value.replaceAll(",", "");
                String propertyName = headers.get(j).text();
                if (builder.getPropertyNames().contains(propertyName)) {
                    builder.addProperty(propertyName, value);
                } else {
                    log.error("Property {} not found in property builder", propertyName);
                }
            }
            CloudOfferedService unit = builder.getUnit();
            unit.withUuid(UUID.randomUUID());
            cloudProvider.addCloudOfferedService(unit);
            units.put(unit.getName(), unit);

            System.out.println();
        }

    } catch (Exception e) {
        log.error(e.getMessage(), e);
    }

    //spot price http://spot-price.s3.amazonaws.com/spot.js
    //on demand http://a0.awsstatic.com/pricing/1/ec2/linux-od.min.js
    //Reserved light a0.awsstatic.com/pricing/1/ec2/linux-ri-light.min.js 
    //Reserved medium a0.awsstatic.com/pricing/1/ec2/linux-ri-medium.min.js
    //Reserved heavy a0.awsstatic.com/pricing/1/ec2/linux-ri-heavy.min.js .
    //get on demand price
    //spot price http://a0.awsstatic.com/pricing/1/ec2/linux-od.min.js
    try {
        //get reserved light utilization
        addReservedCostOptions("LightUtilization", amazonInstancesReservedLightUtilizationCostURL,
                cloudProvider, units, costDependencies);
    } catch (IOException | ParseException ex) {
        Logger.getLogger(AmazonCloudJSONDescriptionParser.class.getName()).log(Level.SEVERE, null, ex);
    }

    try {
        addReservedCostOptions("MediumUtilization", amazonInstancesReservedMediumUtilizationCostURL,
                cloudProvider, units, costDependencies);
        //            addReservedCostOptions("MediumUtilization", "http://s3.amazonaws.com/aws-assets-pricing-prod/pricing/ec2/SF-Summit-2014/medium_linux.js", cloudProvider, units, costDependencies);
    } catch (IOException | ParseException ex) {
        Logger.getLogger(AmazonCloudJSONDescriptionParser.class.getName()).log(Level.SEVERE, null, ex);
    }

    try {
        addReservedCostOptions("HeavyUtilization", amazonInstancesReservedHeavyUtilizationCostURL,
                cloudProvider, units, costDependencies);
        //            addReservedCostOptions("HeavyUtilization", "http://s3.amazonaws.com/aws-assets-pricing-prod/pricing/ec2/SF-Summit-2014/heavy_linux.js", cloudProvider, units, costDependencies);
    } catch (IOException | ParseException ex) {
        Logger.getLogger(AmazonCloudJSONDescriptionParser.class.getName()).log(Level.SEVERE, null, ex);
    }

    try {
        addOndemandCostOptions(amazonInstancesOndemandCostURL, cloudProvider, units, costDependencies);
        //            addReservedCostOptions("HeavyUtilization", "http://s3.amazonaws.com/aws-assets-pricing-prod/pricing/ec2/SF-Summit-2014/heavy_linux.js", cloudProvider, units, costDependencies);
    } catch (IOException | ParseException ex) {
        Logger.getLogger(AmazonCloudJSONDescriptionParser.class.getName()).log(Level.SEVERE, null, ex);
    }

    try {
        addSpotCostOptions(amazonInstancesSpotCostURL, cloudProvider, units, costDependencies);
    } catch (IOException | ParseException ex) {
        Logger.getLogger(AmazonCloudJSONDescriptionParser.class.getName()).log(Level.SEVERE, null, ex);
    }
    //add for each unit its cost elasticity dependencies
    {
        for (String suName : costDependencies.keySet()) {

            //currently due to Neo4J accesss tyle implemented by me, i need unique names for Cost elasticity dependencies.
            ElasticityCapability characteristic = new ElasticityCapability("Cost_" + suName);
            characteristic.setPhase(ElasticityCapability.Phase.INSTANTIATION_TIME);

            for (ElasticityCapability.Dependency d : costDependencies.get(suName)) {
                characteristic.addCapabilityDependency(d);
            }
            CloudOfferedService unit = units.get(suName);
            unit.addElasticityCapability(characteristic);
        }
    }

    //remove units to see outcome
    //        cloudProvider.setServiceUnits(cloudProvider.getServiceUnits().subList(4, 6));
    return cloudProvider;

}

From source file:org.shareok.data.sagedata.SageJournalIssueDateProcessor.java

@SuppressWarnings("empty-statement")
public void retrieveSageJournalVolIssueDates(Map<String, String> processedJournalsMap) {
    List<String> processedJournals = new ArrayList<>();
    //        JSONObject jsonObj = getSavedSageJournalVolIssueDateInformation();
    try {//from  w w w .java  2 s  . c o  m
        Map<String, Map<String, String>> journalMap = getSavedSageJournalVolIssueDateInformation();
        if (null == journalMap) {
            journalMap = new HashMap<>();
        }
        Document doc = null;
        try {
            doc = Jsoup.connect("http://journals.sagepub.com/action/showPublications?pageSize=20&startPage=199")
                    .userAgent(
                            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.152 Safari/537.36")
                    .cookie("auth", "token").timeout(300000).get();
            Elements trs = doc.select("form#browsePublicationsForm").get(0).select("table").get(0)
                    .select("tbody").get(0).select("tr");
            for (Element tr : trs) {
                Element link = tr.select("td").get(1).select("a").get(0);
                String journalName = link.text();
                String journalLink = SageDataUtil.SAGE_HTTP_PREFIX + link.attr("href");
                String[] linkInfo = journalLink.split("/");
                String journalIssuesLink = SageDataUtil.SAGE_HTTP_PREFIX + "/loi/"
                        + linkInfo[linkInfo.length - 1];
                if (null == journalMap.get(journalName)) {
                    Map<String, String> infoMap = new HashMap<>();
                    infoMap.put("homeLink", journalLink);
                    infoMap.put("issueLink", journalIssuesLink);
                    journalMap.put(journalName, infoMap);
                } else {
                    Map<String, String> infoMap = journalMap.get(journalName);
                    if (null == infoMap.get("homeLink")) {
                        infoMap.put("homeLink", journalLink);
                    }
                    if (null == infoMap.get("issueLink")) {
                        infoMap.put("issueLink", journalIssuesLink);
                    }
                }
            }
            int kk = 0;
            mainLoop: for (String journal : journalMap.keySet()) {
                System.out.println("Print out journal " + journal + " information :");
                if (null != processedJournalsMap && (journal == null ? processedJournalsMap.get(journal) == null
                        : journal.equals(processedJournalsMap.get(journal)))) {
                    System.out.println("Journal : has already been processed!");
                    continue;
                }
                //                    if(journal.contains("Christian Education")){
                //                        System.out.println("Journal name : International Journal of Health Services, cannot be processed!");
                ////                        continue;
                //                    }
                //                    if(journal.contains("Plastic Surgery")){
                //                        System.out.println("Journal name : International Journal of Health Services, cannot be processed!");
                //                        continue;
                //                    }
                Map<String, String> journalInfoMap = journalMap.get(journal);
                for (String key : journalInfoMap.keySet()) {
                    if (key.equals("issueLink")) {
                        Document loiDdoc = null;
                        try {
                            loiDdoc = Jsoup.connect(journalInfoMap.get(key)).userAgent(
                                    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.152 Safari/537.36")
                                    .cookie("auth", "token").timeout(300000).get();
                        } catch (HttpStatusException ex) {
                            ex.printStackTrace();
                            break;
                        }
                        Thread.sleep(2200);
                        if (null != loiDdoc) {
                            Map<String, Map<String, String>> dataMap;
                            if (null != journalMap.get(journal).get("data")) {
                                dataMap = DataUtil.getMapFromJson(journalMap.get(journal).get("data"));
                            } else {
                                dataMap = new HashMap<>();
                            }
                            Elements decaseDivs = loiDdoc.select("div.decade");
                            if (null != decaseDivs && decaseDivs.size() > 0) {
                                for (Element decade : decaseDivs) {
                                    Elements yearsDiv = decade.select("div.years").get(0).children();
                                    if (null != yearsDiv && yearsDiv.size() > 0) {
                                        for (Element yearEle : yearsDiv) {
                                            Elements volumesDiv = yearEle.select("div.volumes").get(0)
                                                    .children();
                                            if (null != volumesDiv && volumesDiv.size() > 0) {
                                                for (Element volumeEle : volumesDiv) {
                                                    String volume = volumeEle.select("a").get(0).text().trim()
                                                            .split("Volume")[1].trim();
                                                    Elements issueInfoDivEles = volumeEle
                                                            .select("div.js_issue");
                                                    if (null != issueInfoDivEles
                                                            && issueInfoDivEles.size() > 0) {
                                                        for (Element issueInfoDiv : issueInfoDivEles) {
                                                            String issueText = issueInfoDiv.select("a").get(0)
                                                                    .text();
                                                            issueText = issueText.split(", ")[0]
                                                                    .split("Issue")[1].trim();
                                                            String oldIssueDate = "";
                                                            String issueDate = "";
                                                            if (NO_ARTICLE_PUB_DATE_JOURNALS_LIST
                                                                    .contains(journal)) {
                                                                issueDate = "01 " + issueInfoDiv
                                                                        .select("span.loiIssueCoverDateText")
                                                                        .get(0).text().trim();
                                                                oldIssueDate = issueDate;
                                                                //                                                            if(issueDate.contains("Winter")){
                                                                //                                                                issueDate = issueDate.replaceAll("Winter", "October");
                                                                //                                                            }
                                                                //                                                            if(issueDate.contains("Fall") || issueDate.contains("Autumn")){
                                                                //                                                                issueDate = issueDate.replaceAll("Fall", "September");
                                                                //                                                                issueDate = issueDate.replaceAll("Autumn", "September");
                                                                //                                                            }
                                                                //                                                            if(issueDate.contains("Summer")){
                                                                //                                                                issueDate = issueDate.replaceAll("Summer", "April");
                                                                //                                                            }
                                                                //                                                            if(issueDate.contains("Spring")){
                                                                //                                                                issueDate = issueDate.replaceAll("Spring", "January");
                                                                //                                                            }
                                                                //                                                            try{                                                            
                                                                //                                                                // for date string like "01 July-October 2016"
                                                                //                                                                if(issueDate.contains("-")){
                                                                //                                                                    String[] dateInfo = issueDate.split("-");
                                                                //                                                                    issueDate = dateInfo[0] + " " + dateInfo[1].split(" ")[1];
                                                                //                                                                }
                                                                //                                                                // for date string like "01 July/October 2016"
                                                                //                                                                if(issueDate.contains("/")){
                                                                //                                                                    String[] dataInfo = issueDate.split("/");
                                                                //                                                                    issueDate = dataInfo[0] + " " + dataInfo[1].split(" ")[1];
                                                                //                                                                }
                                                                //                                                            }
                                                                //                                                            catch(ArrayIndexOutOfBoundsException ex){
                                                                //                                                                System.out.println("Journal name: "+journal);
                                                                //                                                                System.out.println("Volume: "+volume+", issue: "+issueText);
                                                                //                                                                System.out.println("This date string cannot be parsed: "+oldIssueDate);
                                                                //                                                                ex.printStackTrace();
                                                                //                                                                continue;
                                                                //                                                            }
                                                                try {
                                                                    issueDate = "01 " + issueInfoDiv.select(
                                                                            "span.loiIssueCoverDateText").get(0)
                                                                            .text().trim();
                                                                    oldIssueDate = issueDate;
                                                                    issueDate = DataHandlersUtil
                                                                            .convertFullMonthDateStringFormat(
                                                                                    issueDate);
                                                                } catch (ParseException ex) {
                                                                    //                                                                if(!journal.contains("OMEGA - Journal of Death and Dying")){
                                                                    //                                                                    continue;
                                                                    //                                                                }
                                                                    System.out.println(
                                                                            "Journal name: " + journal);
                                                                    System.out.println("Volume: " + volume
                                                                            + ", issue: " + issueText);
                                                                    System.out.println(
                                                                            "This date string cannot be parsed: "
                                                                                    + oldIssueDate);
                                                                    ex.printStackTrace();
                                                                    continue;
                                                                }

                                                            } else {
                                                                try {
                                                                    Element issueLinkEle = issueInfoDiv
                                                                            .select("a").get(0);
                                                                    String issueLink = issueLinkEle
                                                                            .attr("href");
                                                                    Document issueDoc = null;
                                                                    try {
                                                                        issueDoc = Jsoup.connect(issueLink)
                                                                                .userAgent(
                                                                                        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.152 Safari/537.36")
                                                                                .cookie("auth", "token")
                                                                                .timeout(300000).get();
                                                                    } catch (HttpStatusException ex) {
                                                                        ex.printStackTrace();
                                                                        break mainLoop;
                                                                    }
                                                                    Thread.sleep(2200);
                                                                    Elements articleDivs = issueDoc
                                                                            .select("div.art_title, .linkable");
                                                                    String articleLink = SageDataUtil.SAGE_HTTP_PREFIX
                                                                            + articleDivs.get(0)
                                                                                    .select("a.ref, .nowrap")
                                                                                    .get(0).attr("href");
                                                                    if (articleLink.contains("pdf/")) {
                                                                        System.out.println("journal: " + journal
                                                                                + " volume=" + volume
                                                                                + " issue=" + issueText
                                                                                + " has ONLY PDF links!");
                                                                        try {
                                                                            issueDate = issueInfoDiv.select(
                                                                                    "span.loiIssueCoverDateText")
                                                                                    .get(0).text().trim();
                                                                            oldIssueDate = issueDate;
                                                                            if (issueDate.contains("Winter")) {
                                                                                issueDate = issueDate
                                                                                        .replaceAll("Winter",
                                                                                                "December");
                                                                            }
                                                                            if (issueDate.contains("Fall")
                                                                                    || issueDate.contains(
                                                                                            "Autumn")) {
                                                                                issueDate = issueDate
                                                                                        .replaceAll("Fall",
                                                                                                "September");
                                                                                issueDate = issueDate
                                                                                        .replaceAll("Autumn",
                                                                                                "September");
                                                                            }
                                                                            if (issueDate.contains("Summer")) {
                                                                                issueDate = issueDate
                                                                                        .replaceAll("Summer",
                                                                                                "June");
                                                                            }
                                                                            if (issueDate.contains("Spring")) {
                                                                                issueDate = issueDate
                                                                                        .replaceAll("Spring",
                                                                                                "March");
                                                                            }
                                                                            if (issueDate.contains("/")) {
                                                                                String[] dataInfo = issueDate
                                                                                        .split("/");
                                                                                String dateInfo1 = dataInfo[0]
                                                                                        .trim();
                                                                                String date;
                                                                                String month1;
                                                                                String[] dateInfo1Arr = dateInfo1
                                                                                        .split(" ");
                                                                                if (dateInfo1Arr.length == 2) {
                                                                                    date = dateInfo1Arr[0];
                                                                                    month1 = dateInfo1Arr[1];
                                                                                } else {
                                                                                    date = "01";
                                                                                    month1 = dataInfo[0].trim();
                                                                                }
                                                                                String month2 = dataInfo[1]
                                                                                        .split("\\s+")[0];
                                                                                String year = dataInfo[1]
                                                                                        .split("\\s+")[1];
                                                                                String date1 = DataHandlersUtil
                                                                                        .convertFullMonthDateStringFormat(
                                                                                                date + " "
                                                                                                        + month1
                                                                                                        + " "
                                                                                                        + year);
                                                                                String date2 = DataHandlersUtil
                                                                                        .convertFullMonthDateStringFormat(
                                                                                                date + " "
                                                                                                        + month2
                                                                                                        + " "
                                                                                                        + year);
                                                                                issueDate = date1 + "::"
                                                                                        + date2;
                                                                            }
                                                                            //  The Journal of Psychiatry & Law dd MMMM-MMMM yyyy pattern
                                                                            else if (issueDate.contains("-")) {
                                                                                if (journal.equals(
                                                                                        "OMEGA - Journal of Death and Dying")) {
                                                                                    Document articleDoc = null;
                                                                                    try {
                                                                                        articleDoc = Jsoup
                                                                                                .connect(
                                                                                                        articleLink)
                                                                                                .userAgent(
                                                                                                        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.152 Safari/537.36")
                                                                                                .cookie("auth",
                                                                                                        "token")
                                                                                                .timeout(300000)
                                                                                                .get();
                                                                                    } catch (HttpStatusException ex) {
                                                                                        ex.printStackTrace();
                                                                                        break mainLoop;
                                                                                    }
                                                                                    Thread.sleep(2200);
                                                                                    Element pubDateDiv = articleDoc
                                                                                            .select("div.published-dates")
                                                                                            .get(0);
                                                                                    issueDate = pubDateDiv
                                                                                            .text()
                                                                                            .split("Issue published:")[1]
                                                                                                    .trim();
                                                                                    oldIssueDate = issueDate;
                                                                                    issueDate = DataHandlersUtil
                                                                                            .convertFullMonthDateStringFormat(
                                                                                                    issueDate);
                                                                                } else {
                                                                                    String[] dataInfo = issueDate
                                                                                            .split("-");
                                                                                    String dateInfo1 = dataInfo[0]
                                                                                            .trim();
                                                                                    String date;
                                                                                    String month1;
                                                                                    String[] dateInfo1Arr = dateInfo1
                                                                                            .split(" ");
                                                                                    if (dateInfo1Arr.length == 2) {
                                                                                        date = dateInfo1Arr[0]
                                                                                                .trim();
                                                                                        month1 = dateInfo1Arr[1]
                                                                                                .trim();
                                                                                    } else {
                                                                                        date = "01";
                                                                                        month1 = dataInfo[0]
                                                                                                .trim();
                                                                                    }
                                                                                    String month2 = dataInfo[1]
                                                                                            .split("\\s+")[0];
                                                                                    String year = dataInfo[1]
                                                                                            .split("\\s+")[1];
                                                                                    String date1 = DataHandlersUtil
                                                                                            .convertFullMonthDateStringFormat(
                                                                                                    date + " "
                                                                                                            + month1
                                                                                                            + " "
                                                                                                            + year);
                                                                                    String date2 = DataHandlersUtil
                                                                                            .convertFullMonthDateStringFormat(
                                                                                                    date + " "
                                                                                                            + month2
                                                                                                            + " "
                                                                                                            + year);
                                                                                    issueDate = date1 + "::"
                                                                                            + date2;
                                                                                }
                                                                            } else {
                                                                                issueDate = "01 " + issueDate;
                                                                                issueDate = DataHandlersUtil
                                                                                        .convertFullMonthDateStringFormat(
                                                                                                issueDate);
                                                                            }
                                                                        } catch (ParseException
                                                                                | ArrayIndexOutOfBoundsException ex) {
                                                                            System.out.println(
                                                                                    "Journal name: " + journal);
                                                                            System.out.println("Volume: "
                                                                                    + volume + ", issue: "
                                                                                    + issueText);
                                                                            System.out.println(
                                                                                    "This date string cannot be parsed: "
                                                                                            + issueDate);
                                                                            ex.printStackTrace();
                                                                            continue;
                                                                        }
                                                                    } else {
                                                                        Document articleDoc = null;
                                                                        try {
                                                                            articleDoc = Jsoup
                                                                                    .connect(articleLink)
                                                                                    .userAgent(
                                                                                            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.152 Safari/537.36")
                                                                                    .cookie("auth", "token")
                                                                                    .timeout(300000).get();
                                                                        } catch (HttpStatusException ex) {
                                                                            ex.printStackTrace();
                                                                            break mainLoop;
                                                                        }
                                                                        Thread.sleep(2200);
                                                                        Element pubDateDiv = articleDoc
                                                                                .select("div.published-dates")
                                                                                .get(0);
                                                                        issueDate = pubDateDiv.text()
                                                                                .split("Issue published:")[1]
                                                                                        .trim();
                                                                        oldIssueDate = issueDate;
                                                                        issueDate = DataHandlersUtil
                                                                                .convertFullMonthDateStringFormat(
                                                                                        issueDate);
                                                                    }

                                                                } catch (Exception ex) {
                                                                    logger.error(
                                                                            "Cannot get the issue date for journal ="
                                                                                    + journal + " volume="
                                                                                    + volume + " issue="
                                                                                    + issueText + " date="
                                                                                    + oldIssueDate,
                                                                            ex);
                                                                    continue;
                                                                }
                                                            }
                                                            if (DataHandlersUtil.datesCompare(issueDate,
                                                                    "2010-01-01") < 0) {
                                                                if (dataMap.size() > 0) {
                                                                    ObjectMapper mapper = new ObjectMapper();
                                                                    String json = mapper
                                                                            .writeValueAsString(dataMap);
                                                                    journalInfoMap.put("data", json);
                                                                }
                                                                processedJournals.add(journal);
                                                                continue mainLoop;
                                                            }
                                                            try {
                                                                if (null != dataMap && dataMap.size() > 0
                                                                        && null != dataMap.get(volume)
                                                                        && null != dataMap.get(volume)
                                                                                .get(issueText)) {
                                                                    continue;
                                                                } else {
                                                                    Map<String, String> issueMap = dataMap
                                                                            .get(volume);
                                                                    if (null == issueMap) {
                                                                        issueMap = new HashMap<>();
                                                                        issueMap.put(issueText, issueDate);
                                                                        dataMap.put(volume, issueMap);
                                                                    } else {
                                                                        issueMap.put(issueText, issueDate);
                                                                    }
                                                                    System.out.println("This is vol. " + volume
                                                                            + " and issue " + issueText
                                                                            + " and date " + issueDate);
                                                                }
                                                            } catch (Exception ex) {
                                                                System.out.println(
                                                                        "Cannot add the pub date info into data map for vol. "
                                                                                + volume + " and issue "
                                                                                + issueText + " and date "
                                                                                + issueDate);
                                                            }
                                                        }
                                                    }
                                                }
                                            }
                                        }
                                    }

                                }
                            }
                            if (dataMap.size() > 0) {
                                ObjectMapper mapper = new ObjectMapper();
                                String json = mapper.writeValueAsString(dataMap);
                                journalInfoMap.put("data", json);
                            }
                        }

                    }
                }
                processedJournals.add(journal);
                if (kk > 100) {
                    break;
                }
                kk++;
            }
        } catch (IOException ex) {
            ex.printStackTrace();
        }
        ObjectMapper mapper = new ObjectMapper();
        String json = mapper.writeValueAsString(journalMap);
        String sageJournalIssueDateInfoFilePath = ShareokdataManager.getSageJournalIssueDateInfoFilePath();
        File sageFile = new File(sageJournalIssueDateInfoFilePath);
        if (sageFile.exists()) {
            String sageJournalIssueDateInfoFilePathOld = sageJournalIssueDateInfoFilePath.split("\\.")[0] + "_"
                    + DataHandlersUtil.getCurrentTimeString() + ".json";
            sageFile.renameTo(new File(sageJournalIssueDateInfoFilePathOld));
        }
        DocumentProcessorUtil.outputStringToFile(json,
                ShareokdataManager.getSageJournalIssueDateInfoFilePath());
        System.out.println("processed journals = " + mapper.writeValueAsString(processedJournals));
    } catch (Exception ex) {
        logger.error("Cannot process the issue dates.", ex);
    }
}

From source file:gr.scify.newsum.ui.ViewActivity.java

@Override
public void run() {
    // take the String from the TopicActivity
    Bundle extras = getIntent().getExtras();
    Category = extras.getString(CATEGORY_INTENT_VAR);

    // Make sure we have updated the data source
    NewSumUiActivity.setDataSource(this);

    // Get user sources
    String sUserSources = Urls.getUserVisibleURLsAsString(ViewActivity.this);

    // get Topics from TopicActivity (avoid multiple server calls)
    TopicInfo[] tiTopics = TopicActivity.getTopics(sUserSources, Category, this);

    // Also get Topic Titles, to display to adapter
    final String[] saTopicTitles = new String[tiTopics.length];
    // Also get Topic IDs
    final String[] saTopicIDs = new String[tiTopics.length];
    // Also get Dates, in order to show in summary title
    final String[] saTopicDates = new String[tiTopics.length];
    // DeHTML titles
    for (int iCnt = 0; iCnt < tiTopics.length; iCnt++) {
        // update Titles Array
        saTopicTitles[iCnt] = Html.fromHtml(tiTopics[iCnt].getTitle()).toString();
        // update IDs Array
        saTopicIDs[iCnt] = tiTopics[iCnt].getID();
        // update Date Array
        saTopicDates[iCnt] = tiTopics[iCnt].getPrintableDate(NewSumUiActivity.getDefaultLocale());
    }/*from  w w w.j a  va2 s  . co m*/
    // get the value of the TopicIDs list size (to use in swipe)
    saTopicIDsLength = saTopicIDs.length;
    final TextView title = (TextView) findViewById(R.id.title);
    // Fill topic spinner
    final ArrayAdapter<CharSequence> adapter = new ArrayAdapter<CharSequence>(this,
            android.R.layout.simple_spinner_item, saTopicTitles);

    final TextView tx = (TextView) findViewById(R.id.textView1);
    //      final float minm = tx.getTextSize();
    //      final float maxm = (minm + 24);

    // Get active topic
    int iTopicNum;
    // If we have returned from a pause
    if (iPrvSelectedItem >= 0)
        // use previous selection before pause
        iTopicNum = iPrvSelectedItem;
    // else
    else
        // use selection from topic page
        iTopicNum = extras.getInt(TOPIC_ID_INTENT_VAR);
    final int num = iTopicNum;

    // create an invisible spinner just to control the summaries of the
    // category (i will use it later on Swipe)
    final Spinner spinner = (Spinner) findViewById(R.id.spinner1);
    adapter.setDropDownViewResource(android.R.layout.simple_spinner_dropdown_item);
    runOnUiThread(new Runnable() {

        @Override
        public void run() {
            spinner.setAdapter(adapter);

            // Scroll view init
            final ScrollView scroll = (ScrollView) findViewById(R.id.scrollView1);
            final String[] saTopicTitlesArg = saTopicTitles;
            final String[] saTopicIDsArg = saTopicIDs;
            final String[] SaTopicDatesArg = saTopicDates;

            // Add selection event
            spinner.setOnItemSelectedListener(new OnItemSelectedListener() {
                public void onItemSelected(AdapterView<?> arg0, View arg1, int arg2, long arg3) {
                    // Changing summary
                    loading = true;
                    showWaitingDialog();
                    // Update visibility of rating bar
                    final RatingBar rb = (RatingBar) findViewById(R.id.ratingBar);
                    rb.setRating(0.0f);
                    rb.setVisibility(View.VISIBLE);
                    final TextView rateLbl = (TextView) findViewById(R.id.rateLbl);
                    rateLbl.setVisibility(View.VISIBLE);
                    scroll.scrollTo(0, 0);

                    String UserSources = Urls.getUserVisibleURLsAsString(ViewActivity.this);

                    String[] saTopicIDs = saTopicIDsArg;

                    // track summary views per category and topic title
                    if (getAnalyticsPref()) {
                        EasyTracker.getTracker().sendEvent(VIEW_SUMMARY_ACTION, Category,
                                saTopicTitlesArg[arg2], 0l);
                    }

                    if (sCustomCategory.trim().length() > 0) {
                        if (Category.equals(sCustomCategory)) {
                            Context ctxCur = NewSumUiActivity.getAppContext(ViewActivity.this);
                            String sCustomCategoryURL = ctxCur.getResources()
                                    .getString(R.string.custom_category_url);
                            // Check if specific element needs to be read
                            String sElementID = ctxCur.getResources()
                                    .getString(R.string.custom_category_elementId);
                            // If an element needs to be selected
                            if (sElementID.trim().length() > 0) {
                                try {
                                    // Check if specific element needs to be read
                                    String sViewOriginalPage = ctxCur.getResources()
                                            .getString(R.string.custom_category_visit_source);
                                    // Init text by a link to the original page
                                    sText = "<p><a href='" + sCustomCategoryURL + "'>" + sViewOriginalPage
                                            + "</a></p>";
                                    // Get document
                                    Document doc = Jsoup.connect(sCustomCategoryURL).get();
                                    // If a table
                                    Element eCur = doc.getElementById(sElementID);
                                    if (eCur.tagName().equalsIgnoreCase("table")) {
                                        // Get table rows
                                        Elements eRows = eCur.select("tr");

                                        // For each row
                                        StringBuffer sTextBuf = new StringBuffer();
                                        for (Element eCurRow : eRows) {
                                            // Append content
                                            // TODO: Use HTML if possible. Now problematic (crashes when we click on link)
                                            sTextBuf.append("<p>" + eCurRow.text() + "</p>");
                                        }
                                        // Return as string
                                        sText = sText + sTextBuf.toString();
                                    } else
                                        // else get text
                                        sText = eCur.text();

                                } catch (IOException e) {
                                    // Show unavailable text
                                    sText = ctxCur.getResources()
                                            .getString(R.string.custom_category_unavailable);
                                    e.printStackTrace();
                                }

                            } else
                                sText = Utils.getFromHttp(sCustomCategoryURL, false);
                        }

                    } else {
                        // call getSummary with (sTopicID, sUserSources). Use "All" for
                        // all Sources
                        String[] Summary = NewSumServiceClient.getSummary(saTopicIDs[arg2], UserSources);
                        // check if Summary exists, otherwise display message
                        if (Summary.length == 0) { // DONE APPLICATION HANGS, DOES NOT
                            // WORK. Updated: Probably OK
                            nothingFound = true;
                            AlertDialog.Builder al = new AlertDialog.Builder(ViewActivity.this);
                            al.setMessage(R.string.shouldReloadSummaries);
                            al.setNeutralButton("Ok", new DialogInterface.OnClickListener() {
                                public void onClick(DialogInterface arg0, int arg1) {
                                    // Reset cache
                                    CacheController.clearCache();
                                    // Restart main activity
                                    startActivity(new Intent(getApplicationContext(), NewSumUiActivity.class)
                                            .setFlags(Intent.FLAG_ACTIVITY_CLEAR_TOP));
                                }
                            });
                            al.setCancelable(false);
                            al.show();
                            // Return to home activity
                            loading = false;
                            return;
                        }
                        // Generate Summary text for normal categories
                        sText = generateSummaryText(Summary, ViewActivity.this);
                        pText = generatesummarypost(Summary, ViewActivity.this);
                    }

                    // Update HTML
                    tx.setText(Html.fromHtml(sText));
                    // Allow links to be followed into browser
                    tx.setMovementMethod(LinkMovementMethod.getInstance());
                    // Also Add Date to Topic Title inside Summary
                    title.setText(saTopicTitlesArg[arg2] + " : " + SaTopicDatesArg[arg2]);

                    // Update size
                    updateTextSize();

                    // Update visited topics
                    TopicActivity.addVisitedTopicID(saTopicIDs[arg2]);
                    // Done
                    loading = false;
                    closeWaitingDialog();
                }

                @Override
                public void onNothingSelected(AdapterView<?> arg0) {
                }

            });

            runOnUiThread(new Runnable() {

                @Override
                public void run() {
                    // Get active topic
                    spinner.setSelection(num);
                }
            });

        }
    });

    runOnUiThread(new Runnable() {

        @Override
        public void run() {
            showHelpDialog();
        }
    });

    closeWaitingDialog();
}

From source file:de.geeksfactory.opacclient.apis.Bibliotheca.java

private SearchField createSearchField(String name, String hint, Element input) {
    if (input.tagName().equals("input") && input.attr("type").equals("text")) {
        TextSearchField field = new TextSearchField();
        field.setDisplayName(name);/*from   ww w  .ja  va 2 s.  c o m*/
        field.setHint(hint);
        field.setId(input.attr("name"));
        return field;
    } else if (input.tagName().equals("select")) {
        DropdownSearchField field = new DropdownSearchField();
        field.setDisplayName(name);
        field.setId(input.attr("name"));
        for (Element option : input.select("option")) {
            field.addDropdownValue(option.attr("value"), option.text());
        }
        return field;
    } else {
        return null;
    }
}

From source file:de.geeksfactory.opacclient.apis.SISIS.java

private void parseDropdown(Element dropdownElement, List<SearchField> fields) throws JSONException {
    Elements options = dropdownElement.select("option");
    DropdownSearchField dropdown = new DropdownSearchField();
    if (dropdownElement.parent().select("input[type=hidden]").size() > 0) {
        dropdown.setId(dropdownElement.parent().select("input[type=hidden]").attr("value"));
        dropdown.setData(new JSONObject("{\"restriction\": true}"));
    } else {/*w  w w . j  a  va2 s. c  o m*/
        dropdown.setId(dropdownElement.attr("name"));
        dropdown.setData(new JSONObject("{\"restriction\": false}"));
    }
    for (Element option : options) {
        dropdown.addDropdownValue(option.attr("value"), option.text());
    }
    dropdown.setDisplayName(dropdownElement.parent().select("label").text());
    fields.add(dropdown);
}

From source file:de.geeksfactory.opacclient.apis.SISIS.java

@Override
public AccountData account(Account acc) throws IOException, JSONException, OpacErrorException {
    start(); // TODO: Is this necessary?

    int resultNum;

    if (!login(acc)) {
        return null;
    }/*  www  .java  2s . c  o m*/

    // Geliehene Medien
    String html = httpGet(opac_url + "/userAccount.do?methodToCall=showAccount&typ=1", ENCODING);
    List<LentItem> medien = new ArrayList<>();
    Document doc = Jsoup.parse(html);
    doc.setBaseUri(opac_url);
    parse_medialist(medien, doc, 1);
    if (doc.select(".box-right").size() > 0) {
        for (Element link : doc.select(".box-right").first().select("a")) {
            String href = link.attr("abs:href");
            Map<String, String> hrefq = getQueryParamsFirst(href);
            if (hrefq == null || hrefq.get("methodToCall") == null) {
                continue;
            }
            if (hrefq.get("methodToCall").equals("pos") && !"1".equals(hrefq.get("anzPos"))) {
                html = httpGet(href, ENCODING);
                parse_medialist(medien, Jsoup.parse(html), Integer.parseInt(hrefq.get("anzPos")));
            }
        }
    }
    if (doc.select("#label1").size() > 0) {
        resultNum = 0;
        String rNum = doc.select("#label1").first().text().trim().replaceAll(".*\\(([0-9]*)\\).*", "$1");
        if (rNum.length() > 0) {
            resultNum = Integer.parseInt(rNum);
        }

        assert (resultNum == medien.size());
    }

    // Ordered media ("Bestellungen")
    html = httpGet(opac_url + "/userAccount.do?methodToCall=showAccount&typ=6", ENCODING);
    List<ReservedItem> reserved = new ArrayList<>();
    doc = Jsoup.parse(html);
    doc.setBaseUri(opac_url);
    parse_reslist("6", reserved, doc, 1);
    Elements label6 = doc.select("#label6");
    if (doc.select(".box-right").size() > 0) {
        for (Element link : doc.select(".box-right").first().select("a")) {
            String href = link.attr("abs:href");
            Map<String, String> hrefq = getQueryParamsFirst(href);
            if (hrefq == null || hrefq.get("methodToCall") == null) {
                break;
            }
            if (hrefq.get("methodToCall").equals("pos") && !"1".equals(hrefq.get("anzPos"))) {
                html = httpGet(href, ENCODING);
                parse_reslist("6", reserved, Jsoup.parse(html), Integer.parseInt(hrefq.get("anzPos")));
            }
        }
    }

    // Prebooked media ("Vormerkungen")
    html = httpGet(opac_url + "/userAccount.do?methodToCall=showAccount&typ=7", ENCODING);
    doc = Jsoup.parse(html);
    doc.setBaseUri(opac_url);
    parse_reslist("7", reserved, doc, 1);
    if (doc.select(".box-right").size() > 0) {
        for (Element link : doc.select(".box-right").first().select("a")) {
            String href = link.attr("abs:href");
            Map<String, String> hrefq = getQueryParamsFirst(href);
            if (hrefq == null || hrefq.get("methodToCall") == null) {
                break;
            }
            if (hrefq.get("methodToCall").equals("pos") && !"1".equals(hrefq.get("anzPos"))) {
                html = httpGet(href, ENCODING);
                parse_reslist("7", reserved, Jsoup.parse(html), Integer.parseInt(hrefq.get("anzPos")));
            }
        }
    }
    if (label6.size() > 0 && doc.select("#label7").size() > 0) {
        resultNum = 0;
        String rNum = label6.text().trim().replaceAll(".*\\(([0-9]*)\\).*", "$1");
        if (rNum.length() > 0) {
            resultNum = Integer.parseInt(rNum);
        }
        rNum = doc.select("#label7").text().trim().replaceAll(".*\\(([0-9]*)\\).*", "$1");
        if (rNum.length() > 0) {
            resultNum += Integer.parseInt(rNum);
        }
        assert (resultNum == reserved.size());
    }

    AccountData res = new AccountData(acc.getId());

    if (doc.select("#label8").size() > 0) {
        String text = doc.select("#label8").first().text().trim();
        if (text.matches("Geb.+hren[^\\(]+\\(([0-9.,]+)[^0-9A-Z]*(|EUR|CHF|Fr)\\)")) {
            text = text.replaceAll("Geb.+hren[^\\(]+\\(([0-9.,]+)[^0-9A-Z]*(|EUR|CHF|Fr)\\)", "$1 $2");
            res.setPendingFees(text);
        }
    }
    Pattern p = Pattern.compile("[^0-9.]*", Pattern.MULTILINE);
    if (doc.select(".box3").size() > 0) {
        for (Element box : doc.select(".box3")) {
            if (box.select("strong").size() == 1) {
                String text = box.select("strong").text();
                if (text.equals("Jahresgebhren")) {
                    text = box.text();
                    text = p.matcher(text).replaceAll("");
                    res.setValidUntil(text);
                }
            }

        }
    }

    res.setLent(medien);
    res.setReservations(reserved);
    return res;
}

From source file:ExtractorContentTest.java

private boolean isEmpty(Element row) {
    for (Element header : row.select("th")) {
        String headerV = header.text();
        if (!headerV.isEmpty())
            return false;
    }/* ww  w .  j ava2s. co  m*/
    // all empty
    return true;
}

From source file:net.pixomania.crawler.W3C.parser.rules.editors.EditorsRule7.java

@Override
public ArrayList<Person> run(String url, Document doc) {
    ArrayList<Person> editorList = new ArrayList<>();

    Elements editors = doc.select("dt:contains(Authors/Editors) ~ dd, dt:contains(Author/Editor) ~ dd");
    if (editors.size() == 0)
        return null;

    boolean skip = false;
    for (Element editor : editors) {
        Element prev = editor.previousElementSibling();
        if (prev.tagName().equals("dt")) {
            if (!prev.text().trim().toLowerCase().startsWith("authors/editors")
                    && !prev.text().trim().toLowerCase().startsWith("author/editor")) {
                skip = true;//from w  ww .j av a2s. c  o  m
            }
        }

        if (skip) {
            Element next = editor.nextElementSibling();
            if (next != null) {
                if (next.text().trim().toLowerCase().startsWith("authors/editors")
                        || next.text().trim().toLowerCase().startsWith("author/editor")) {
                    skip = false;
                    continue;
                }
            }
            continue;
        }

        if (StringUtils.countMatches(editor.text(), " - ") > 2) {
            Log.log("warning", url + ": This editor may be a list of editors separated by  - ");
            EditorsRule5 ed5 = new EditorsRule5();

            return ed5.run(url, doc);
        }

        String[] splitted = editor.html().split("<br />|<br clear=\"none\" />");

        if (splitted.length < 2) {
            if (editor.text().equals("WHATWG:") || editor.text().equals("W3C:"))
                continue;
            Person result = NameParser.parse(editor.text());
            if (result == null)
                continue;

            for (int i = 0; i < editor.select("a").size(); i++) {
                if (!editor.select("a").get(i).attr("href").isEmpty()) {
                    if (editor.select("a").get(i).attr("href").contains("@")) {
                        result.setEmail(editor.select("a").get(i).attr("href").replace("mailto:", ""));
                    } else {
                        result.addWebsite(editor.select("a").get(i).attr("href"));
                    }
                }
            }

            editorList.add(result);
        } else {
            for (String split : splitted) {
                if (!split.isEmpty()) {
                    if (split.equals("WHATWG:") || split.equals("W3C:"))
                        continue;
                    Document newdoc = Jsoup.parse(split.replaceAll("\n", ""));
                    Person result = NameParser.parse(newdoc.text());
                    if (result == null)
                        continue;

                    for (int i = 0; i < newdoc.select("a").size(); i++) {
                        if (!newdoc.select("a").get(i).attr("href").isEmpty()) {
                            if (newdoc.select("a").get(i).attr("href").contains("@")) {
                                result.setEmail(newdoc.select("a").get(i).attr("href").replace("mailto:", ""));
                            } else {
                                result.addWebsite(newdoc.select("a").get(i).attr("href"));
                            }
                        }
                    }

                    editorList.add(result);
                }
            }
        }

        Element next = editor.nextElementSibling();
        if (next != null)
            if (next.tag().getName().equals("dt"))
                break;
    }

    if (editorList.size() == 0)
        return null;

    return editorList;
}