Example usage for java.util.regex PatternSyntaxException printStackTrace

List of usage examples for java.util.regex PatternSyntaxException printStackTrace

Introduction

In this page you can find the example usage for java.util.regex PatternSyntaxException printStackTrace.

Prototype

public void printStackTrace() 

Source Link

Document

Prints this throwable and its backtrace to the standard error stream.

Usage

From source file:org.paxle.filter.blacklist.impl.Blacklist.java

/**
 * @see org.paxle.filter.blacklist.IBlacklist#addPattern(String)
 *//*from   w  w  w.ja va 2 s . c  o  m*/
public boolean addPattern(String pattern) {
    lock.writeLock().lock();
    try {
        Pattern p = Pattern.compile(pattern);
        blacklist.put(pattern, p);
        //System.out.println("Pattern from "+listFileName+" added to blacklist: "+pattern);
        return this.store(); // Update the blacklist store
    } catch (PatternSyntaxException e) {
        e.printStackTrace();
        return false;
    } finally {
        lock.writeLock().unlock();
    }
}

From source file:fr.eurecom.nerd.core.proxy.SaploClient.java

private List<TEntity> parse(List<SaploTag> tags, String text, OntologyType otype) {
    List<TEntity> result = new LinkedList<TEntity>();
    Map<String, Integer> map = new HashMap<String, Integer>();

    for (SaploTag tag : tags) {
        String label = tag.getTagWord();
        String type = tag.getCategory().toString();
        String nerdType = OntoFactory.mapper.getNerdType(otype, label, SOURCE, type).toString();
        Double confidence = tag.getRelevance();

        //logic to compute the startchar and endchar of the entity within the text
        Integer startchar = null, endchar = null;
        if (map.containsKey(label)) {
            int value = map.get(label);
            map.remove(label);/*  w  ww . j a  v a  2 s.c o m*/
            map.put(label, new Integer(value + 1));
        } else
            map.put(label, new Integer(1));

        try {
            Pattern p = Pattern.compile("\\b" + label + "\\b");
            Matcher m = p.matcher(text);
            for (int j = 0; j < map.get(label) && m.find(); j++) {
                startchar = m.start(0);
                endchar = m.end(0);
                if (containsAtIndex(result, startchar, endchar))
                    j--;
            }

            if (startchar != null && endchar != null) {
                TEntity extraction = new TEntity(label, type, null, nerdType.toString(), startchar, endchar,
                        confidence, SOURCE);

                result.add(extraction);
            }
        } catch (PatternSyntaxException eregex) {
            eregex.printStackTrace();
        }
    }

    return result;
}

From source file:fr.eurecom.nerd.core.proxy.WikimetaClient.java

public List<TEntity> parse(String json, String text, OntologyType otype) throws IOException {
    List<TEntity> result = new LinkedList<TEntity>();
    Map<String, Integer> map = new HashMap<String, Integer>();

    try {/*w w  w .java  2s  .  com*/
        JSONObject o = new JSONObject(json);
        JSONArray jadocument = o.getJSONArray("document");

        // 3 items is Named Entities
        JSONObject jodocument = jadocument.getJSONObject(2);
        JSONArray jsonarray = jodocument.getJSONArray("Named Entities");

        for (int i = 0; i < jsonarray.length(); i++) {
            JSONObject jo = jsonarray.getJSONObject(i);
            String entity = jo.getString("EN");
            String type = (jo.getString("type").equals("")) ? null : jo.getString("type");
            String nerdType = OntoFactory.mapper.getNerdType(otype, entity, SOURCE, type).toString();
            String uri = jo.getString("URI");

            //logic to compute the startchar and endchar of the entity within the text
            Integer startchar = null, endchar = null;
            if (map.containsKey(entity)) {
                int value = map.get(entity);
                map.remove(entity);
                map.put(entity, new Integer(value + 1));
            } else
                map.put(entity, new Integer(1));

            try {
                Pattern p = Pattern.compile("\\b" + entity + "\\b");
                Matcher m = p.matcher(text);
                for (int j = 0; j < map.get(entity) && m.find(); j++) {
                    startchar = m.start(0);
                    endchar = m.end(0);
                    if (containsAtIndex(result, startchar, endchar))
                        j--;
                }

                double confidence = 0.0;
                if (!jo.getString("confidenceScore").equals(""))
                    confidence = Double.parseDouble(jo.getString("confidenceScore"));

                if (startchar != null && endchar != null) {
                    TEntity extraction = new TEntity(entity, type, uri, nerdType.toString(), startchar, endchar,
                            confidence, SOURCE);

                    result.add(extraction);
                }
            } catch (PatternSyntaxException eregex) {
                eregex.printStackTrace();
            }
        }
    } catch (JSONException e) {
        e.printStackTrace();
    }
    return result;
}

From source file:fr.eurecom.nerd.core.proxy.ExtractivClient.java

private List<TEntity> parse(String text, String serviceKey, OntologyType otype) {
    List<TEntity> result = new LinkedList<TEntity>();
    URI endpoint;//  ww w  .  j  av a  2 s  .com
    try {
        endpoint = new URI(EXTRACTIV_SERVER_LOCATION);
        HttpMethodBase extractivRequest = getExtractivProcessString(endpoint, text, serviceKey);
        InputStream extractivResults = fetchHttpRequest(extractivRequest);
        Readable jsonReadable = new InputStreamReader(extractivResults);
        ExtractivJSONParser jsonParser = new ExtractivJSONParser(jsonReadable);

        Map<String, Integer> map = new HashMap<String, Integer>();
        for (Document document : jsonParser)
            for (com.extractiv.Entity item : document.getEntities()) {
                String label = item.asString();
                String type = item.getType();
                String nerdType = OntoFactory.mapper.getNerdType(otype, label, SOURCE, type).toString();
                String uri = (item.getLinks().size() > 0) ? item.getLinks().get(0) : "null";
                //                    Integer startChar = item.getOffset();
                //                    Integer endChar = startChar + item.getCharLength();
                //                    TEntity extraction = new TEntity(label, type, uri, nerdType, 
                //                    startChar, endChar, confidence, SOURCE); 
                //                    result.add(extraction);

                //logic to compute the startchar and endchar of the entity within the text
                Integer startchar = null, endchar = null;
                if (map.containsKey(label)) {
                    int value = map.get(label);
                    map.remove(label);
                    map.put(label, new Integer(value + 1));
                } else
                    map.put(label, new Integer(1));

                try {
                    Pattern p = Pattern.compile("\\b" + label + "\\b");
                    Matcher m = p.matcher(text);
                    for (int j = 0; j < map.get(label) && m.find(); j++) {
                        startchar = m.start(0);
                        endchar = m.end(0);
                        if (containsAtIndex(result, startchar, endchar))
                            j--;
                    }

                    Double confidence = 0.5;

                    if (startchar != null && endchar != null) {
                        TEntity extraction = new TEntity(label, type, uri, nerdType.toString(), startchar,
                                endchar, confidence, SOURCE);

                        result.add(extraction);
                    }
                } catch (PatternSyntaxException eregex) {
                    eregex.printStackTrace();
                }
            }
    } catch (URISyntaxException e) {
        e.printStackTrace();
    } catch (BadInputException e) {
        e.printStackTrace();
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    }

    return result;
}

From source file:net.bioclipse.pubchem.business.PubChemManager.java

private String downloadAsString(String URL, String accepts, IProgressMonitor monitor)
        throws IOException, BioclipseException, CoreException {
    if (monitor == null)
        monitor = new NullProgressMonitor();

    String fileContent = "";
    try {/*from  w ww  . j a va 2s.  c o m*/
        monitor.subTask("Downloading from " + URL);
        HttpClient client = new HttpClient();
        GetMethod method = new GetMethod(URL);
        if (accepts != null) {
            method.setRequestHeader("Accept", accepts);
            method.setRequestHeader("Content-Type", accepts);
        }
        client.executeMethod(method);

        InputStream responseStream = method.getResponseBodyAsStream();
        ByteArrayOutputStream buffer = new ByteArrayOutputStream();
        int nRead;
        byte[] data = new byte[16384];
        while ((nRead = responseStream.read(data, 0, data.length)) != -1) {
            buffer.write(data, 0, nRead);
        }
        buffer.flush();
        responseStream.close();
        method.releaseConnection();
        fileContent = new String(buffer.toByteArray());
        monitor.worked(1);
    } catch (PatternSyntaxException exception) {
        exception.printStackTrace();
        throw new BioclipseException("Invalid Pattern.", exception);
    } catch (MalformedURLException exception) {
        exception.printStackTrace();
        throw new BioclipseException("Invalid URL.", exception);
    }
    return fileContent;
}

From source file:org.paxle.filter.blacklist.impl.Blacklist.java

/**
 * New blacklist object./*from   ww w .  j a v a2  s .  c  o  m*/
 * @param name the name of the blacklist
 * @param patterns the patterns the list consists of
 * @param blacklistStore the store for the blacklists
 * @throws InvalidBlacklistnameException
 */
public Blacklist(String name, Collection<String> patterns, IBlacklistStore blacklistStore)
        throws InvalidBlacklistnameException {
    this.name = name;
    this.blacklistStore = blacklistStore;
    // check for uniqueness of this object
    if (this.blacklistStore.getBlacklist(this.name) != null) {
        throw new IllegalArgumentException("Blacklist-object does already exist!");
    }
    Iterator<String> patternIterator = patterns.iterator();
    blacklist = new ConcurrentHashMap<String, Pattern>();
    while (patternIterator.hasNext()) {
        String pattern = patternIterator.next();
        try {
            blacklist.put(pattern, Pattern.compile(pattern));
        } catch (PatternSyntaxException e) {
            logger.warn("Invalid blacklistpattern " + pattern + " in file " + name
                    + ", it will be ignored and a version without this pattern will be saved");
            e.printStackTrace();
        }
    }
    this.blacklistStore.updateBlacklist(this);
}

From source file:de.unibi.techfak.bibiserv.BiBiToolsTest.java

@Test
public void testCheckAndParseParam() throws Exception {
    LOG.info("+++++++++++++++++++\ntest checkAndParseParam\n+++++++++++++++++++\n");

    // initalize BiBiTools object
    InputStream is = new FileInputStream(TEST);
    BiBiTools bibitools = new BiBiTools(is);

    //  create new LogShed
    LogShed logshed = new LogShed();

    // load sample parameter 
    InputStream checkAndParseParamInput = new FileInputStream(PARAM_TEST);

    List<Pair<String, String>> list_param = BiBiTools.paramdoc2List(db.parse(checkAndParseParamInput));

    // create parmater hash using checkAndParseParam ...
    Map<String, String> paramhash = bibitools.checkAndParseParam(list_param, "parametertesttool_function_0",
            logshed);//from  w  w  w .  java 2 s  .c  o m
    // print out logshed messages 
    LOG.info("logshed messages : \n" + logshed.toString());

    /*<param>
    <parametertesttool_param_boolean>true</parametertesttool_param_boolean>
    <parametertesttool_param_boolean2>false</parametertesttool_param_boolean2>
    <parametertesttool_param_int>42</parametertesttool_param_int>
    <!-- <parametertesttool_param_int_min_max_default>50</parametertesttool_param_int_min_max_default> -->
    <parametertesttool_param_float>0.5</parametertesttool_param_float>
    <!-- <parametertesttool_param_float_max_min_default>99.9</parametertesttool_param_float_max_min_default> -->
    <parametertesttool_param_string>test</parametertesttool_param_string>
    <parametertesttool_param_string_regexp>32278 Kirchlengern</parametertesttool_param_string_regexp>
    <parametertesttool_param_string_min_max>abcdefghijklmn</parametertesttool_param_string_min_max>
    <!-- <parametertesttool_param_string_regexp_default>ACGU</parametertesttool_param_string_regexp_default> -->
    ... */
    /*parametertesttool_param_boolean - true */
    if (paramhash.containsKey("parametertesttool_param_boolean")
            && paramhash.get("parametertesttool_param_boolean").trim().equals("-boolean")) {
        LOG.info("Found : \"parametertesttool_param_boolean\"-\"-boolean\"");
    } else {
        fail("Expected : \"parametertesttool_param_boolean\"-\"-boolean\"");
    }

    /*parametertesttool_param_boolean2 - false*/
    if (paramhash.containsKey("parametertesttool_param_boolean2")
            && paramhash.get("parametertesttool_param_boolean2").trim().equals("")) {
        LOG.info("Found : \"parametertesttool_param_boolean2\"-\"\"");
    } else {
        fail("Expected : \"parametertesttool_param_boolean2\"-\"\"");
    }

    /*parametertesttool_param_int - 42 */
    if (paramhash.containsKey("parametertesttool_param_int")
            && paramhash.get("parametertesttool_param_int").trim().equals("-int42")) {
        LOG.info("Found :\"parametertesttool_param_int\"-\"-int42\"");
    } else {
        fail("Expected :\"parametertesttool_param_int\"-\"-int42\"");
    }

    /* parametertesttool_param_int_min_max_default - default : 50 // comment out in source, must be inserted by funtcion because of set default value*/
    if (paramhash.containsKey("parametertesttool_param_int_min_max_default") && paramhash
            .get("parametertesttool_param_int_min_max_default").trim().equals("-int_max_min_default50")) {
        LOG.info("Found :\"parametertesttool_param_int_min_max_default\"-\"-int_max_min_default50\"");
    } else {
        fail("Expected :\"parametertesttool_param_int_min_max_default\"-\"-int_max_min_default50\"");
    }

    /* parametertesttool_param_float - 0.5 */
    if (paramhash.containsKey("parametertesttool_param_float")
            && paramhash.get("parametertesttool_param_float").trim().equals("-float0.5")) {
        LOG.info("Found :\"parametertesttool_param_float\"-\"-float0.5\"");
    } else {
        fail("Expected :\"parametertesttool_param_float\"-\"-float0.5\"");
    }

    /* parametertesttool_param_float_max_min_default - default : 99.9 // comment out in source, must be inserted by function because of set default value*/
    if (paramhash.containsKey("parametertesttool_param_float_max_min_default") && paramhash
            .get("parametertesttool_param_float_max_min_default").trim().equals("-float_min_max_default99.9")) {
        LOG.info("Found :\"parametertesttool_param_float_max_min_default\"-\"-float_min_max_default99.9\"");
    } else {
        fail("Expected :\"parametertesttool_param_float\"-\"-float_min_max_default99.9\"");
    }

    /* parametertesttool_param_string - test */
    if (paramhash.containsKey("parametertesttool_param_string")
            && paramhash.get("parametertesttool_param_string").trim().equals("-stringtest")) {
        LOG.info("Found :\"parametertesttool_param_string\"-\"-stringtest\"");
    } else {
        fail("Expected :\"parametertesttool_param_string\"-\"-stringtest\"");
    }

    /* parametertesttool_param_string_regexp - 32278 Kirchlengern */
    if (paramhash.containsKey("parametertesttool_param_string_regexp") && paramhash
            .get("parametertesttool_param_string_regexp").trim().equals("-string_regexp32278 Kirchlengern")) {
        LOG.info("Found :\"parametertesttool_param_string_regexp\"-\"-string_regexp32278 Kirchlengern\"");
    } else {
        fail("Expected :\"parametertesttool_param_string_regexp\"-\"-string_regexp32278 Kirchlengern\"");
    }

    /* parametertesttool_param_string_min_max - abcdefghijklmn // no option*/
    if (paramhash.containsKey("parametertesttool_param_string_min_max")
            && paramhash.get("parametertesttool_param_string_min_max").trim().equals("abcdefghijklmn")) {
        LOG.info("Found :\"parametertesttool_param_string_min_max\"-\"abcdefghijklmn\"");
    } else {
        fail("Expected :\"parametertesttool_param_string_min_max\"-\"abcdefghijklmn\"");
    }

    /* parametertesttool_param_string_regexp_default - default : ACGU // comment out in source, must be inserted by function because of set default value */
    if (paramhash.containsKey("parametertesttool_param_string_regexp_default") && paramhash
            .get("parametertesttool_param_string_regexp_default").trim().equals("-string_regexp_defaultACGU")) {
        LOG.info("Found :\"parametertesttool_param_string_regexp_default\"-\"-string_regexp_defaultACGU\"");
    } else {
        fail("Expected :\"parametertesttool_param_string_regexp_default\"-\"-string_regexp_defaultACGU\"");
    }

    if (paramhash.keySet().size() != 10) {
        fail("Expected 10 'normal' parameter  (key/value pairs), found " + paramhash.keySet().size());
    }

    /* test function_1 with enum parameter */

    /* ...
           <parametertesttool_enum_selectoneradio>DE</parametertesttool_enum_selectoneradio>
           <parametertesttool_enum_selectonelistbox>NL</parametertesttool_enum_selectonelistbox>
           <parametertesttool_enum_selectonemenu>US</parametertesttool_enum_selectonemenu>
           <parametertesttool_enum_selectmanycheckbox>DE NL US</parametertesttool_enum_selectmanycheckbox>
           <!-- <parametertesttool_enum_selectmanylistbox>DE US SE</parametertesttool_enum_selectmanylistbox> -->
           <parametertesttool_enum_selectmanymenu>DE SE</parametertesttool_enum_selectmanymenu> 
           </param>*/
    logshed = new LogShed();
    // create parmater hash using checkAndParseParam ...
    try {

        paramhash = bibitools.checkAndParseParam(list_param, "parametertesttool_function_1", logshed);
    } catch (PatternSyntaxException e) {
        e.printStackTrace();
    }
    // print out logshed messages
    LOG.info("logshed messages : \n" + logshed.toString());

    /* parametertesttool_enum_selectoneradio - DE*/
    if (paramhash.containsKey("parametertesttool_enum_selectoneradio")
            && paramhash.get("parametertesttool_enum_selectoneradio").trim().equals("-lang_sor DE")) {
        LOG.info("Found :\"parametertesttool_enum_selectoneradio\"-\"-lang_sor DE\"");
    } else {
        fail("Expected :\"parametertesttool_enum_selectoneradio\"-\"-lang_sor DE\"");
    }

    /* parametertesttool_enum_selectonelistbox - NL // default US*/
    if (paramhash.containsKey("parametertesttool_enum_selectonelistbox")
            && paramhash.get("parametertesttool_enum_selectonelistbox").trim().equals("-lang_sol NL")) {
        LOG.info("Found :\"parametertesttool_enum_selectonelistbox\" - \"-lang_sol NL\"");
    } else {
        fail("Expected :\"parametertesttool_enum_selectonelistbox\" - \"-lang_sol NL\"");
    }

    /* parametertesttool_enum_selectonemenu - US */
    if (paramhash.containsKey("parametertesttool_enum_selectonemenu")
            && paramhash.get("parametertesttool_enum_selectonemenu").trim().equals("-lang_som US")) {
        LOG.info("Found :\"parametertesttool_enum_selectonemenu\" - \"-lang_som US\"");
    } else {
        fail("Expected :\"parametertesttool_enum_selectonemenu\" - \"-lang_som US\"");
    }

    /* parametertesttool_enum_selectmanycheckbox -DE NL US // maxoccurrs 3 , separator ','*/
    if (paramhash.containsKey("parametertesttool_enum_selectmanycheckbox")
            && paramhash.get("parametertesttool_enum_selectmanycheckbox").trim().equals("-lang_smc DE,NL,US")) {
        LOG.info("Found :\"parametertesttool_enum_selectmanycheckbox\" - \"-lang_smc DE,NL,US\"");
    } else {
        fail("Expected :\"parametertesttool_enum_selectmanycheckbox\" - \"-lang_smc DE,NL,US\"");
    }
    /* parametertesttool_enum_selectmanylistbox - default DE US SE, maxOccurs 3, separator ',' */
    if (paramhash.containsKey("parametertesttool_enum_selectmanylistbox")
            && paramhash.get("parametertesttool_enum_selectmanylistbox").trim().equals("-lang_sml DE,US,SE")) {
        LOG.info("Found :\"parametertesttool_enum_selectmanylistbox\" - \"-lang_sml DE,US,SE\"");
    } else {
        fail("Expected :\"parametertesttool_enum_selectmanylistbox\" - \"-lang_sml DE,US,SE\"");
    }

    /* parametertesttool_enum_selectmanymenu - DE SE // maxoccurs 3, separator '+', prefix '[', suffix ']' */
    if (paramhash.containsKey("parametertesttool_enum_selectmanymenu")
            && paramhash.get("parametertesttool_enum_selectmanymenu").trim().equals("-lang_smm [DE+SE]")) {
        LOG.info("Found :\"parametertesttool_enum_selectmanymenu\" - \"-lang_smm [DE+SE]\"");
    } else {
        fail("Expected :\"parametertesttool_enum_selectmanymenu\" - \"-lang_smm [DE+SE]\"");
    }

    if (paramhash.keySet().size() != 6) {
        fail("Expected 6 'normal' parameter  (key/value pairs), found " + paramhash.keySet().size());
    }
}

From source file:org.epop.dataprovider.acmdigital.ACMDigitalLibrarySearch.java

/**
 * extractPaper Extracts the parameters//w  ww  . j ava 2  s  . co  m
 * 
 * @param element
 *            Element that contains the HTML element to analyze
 * @return Paper Object that contains the extracted parameters
 */
private Literature extractPaper(Element element) {

    try {
        if (AUTHOR_ID_PATTERN == null)
            AUTHOR_ID_PATTERN = Pattern.compile(AUTHOR_ID_PATTERN_STRING);
        if (CITATION_COUNT_PATTERN == null)
            CITATION_COUNT_PATTERN = Pattern.compile(CITATION_COUNT_PATTERN_STRING);
    } catch (PatternSyntaxException e) {
        // TODO implement error handling
        e.printStackTrace();
        return null;
    }

    LiteratureBuilder builder = new LiteratureBuilder();

    /**
     * extract paperTitle analyze all the elements with tag "a", attribute
     * class="medium-text" and extract the title
     */
    URI pageURI = null;
    for (Element a : element.getAllElements(HTMLElementName.A)) {
        Attribute classAttr = a.getStartTag().getAttributes().get("class");
        if (classAttr != null) {
            if (classAttr.getValue().equals("medium-text")) {
                Source htmlSource = new Source(a.getContent().toString());
                String title = StringUtils.formatInLineSingleSpace(htmlSource.getTextExtractor().toString());
                logger.debug(title);
                builder.setTitle(title);
                String href = a.getAttributeValue("href");
                if (builder.getWebsiteURLs() == null)
                    builder.setWebsiteURLs(new HashSet<Link>());
                try {
                    pageURI = new URI(DOMAIN + href + "&preflayout=flat");
                    builder.getWebsiteURLs().add(new Link("ACM", pageURI));
                } catch (URISyntaxException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
                try {
                    if (ID_PATTERN == null)
                        ID_PATTERN = Pattern.compile(ID_PATTERN_STRING);
                    Matcher idMatcher = ID_PATTERN.matcher(href);
                    if (idMatcher.find()) {
                        builder.setAcmID(idMatcher.group(1));
                    }
                    // else
                    // TODO error handling
                } catch (PatternSyntaxException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            }
        }
    }

    /**
     * extract paperAuthors analyze all the elements with tag "div",
     * attribute class="authors" and extract the authors
     */
    for (Element s : element.getAllElements(HTMLElementName.DIV)) {
        Attribute classAttr = s.getStartTag().getAttributes().get("class");
        if (classAttr != null) {
            if (classAttr.getValue().equals("authors")) {
                Set<Author> authors = new HashSet<>();
                for (Element q : s.getAllElements(HTMLElementName.A)) {
                    Source htmlSource = new Source(q.getContent().toString());
                    AuthorBuilder authBuilder = new AuthorBuilder();
                    String paperAuthor = StringUtils
                            .formatInLineSingleSpace(htmlSource.getTextExtractor().toString());
                    try {
                        Utils.setFirstMiddleLastNameFromNameString(authBuilder, paperAuthor);
                        Matcher matcher = AUTHOR_ID_PATTERN.matcher(q.getAttributeValue("href"));
                        if (matcher.find()) {
                            authBuilder.setAcmID(matcher.group(1));
                        } else {
                            // TODO error handling
                        }
                        authors.add(authBuilder.getObject());
                    } catch (PatternMismatchException e) {
                        // TODO Auto-generated catch block
                        e.printStackTrace();
                    }
                }
                builder.setAuthors(authors);
                /**
                 * extract paperPlace analyze all the elements with tag
                 * "div", attribute class="addinfo" and extract the place
                 */
            } else if (classAttr.getValue().equals("addinfo")) {
                for (Element g : s.getAllElements(HTMLElementName.DIV)) {
                    Source htmlSource = new Source(g.getContent().toString());
                    String paperPlace = StringUtils
                            .formatInLineSingleSpace(htmlSource.getTextExtractor().toString());
                    logger.debug(paperPlace);
                    builder.setPublicationContext(paperPlace);
                }
            }
        }
    }

    /**
     * extract citedInfo analyze all the elements with tag "td" that
     * contains the string "Bibliometrics", replace keywords that identify
     * the parameters with a separator, used to split the string.The
     * unnecessary text is deleted calling the "replace" method of the class
     * String.Finally is extracted the info using StringTokenizer
     */
    for (Element y : element.getAllElements(HTMLElementName.TD)) {
        if (y.toString().contains("Bibliometrics")) {
            Matcher matcher = CITATION_COUNT_PATTERN.matcher(y.getContent());
            if (matcher.find()) {
                builder.setAcmNumCitations(Integer.parseInt(matcher.group(1)));
            } else {
                // TODO error handling
            }
        }

        /**
         * extract venueYear analyze all the elements with tag "td" that
         * contains a month and call the method ExtractYear to extract the
         * year
         */
        if (y.toString().contains("January") || y.toString().contains("February")
                || y.toString().contains("March") || y.toString().contains("April")
                || y.toString().contains("May") || y.toString().contains("June")
                || y.toString().contains("July") || y.toString().contains("August")
                || y.toString().contains("September") || y.toString().contains("October")
                || y.toString().contains("November") || y.toString().contains("December")) {
            String s = y.getContent().toString();
            String venueYear = ExtractYear(s);
            logger.debug(venueYear);
            if (!venueYear.isEmpty()) {
                builder.setYear(Integer.parseInt(venueYear));
            }

        }
    }

    try {
        try {
            Thread.sleep(2861);
        } catch (InterruptedException e1) {
            e1.printStackTrace();
        }
        HTMLPage entryPage = new HTMLPage(pageURI);
        try {
            Node abstractTextNode = entryPage.getNodeByXPath("//*[@id='fback']/div[3]/div[1]");
            if (abstractTextNode != null)
                builder.setAbstractText(abstractTextNode.getTextContent());
        } catch (XPathExpressionException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        try {
            Node fullTextLinkNode = entryPage.getNodeByXPath(
                    "//*[@id='divmain']/table[1]/tbody/tr/td[1]/table[1]/tbody/tr/td[2]/a[@name='FullTextPDF']/@href");
            if (fullTextLinkNode != null) {
                String href = fullTextLinkNode.getTextContent(); // ft_gateway.cfm?id=1150304&ftid=371641&dwn=1&CFID=553795364&CFTOKEN=65402853
                if (builder.getFulltextURLs() == null)
                    builder.setFulltextURLs(new HashSet<Link>());
                URI fulltextURI = new URI(DOMAIN + href);
                builder.getFulltextURLs().add(new Link("ACM", fulltextURI));
            }
        } catch (XPathExpressionException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (URISyntaxException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    } catch (IOException | ParserConfigurationException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }

    // return a new Paper if a title and a paper was found
    if (builder.getTitle() != null && builder.getAuthors() != null) {
        return builder.getObject();
    } else {
        return null;
    }

}

From source file:edu.harvard.iq.dvn.core.web.subsetting.AnalysisPage.java

public boolean isVariableNameValid(String newVarName, String regex) {
    boolean rtvl = false;
    // //from  ww w .ja v a 2 s. co  m
    Pattern p = null;
    try {
        p = Pattern.compile(regex);
    } catch (PatternSyntaxException pex) {
        pex.printStackTrace();

    }
    Matcher matcher = p.matcher(newVarName);
    rtvl = matcher.find();
    return rtvl;
}

From source file:edu.harvard.iq.dvn.core.web.subsetting.AnalysisPage.java

/**
 * Returns true if an end-user's brower is Firefox
 *
 * @param userAgent    hash value of the user-agent key in the request
 *                     header's map// ww w.j av a 2  s  .c  om
 * @return    true if an end-user's brower is Firefox; false otherwise
 */
public boolean isBrowserFirefox(String userAgent) {
    boolean rtvl = false;
    String regex = "Firefox";
    Pattern p = null;
    try {
        p = Pattern.compile(regex, Pattern.CASE_INSENSITIVE);
    } catch (PatternSyntaxException pex) {
        pex.printStackTrace();
    }
    Matcher matcher = p.matcher(userAgent);
    rtvl = matcher.find();
    return rtvl;
}