to.networld.scrawler.foaf.Person.java Source code

Java tutorial

Introduction

Here is the source code for to.networld.scrawler.foaf.Person.java

Source

/**
 * Semantic Crawler Library
 *
 * Copyright (C) 2010 by Networld Project
 * Written by Alex Oberhauser <oberhauseralex@networld.to>
 * All Rights Reserved
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by 
 * the Free Software Foundation, version 3 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of 
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this software.  If not, see <http://www.gnu.org/licenses/>
 */

package to.networld.scrawler.foaf;

import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.List;
import java.util.Vector;

import org.dom4j.Element;

import to.networld.scrawler.common.Ontologies;
import to.networld.scrawler.common.RDFParser;
import to.networld.scrawler.interfaces.IFOAFPerson;

/**
 * Handles a FOAF Person. The information are read out with the help of XPath queries.
 * 
 * @author Alex Oberhauser
 */
public final class Person extends RDFParser implements IFOAFPerson {

    /**
     * @param _url The URL that points to a valid FOAF file
     * @throws Exception Generic exception, doesn't matter what error occurs the agent could not be instantiated.
     */
    public Person(URL _url) throws Exception {
        super(_url);
        this.namespace.put("dive", Ontologies.diveURI);
        this.namespace.put("foaf", Ontologies.foafURI);
        this.namespace.put("geo", Ontologies.geoURI);

        this.setQueryPrefix();
    }

    /**
     * Set the query prefix that handles the node of the person that is described by the FOAF file.
     */
    private void setQueryPrefix() {
        List<Element> nameNodes = this.getLinkNodes("/rdf:RDF/foaf:PersonalProfileDocument/foaf:primaryTopic");
        if (nameNodes.size() > 0) {
            this.queryPrefix = "/rdf:RDF/foaf:Person[@*='" + nameNodes.get(0).valueOf("@rdf:resource") + "']";
            if (this.getLinkNodes(this.queryPrefix).size() > 0)
                return;
            this.queryPrefix = "/rdf:RDF/foaf:Person[@*='"
                    + nameNodes.get(0).valueOf("@rdf:resource").replace("#", "") + "']";
            if (this.getLinkNodes(this.queryPrefix).size() > 0)
                return;
        }
    }

    /**
     * @see to.networld.scrawler.interfaces.IFOAFPerson#getName()
     */
    @Override
    public String getName() {
        String name = this.getSingleNode("foaf:name");
        if (name == null || name.equals("")) {
            String firstname = this.getSingleNode("foaf:firstName");
            String surname = this.getSingleNode("foaf:surname");
            return firstname + " " + surname;
        }
        return name;
    }

    /**
     * @see to.networld.scrawler.interfaces.IFOAFPerson#getGender()
     */
    @Override
    public String getGender() {
        return this.getSingleNode("foaf:gender");
    }

    /**
     * @see to.networld.scrawler.interfaces.IFOAFPerson#getImageURL()
     */
    @Override
    public String getImageURL() {
        String image = this.getSingleNodeResource("foaf:depiction", "rdf:resource");
        if (image == null || image.equals(""))
            image = this.getSingleNodeResource("foaf:img", "rdf:resource");
        return image;
    }

    /**
     * @see to.networld.scrawler.interfaces.IFOAFPerson#getDateOfBirth()
     */
    @Override
    public String getDateOfBirth() {
        return this.getSingleNode("foaf:dateOfBirth");
    }

    /**
     * @see to.networld.scrawler.interfaces.IFOAFPerson#getWebsite()
     */
    @Override
    public String getWebsite() {
        return this.getSingleNodeResource("foaf:homepage", "rdf:resource");
    }

    /**
     * @see to.networld.scrawler.interfaces.IFOAFPerson#getWeblog()
     */
    @Override
    public String getWeblog() {
        return this.getSingleNodeResource("foaf:weblog", "rdf:resource");
    }

    /**
     * @see to.networld.scrawler.interfaces.IFOAFPerson#getSchoolHomepage()
     */
    @Override
    public String getSchoolHomepage() {
        return this.getSingleNodeResource("/foaf:schoolHomepage", "rdf:resource");
    }

    /**
     * @see to.networld.scrawler.interfaces.IFOAFPerson#getWorkplaceHomepage()
     */
    @Override
    public String getWorkplaceHomepage() {
        return this.getSingleNodeResource("foaf:workplaceHomepage", "rdf:resource");
    }

    /**
     * @see to.networld.scrawler.interfaces.IFOAFPerson#getWorkInfoHomepage()
     */
    @Override
    public String getWorkInfoHomepage() {
        return this.getSingleNodeResource("foaf:workInfoHomepage", "rdf:resource");
    }

    /**
     * @see to.networld.scrawler.interfaces.IFOAFPerson#getOpenid()
     */
    @Override
    public String getOpenid() {
        return this.getSingleNodeResource("foaf:openid", "rdf:resource");
    }

    /**
     * @see to.networld.scrawler.interfaces.IFOAFPerson#getLocation()
     */
    @Override
    public Vector<Double> getLocation() {
        double lat = -1.0;
        double lon = -1.0;
        Vector<Double> geo = new Vector<Double>();
        try {
            lat = Double.parseDouble(this.getSingleNode("/geo:lat"));
            lon = Double.parseDouble(this.getSingleNode("/geo:long"));
        } catch (Exception e) {
            return geo;
        }
        geo.add(0, lat);
        geo.add(1, lon);
        return geo;
    }

    /**
     * @see to.networld.scrawler.interfaces.IFOAFPerson#getKnownAgents()
     */
    @Override
    public Vector<String> getKnownAgents() {
        Vector<String> retValues = new Vector<String>();
        retValues.addAll(this.getNodesResource("foaf:knows", "rdf:resource"));
        retValues.addAll(this.getNodesResource("foaf:knows//rdfs:seeAlso", "rdf:resource"));
        return retValues;
    }

    /**
     * @see to.networld.scrawler.interfaces.IFOAFPerson#getPublications()
     */
    @Override
    public Vector<String> getPublications() {
        return this.getNodesResource("foaf:publications", "rdf:resource");
    }

    /**
     * @see to.networld.scrawler.interfaces.IFOAFPerson#getInterests()
     */
    @Override
    public Vector<String> getInterests() {
        return this.getNodesResource("foaf:interest", "rdfs:label");
    }

    /**
     * @see to.networld.scrawler.interfaces.IFOAFPerson#getEMails()
     */
    @Override
    public Vector<String> getEMails() {
        return this.getNodesResource("foaf:mbox", "rdf:resource");
    }

    /**
     * @see to.networld.scrawler.interfaces.IFOAFPerson#getPhoneNumbers()
     */
    @Override
    public Vector<String> getPhoneNumbers() {
        return this.getNodesResource("foaf:phone", "rdf:resource");
    }

    /**
     * @see to.networld.scrawler.interfaces.IFOAFPerson#getDiveCertificate()
     */
    @Override
    public String getDiveCertificate() {
        return this.getSingleNodeResource("dive:hasCertification", "rdf:resource");
    }

    private static String convertToHex(byte[] data) {
        StringBuffer buf = new StringBuffer();
        for (int i = 0; i < data.length; i++) {
            int halfbyte = (data[i] >>> 4) & 0x0F;
            int two_halfs = 0;
            do {
                if ((0 <= halfbyte) && (halfbyte <= 9))
                    buf.append((char) ('0' + halfbyte));
                else
                    buf.append((char) ('a' + (halfbyte - 10)));
                halfbyte = data[i] & 0x0F;
            } while (two_halfs++ < 1);
        }
        return buf.toString();
    }

    private static String computeSHA1(String _text) throws NoSuchAlgorithmException, UnsupportedEncodingException {
        MessageDigest md = MessageDigest.getInstance("SHA-1");
        byte[] sha1hash = new byte[40];
        md.update(_text.getBytes("iso-8859-1"), 0, _text.length());
        sha1hash = md.digest();
        return convertToHex(sha1hash);

    }

    /**
     * @see to.networld.scrawler.interfaces.IFOAFPerson#hasEMail(java.lang.String)
     */
    @Override
    public boolean hasEMail(String _email) {
        try {
            String sha1value = computeSHA1(_email);
            Vector<String> mboxValues = this.getNodes("foaf:mbox_sha1sum");
            for (String entry : mboxValues) {
                if (sha1value.equals(entry))
                    return true;
            }
        } catch (NoSuchAlgorithmException e) {
            e.printStackTrace();
        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();
        }
        return false;
    }

    /**
     * @see to.networld.scrawler.interfaces.IFOAFPerson#getAccounts()
     */
    @Override
    public Vector<Account> getAccounts() {
        List<Element> elements = this.getLinkNodes(this.queryPrefix + "/foaf:holdsAccount");
        Vector<Account> retVector = new Vector<Account>();
        for (Element entry : elements) {
            Account account = new Account();
            Element nameNode = (Element) entry.selectSingleNode(entry.getUniquePath() + "//foaf:accountName");
            if (nameNode != null)
                account.setName(nameNode.getTextTrim());

            Element serviceHomepageNode = (Element) entry
                    .selectSingleNode(entry.getUniquePath() + "//foaf:accountServiceHomepage");
            if (serviceHomepageNode != null)
                account.setServiceHomepage(serviceHomepageNode.valueOf("@rdf:resource"));

            Element profilePageNode = (Element) entry
                    .selectSingleNode(entry.getUniquePath() + "//foaf:accountProfilePage");
            if (profilePageNode != null)
                account.setProfilePage(profilePageNode.valueOf("@rdf:resource"));

            retVector.add(account);
        }
        return retVector;
    }

    /**
     * @see to.networld.scrawler.interfaces.IFOAFPerson#getCurrentProjects()
     */
    @Override
    public Vector<String> getCurrentProjects() {
        return this.getNodesResource("foaf:currentProject/rdf:Description/rdfs:seeAlso", "rdf:resource");
    }

    /**
     * @see to.networld.scrawler.interfaces.IFOAFPerson#getPastProjects()
     */
    @Override
    public Vector<String> getPastProjects() {
        return this.getNodesResource("foaf:pastProject/rdf:Description/rdfs:seeAlso", "rdf:resource");
    }

}