net.sf.jabref.logic.msbib.MSBibEntry.java Source code

Java tutorial

Introduction

Here is the source code for net.sf.jabref.logic.msbib.MSBibEntry.java

Source

/*  Copyright (C) 2003-2016 JabRef contributors.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
    
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.
    
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
package net.sf.jabref.logic.msbib;

import java.io.StringWriter;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import net.sf.jabref.importer.fileformat.ImportFormat;
import net.sf.jabref.logic.layout.LayoutFormatter;
import net.sf.jabref.logic.layout.format.RemoveBrackets;
import net.sf.jabref.logic.layout.format.XMLChars;
import net.sf.jabref.logic.mods.PageNumbers;
import net.sf.jabref.logic.mods.PersonName;
import net.sf.jabref.logic.util.strings.StringUtil;
import net.sf.jabref.model.entry.BibEntry;
import net.sf.jabref.model.entry.BibtexEntryTypes;
import net.sf.jabref.model.entry.EntryType;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

/**
 * Date: May 15, 2007; May 03, 2007
 * <p>
 * History
 * May 03, 2007 - Added export functionality
 * May 15, 2007 - Added import functionality
 * May 16, 2007 - Changed all interger entries to strings,
 * except LCID which must be an integer.
 * To avoid exception during integer parsing
 * the exception is caught and LCID is set to zero.
 * Jan 06, 2012 - Changed the XML element ConferenceName to present
 * the Booktitle instead of the organization field content
 *
 * @author S M Mahbub Murshed (udvranto@yahoo.com)
 * @version 2.0.0
 * @see <a href="http://mahbub.wordpress.com/2007/03/24/details-of-microsoft-office-2007-bibliographic-format-compared-to-bibtex/">ms office 2007 bibliography format compared to bibtex</a>
 * @see <a href="http://mahbub.wordpress.com/2007/03/22/deciphering-microsoft-office-2007-bibliography-format/">deciphering ms office 2007 bibliography format</a>
 * See http://www.ecma-international.org/publications/standards/Ecma-376.htm
 */
class MSBibEntry {
    private static final Log LOGGER = LogFactory.getLog(MSBibEntry.class);

    private String sourceType = "Misc";
    private String bibTexEntry;

    private String tag;
    private static final String GUID = null;
    private int LCID = -1;

    private List<PersonName> authors;
    private List<PersonName> bookAuthors;
    private List<PersonName> editors;
    private List<PersonName> translators;
    private List<PersonName> producerNames;
    private List<PersonName> composers;
    private List<PersonName> conductors;
    private List<PersonName> performers;
    private List<PersonName> writers;
    private List<PersonName> directors;
    private List<PersonName> compilers;
    private List<PersonName> interviewers;
    private List<PersonName> interviewees;
    private List<PersonName> inventors;
    private List<PersonName> counsels;

    private String title;
    private String year;
    private String month;
    private String day;

    private String shortTitle;
    private String comments;

    private PageNumbers pages;
    private String volume;
    private String numberOfVolumes;
    private String edition;
    private String standardNumber;
    private String publisher;

    private String address;
    private String bookTitle;
    private String chapterNumber;
    private String journalName;
    private String issue;
    private String periodicalTitle;
    private String conferenceName;
    private String department;
    private String institution;
    private String thesisType;
    private String internetSiteTitle;
    private String dateAccessed;
    private String url;
    private String productionCompany;
    private String publicationTitle;
    private String medium;
    private String albumTitle;
    private String recordingNumber;
    private String theater;
    private String distributor;
    private String broadcastTitle;
    private String broadcaster;
    private String station;
    private String type;
    private String patentNumber;
    private String court;
    private String reporter;
    private String caseNumber;
    private String abbreviatedCaseNumber;
    private String bibTexSeries;
    private String bibTexAbstract;
    private String bibTexKeyWords;
    private String bibTexCrossRef;
    private String bibTex_HowPublished;
    private String bibTexAffiliation;
    private String bibTexContents;
    private String bibTexCopyright;
    private String bibTexPrice;
    private String bibTexSize;

    /* SM 2010.10 intype, paper support */
    private String bibTexInType;
    private String bibTexPaper;

    private static final String BIBTEX = "BIBTEX_";
    private static final String MSBIB = "msbib-";

    private static final String B_COLON = "b:";

    // reduced subset, supports only "CITY , STATE, COUNTRY"
    // \b(\w+)\s?[,]?\s?(\w+)\s?[,]?\s?(\w+)\b
    // WORD SPACE , SPACE WORD SPACE , SPACE WORD
    // tested using http://www.javaregex.com/test.html
    private static final Pattern ADDRESS_PATTERN = Pattern
            .compile("\\b(\\w+)\\s*[,]?\\s*(\\w+)\\s*[,]?\\s*(\\w+)\\b");

    // Allows 20.3-2007|||20/3-  2007 etc.
    // (\d{1,2})\s?[.,-/]\s?(\d{1,2})\s?[.,-/]\s?(\d{2,4})
    // 1-2 DIGITS SPACE SEPERATOR SPACE 1-2 DIGITS SPACE SEPERATOR SPACE 2-4 DIGITS
    // tested using http://www.javaregex.com/test.html
    private static final Pattern DATE_PATTERN = Pattern
            .compile("(\\d{1,2})\\s*[.,-/]\\s*(\\d{1,2})\\s*[.,-/]\\s*(\\d{2,4})");

    public MSBibEntry(BibEntry bibtex) {
        populateFromBibtex(bibtex);
    }

    public MSBibEntry(Element entry, String bcol) {
        populateFromXml(entry, bcol);
    }

    private String getFromXml(String name, Element entry) {
        String value = null;
        NodeList nodeLst = entry.getElementsByTagName(name);
        if (nodeLst.getLength() > 0) {
            value = nodeLst.item(0).getTextContent();
        }
        return value;
    }

    private void populateFromXml(Element entry, String bcol) {
        String temp;

        sourceType = getFromXml(bcol + "SourceType", entry);

        tag = getFromXml(bcol + "Tag", entry);

        temp = getFromXml(bcol + "LCID", entry);
        if (temp != null) {
            try {
                LCID = Integer.parseInt(temp);
            } catch (NumberFormatException e) {
                LCID = -1;
            }
        }

        title = getFromXml(bcol + "Title", entry);
        year = getFromXml(bcol + "Year", entry);
        month = getFromXml(bcol + "Month", entry);
        day = getFromXml(bcol + "Day", entry);

        shortTitle = getFromXml(bcol + "ShortTitle", entry);
        comments = getFromXml(bcol + "Comments", entry);

        temp = getFromXml(bcol + "Pages", entry);
        if (temp != null) {
            pages = new PageNumbers(temp);
        }

        volume = getFromXml(bcol + "Volume", entry);

        numberOfVolumes = getFromXml(bcol + "NumberVolumes", entry);

        edition = getFromXml(bcol + "Edition", entry);

        standardNumber = getFromXml(bcol + "StandardNumber", entry);

        publisher = getFromXml(bcol + "Publisher", entry);

        String city = getFromXml(bcol + "City", entry);
        String state = getFromXml(bcol + "StateProvince", entry);
        String country = getFromXml(bcol + "CountryRegion", entry);
        StringBuffer addressBuffer = new StringBuffer();
        if (city != null) {
            addressBuffer.append(city).append(", ");
        }
        if (state != null) {
            addressBuffer.append(state).append(' ');
        }
        if (country != null) {
            addressBuffer.append(country);
        }
        address = addressBuffer.toString().trim();
        if (address.isEmpty() || ",".equals(address)) {
            address = null;
        }

        bookTitle = getFromXml(bcol + "BookTitle", entry);

        chapterNumber = getFromXml(bcol + "ChapterNumber", entry);

        journalName = getFromXml(bcol + "JournalName", entry);

        issue = getFromXml(bcol + "Issue", entry);

        periodicalTitle = getFromXml(bcol + "PeriodicalTitle", entry);

        conferenceName = getFromXml(bcol + "ConferenceName", entry);
        department = getFromXml(bcol + "Department", entry);
        institution = getFromXml(bcol + "Institution", entry);

        thesisType = getFromXml(bcol + "ThesisType", entry);
        internetSiteTitle = getFromXml(bcol + "InternetSiteTitle", entry);
        String month = getFromXml(bcol + "MonthAccessed", entry);
        String day = getFromXml(bcol + "DayAccessed", entry);
        String year = getFromXml(bcol + "YearAccessed", entry);
        dateAccessed = "";
        if (month != null) {
            dateAccessed += month + ' ';
        }
        if (day != null) {
            dateAccessed += day + ", ";
        }
        if (year != null) {
            dateAccessed += year;
        }
        dateAccessed = dateAccessed.trim();
        if (dateAccessed.isEmpty() || ",".equals(dateAccessed)) {
            dateAccessed = null;
        }

        url = getFromXml(bcol + "URL", entry);
        productionCompany = getFromXml(bcol + "ProductionCompany", entry);

        publicationTitle = getFromXml(bcol + "PublicationTitle", entry);
        medium = getFromXml(bcol + "Medium", entry);
        albumTitle = getFromXml(bcol + "AlbumTitle", entry);
        recordingNumber = getFromXml(bcol + "RecordingNumber", entry);
        theater = getFromXml(bcol + "Theater", entry);
        distributor = getFromXml(bcol + "Distributor", entry);
        broadcastTitle = getFromXml(bcol + "BroadcastTitle", entry);
        broadcaster = getFromXml(bcol + "Broadcaster", entry);
        station = getFromXml(bcol + "Station", entry);
        type = getFromXml(bcol + "Type", entry);
        patentNumber = getFromXml(bcol + "PatentNumber", entry);
        court = getFromXml(bcol + "Court", entry);
        reporter = getFromXml(bcol + "Reporter", entry);
        caseNumber = getFromXml(bcol + "CaseNumber", entry);
        abbreviatedCaseNumber = getFromXml(bcol + "AbbreviatedCaseNumber", entry);
        bibTexSeries = getFromXml(bcol + BIBTEX + "Series", entry);
        bibTexAbstract = getFromXml(bcol + BIBTEX + "Abstract", entry);
        bibTexKeyWords = getFromXml(bcol + BIBTEX + "KeyWords", entry);
        bibTexCrossRef = getFromXml(bcol + BIBTEX + "CrossRef", entry);
        bibTex_HowPublished = getFromXml(bcol + BIBTEX + "HowPublished", entry);
        bibTexAffiliation = getFromXml(bcol + BIBTEX + "Affiliation", entry);
        bibTexContents = getFromXml(bcol + BIBTEX + "Contents", entry);
        bibTexCopyright = getFromXml(bcol + BIBTEX + "Copyright", entry);
        bibTexPrice = getFromXml(bcol + BIBTEX + "Price", entry);
        bibTexSize = getFromXml(bcol + BIBTEX + "Size", entry);

        NodeList nodeLst = entry.getElementsByTagName(bcol + "Author");
        if (nodeLst.getLength() > 0) {
            getAuthors((Element) nodeLst.item(0), bcol);
        }
    }

    private void populateFromBibtex(BibEntry bibtex) {

        sourceType = getMSBibSourceType(bibtex);

        if (bibtex.hasField(BibEntry.KEY_FIELD)) {
            tag = bibtex.getCiteKey();
        }

        if (bibtex.hasField("language")) {
            LCID = getLCID(bibtex.getField("language"));
        }

        if (bibtex.hasField("title")) {
            String temp = bibtex.getField("title");
            // TODO: remove LaTex syntax
            title = new RemoveBrackets().format(temp);
        }
        if (bibtex.hasField("year")) {
            year = bibtex.getField("year");
        }
        if (bibtex.hasField("month")) {
            month = bibtex.getField("month");
        }
        if (bibtex.hasField(MSBIB + "day")) {
            day = bibtex.getField(MSBIB + "day");
        }

        if (bibtex.hasField(MSBIB + "shorttitle")) {
            shortTitle = bibtex.getField(MSBIB + "shorttitle");
        }
        if (bibtex.hasField("note")) {
            comments = bibtex.getField("note");
        }

        if (bibtex.hasField("pages")) {
            pages = new PageNumbers(bibtex.getField("pages"));
        }

        if (bibtex.hasField("volume")) {
            volume = bibtex.getField("volume");
        }

        if (bibtex.hasField(MSBIB + "numberofvolume")) {
            numberOfVolumes = bibtex.getField(MSBIB + "numberofvolume");
        }

        if (bibtex.hasField("edition")) {
            edition = bibtex.getField("edition");
        }

        standardNumber = "";
        if (bibtex.hasField("isbn")) {
            standardNumber += " ISBN: " + bibtex.getField("isbn"); /* SM: 2010.10: lower case */
        }
        if (bibtex.hasField("issn")) {
            standardNumber += " ISSN: " + bibtex.getField("issn"); /* SM: 2010.10: lower case */
        }
        if (bibtex.hasField("lccn")) {
            standardNumber += " LCCN: " + bibtex.getField("lccn"); /* SM: 2010.10: lower case */
        }
        if (bibtex.hasField("mrnumber")) {
            standardNumber += " MRN: " + bibtex.getField("mrnumber");
        }
        /* SM: 2010.10 begin DOI support */
        if (bibtex.hasField("doi")) {
            standardNumber += " DOI: " + bibtex.getField("doi");
        }
        /* SM: 2010.10 end DOI support */
        if (standardNumber.isEmpty()) {
            standardNumber = null;
        }

        if (bibtex.hasField("publisher")) {
            publisher = bibtex.getField("publisher");
        }

        if (bibtex.hasField("address")) {
            address = bibtex.getField("address");
        }

        if (bibtex.hasField("booktitle")) {
            bookTitle = bibtex.getField("booktitle");
        }

        if (bibtex.hasField("chapter")) {
            chapterNumber = bibtex.getField("chapter");
        }

        if (bibtex.hasField("journal")) {
            journalName = bibtex.getField("journal");
        }

        if (bibtex.hasField("number")) {
            issue = bibtex.getField("number");
        }

        if (bibtex.hasField(MSBIB + "periodical")) {
            periodicalTitle = bibtex.getField(MSBIB + "periodical");
        }

        if (bibtex.hasField("booktitle")) {
            conferenceName = bibtex.getField("booktitle");
        }
        if (bibtex.hasField("school")) {
            department = bibtex.getField("school");
        }
        if (bibtex.hasField("institution")) {
            institution = bibtex.getField("institution");
        }

        /* SM: 2010.10 Modified for default source types */
        if (bibtex.hasField("type")) {
            thesisType = bibtex.getField("type");
        } else {
            if ("techreport".equalsIgnoreCase(bibtex.getType())) {
                thesisType = "Tech. rep.";
            } else if ("mastersthesis".equalsIgnoreCase(bibtex.getType())) {
                thesisType = "Master's thesis";
            } else if ("phdthesis".equalsIgnoreCase(bibtex.getType())) {
                thesisType = "Ph.D. dissertation";
            } else if ("unpublished".equalsIgnoreCase(bibtex.getType())) {
                thesisType = "unpublished";
            }
        }

        if (("InternetSite".equals(sourceType) || "DocumentFromInternetSite".equals(sourceType))
                && (bibtex.hasField("title"))) {
            internetSiteTitle = bibtex.getField("title");
        }
        if (bibtex.hasField(MSBIB + "accessed")) {
            dateAccessed = bibtex.getField(MSBIB + "accessed");
        }
        if (bibtex.hasField("url")) {
            url = bibtex.getField("url"); /* SM: 2010.10: lower case */
        }
        if (bibtex.hasField(MSBIB + "productioncompany")) {
            productionCompany = bibtex.getField(MSBIB + "productioncompany");
        }

        if (("ElectronicSource".equals(sourceType) || "Art".equals(sourceType) || "Misc".equals(sourceType))
                && (bibtex.hasField("title"))) {
            publicationTitle = bibtex.getField("title");
        }
        if (bibtex.hasField(MSBIB + "medium")) {
            medium = bibtex.getField(MSBIB + "medium");
        }
        if ("SoundRecording".equals(sourceType) && (bibtex.hasField("title"))) {
            albumTitle = bibtex.getField("title");
        }
        if (bibtex.hasField(MSBIB + "recordingnumber")) {
            recordingNumber = bibtex.getField(MSBIB + "recordingnumber");
        }
        if (bibtex.hasField(MSBIB + "theater")) {
            theater = bibtex.getField(MSBIB + "theater");
        }
        if (bibtex.hasField(MSBIB + "distributor")) {
            distributor = bibtex.getField(MSBIB + "distributor");
        }
        if ("Interview".equals(sourceType) && (bibtex.hasField("title"))) {
            broadcastTitle = bibtex.getField("title");
        }
        if (bibtex.hasField(MSBIB + "broadcaster")) {
            broadcaster = bibtex.getField(MSBIB + "broadcaster");
        }
        if (bibtex.hasField(MSBIB + "station")) {
            station = bibtex.getField(MSBIB + "station");
        }
        if (bibtex.hasField(MSBIB + "type")) {
            type = bibtex.getField(MSBIB + "type");
        }
        if (bibtex.hasField(MSBIB + "patentnumber")) {
            patentNumber = bibtex.getField(MSBIB + "patentnumber");
        }
        if (bibtex.hasField(MSBIB + "court")) {
            court = bibtex.getField(MSBIB + "court");
        }
        if (bibtex.hasField(MSBIB + "reporter")) {
            reporter = bibtex.getField(MSBIB + "reporter");
        }
        if (bibtex.hasField(MSBIB + "casenumber")) {
            caseNumber = bibtex.getField(MSBIB + "casenumber");
        }
        if (bibtex.hasField(MSBIB + "abbreviatedcasenumber")) {
            abbreviatedCaseNumber = bibtex.getField(MSBIB + "abbreviatedcasenumber");
        }

        if (bibtex.hasField("series")) {
            bibTexSeries = bibtex.getField("series");
        }
        if (bibtex.hasField("abstract")) {
            bibTexAbstract = bibtex.getField("abstract");
        }
        if (bibtex.hasField("keywords")) {
            bibTexKeyWords = bibtex.getField("keywords");
        }
        if (bibtex.hasField("crossref")) {
            bibTexCrossRef = bibtex.getField("crossref");
        }
        if (bibtex.hasField("howpublished")) {
            bibTex_HowPublished = bibtex.getField("howpublished");
        }
        if (bibtex.hasField("affiliation")) {
            bibTexAffiliation = bibtex.getField("affiliation");
        }
        if (bibtex.hasField("contents")) {
            bibTexContents = bibtex.getField("contents");
        }
        if (bibtex.hasField("copyright")) {
            bibTexCopyright = bibtex.getField("copyright");
        }
        if (bibtex.hasField("price")) {
            bibTexPrice = bibtex.getField("price");
        }
        if (bibtex.hasField("size")) {
            bibTexSize = bibtex.getField("size");
        }

        /* SM: 2010.10 end intype, paper support */
        if (bibtex.hasField("intype")) {
            bibTexInType = bibtex.getField("intype");
        }
        if (bibtex.hasField("paper")) {
            bibTexPaper = bibtex.getField("paper");
        }

        if (bibtex.hasField("author")) {
            authors = getAuthors(bibtex.getField("author"));
        }
        if (bibtex.hasField("editor")) {
            editors = getAuthors(bibtex.getField("editor"));
        }

        boolean FORMATXML = false;
        if (FORMATXML) {
            title = format(title);
            bibTexAbstract = format(bibTexAbstract);
        }
    }

    private String format(String value) {
        if (value == null) {
            return null;
        }
        String result;
        LayoutFormatter chars = new XMLChars();
        result = chars.format(value);
        return result;
    }

    // http://www.microsoft.com/globaldev/reference/lcid-all.mspx
    private int getLCID(String language) {
        // TODO: add language to LCID mapping

        return 0;
    }

    // http://www.microsoft.com/globaldev/reference/lcid-all.mspx
    private String getLanguage(int LCID) {
        // TODO: add language to LCID mapping

        return "english";
    }

    private List<PersonName> getSpecificAuthors(String type, Element authors, String bcol) {
        List<PersonName> result = null;
        NodeList nodeLst = authors.getElementsByTagName(bcol + type);
        if (nodeLst.getLength() <= 0) {
            return result;
        }
        nodeLst = ((Element) nodeLst.item(0)).getElementsByTagName(bcol + "NameList");
        if (nodeLst.getLength() <= 0) {
            return result;
        }
        NodeList person = ((Element) nodeLst.item(0)).getElementsByTagName(bcol + "Person");
        if (person.getLength() <= 0) {
            return result;
        }

        result = new LinkedList<>();
        for (int i = 0; i < person.getLength(); i++) {
            NodeList firstName = ((Element) person.item(i)).getElementsByTagName(bcol + "First");
            NodeList lastName = ((Element) person.item(i)).getElementsByTagName(bcol + "Last");
            NodeList middleName = ((Element) person.item(i)).getElementsByTagName(bcol + "Middle");
            PersonName name = new PersonName();
            if (firstName.getLength() > 0) {
                name.setFirstname(firstName.item(0).getTextContent());
            }
            if (middleName.getLength() > 0) {
                name.setMiddlename(middleName.item(0).getTextContent());
            }
            if (lastName.getLength() > 0) {
                name.setSurname(lastName.item(0).getTextContent());
            }
            result.add(name);
        }

        return result;
    }

    private void getAuthors(Element authorsElem, String bcol) {
        authors = getSpecificAuthors("Author", authorsElem, bcol);
        bookAuthors = getSpecificAuthors("BookAuthor", authorsElem, bcol);
        editors = getSpecificAuthors("Editor", authorsElem, bcol);
        translators = getSpecificAuthors("Translator", authorsElem, bcol);
        producerNames = getSpecificAuthors("ProducerName", authorsElem, bcol);
        composers = getSpecificAuthors("Composer", authorsElem, bcol);
        conductors = getSpecificAuthors("Conductor", authorsElem, bcol);
        performers = getSpecificAuthors("Performer", authorsElem, bcol);
        writers = getSpecificAuthors("Writer", authorsElem, bcol);
        directors = getSpecificAuthors("Director", authorsElem, bcol);
        compilers = getSpecificAuthors("Compiler", authorsElem, bcol);
        interviewers = getSpecificAuthors("Interviewer", authorsElem, bcol);
        interviewees = getSpecificAuthors("Interviewee", authorsElem, bcol);
        inventors = getSpecificAuthors("Inventor", authorsElem, bcol);
        counsels = getSpecificAuthors("Counsel", authorsElem, bcol);
    }

    private List<PersonName> getAuthors(String authors) {
        List<PersonName> result = new LinkedList<>();

        if (authors.contains(" and ")) {
            String[] names = authors.split(" and ");
            for (String name : names) {
                result.add(new PersonName(name));
            }
        } else {
            result.add(new PersonName(authors));
        }
        return result;
    }

    private String getMSBibSourceType(BibEntry bibtex) {
        String bibtexType = bibtex.getType();

        String result = "Misc";
        if ("book".equalsIgnoreCase(bibtexType)) {
            result = "Book";
        } else if ("inbook".equalsIgnoreCase(bibtexType)) {
            result = "BookSection";
            bibTexEntry = "inbook";
        } /* SM 2010.10: generalized */ else if ("booklet".equalsIgnoreCase(bibtexType)) {
            result = "BookSection";
            bibTexEntry = "booklet";
        } else if ("incollection".equalsIgnoreCase(bibtexType)) {
            result = "BookSection";
            bibTexEntry = "incollection";
        } else if ("article".equalsIgnoreCase(bibtexType)) {
            result = "JournalArticle";
        } else if ("inproceedings".equalsIgnoreCase(bibtexType)) {
            result = "ConferenceProceedings";
            bibTexEntry = "inproceedings";
        } /* SM 2010.10: generalized */ else if ("conference".equalsIgnoreCase(bibtexType)) {
            result = "ConferenceProceedings";
            bibTexEntry = "conference";
        } else if ("proceedings".equalsIgnoreCase(bibtexType)) {
            result = "ConferenceProceedings";
            bibTexEntry = "proceedings";
        } else if ("collection".equalsIgnoreCase(bibtexType)) {
            result = "ConferenceProceedings";
            bibTexEntry = "collection";
        } else if ("techreport".equalsIgnoreCase(bibtexType)) {
            result = "Report";
            bibTexEntry = "techreport";
        } /* SM 2010.10: generalized */ else if ("manual".equalsIgnoreCase(bibtexType)) {
            result = "Report";
            bibTexEntry = "manual";
        } else if ("mastersthesis".equalsIgnoreCase(bibtexType)) {
            result = "Report";
            bibTexEntry = "mastersthesis";
        } else if ("phdthesis".equalsIgnoreCase(bibtexType)) {
            result = "Report";
            bibTexEntry = "phdthesis";
        } else if ("unpublished".equalsIgnoreCase(bibtexType)) {
            result = "Report";
            bibTexEntry = "unpublished";
        } else if ("patent".equalsIgnoreCase(bibtexType)) {
            result = "Patent";
        } else if ("misc".equalsIgnoreCase(bibtexType)) {
            result = "Misc";
        } else if ("electronic".equalsIgnoreCase(bibtexType)) {
            result = "Misc";
            bibTexEntry = "electronic";
        }

        return result;
    }

    private Node getDOMrepresentation() {
        Node result = null;
        try {
            DocumentBuilder documentBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();

            result = getDOMrepresentation(documentBuilder.newDocument());
        } catch (ParserConfigurationException e) {
            LOGGER.warn("Could not create DocumentBuilder", e);
        }
        return result;
    }

    private void addField(Document document, Element parent, String name, String value) {
        if (value == null) {
            return;
        }
        Element elem = document.createElement(B_COLON + name);
        elem.appendChild(document.createTextNode(StringUtil.stripNonValidXMLCharacters(value)));
        parent.appendChild(elem);
    }

    private void addAuthor(Document document, Element allAuthors, String entryName, List<PersonName> authorsLst) {
        if (authorsLst == null) {
            return;
        }
        Element authorTop = document.createElement(B_COLON + entryName);
        Element nameList = document.createElement(B_COLON + "NameList");
        for (PersonName name : authorsLst) {
            Element person = document.createElement(B_COLON + "Person");
            addField(document, person, "Last", name.getSurname());
            addField(document, person, "Middle", name.getMiddlename());
            addField(document, person, "First", name.getFirstname());
            nameList.appendChild(person);
        }
        authorTop.appendChild(nameList);

        allAuthors.appendChild(authorTop);
    }

    private void addAddress(Document document, Element parent, String address) {
        if (address == null) {
            return;
        }

        Matcher matcher = ADDRESS_PATTERN.matcher(address);
        if (matcher.matches() && (matcher.groupCount() > 3)) {
            addField(document, parent, "City", matcher.group(1));
            addField(document, parent, "StateProvince", matcher.group(2));
            addField(document, parent, "CountryRegion", matcher.group(3));
        } else {
            /* SM: 2010.10 generalized */
            addField(document, parent, "City", address);
        }
    }

    private void addDate(Document document, Element parent, String date, String extra) {
        if (date == null) {
            return;
        }

        Matcher matcher = DATE_PATTERN.matcher(date);
        if (matcher.matches() && (matcher.groupCount() > 3)) {
            addField(document, parent, "Month" + extra, matcher.group(1));
            addField(document, parent, "Day" + extra, matcher.group(2));
            addField(document, parent, "Year" + extra, matcher.group(3));
        }
    }

    public Element getDOMrepresentation(Document document) {

        Element msbibEntry = document.createElement(B_COLON + "Source");

        addField(document, msbibEntry, "SourceType", sourceType);
        addField(document, msbibEntry, BIBTEX + "Entry", bibTexEntry);

        addField(document, msbibEntry, "Tag", tag);
        addField(document, msbibEntry, "GUID", GUID);
        if (LCID >= 0) {
            addField(document, msbibEntry, "LCID", Integer.toString(LCID));
        }
        addField(document, msbibEntry, "Title", title);
        addField(document, msbibEntry, "Year", year);
        addField(document, msbibEntry, "ShortTitle", shortTitle);
        addField(document, msbibEntry, "Comments", comments);

        Element allAuthors = document.createElement(B_COLON + "Author");

        addAuthor(document, allAuthors, "Author", authors);
        String bookAuthor = "BookAuthor";
        addAuthor(document, allAuthors, bookAuthor, bookAuthors);
        addAuthor(document, allAuthors, "Editor", editors);
        addAuthor(document, allAuthors, "Translator", translators);
        addAuthor(document, allAuthors, "ProducerName", producerNames);
        addAuthor(document, allAuthors, "Composer", composers);
        addAuthor(document, allAuthors, "Conductor", conductors);
        addAuthor(document, allAuthors, "Performer", performers);
        addAuthor(document, allAuthors, "Writer", writers);
        addAuthor(document, allAuthors, "Director", directors);
        addAuthor(document, allAuthors, "Compiler", compilers);
        addAuthor(document, allAuthors, "Interviewer", interviewers);
        addAuthor(document, allAuthors, "Interviewee", interviewees);
        addAuthor(document, allAuthors, "Inventor", inventors);
        addAuthor(document, allAuthors, "Counsel", counsels);

        msbibEntry.appendChild(allAuthors);

        if (pages != null) {
            addField(document, msbibEntry, "Pages", pages.toString("-"));
        }
        addField(document, msbibEntry, "Volume", volume);
        addField(document, msbibEntry, "NumberVolumes", numberOfVolumes);
        addField(document, msbibEntry, "Edition", edition);
        addField(document, msbibEntry, "StandardNumber", standardNumber);
        addField(document, msbibEntry, "Publisher", publisher);

        addAddress(document, msbibEntry, address);

        addField(document, msbibEntry, "BookTitle", bookTitle);
        addField(document, msbibEntry, "ChapterNumber", chapterNumber);

        addField(document, msbibEntry, "JournalName", journalName);
        addField(document, msbibEntry, "Issue", issue);
        addField(document, msbibEntry, "PeriodicalTitle", periodicalTitle);
        addField(document, msbibEntry, "ConferenceName", conferenceName);

        addField(document, msbibEntry, "Department", department);
        addField(document, msbibEntry, "Institution", institution);
        addField(document, msbibEntry, "ThesisType", thesisType);
        addField(document, msbibEntry, "InternetSiteTitle", internetSiteTitle);

        addDate(document, msbibEntry, dateAccessed, "Accessed");

        /* SM 2010.10 added month export */
        addField(document, msbibEntry, "Month", month);

        addField(document, msbibEntry, "URL", url);
        addField(document, msbibEntry, "ProductionCompany", productionCompany);
        addField(document, msbibEntry, "PublicationTitle", publicationTitle);
        addField(document, msbibEntry, "Medium", medium);
        addField(document, msbibEntry, "AlbumTitle", albumTitle);
        addField(document, msbibEntry, "RecordingNumber", recordingNumber);
        addField(document, msbibEntry, "Theater", theater);
        addField(document, msbibEntry, "Distributor", distributor);
        addField(document, msbibEntry, "BroadcastTitle", broadcastTitle);
        addField(document, msbibEntry, "Broadcaster", broadcaster);
        addField(document, msbibEntry, "Station", station);
        addField(document, msbibEntry, "Type", type);
        addField(document, msbibEntry, "PatentNumber", patentNumber);
        addField(document, msbibEntry, "Court", court);
        addField(document, msbibEntry, "Reporter", reporter);
        addField(document, msbibEntry, "CaseNumber", caseNumber);
        addField(document, msbibEntry, "AbbreviatedCaseNumber", abbreviatedCaseNumber);

        addField(document, msbibEntry, BIBTEX + "Series", bibTexSeries);
        addField(document, msbibEntry, BIBTEX + "Abstract", bibTexAbstract);
        addField(document, msbibEntry, BIBTEX + "KeyWords", bibTexKeyWords);
        addField(document, msbibEntry, BIBTEX + "CrossRef", bibTexCrossRef);
        addField(document, msbibEntry, BIBTEX + "HowPublished", bibTex_HowPublished);
        addField(document, msbibEntry, BIBTEX + "Affiliation", bibTexAffiliation);
        addField(document, msbibEntry, BIBTEX + "Contents", bibTexContents);
        addField(document, msbibEntry, BIBTEX + "Copyright", bibTexCopyright);
        addField(document, msbibEntry, BIBTEX + "Price", bibTexPrice);
        addField(document, msbibEntry, BIBTEX + "Size", bibTexSize);

        /* SM: 2010.10 end intype, paper support */
        addField(document, msbibEntry, BIBTEX + "InType", bibTexInType);
        addField(document, msbibEntry, BIBTEX + "Paper", bibTexPaper);

        return msbibEntry;
    }

    private void parseSingleStandardNumber(String type, String bibtype, String standardNum,
            Map<String, String> map) {
        // tested using http://www.javaregex.com/test.html
        Pattern pattern = Pattern.compile(':' + type + ":(.[^:]+)");
        Matcher matcher = pattern.matcher(standardNum);
        if (matcher.matches()) {
            map.put(bibtype, matcher.group(1));
        }
    }

    private void parseStandardNumber(String standardNum, Map<String, String> map) {
        if (standardNumber == null) {
            return;
        }
        parseSingleStandardNumber("ISBN", "isbn", standardNum, map); /* SM: 2010.10: lower case */
        parseSingleStandardNumber("ISSN", "issn", standardNum, map); /* SM: 2010.10: lower case */
        parseSingleStandardNumber("LCCN", "lccn", standardNum, map); /* SM: 2010.10: lower case */
        parseSingleStandardNumber("MRN", "mrnumber", standardNum, map);
        /* SM: 2010.10 begin DOI support */
        parseSingleStandardNumber("DOI", "doi", standardNum, map);
        /* SM: 2010.10 end DOI support */
    }

    private void addAuthor(Map<String, String> map, String type, List<PersonName> authors) {
        if (authors == null) {
            return;
        }
        String allAuthors = authors.stream().map(name -> name.getFullname()).collect(Collectors.joining(" and "));

        map.put(type, allAuthors);
    }

    private EntryType mapMSBibToBibtexType(String msbib) {
        EntryType bibtex;
        switch (msbib) {
        case "Book":
            bibtex = BibtexEntryTypes.BOOK;
            break;
        case "BookSection":
            bibtex = BibtexEntryTypes.INBOOK;
            break;
        case "JournalArticle":
        case "ArticleInAPeriodical":
            bibtex = BibtexEntryTypes.ARTICLE;
            break;
        case "ConferenceProceedings":
            bibtex = BibtexEntryTypes.CONFERENCE;
            break;
        case "Report":
            bibtex = BibtexEntryTypes.TECHREPORT;
            break;
        case "InternetSite":
        case "DocumentFromInternetSite":
        case "ElectronicSource":
        case "Art":
        case "SoundRecording":
        case "Performance":
        case "Film":
        case "Interview":
        case "Patent":
        case "Case":
        default:
            bibtex = BibtexEntryTypes.MISC;
            break;
        }

        return bibtex;
    }

    public BibEntry getBibtexRepresentation() {

        BibEntry entry;
        if (tag == null) {
            entry = new BibEntry(ImportFormat.DEFAULT_BIBTEXENTRY_ID, mapMSBibToBibtexType(sourceType).getName());
        } else {
            entry = new BibEntry(tag, mapMSBibToBibtexType(sourceType).getName()); // id assumes an existing database so don't
        }

        // Todo: add check for BibTexEntry types

        Map<String, String> hm = new HashMap<>();

        if (tag != null) {
            hm.put(BibEntry.KEY_FIELD, tag);
        }

        if (LCID >= 0) {
            hm.put("language", getLanguage(LCID));
        }
        if (title != null) {
            hm.put("title", title);
        }
        if (year != null) {
            hm.put("year", year);
        }
        if (shortTitle != null) {
            hm.put(MSBIB + "shorttitle", shortTitle);
        }
        if (comments != null) {
            hm.put("note", comments);
        }

        addAuthor(hm, "author", authors);
        addAuthor(hm, MSBIB + "bookauthor", bookAuthors);
        addAuthor(hm, "editor", editors);
        addAuthor(hm, MSBIB + "translator", translators);
        addAuthor(hm, MSBIB + "producername", producerNames);
        addAuthor(hm, MSBIB + "composer", composers);
        addAuthor(hm, MSBIB + "conductor", conductors);
        addAuthor(hm, MSBIB + "performer", performers);
        addAuthor(hm, MSBIB + "writer", writers);
        addAuthor(hm, MSBIB + "director", directors);
        addAuthor(hm, MSBIB + "compiler", compilers);
        addAuthor(hm, MSBIB + "interviewer", interviewers);
        addAuthor(hm, MSBIB + "interviewee", interviewees);
        addAuthor(hm, MSBIB + "inventor", inventors);
        addAuthor(hm, MSBIB + "counsel", counsels);

        if (pages != null) {
            hm.put("pages", pages.toString("--"));
        }
        if (volume != null) {
            hm.put("volume", volume);
        }
        if (numberOfVolumes != null) {
            hm.put(MSBIB + "numberofvolume", numberOfVolumes);
        }
        if (edition != null) {
            hm.put("edition", edition);
        }
        if (edition != null) {
            hm.put("edition", edition);
        }
        parseStandardNumber(standardNumber, hm);

        if (publisher != null) {
            hm.put("publisher", publisher);
        }
        if (publisher != null) {
            hm.put("publisher", publisher);
        }
        if (address != null) {
            hm.put("address", address);
        }
        if (bookTitle != null) {
            hm.put("booktitle", bookTitle);
        }
        if (chapterNumber != null) {
            hm.put("chapter", chapterNumber);
        }
        if (journalName != null) {
            hm.put("journal", journalName);
        }
        if (issue != null) {
            hm.put("number", issue);
        }
        if (month != null) {
            hm.put("month", month);
        }
        if (periodicalTitle != null) {
            hm.put("organization", periodicalTitle);
        }
        if (conferenceName != null) {
            hm.put("organization", conferenceName);
        }
        if (department != null) {
            hm.put("school", department);
        }
        if (institution != null) {
            hm.put("institution", institution);
        }

        if (dateAccessed != null) {
            hm.put(MSBIB + "accessed", dateAccessed);
        }
        if (url != null) {
            hm.put("url", url);
        }
        if (productionCompany != null) {
            hm.put(MSBIB + "productioncompany", productionCompany);
        }

        if (medium != null) {
            hm.put(MSBIB + "medium", medium);
        }

        if (recordingNumber != null) {
            hm.put(MSBIB + "recordingnumber", recordingNumber);
        }
        if (theater != null) {
            hm.put(MSBIB + "theater", theater);
        }
        if (distributor != null) {
            hm.put(MSBIB + "distributor", distributor);
        }

        if (broadcaster != null) {
            hm.put(MSBIB + "broadcaster", broadcaster);
        }
        if (station != null) {
            hm.put(MSBIB + "station", station);
        }
        if (type != null) {
            hm.put(MSBIB + "type", type);
        }
        if (patentNumber != null) {
            hm.put(MSBIB + "patentnumber", patentNumber);
        }
        if (court != null) {
            hm.put(MSBIB + "court", court);
        }
        if (reporter != null) {
            hm.put(MSBIB + "reporter", reporter);
        }
        if (caseNumber != null) {
            hm.put(MSBIB + "casenumber", caseNumber);
        }
        if (abbreviatedCaseNumber != null) {
            hm.put(MSBIB + "abbreviatedcasenumber", abbreviatedCaseNumber);
        }

        if (bibTexSeries != null) {
            hm.put("series", bibTexSeries);
        }
        if (bibTexAbstract != null) {
            hm.put("abstract", bibTexAbstract);
        }
        if (bibTexKeyWords != null) {
            hm.put("keywords", bibTexKeyWords);
        }
        if (bibTexCrossRef != null) {
            hm.put("crossref", bibTexCrossRef);
        }
        if (bibTex_HowPublished != null) {
            hm.put("howpublished", bibTex_HowPublished);
        }
        if (bibTexAffiliation != null) {
            hm.put("affiliation", bibTexAffiliation);
        }
        if (bibTexContents != null) {
            hm.put("contents", bibTexContents);
        }
        if (bibTexCopyright != null) {
            hm.put("copyright", bibTexCopyright);
        }
        if (bibTexPrice != null) {
            hm.put("price", bibTexPrice);
        }
        if (bibTexSize != null) {
            hm.put("size", bibTexSize);
        }

        entry.setField(hm);
        return entry;
    }

    /*
     * render as XML
     *
     * TODO This is untested.
     */
    @Override
    public String toString() {
        StringWriter result = new StringWriter();
        try {
            DOMSource source = new DOMSource(getDOMrepresentation());
            StreamResult streamResult = new StreamResult(result);
            Transformer trans = TransformerFactory.newInstance().newTransformer();
            trans.setOutputProperty(OutputKeys.INDENT, "yes");
            trans.transform(source, streamResult);
        } catch (TransformerException e) {
            LOGGER.warn("Could not build XML representation", e);
        }
        return result.toString();
    }

}