Java tutorial
/* * Copyright 2014 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.springframework.data.solr.showcase; import java.io.File; import java.io.IOException; import java.util.List; import javax.annotation.PostConstruct; import org.apache.commons.lang.WordUtils; import org.apache.solr.client.solrj.SolrServer; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.common.SolrInputDocument; import org.jdom2.Document; import org.jdom2.Element; import org.jdom2.JDOMException; import org.jdom2.input.SAXBuilder; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.SpringApplication; import org.springframework.boot.autoconfigure.EnableAutoConfiguration; import org.springframework.context.annotation.ComponentScan; import org.springframework.context.annotation.Configuration; import org.springframework.context.annotation.Import; import org.springframework.data.solr.showcase.config.SearchContext; import org.springframework.data.solr.showcase.config.WebContext; /** * Application run Spring Boot Class * * @author Panos Vlastaridis */ @Configuration @ComponentScan @EnableAutoConfiguration @Import({ WebContext.class, SearchContext.class }) public class Application { @Autowired SolrServer solr; public static void main(String[] args) { SpringApplication.run(Application.class, args); } @PostConstruct public void initApplication() { File folder = new File("/home/panos/ppprojpopulatorfiles/indbarticles.O-Z"); for (File file : folder.listFiles()) { SolrInputDocument doc1 = new SolrInputDocument(); doc1.setField("id", file.getName()); doc1.setField("title", "title publication"); doc1.setField("journal", "sth"); doc1.setField("textbody", readPMCFileTextBody(file).replace(".xml", "")); doc1.setField("authors", "smn"); try { solr.add(doc1); solr.commit(true, true); } catch (SolrServerException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } } public static String readPMCFileTextBody(File file) { String bodytext = ""; try { SAXBuilder saxBuilder = new SAXBuilder(); // saxBuilder.setValidation(false); saxBuilder.setFeature("http://xml.org/sax/features/validation", false); saxBuilder.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false); saxBuilder.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); try { // converted file to document object Document document = saxBuilder.build(file); // get root node from xml Element rootNode = document.getRootElement(); // article Element body = rootNode.getChild("body"); if (body != null) { List<Element> bodychildren = body.getChildren(); //int counter = 0; for (int i = 0; i < bodychildren.size(); i++) { Element bc = bodychildren.get(i); String sth = readElement(bc, ""); //System.out.println(++counter + ") " + sth); bodytext = bodytext.concat(sth); } //System.out.println(WordUtils.wrap(bodytext, 90)); } } catch (JDOMException e) { // TODO Auto-generated catch block e.printStackTrace(); } } catch (IOException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } return bodytext; } public static String readElement(Element e, String textin) { //System.out.println("Found a " + e.getName()); if (e.getName().equalsIgnoreCase("p")) { //System.out.println(">>>> p " + e.getValue().substring(0, 15) + "....."); textin = textin.concat(e.getValue()); } if (e.getName().equalsIgnoreCase("sec")) { List<Element> secchildren = e.getChildren(); for (int i = 0; i < secchildren.size(); i++) { //System.out.println("Going Deeper"); Element bc = secchildren.get(i); String text2 = readElement(bc, ""); textin = textin.concat(text2); //System.out.println("ReadElement in sec " + readElement(bc, s)); } } return textin; } }