Java tutorial
/******************************************************************************* * Copyright 2012, The Infinit.e Open Source Project. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License, version 3, * as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. ******************************************************************************/ package com.ikanow.infinit.e.api.knowledge; import java.io.ByteArrayInputStream; import java.io.File; import java.util.Arrays; import java.util.zip.GZIPInputStream; import org.apache.log4j.Logger; import org.bson.types.ObjectId; import com.ikanow.infinit.e.api.utils.MimeUtils; import com.ikanow.infinit.e.api.utils.SocialUtils; import com.ikanow.infinit.e.data_model.api.ApiManager; import com.ikanow.infinit.e.data_model.api.ResponsePojo; import com.ikanow.infinit.e.data_model.api.ResponsePojo.ResponseObject; import com.ikanow.infinit.e.data_model.api.knowledge.DocumentPojoApiMap; import com.ikanow.infinit.e.data_model.store.DbManager; import com.ikanow.infinit.e.data_model.store.MongoDbManager; import com.ikanow.infinit.e.data_model.store.config.source.SourcePojo; import com.ikanow.infinit.e.data_model.store.document.CompressedFullTextPojo; import com.ikanow.infinit.e.data_model.store.document.DocumentPojo; import com.ikanow.infinit.e.harvest.extraction.document.file.FileHarvester; import com.mongodb.BasicDBObject; import com.mongodb.DBCollection; /** * This class is for all operations related to the retrieval, addition * or update of people within the system * * @author cmorgan * */ public class DocumentHandler { private static final Logger logger = Logger.getLogger(DocumentHandler.class); // Utility class used to pass binary/text info between doc handler and interface // (but it's changed into a doc object before being sent out the API) public static class DocumentFileInterface { public byte[] bytes; public String mediaType; } /** * Get information function that returns the user information in the form of a JSON String. * @param isAdmin * * @param key the key definition of the user ( example email@email.com ) * @return a JSON string representation of the person information on success */ public ResponsePojo getInfo(String userIdStr, String sourceKey, String idStrOrUrl, boolean bReturnFullText, boolean returnRawData, boolean isAdmin) { ResponsePojo rp = new ResponsePojo(); try { // Set up the query BasicDBObject query = new BasicDBObject(); ObjectId id = null; if (null == sourceKey) { id = new ObjectId(idStrOrUrl); query.put(DocumentPojo._id_, id); } else { query.put(DocumentPojo.sourceKey_, sourceKey); query.put(DocumentPojo.url_, idStrOrUrl); } if (!isAdmin) query.put(DocumentPojo.communityId_, new BasicDBObject(MongoDbManager.in_, SocialUtils.getUserCommunities(userIdStr))); // (use DBObject here because DocumentPojo is pretty big and this call could conceivably have perf implications) BasicDBObject fieldsQ = new BasicDBObject(); if (!bReturnFullText) { fieldsQ.put(DocumentPojo.fullText_, 0); // (XML/JSON have fullText as part of pojo) } BasicDBObject dbo = (BasicDBObject) DbManager.getDocument().getMetadata().findOne(query, fieldsQ); if ((null == dbo) || ((null != dbo.get(DocumentPojo.url_)) && dbo.getString(DocumentPojo.url_).startsWith("?DEL?"))) { if (null != id) { // this might be the update id... query = new BasicDBObject(DocumentPojo.updateId_, id); dbo = (BasicDBObject) DbManager.getDocument().getMetadata().findOne(query, fieldsQ); } } //TESTED (update case, normal case, and intermediate case where both update and original still exist) if (null == dbo) { rp.setResponse(new ResponseObject("Doc Info", true, "Document not found")); return rp; } DocumentPojo dp = DocumentPojo.fromDb(dbo, DocumentPojo.class); if (bReturnFullText) { if (null == dp.getFullText()) { // (Some things like database records might have this stored already) byte[] storageArray = new byte[200000]; DBCollection contentDB = DbManager.getDocument().getContent(); BasicDBObject contentQ = new BasicDBObject(CompressedFullTextPojo.url_, dp.getUrl()); contentQ.put(CompressedFullTextPojo.sourceKey_, new BasicDBObject(MongoDbManager.in_, Arrays.asList(null, dp.getSourceKey()))); BasicDBObject fields = new BasicDBObject(CompressedFullTextPojo.gzip_content_, 1); BasicDBObject dboContent = (BasicDBObject) contentDB.findOne(contentQ, fields); if (null != dboContent) { byte[] compressedData = ((byte[]) dboContent.get(CompressedFullTextPojo.gzip_content_)); ByteArrayInputStream in = new ByteArrayInputStream(compressedData); GZIPInputStream gzip = new GZIPInputStream(in); int nRead = 0; StringBuffer output = new StringBuffer(); while (nRead >= 0) { nRead = gzip.read(storageArray, 0, 200000); if (nRead > 0) { String s = new String(storageArray, 0, nRead, "UTF-8"); output.append(s); } } dp.setFullText(output.toString()); dp.makeFullTextNonTransient(); } } } else if (!returnRawData) { dp.setFullText(null); // (obviously will normally contain full text anyway) } else // if ( returnRawData ) { //check if the harvest type is file, return the file instead //if file is db return the json //get source SourcePojo source = getSourceFromKey(dp.getSourceKey()); if (source.getExtractType().equals("File")) { //get file from harvester String fileURL = dp.getUrl(); if (dp.getSourceUrl() != null) fileURL = dp.getSourceUrl(); byte[] bytes = FileHarvester.getFile(fileURL, source); if (bytes == null) { // Try returning JSON instead String json = ApiManager.mapToApi(dp, new DocumentPojoApiMap()); DocumentFileInterface dfp = new DocumentFileInterface(); dfp.bytes = json.getBytes(); dfp.mediaType = "application/json"; rp.setResponse( new ResponseObject("Doc Info", true, "Document bytes returned successfully")); rp.setData(dfp, null); return rp; } else { DocumentFileInterface dfp = new DocumentFileInterface(); dfp.bytes = bytes; dfp.mediaType = getMediaType(fileURL); rp.setResponse( new ResponseObject("Doc Info", true, "Document bytes returned successfully")); rp.setData(dfp, null); return rp; } } else { String json = ApiManager.mapToApi(dp, new DocumentPojoApiMap()); DocumentFileInterface dfp = new DocumentFileInterface(); dfp.bytes = json.getBytes(); dfp.mediaType = "application/json"; rp.setResponse(new ResponseObject("Doc Info", true, "Document bytes returned successfully")); rp.setData(dfp, null); return rp; } } rp.setData(dp, new DocumentPojoApiMap()); rp.setResponse(new ResponseObject("Doc Info", true, "Feed info returned successfully")); } //(end full text vs raw data) catch (Exception e) { // If an exception occurs log the error logger.error("Exception Message: " + e.getMessage(), e); rp.setResponse(new ResponseObject("Doc Info", false, "error returning feed: " + e.getMessage())); } // Return Json String representing the user return rp; } public ResponsePojo getFileContents(String userIdStr, String sourceKey, String relativePath, boolean isAdmin) { ResponsePojo rp = new ResponsePojo(); try { BasicDBObject query = new BasicDBObject(SourcePojo.key_, sourceKey); if (!isAdmin) query.put(SourcePojo.communityIds_, new BasicDBObject(MongoDbManager.in_, SocialUtils.getUserCommunities(userIdStr))); BasicDBObject fields = new BasicDBObject(SourcePojo.url_, 1); fields.put(SourcePojo.extractType_, 1); fields.put(SourcePojo.file_, 1); fields.put(SourcePojo.isApproved_, 1); SourcePojo source = SourcePojo.fromDb(DbManager.getIngest().getSource().findOne(query, fields), SourcePojo.class); // TEST for security shenanigans String baseRelativePath = new File(".").getCanonicalPath(); String actualRelativePath = new File(relativePath).getCanonicalPath(); if (!actualRelativePath.startsWith(baseRelativePath)) { throw new RuntimeException("Access denied: " + relativePath); } //(end security shenanigans) if (null == source) { throw new RuntimeException("Document source not found: " + sourceKey); } if ((null != source.getExtractType()) && !source.getExtractType().equals("File")) { throw new RuntimeException( "Document source not a file: " + sourceKey + ", " + source.getExtractType()); } if (!source.isApproved()) { throw new RuntimeException("Document source not approved, access denied: " + sourceKey); } String fileURL = source.getUrl() + relativePath; byte[] bytes = FileHarvester.getFile(fileURL, source); if (bytes == null) { //fail rp.setResponse(new ResponseObject("Doc Info", false, "Could not find document: " + relativePath)); return rp; } else { DocumentFileInterface dfp = new DocumentFileInterface(); dfp.bytes = bytes; dfp.mediaType = getMediaType(fileURL); rp.setResponse(new ResponseObject("Doc Info", true, "Document bytes returned successfully")); rp.setData(dfp, null); return rp; } } catch (Exception e) { // If an exception occurs log the error logger.error("Exception Message: " + e.getMessage(), e); rp.setResponse(new ResponseObject("Doc Info", false, "error returning feed: " + e.getMessage())); } // Return Json String representing the user return rp; }//TESTED private SourcePojo getSourceFromKey(String sourceKey) { SourcePojo source = null; try { BasicDBObject query = new BasicDBObject(); query.put(SourcePojo.key_, sourceKey); source = SourcePojo.fromDb(DbManager.getIngest().getSource().findOne(query), SourcePojo.class); } catch (Exception e) { } return source; } private String getMediaType(String url) { String mediaType = null; int end = url.lastIndexOf("?"); if (end >= 0) { url = url.substring(0, end); } int mid = url.lastIndexOf("."); String extension = url.substring(mid + 1, url.length()); mediaType = MimeUtils.lookupMimeType(extension); if (null == mediaType) { mediaType = "text/plain"; } return mediaType; }//TESTED }