List of usage examples for org.apache.pdfbox.cos COSName getName
public String getName()
From source file:net.timendum.pdf.Images2HTML.java
License:Open Source License
@Override protected void processOperator(PDFOperator operator, List arguments) throws IOException { String operation = operator.getOperation(); if (INVOKE_OPERATOR.equals(operation)) { COSName objectName = (COSName) arguments.get(0); Map<String, PDXObject> xobjects = getResources().getXObjects(); PDXObject xobject = xobjects.get(objectName.getName()); if (xobject instanceof PDXObjectImage) { PDXObjectImage image = (PDXObjectImage) xobject; PDPage page = getCurrentPage(); int imageWidth = image.getWidth(); int imageHeight = image.getHeight(); double pageHeight = page.getMediaBox().getHeight(); Matrix ctmNew = getGraphicsState().getCurrentTransformationMatrix(); float yScaling = ctmNew.getYScale(); float angle = (float) Math.acos(ctmNew.getValue(0, 0) / ctmNew.getXScale()); if (ctmNew.getValue(0, 1) < 0 && ctmNew.getValue(1, 0) > 0) { angle = (-1) * angle;/*w w w . ja v a 2 s. c o m*/ } ctmNew.setValue(2, 1, (float) (pageHeight - ctmNew.getYPosition() - Math.cos(angle) * yScaling)); ctmNew.setValue(2, 0, (float) (ctmNew.getXPosition() - Math.sin(angle) * yScaling)); // because of the moved 0,0-reference, we have to shear in the opposite direction ctmNew.setValue(0, 1, (-1) * ctmNew.getValue(0, 1)); ctmNew.setValue(1, 0, (-1) * ctmNew.getValue(1, 0)); AffineTransform ctmAT = ctmNew.createAffineTransform(); ctmAT.scale(1f / imageWidth, 1f / imageHeight); Image entry = new Image(); entry.x = ctmNew.getXPosition(); entry.image = image; entry.name = objectName.getName(); images.put(page, ctmNew.getYPosition(), entry); } else if (xobject instanceof PDXObjectForm) { // save the graphics state getGraphicsStack().push((PDGraphicsState) getGraphicsState().clone()); PDPage page = getCurrentPage(); PDXObjectForm form = (PDXObjectForm) xobject; COSStream invoke = (COSStream) form.getCOSObject(); PDResources pdResources = form.getResources(); if (pdResources == null) { pdResources = page.findResources(); } // if there is an optional form matrix, we have to // map the form space to the user space Matrix matrix = form.getMatrix(); if (matrix != null) { Matrix xobjectCTM = matrix.multiply(getGraphicsState().getCurrentTransformationMatrix()); getGraphicsState().setCurrentTransformationMatrix(xobjectCTM); } processSubStream(page, pdResources, invoke); // restore the graphics state setGraphicsState(getGraphicsStack().pop()); } } else { super.processOperator(operator, arguments); } }
From source file:org.apache.fop.render.pdf.pdfbox.FOPPDFSingleByteFont.java
License:Apache License
private Map<Integer, String> getCodeToName(Encoding encoding) { Map<Integer, String> codeToName = new HashMap<Integer, String>(); if (encoding != null) { COSBase cos = null;/*from w ww . j a va2 s.c o m*/ if (!(encoding instanceof BuiltInEncoding)) { cos = encoding.getCOSObject(); } if (cos instanceof COSDictionary) { COSDictionary enc = (COSDictionary) cos; COSName baseEncodingName = (COSName) enc.getDictionaryObject(COSName.BASE_ENCODING); if (baseEncodingName != null) { Encoding baseEncoding = Encoding.getInstance(baseEncodingName); codeToName.putAll(baseEncoding.getCodeToNameMap()); } COSArray differences = (COSArray) enc.getDictionaryObject(COSName.DIFFERENCES); int currentIndex = -1; for (int i = 0; differences != null && i < differences.size(); i++) { COSBase next = differences.getObject(i); if (next instanceof COSNumber) { currentIndex = ((COSNumber) next).intValue(); } else if (next instanceof COSName) { COSName name = (COSName) next; codeToName.put(currentIndex++, name.getName()); } } } else { return encoding.getCodeToNameMap(); } } return codeToName; }
From source file:org.apache.fop.render.pdf.pdfbox.MergeFontsPDFWriter.java
License:Apache License
protected void readPDFArguments(Operator op, Collection<COSBase> arguments) throws IOException { for (COSBase c : arguments) { if (c instanceof COSName) { COSName cn = (COSName) c; COSDictionary fontData = (COSDictionary) fonts.getDictionaryObject(cn.getName()); String internalName = fontsToRemove.get(cn); if (internalName == null && fontData != null) { internalName = getNewFont(fontData, fontInfo, fontsToRemove.values()); }/*from w w w .jav a 2s . c o m*/ if (fontData == null || internalName == null) { s.append("/" + key.getName(cn)); if (op.getName().equals("Tf")) { font = null; oldFont = null; } } else { s.append("/" + internalName); fontsToRemove.put(cn, internalName); font = fontInfo.getUsedFonts().get(internalName); oldFont = getFont(fontData); } s.append(" "); } else if (c instanceof COSString && font != null && ((FOPPDFFont) font).size() != 1) { List<String> word = readCOSString((COSString) c, oldFont); if (word == null) { s.append(PDFText.escapeString(getString((COSString) c))); } else { String x = getMappedWord(word, font, ((COSString) c).getBytes()); if (x == null) { s.append(PDFText.escapeString(getString((COSString) c))); } else { s.append(x); } } } else { processArg(op, c); } } }
From source file:org.apache.fop.render.pdf.pdfbox.PDFBoxAdapter.java
License:Apache License
private void transferDict(COSDictionary orgDict, PDFStream targetDict, Set filter, boolean inclusive) throws IOException { Set<COSName> keys = orgDict.keySet(); for (COSName key : keys) { if (inclusive && !filter.contains(key.getName())) { continue; } else if (!inclusive && filter.contains(key.getName())) { continue; }//from w w w . ja v a 2 s .c o m targetDict.put(key.getName(), cloneForNewDocument(orgDict.getItem(key))); } }
From source file:org.apache.fop.render.pdf.pdfbox.PDFBoxAdapter.java
License:Apache License
private void updateXObj(COSDictionary sourcePageResources, PDFDictionary pageResources) throws IOException { COSDictionary xobj = (COSDictionary) sourcePageResources.getDictionaryObject(COSName.XOBJECT); if (xobj != null && pdfDoc.isMergeFontsEnabled()) { PDFDictionary target = (PDFDictionary) pageResources.get("XObject"); for (COSName entry : xobj.keySet()) { if (newXObj.containsKey(entry)) { PDFStream s = (PDFStream) target.get(entry.getName()); s.setData(newXObj.get(entry).getBytes("ISO-8859-1")); PDFDictionary xobjr = (PDFDictionary) s.get("Resources"); xobjr.put("Font", pageResources.get("Font")); }//from w w w . j av a 2 s . co m } } }
From source file:org.apache.fop.render.pdf.pdfbox.StructureTreeMerger.java
License:Apache License
private void copyElemEntries(COSBase base, PDFStructElem elem) throws IOException { assert base instanceof COSObject; COSObject baseObj = (COSObject) base; COSDictionary baseDic = (COSDictionary) baseObj.getObject(); COSName[] names = { COSName.TYPE, COSName.S, COSName.PG, COSName.ALT, COSName.LANG, COSName.A, COSName.ACTUAL_TEXT, COSName.T, COSName.E, COSName.C }; for (COSName name : names) { if (baseDic.keySet().contains(name)) { if (name.equals(COSName.PG)) { elem.put(COSName.PG.getName(), targetPage.makeReference()); } else { elem.put(name.getName(), adapter.cloneForNewDocument(baseDic.getItem(name))); }// ww w .java 2s . co m } } adapter.cacheClonedObject(base, elem); }
From source file:org.apache.fop.render.pdf.pdfbox.UniqueName.java
License:Apache License
protected String getName(COSName cn) { if (resourceNames.contains(cn)) { return cn.getName() + key; }//from w w w .ja va2 s . com return cn.getName(); }
From source file:org.apache.pdflens.views.treeview.PDFTreeCellRenderer.java
License:Apache License
private Object convertToTreeObject(Object nodeValue) { if (nodeValue instanceof MapEntry) { MapEntry entry = (MapEntry) nodeValue; COSName key = (COSName) entry.getKey(); COSBase value = (COSBase) entry.getValue(); nodeValue = key.getName() + ":" + convertToTreeObject(value); } else if (nodeValue instanceof COSFloat) { nodeValue = "COSFloat:" + ((COSFloat) nodeValue).floatValue(); } else if (nodeValue instanceof COSInteger) { nodeValue = "COSInteger:" + ((COSInteger) nodeValue).intValue(); } else if (nodeValue instanceof COSString) { nodeValue = "COSString:" + ((COSString) nodeValue).getString(); } else if (nodeValue instanceof COSName) { nodeValue = "COSName:" + ((COSName) nodeValue).getName(); } else if (nodeValue instanceof ArrayEntry) { ArrayEntry entry = (ArrayEntry) nodeValue; nodeValue = "[" + entry.getIndex() + "]" + convertToTreeObject(entry.getValue()); } else if (nodeValue instanceof COSNull) { nodeValue = "COSNull:" + "null"; } else if (nodeValue instanceof COSDictionary) { COSDictionary dict = (COSDictionary) nodeValue; if (nodeValue instanceof COSStream) { nodeValue = "Stream"; } else {// ww w. j a v a2 s. c o m nodeValue = "Dictionary"; } COSName type = (COSName) dict.getDictionaryObject(COSName.TYPE); if (type != null) { nodeValue = nodeValue + "(" + type.getName(); COSName subType = (COSName) dict.getDictionaryObject(COSName.SUBTYPE); if (subType != null) { nodeValue = nodeValue + ":" + subType.getName(); } nodeValue = nodeValue + ")"; } } else if (nodeValue instanceof COSArray) { nodeValue = "COSArray"; } else if (nodeValue instanceof COSString) { nodeValue = "COSString:" + ((COSString) nodeValue).getString(); } return nodeValue; }
From source file:org.apache.tika.parser.pdf.EnhancedPDF2XHTML.java
License:Apache License
private void extractImages(PDResources resources) throws SAXException, IOException { if (resources == null || config.getExtractInlineImages() == false) { return;/* w w w .j a va 2 s. co m*/ } Iterable<COSName> cosIterable = resources.getXObjectNames(); if (cosIterable == null) { return; } for (COSName name : cosIterable) { PDXObject object = resources.getXObject(name); if (object instanceof PDFormXObject) { extractImages(((PDFormXObject) object).getResources()); } else if (object instanceof PDImageXObject) { PDImageXObject image = (PDImageXObject) object; Metadata metadata = new Metadata(); String extension = ""; if ("jpg".equalsIgnoreCase(image.getSuffix())) { metadata.set(Metadata.CONTENT_TYPE, "image/jpeg"); extension = ".jpg"; } else if ("tiff".equalsIgnoreCase(image.getSuffix())) { metadata.set(Metadata.CONTENT_TYPE, "image/tiff"); extension = ".tif"; } else if ("jpx".equalsIgnoreCase(image.getSuffix())) { metadata.set(Metadata.CONTENT_TYPE, "image/jpx"); extension = ".jpx"; } else if ("png".equalsIgnoreCase(image.getSuffix())) { metadata.set(Metadata.CONTENT_TYPE, "image/png"); extension = ".png"; } Integer imageNumber = processedInlineImages.get(name.getName()); if (imageNumber == null) { imageNumber = inlineImageCounter++; } String fileName = "image" + imageNumber + extension; metadata.set(Metadata.RESOURCE_NAME_KEY, fileName); // Output the img tag AttributesImpl attr = new AttributesImpl(); attr.addAttribute("", "src", "src", "CDATA", "embedded:" + fileName); attr.addAttribute("", "alt", "alt", "CDATA", fileName); handler.startElement("img", attr); handler.endElement("img"); //Do we only want to process unique COSObject ids? //If so, have we already processed this one? if (config.getExtractUniqueInlineImagesOnly() == true) { String cosObjectId = name.getName(); if (processedInlineImages.containsKey(cosObjectId)) { continue; } processedInlineImages.put(cosObjectId, imageNumber); } metadata.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE, TikaCoreProperties.EmbeddedResourceType.INLINE.toString()); EmbeddedDocumentExtractor extractor = getEmbeddedDocumentExtractor(); if (extractor.shouldParseEmbedded(metadata)) { try { extractor.parseEmbedded(image.getCOSStream().getFilteredStream(), new EmbeddedContentHandler(handler), metadata, false); } catch (IOException e) { // could not extract this image, so just skip it... } } } } }
From source file:org.apache.tika.parser.pdf.EnhancedPDFParser.java
License:Apache License
@SuppressWarnings("deprecation") private void extractMetadata(PDDocument document, Metadata metadata) throws TikaException { XMPMetadata xmp = null;//from w ww. j a va 2 s . com XMPSchemaDublinCore dcSchema = null; try { if (document.getDocumentCatalog().getMetadata() != null) { xmp = XMPMetadata.load(document.getDocumentCatalog().getMetadata().exportXMPMetadata()); } if (xmp != null) { dcSchema = xmp.getDublinCoreSchema(); } } catch (IOException e) { //swallow } PDDocumentInformation info = document.getDocumentInformation(); metadata.set(PagedText.N_PAGES, document.getNumberOfPages()); extractMultilingualItems(metadata, TikaCoreProperties.TITLE, info.getTitle(), dcSchema); extractDublinCoreListItems(metadata, TikaCoreProperties.CREATOR, info.getAuthor(), dcSchema); extractDublinCoreListItems(metadata, TikaCoreProperties.CONTRIBUTOR, null, dcSchema); addMetadata(metadata, TikaCoreProperties.CREATOR_TOOL, info.getCreator()); addMetadata(metadata, TikaCoreProperties.KEYWORDS, info.getKeywords()); addMetadata(metadata, "producer", info.getProducer()); extractMultilingualItems(metadata, TikaCoreProperties.DESCRIPTION, null, dcSchema); // TODO: Move to description in Tika 2.0 addMetadata(metadata, TikaCoreProperties.TRANSITION_SUBJECT_TO_OO_SUBJECT, info.getSubject()); addMetadata(metadata, "trapped", info.getTrapped()); // TODO Remove these in Tika 2.0 addMetadata(metadata, "created", info.getCreationDate()); addMetadata(metadata, TikaCoreProperties.CREATED, info.getCreationDate()); Calendar modified = info.getModificationDate(); addMetadata(metadata, Metadata.LAST_MODIFIED, modified); addMetadata(metadata, TikaCoreProperties.MODIFIED, modified); // All remaining metadata is custom // Copy this over as-is List<String> handledMetadata = Arrays.asList("Author", "Creator", "CreationDate", "ModDate", "Keywords", "Producer", "Subject", "Title", "Trapped"); for (COSName key : info.getDictionary().keySet()) { String name = key.getName(); if (!handledMetadata.contains(name)) { addMetadata(metadata, name, info.getDictionary().getDictionaryObject(key)); } } //try to get the various versions //Caveats: // there is currently a fair amount of redundancy // TikaCoreProperties.FORMAT can be multivalued // There are also three potential pdf specific version keys: pdf:PDFVersion, pdfa:PDFVersion, pdf:PDFExtensionVersion metadata.set("pdf:PDFVersion", Float.toString(document.getDocument().getVersion())); metadata.add(TikaCoreProperties.FORMAT.getName(), MEDIA_TYPE.toString() + "; version=" + Float.toString(document.getDocument().getVersion())); try { if (xmp != null) { xmp.addXMLNSMapping(XMPSchemaPDFAId.NAMESPACE, XMPSchemaPDFAId.class); XMPSchemaPDFAId pdfaxmp = (XMPSchemaPDFAId) xmp.getSchemaByClass(XMPSchemaPDFAId.class); if (pdfaxmp != null) { metadata.set("pdfaid:part", Integer.toString(pdfaxmp.getPart())); if (pdfaxmp.getConformance() != null) { metadata.set("pdfaid:conformance", pdfaxmp.getConformance()); String version = "A-" + pdfaxmp.getPart() + pdfaxmp.getConformance().toLowerCase(Locale.ROOT); metadata.set("pdfa:PDFVersion", version); metadata.add(TikaCoreProperties.FORMAT.getName(), MEDIA_TYPE.toString() + "; version=\"" + version + "\""); } } // TODO WARN if this XMP version is inconsistent with document header version? } } catch (IOException e) { metadata.set(TikaCoreProperties.TIKA_META_PREFIX + "pdf:metadata-xmp-parse-failed", "" + e); } //TODO: Let's try to move this into PDFBox. //Attempt to determine Adobe extension level, if present: COSDictionary root = document.getDocumentCatalog().getCOSObject(); COSDictionary extensions = (COSDictionary) root.getDictionaryObject(COSName.getPDFName("Extensions")); if (extensions != null) { for (COSName extName : extensions.keySet()) { // If it's an Adobe one, interpret it to determine the extension level: if (extName.equals(COSName.getPDFName("ADBE"))) { COSDictionary adobeExt = (COSDictionary) extensions.getDictionaryObject(extName); if (adobeExt != null) { String baseVersion = adobeExt.getNameAsString(COSName.getPDFName("BaseVersion")); int el = adobeExt.getInt(COSName.getPDFName("ExtensionLevel")); //-1 is sentinel value that something went wrong in getInt if (el != -1) { metadata.set("pdf:PDFExtensionVersion", baseVersion + " Adobe Extension Level " + el); metadata.add(TikaCoreProperties.FORMAT.getName(), MEDIA_TYPE.toString() + "; version=\"" + baseVersion + " Adobe Extension Level " + el + "\""); } } } else { // WARN that there is an Extension, but it's not Adobe's, and so is a 'new' format'. metadata.set("pdf:foundNonAdobeExtensionName", extName.getName()); } } } }