Java tutorial
/** * Copyright (C) 2015 Bruno Candido Volpato da Cunha (brunocvcunha@gmail.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.brunocvcunha.digesteroids; import com.google.gson.reflect.TypeToken; import com.jayway.jsonpath.JsonPath; import java.beans.PropertyDescriptor; import java.io.InputStream; import java.lang.reflect.Field; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; import java.lang.reflect.ParameterizedType; import java.lang.reflect.Type; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Map; import java.util.stream.Collectors; import org.apache.log4j.Logger; import org.brunocvcunha.digesteroids.annotation.DigesterEntity; import org.brunocvcunha.digesteroids.annotation.DigesterMapping; import org.brunocvcunha.digesteroids.cast.DigesteroidsCaster; import org.brunocvcunha.digesteroids.cast.DigesteroidsDefaultCaster; import org.brunocvcunha.inutils4j.MyStreamUtils; import org.jsoup.nodes.Element; import org.jsoup.nodes.TextNode; import org.jsoup.select.Elements; /** * Crawler Utils * * @author brunovolpato * */ public class Digesteroids { private static Logger log = Logger.getLogger(Digesteroids.class); private DigesteroidsCaster caster; /** * Non-Args Constructor for the Digesteroids */ public Digesteroids() { this.caster = DigesteroidsDefaultCaster.getInstance(); } /** * @param caster The instance of Caster to use */ public Digesteroids(DigesteroidsCaster caster) { super(); this.caster = caster; } /** * Convert given stream with HTML to targetType, using the mappings from the default source. * @param source Source name * @param is Stream with the HTML * @param targetType Target type * @return Converted object * @throws InstantiationException Can not create instance of target object * @throws IllegalAccessException Access violation * @param <T> Type to return */ public <T> T convertHTMLToType(String source, InputStream is, Type targetType) throws InstantiationException, IllegalAccessException { return convertObjectToType(source, MyStreamUtils.readContent(is), targetType); } /** * Convert given original object to targetType, using the mappings from the default source. * @param original Original data * @param targetType Target type * @return Converted object * @throws InstantiationException Can not create instance of target object * @throws IllegalAccessException Access violation * @param <T> Type to return */ public <T> T convertObjectToType(Object original, Type targetType) throws InstantiationException, IllegalAccessException { return convertObjectToType("", original, targetType); } /** * Convert given original object to targetType, using the mappings from the source parameter. * @param source Source name * @param original Original data * @param targetType Target type * @return Converted object * @throws InstantiationException Can not create instance of target object * @throws IllegalAccessException Access violation * @param <T> Type to return */ public <T> T convertObjectToType(String source, Object original, Type targetType) throws InstantiationException, IllegalAccessException { if (original == null) { return null; } TypeToken<T> typeToken = (TypeToken<T>) TypeToken.get(targetType); Class<T> targetClass = (Class<T>) typeToken.getRawType(); T target = targetClass.newInstance(); // at this point, we starting doing comparisons for (Field entryField : targetClass.getDeclaredFields()) { entryField.setAccessible(true); DigesterMapping reference = null; try { DigesterMapping[] references = entryField.getAnnotationsByType(DigesterMapping.class); for (DigesterMapping candidate : references) { if (candidate.source().equalsIgnoreCase(source)) { reference = candidate; break; } } } catch (Exception e) { // it's ok } if (reference != null) { log.debug("Found reference, using field: " + reference.value() + ". Field name: " + entryField.getName() + ", Base Class: " + targetClass + ", object: " + original.getClass()); PropertyDescriptor descriptor; Method writerMethod; try { descriptor = new PropertyDescriptor(entryField.getName(), targetClass); writerMethod = descriptor.getWriteMethod(); } catch (Exception e) { e.printStackTrace(); log.warn("Exception converting record: ", e); continue; } Type valueType = writerMethod.getGenericParameterTypes()[0]; log.debug("Reference for field " + entryField.getName() + " - " + reference); Object resolvedValue = resolveValue(original, reference, valueType); if (resolvedValue != null) { try { if (reference.trim() && resolvedValue instanceof String) { resolvedValue = ((String) resolvedValue).replaceAll("", " ") .replaceAll("[\\s\\t]", " ").trim(); } invokeSetter(target, writerMethod, resolvedValue); } catch (Exception e) { e.printStackTrace(); } } } } return target; } /** * @param originalData Original data to resolve * @param reference Reference annotation * @param valueType Value to return * @return Value resolved based on the annotation * @throws InstantiationException Can not create instance of target object * @throws IllegalAccessException Access violation */ public Object resolveValue(Object originalData, DigesterMapping reference, Type valueType) throws InstantiationException, IllegalAccessException { TypeToken<?> typeToken = TypeToken.get(valueType); Class<?> targetClass = typeToken.getRawType(); Object resolvedValue = null; if (reference.refType() == ReferenceTypeEnum.NORMAL) { resolvedValue = resolveValueNormal(reference.source(), originalData, reference.value(), valueType, targetClass); } else if (reference.refType() == ReferenceTypeEnum.PASS_THROUGH) { resolvedValue = resolveValuePassthrough(reference.source(), originalData, valueType, targetClass); } else if (reference.refType() == ReferenceTypeEnum.JSON_PATH) { resolvedValue = resolveValueJsonPath(originalData, reference.value()); } else if (reference.refType() == ReferenceTypeEnum.HTML_ID) { resolvedValue = resolveValueHTMLId(originalData, reference.value(), reference.htmlText(), reference.textNode(), reference.attribute()); } else if (reference.refType() == ReferenceTypeEnum.HTML_CSS) { resolvedValue = resolveValueHTMLCss(originalData, reference.value(), reference.htmlText(), reference.textNode(), reference.attribute()); } else if (reference.refType() == ReferenceTypeEnum.HTML_XPATH) { resolvedValue = resolveValueHTMLXPath(originalData, reference.value(), reference.htmlText(), reference.textNode(), reference.attribute()); } else if (reference.refType() == ReferenceTypeEnum.HARDCODE) { resolvedValue = reference.value(); } // make sure that it's the type Object returnValue = caster.cast(resolvedValue, valueType); if (reference.rule() != null && !reference.rule().isInterface()) { return reference.rule().newInstance().apply(returnValue); } return returnValue; } /** * @param source Source to consider the mappings * @param originalData Original data * @param valueType The type of the value * @param targetClass The target class to consider * @return Resolved value * @throws InstantiationException Failed to create instance * @throws IllegalAccessException Access violation */ protected Object resolveValuePassthrough(String source, Object originalData, Type valueType, Class<?> targetClass) throws InstantiationException, IllegalAccessException { Object resolvedValue; if (Collection.class.isAssignableFrom(targetClass)) { log.info("Annotated: " + valueType); List<Object> array = new ArrayList<>(); Object resolvedElement; if (valueType instanceof ParameterizedType) { resolvedElement = convertObjectToType(source, originalData, ((ParameterizedType) valueType).getActualTypeArguments()[0]); } else { resolvedElement = convertObjectToType(source, originalData, valueType); } array.add(resolvedElement); resolvedValue = array; } else { resolvedValue = convertObjectToType(source, originalData, valueType); } return resolvedValue; } /** * @param originalData Original data with the HTML * @param id The ID to look for * @param htmlText If should return HTML text or the whole object * @param textNode node to return * @return HTML Information */ protected Object resolveValueHTMLId(Object originalData, String id, boolean htmlText, int textNode, String attribute) { Element targetElement = caster.htmlElement(originalData); Element elementById = targetElement.getElementById(id); if (textNode >= 0) { return elementById.textNodes().get(textNode); } if (htmlText) { return elementById.text(); } if (!attribute.isEmpty()) { return elementById.attr(attribute); } return elementById; } /** * @param originalData Original data with the HTML * @param refValue The CSS selector to look for * @param htmlText If should return HTML text or the whole object * @param textNode node to return * @return HTML Information */ protected Object resolveValueHTMLCss(Object originalData, String refValue, boolean htmlText, int textNode, String attribute) { Element targetElement = caster.htmlElement(originalData); Elements elements = targetElement.select(refValue); if (!elements.isEmpty() && textNode >= 0) { List<TextNode> selectedNode = elements.first().textNodes().stream() .filter(node -> node.text() != null && !node.text().trim().isEmpty()) .collect(Collectors.toList()); if (selectedNode.size() > textNode) { return selectedNode.get(textNode).text(); } } if (htmlText) { return elements.text(); } if (!attribute.isEmpty()) { return elements.attr(attribute); } return elements; } /** * @param originalData Original data with the HTML * @param refValue The XPath selector to look for * @param htmlText If should return HTML text or the whole object * @param textNode node to return * @return HTML Information */ protected Object resolveValueHTMLXPath(Object originalData, String refValue, boolean htmlText, int textNode, String attribute) { Element targetElement = caster.htmlElement(originalData); Elements elements = targetElement.select(refValue); if (textNode >= 0) { return elements.first().textNodes().get(textNode); } if (htmlText) { return elements.text(); } if (!attribute.isEmpty()) { return elements.attr(attribute); } return elements; } /** * @param originalData The JSON data * @param path The JsonPath to return * @return Json Path information */ protected Object resolveValueJsonPath(Object originalData, String path) { Map<String, Object> targetMap = caster.map(originalData); return JsonPath.read(caster.json(targetMap), path); } /** * Resolve value using simple getter * @param source Source to get the annotations * @param originalData Original data to resolve * @param refValue Where the data is * @param valueType The value type * @param targetClass The target class * @return Resolved value * @throws InstantiationException Can not create instance of target object * @throws IllegalAccessException Access violation */ protected Object resolveValueNormal(String source, Object originalData, String refValue, Type valueType, Class<?> targetClass) throws InstantiationException, IllegalAccessException { Map<String, Object> targetMap = caster.map(originalData); Object resolvedValue = targetMap.get(refValue); if (resolvedValue == null) { resolvedValue = DigesteroidsReflectionUtils.getRecursive(targetMap, refValue); } if (targetClass.getAnnotation(DigesterEntity.class) != null) { resolvedValue = convertObjectToType(source, resolvedValue, valueType); } return resolvedValue; } /** * @param target Target to set * @param setter Setter method * @param resolvedValue Value to set * @throws IllegalArgumentException Invalid arguments sent * @throws InvocationTargetException Access violation in the method * @throws IllegalAccessException Access violation */ protected void invokeSetter(Object target, Method setter, Object resolvedValue) throws IllegalAccessException, IllegalArgumentException, InvocationTargetException { Class<?> valueType = setter.getParameterTypes()[0]; // if the value and the setter are compatible, so just // invoke it to set the new value if (valueType.isAssignableFrom(resolvedValue.getClass())) { setter.invoke(target, resolvedValue); } else { // if the value cannot be used for the setter argument (e.g. // String to a Date) // so we need to convert types. sometimes it happens because // the lack of support // of some types in JSON if (log.isDebugEnabled()) { log.debug("Need to convert " + resolvedValue.getClass() + " to " + valueType); } try { // cast to the type, and call the setter Object data = caster.cast(resolvedValue, valueType); Object[] objArray; if (data instanceof Object[]) { objArray = (Object[]) data; } else { objArray = new Object[] { data }; } setter.invoke(target, objArray); } catch (Exception e) { log.warn("Exception occurred while trying to convert data - " + resolvedValue + " - " + setter.getName(), e); } } } /** * @return the caster */ public DigesteroidsCaster getCaster() { return caster; } /** * @param caster the caster to set */ public void setCaster(DigesteroidsCaster caster) { this.caster = caster; } }