Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package hivemall.docs; import static hivemall.docs.utils.MarkdownUtils.asCodeBlock; import static hivemall.docs.utils.MarkdownUtils.asInlineCode; import static hivemall.docs.utils.MarkdownUtils.asListElement; import static hivemall.docs.utils.MarkdownUtils.indent; import static org.apache.commons.lang.StringEscapeUtils.escapeHtml; import hivemall.utils.lang.StringUtils; import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.io.PrintWriter; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeSet; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.annotation.Nonnull; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.maven.execution.MavenSession; import org.apache.maven.plugin.AbstractMojo; import org.apache.maven.plugin.MojoExecutionException; import org.apache.maven.plugins.annotations.Mojo; import org.apache.maven.plugins.annotations.Parameter; import org.reflections.Reflections; /** * Generate a list of UDFs for documentation. * * @link https://hivemall.incubator.apache.org/userguide/misc/generic_funcs.html * @link https://hivemall.incubator.apache.org/userguide/misc/funcs.html */ @Mojo(name = "generate-funcs-list") public class FuncsListGenerator extends AbstractMojo { @Parameter(defaultValue = "${basedir}", readonly = true) private File basedir; @Parameter(defaultValue = "${session}", readonly = true) private MavenSession session; @Parameter(defaultValue = "docs/gitbook/misc/generic_funcs.md") private String pathToGenericFuncs; @Parameter(defaultValue = "docs/gitbook/misc/funcs.md") private String pathToFuncs; private static final Map<String, List<String>> genericFuncsHeaders = new LinkedHashMap<>(); static { genericFuncsHeaders.put("# Array", Arrays.asList("hivemall.tools.array", "hivemall.tools.list")); genericFuncsHeaders.put("# Map", Collections.singletonList("hivemall.tools.map")); genericFuncsHeaders.put("# Bitset", Collections.singletonList("hivemall.tools.bits")); genericFuncsHeaders.put("# Compression", Collections.singletonList("hivemall.tools.compress")); genericFuncsHeaders.put("# MapReduce", Collections.singletonList("hivemall.tools.mapred")); genericFuncsHeaders.put("# Math", Collections.singletonList("hivemall.tools.math")); genericFuncsHeaders.put("# Matrix", Collections.singletonList("hivemall.tools.matrix")); genericFuncsHeaders.put("# Text processing", Collections.singletonList("hivemall.tools.text")); genericFuncsHeaders.put("# Others", Collections.singletonList("hivemall.tools")); } private static final Map<String, List<String>> funcsHeaders = new LinkedHashMap<>(); static { funcsHeaders.put("# Regression", Collections.singletonList("hivemall.regression")); funcsHeaders.put("# Classification", Collections.<String>emptyList()); funcsHeaders.put("## Binary classification", Collections.singletonList("hivemall.classifier")); funcsHeaders.put("## Multiclass classification", Collections.singletonList("hivemall.classifier.multiclass")); funcsHeaders.put("# Matrix factorization", Collections.singletonList("hivemall.mf")); funcsHeaders.put("# Factorization machines", Collections.singletonList("hivemall.fm")); funcsHeaders.put("# Recommendation", Collections.singletonList("hivemall.recommend")); funcsHeaders.put("# Anomaly detection", Collections.singletonList("hivemall.anomaly")); funcsHeaders.put("# Topic modeling", Collections.singletonList("hivemall.topicmodel")); funcsHeaders.put("# Preprocessing", Collections.singletonList("hivemall.ftvec")); funcsHeaders.put("## Data amplification", Collections.singletonList("hivemall.ftvec.amplify")); funcsHeaders.put("## Feature binning", Collections.singletonList("hivemall.ftvec.binning")); funcsHeaders.put("## Feature format conversion", Collections.singletonList("hivemall.ftvec.conv")); funcsHeaders.put("## Feature hashing", Collections.singletonList("hivemall.ftvec.hashing")); funcsHeaders.put("## Feature paring", Collections.singletonList("hivemall.ftvec.pairing")); funcsHeaders.put("## Ranking", Collections.singletonList("hivemall.ftvec.ranking")); funcsHeaders.put("## Feature scaling", Collections.singletonList("hivemall.ftvec.scaling")); funcsHeaders.put("## Feature selection", Collections.singletonList("hivemall.ftvec.selection")); funcsHeaders.put("## Feature transformation and vectorization", Collections.singletonList("hivemall.ftvec.trans")); funcsHeaders.put("# Geospatial functions", Collections.singletonList("hivemall.geospatial")); funcsHeaders.put("# Distance measures", Collections.singletonList("hivemall.knn.distance")); funcsHeaders.put("# Locality-sensitive hashing", Collections.singletonList("hivemall.knn.lsh")); funcsHeaders.put("# Similarity measures", Collections.singletonList("hivemall.knn.similarity")); funcsHeaders.put("# Evaluation", Collections.singletonList("hivemall.evaluation")); funcsHeaders.put("# Sketching", Collections.singletonList("hivemall.sketch.hll")); funcsHeaders.put("# Ensemble learning", Collections.singletonList("hivemall.ensemble")); funcsHeaders.put("## Bagging", Collections.singletonList("hivemall.ensemble.bagging")); funcsHeaders.put("# Decision trees and RandomForest", Arrays.asList("hivemall.smile.classification", "hivemall.smile.regression", "hivemall.smile.tools")); funcsHeaders.put("# XGBoost", Arrays.asList("hivemall.xgboost.classification", "hivemall.xgboost.regression", "hivemall.xgboost.tools")); funcsHeaders.put("# Others", Arrays.asList("hivemall", "hivemall.dataset", "hivemall.ftvec.text")); } @Override public void execute() throws MojoExecutionException { if (!isReactorRootProject()) { // output only once across the projects return; } generate(new File(basedir, pathToGenericFuncs), "This page describes a list of useful Hivemall generic functions. See also a [list of machine-learning-related functions](./funcs.md).", genericFuncsHeaders); generate(new File(basedir, pathToFuncs), "This page describes a list of Hivemall functions. See also a [list of generic Hivemall functions](./generic_funcs.md) for more general-purpose functions such as array and map UDFs.", funcsHeaders); } private boolean isReactorRootProject() { return session.getExecutionRootDirectory().equalsIgnoreCase(basedir.toString()); } private void generate(@Nonnull File outputFile, @Nonnull String preface, @Nonnull Map<String, List<String>> headers) throws MojoExecutionException { Reflections reflections = new Reflections("hivemall"); Set<Class<?>> annotatedClasses = reflections.getTypesAnnotatedWith(Description.class); StringBuilder sb = new StringBuilder(); Map<String, Set<String>> packages = new HashMap<>(); Pattern func = Pattern.compile("_FUNC_(\\(.*?\\))(.*)", Pattern.DOTALL); for (Class<?> annotatedClass : annotatedClasses) { Deprecated deprecated = annotatedClass.getAnnotation(Deprecated.class); if (deprecated != null) { continue; } Description description = annotatedClass.getAnnotation(Description.class); String value = description.value().replaceAll("\n", " "); Matcher matcher = func.matcher(value); if (matcher.find()) { value = asInlineCode(description.name() + matcher.group(1)) + escapeHtml(matcher.group(2)); } sb.append(asListElement(value)); StringBuilder sbExtended = new StringBuilder(); if (!description.extended().isEmpty()) { sbExtended.append(description.extended()); sb.append("\n"); } String extended = sbExtended.toString(); if (extended.isEmpty()) { sb.append("\n"); } else { if (extended.toLowerCase().contains("select")) { // extended description contains SQL statements sb.append(indent(asCodeBlock(extended, "sql"))); } else { sb.append(indent(asCodeBlock(extended))); } } String packageName = annotatedClass.getPackage().getName(); if (!packages.containsKey(packageName)) { Set<String> set = new TreeSet<>(); packages.put(packageName, set); } Set<String> List = packages.get(packageName); List.add(sb.toString()); StringUtils.clear(sb); } try (PrintWriter writer = new PrintWriter(outputFile)) { // license header writer.println("<!--"); try { File licenseFile = new File(basedir, "resources/license-header.txt"); FileReader fileReader = new FileReader(licenseFile); try (BufferedReader bufferedReader = new BufferedReader(fileReader)) { String line; while ((line = bufferedReader.readLine()) != null) { writer.println(indent(line)); } } } catch (IOException e) { throw new MojoExecutionException("Failed to read license file"); } writer.println("-->\n"); writer.println(preface); writer.println("\n<!-- toc -->\n"); for (Map.Entry<String, List<String>> e : headers.entrySet()) { writer.println(e.getKey() + "\n"); List<String> packageNames = e.getValue(); for (String packageName : packageNames) { for (String desc : packages.get(packageName)) { writer.println(desc); } } } writer.flush(); } catch (FileNotFoundException e) { throw new MojoExecutionException("Output file is not found"); } } }