Example usage for java.lang String replaceAll

List of usage examples for java.lang String replaceAll

Introduction

In this page you can find the example usage for java.lang String replaceAll.

Prototype

public String replaceAll(String regex, String replacement) 

Source Link

Document

Replaces each substring of this string that matches the given regular expression with the given replacement.

Usage

From source file:com.maxl.java.aips2xml.Aips2Xml.java

public static void main(String[] args) {
    Options options = new Options();
    addOption(options, "help", "print this message", false, false);
    addOption(options, "version", "print the version information and exit", false, false);
    addOption(options, "quiet", "be extra quiet", false, false);
    addOption(options, "verbose", "be extra verbose", false, false);
    addOption(options, "zip", "generate zip file", false, false);
    addOption(options, "lang", "use given language", true, false);
    addOption(options, "alpha", "only include titles which start with option value", true, false);
    addOption(options, "nodown", "no download, parse only", false, false);

    commandLineParse(options, args);//from   w  w  w . j a  v  a2s. c o  m

    // Download all files and save them in appropriate directories
    if (DOWNLOAD_ALL) {
        System.out.println("");
        allDown();
    }

    DateFormat df = new SimpleDateFormat("ddMMyy");
    String date_str = df.format(new Date());

    System.out.println("");
    if (!DB_LANGUAGE.isEmpty()) {
        extractPackageInfo();

        List<MedicalInformations.MedicalInformation> med_list = readAipsFile();

        if (SHOW_LOGS) {
            System.out.println("");
            System.out.println("- Generating xml and html files ... ");
        }
        long startTime = System.currentTimeMillis();
        int counter = 0;
        String fi_complete_xml = "";
        for (MedicalInformations.MedicalInformation m : med_list) {
            if (m.getLang().equals(DB_LANGUAGE) && m.getType().equals("fi")) {
                if (m.getTitle().startsWith(MED_TITLE)) {
                    if (SHOW_LOGS)
                        System.out.println(++counter + ": " + m.getTitle());
                    String[] html_str = extractHtmlSection(m);
                    // html_str[0] -> registration numbers
                    // html_str[1] -> content string
                    String xml_str = convertHtmlToXml(m.getTitle(), html_str[1], html_str[0]);
                    if (DB_LANGUAGE.equals("de")) {
                        if (!html_str[0].isEmpty()) {
                            String name = m.getTitle();
                            // Replace all "Sonderzeichen"
                            name = name.replaceAll("[/%:]", "_");
                            writeToFile(html_str[1], "./fis/fi_de_html/", name + "_fi_de.html");
                            writeToFile(xml_str, "./fis/fi_de_xml/", name + "_fi_de.xml");
                            fi_complete_xml += (xml_str + "\n");
                        }
                    } else if (DB_LANGUAGE.equals("fr")) {
                        if (!html_str[0].isEmpty()) {
                            String name = m.getTitle();
                            // Replace all "Sonderzeichen"
                            name = name.replaceAll("[/%:]", "_");
                            writeToFile(html_str[1], "./fis/fi_fr_html/", name + "_fi_fr.html");
                            writeToFile(xml_str, "./fis/fi_fr_xml/", name + "_fi_fr.xml");
                            fi_complete_xml += (xml_str + "\n");
                        }
                    }
                }
            }
        }

        // Add header to huge xml
        fi_complete_xml = addHeaderToXml(fi_complete_xml);
        // Dump to file
        if (DB_LANGUAGE.equals("de")) {
            writeToFile(fi_complete_xml, "./fis/", "fi_de.xml");
            if (ZIP_XML)
                zipToFile("./fis/", "fi_de.xml");
        } else if (DB_LANGUAGE.equals("fr")) {
            writeToFile(fi_complete_xml, "./fis/", "fi_fr.xml");
            if (ZIP_XML)
                zipToFile("./fis/", "fi_fr.xml");
        }

        // Move stylesheet file to ./fis/ folders
        try {
            File src = new File("./css/amiko_stylesheet.css");
            File dst_de = new File("./fis/fi_de_html/");
            File dst_fr = new File("./fis/fi_fr_html/");
            if (src.exists()) {
                if (dst_de.exists())
                    FileUtils.copyFileToDirectory(src, dst_de);
                if (dst_fr.exists())
                    FileUtils.copyFileToDirectory(src, dst_fr);
            }
        } catch (IOException e) {
            // Unhandled!
        }

        if (SHOW_LOGS) {
            long stopTime = System.currentTimeMillis();
            System.out.println("- Generated " + counter + " xml and html files in "
                    + (stopTime - startTime) / 1000.0f + " sec");
        }
    }

    System.exit(0);
}

From source file:com.virtualparadigm.packman.processor.JPackageManagerBU.java

public static void main(String[] args) {
    //       StringTemplate templ = new StringTemplate("foo $fo$bo$r$ yo");
    //       templ.setAttribute("success", "foobar");
    //       templ.setAttribute("bo", "oba");
    //        System.out.println(templ.toString());

    try {//from  w w w. j a v  a 2s  . co m
        //           StringTemplateLoader stringTemplateLoader = new StringTemplateLoader();
        String firstTemplate = "firstTemplate";
        String content = "this should ${foobar} ${foo:bar.0.1}";
        String updatedContent = "this should ${foobar} #{foo:bar.0.1}";
        //           String content = "this should ${foo:bar}";

        //           System.out.println(content.matches("\\$\\{.*\\:.*\\}"));

        //           System.out.println(content.replaceAll("\\$\\{.*\\:.*\\}", "hahaha"));
        //           System.out.println(content.replaceAll("(\\$\\{.*)(\\:)(.*\\})", "$1-$3"));
        //           System.out.println(content.replaceAll("(\\$\\{.*)(\\:)(.*\\})", "$1-$3"));
        //           System.out.println(content.replaceAll("(\\$)(\\{.*)(\\:)(.*\\})", "#$2$3$4"));
        System.out.println(updatedContent.replaceAll("(#)(\\{)([^\\}]*)(\\:)([^\\}]*)(\\})", "\\$$2$3$4$5$6"));
        System.out.println(content.replaceAll("(\\$)(\\{)(.*)(\\:)(.*)(\\})", "--$2$3$4$5$6--"));
        System.out.println(content.replaceAll("(\\$)(\\{\\w*\\:\\w*\\})", "#$2"));

        //           stringTemplateLoader.putTemplate(firstTemplate, "this should ${foobar} ${foo:bar}");
        //           
        //            freemarker.template.Configuration freeMarkerConfiguration = new freemarker.template.Configuration();
        //            freeMarkerConfiguration.setTemplateLoader(stringTemplateLoader);
        //           Template template = freeMarkerConfiguration.getTemplate(firstTemplate);           
        //            Map<String, Object> valueMap = new HashMap<String, Object>();
        //            valueMap.put("foobar", "helloworld");
        //            
        //            Writer out = new OutputStreamWriter(System.out);
        //            template.process(valueMap, out);
        //            out.flush();
        //            
        //            freeMarkerConfiguration.clearTemplateCache();

    } catch (Exception e) {
        e.printStackTrace();
    }

    System.out.println("");
    System.out.println("");

    VelocityEngine velocityEngine = new VelocityEngine();
    Properties vProps = new Properties();
    //      vProps.put("file.resource.loader.path", "");
    vProps.setProperty("resource.loader", "string");
    vProps.setProperty("string.resource.loader.class",
            "org.apache.velocity.runtime.resource.loader.StringResourceLoader");
    velocityEngine.init(vProps);
    Template template = null;
    VelocityContext velocityContext = new VelocityContext();
    velocityContext.put("bo", "oba");
    velocityContext.put("foobar", "be replaced");

    try {
        StringResourceRepository repository = StringResourceLoader.getRepository();
        repository.putStringResource("template", FileUtils.readFileToString(
                new File("c:/dev/workbench/paradigm_workspace/jpackage-manager/template.xml")));
        StringWriter writer = new StringWriter();
        template = velocityEngine.getTemplate("template");
        template.merge(velocityContext, writer);
        System.out.println(writer.toString());
    } catch (Exception e) {
        e.printStackTrace();
    }

}

From source file:com.mycompany.myelasticsearch.MainClass.java

/**
 * @param args the command line arguments
 *//*from w  ww  .ja  va  2  s. c o m*/
public static void main(String[] args) {

    // TODO code application logic here
    Tika tika = new Tika();
    String fileEntry = "C:\\Contract\\Contract1.pdf";
    String filetype = tika.detect(fileEntry);
    System.out.println("FileType " + filetype);
    BodyContentHandler handler = new BodyContentHandler(-1);
    String text = "";
    Metadata metadata = new Metadata();

    FileInputStream inputstream = null;

    try {
        inputstream = new FileInputStream(fileEntry);
    } catch (FileNotFoundException ex) {
        Logger.getLogger(MainClass.class.getName()).log(Level.SEVERE, null, ex);
    }
    ParseContext pcontext = new ParseContext();

    //parsing the document using PDF parser
    PDFParser pdfparser = new PDFParser();
    try {
        pdfparser.parse(inputstream, handler, metadata, pcontext);
    } catch (IOException ex) {

        Logger.getLogger(MainClass.class.getName()).log(Level.SEVERE, null, ex);
    } catch (SAXException ex) {
        Logger.getLogger(MainClass.class.getName()).log(Level.SEVERE, null, ex);
    } catch (TikaException ex) {
        Logger.getLogger(MainClass.class.getName()).log(Level.SEVERE, null, ex);
    }
    String docText = "";
    String outputArray[];
    String out[];
    //getting the content of the document
    docText = handler.toString().replaceAll("(/[^\\da-zA-Z.]/)", "");

    // PhraseDetection.getPhrases(docText);
    try {
        Node node = nodeBuilder().node();
        Client client = node.client();
        DocumentReader.parseString(docText, client);
        //"Borrowing should be replaced by the user input key"
        Elastic.getDefinedTerm(client, "definedterms", "term", "1", "Borrowing");
        node.close();
    } catch (FileNotFoundException ex) {
        Logger.getLogger(MainClass.class.getName()).log(Level.SEVERE, null, ex);
    }
    Stanford.getSentence(docText);

    int definedTermsEnd = docText.indexOf("SCHEDULES");
    String toc = docText.substring(0, definedTermsEnd);
    String c = docText.substring(definedTermsEnd);

    System.out.println("Table of content" + toc);
    System.out.println("--------------------------------");
    System.out.println("content" + c);

    out = toc.split("Article|article|ARTICLE");
    int count = 0;
    String outputArrayString = "";
    int s = 0;
    StringBuffer tocOutput = new StringBuffer();

    for (String o : out) {
        if (count != 0) {
            s = Integer.parseInt(String.valueOf(o.charAt(1)));
            if (s == count) {
                tocOutput.append(o);
                tocOutput.append("JigarAnkitNeeraj");
                System.out.println(s);
            }
        }
        outputArrayString += "Count" + count + o;
        count++;
        System.out.println();
    }
    System.out.println("---------------------------------------------------Content---------");
    count = 1;
    StringBuffer contentOutput = new StringBuffer();

    String splitContent[] = c.split("ARTICLE|Article");
    Node node = nodeBuilder().node();
    Client client = node.client();
    for (String o : splitContent) {
        o = o.replaceAll("[^a-zA-Z0-9.,\\/#!$%\\^&\\*;:{}=\\-_`~()?\\s]+", "");
        o = o.replaceAll("\n", " ");
        char input = o.charAt(1);
        if (input >= '0' && input <= '9') {
            s = Integer.parseInt(String.valueOf(o.charAt(1)));
            if (s == count) {
                //System.out.println(s);
                JSONObject articleJSONObject = new JSONObject();
                contentOutput.append(" \n MyArticlesSeparated \n ");
                articleJSONObject.put("Article" + count, o.toString());
                try {
                    try {
                        JSONObject articleJSONObject1 = new JSONObject();
                        articleJSONObject1.put("hi", "j");
                        client.prepareIndex("contract", "article", String.valueOf(count))
                                .setSource(articleJSONObject.toString()).execute().actionGet();
                    } catch (Exception e) {
                        System.out.println(e.getMessage());
                    }
                    //"Borrowing should be replaced by the user input key"

                } catch (Exception ex) {
                    Logger.getLogger(MainClass.class.getName()).log(Level.SEVERE, null, ex);
                }
                System.out.println(s);
                count++;
            }
            //outputArrayString += "Count" + count + o;

            contentOutput.append(o);
        }
    }
    Elastic.getDocument(client, "contract", "article", "1");
    Elastic.searchDocument(client, "contract", "article", "Lenders");
    Elastic.searchDocument(client, "contract", "article", "Negative Covenants");

    Elastic.searchDocument(client, "contract", "article", "Change in Law");
    String tableOfContent[];
    tableOfContent = tocOutput.toString().split("JigarAnkitNeeraj");

    String splitContectsAccordingToArticles[];
    splitContectsAccordingToArticles = contentOutput.toString().split("MyArticlesSeparated");
    int numberOfArticle = splitContectsAccordingToArticles.length;

    int countArticle = 1;
    Double toBeTruncated = new Double("" + countArticle + ".00");

    String section = "Section";
    toBeTruncated += 0.01;

    System.out.println(toBeTruncated);
    String sectionEnd;
    StringBuffer sectionOutput = new StringBuffer();
    int skipFirstArtcile = 0;
    JSONObject obj = new JSONObject();

    for (String article : splitContectsAccordingToArticles) {
        if (skipFirstArtcile != 0) {
            DecimalFormat f = new DecimalFormat("##.00");
            String sectionStart = section + " " + f.format(toBeTruncated);
            int start = article.indexOf(sectionStart);
            toBeTruncated += 0.01;

            System.out.println();
            sectionEnd = section + " " + f.format(toBeTruncated);

            int end = article.indexOf(sectionEnd);
            while (end != -1) {
                sectionStart = section + " " + f.format(toBeTruncated - 0.01);
                sectionOutput.append(" \n Key:" + sectionStart);
                if (start < end) {
                    sectionOutput.append("\n Value:" + article.substring(start, end));
                    obj.put(sectionStart, article.substring(start, end).replaceAll("\\r\\n|\\r|\\n", " "));
                    try {
                        try {
                            JSONObject articleJSONObject1 = new JSONObject();
                            articleJSONObject1.put("hi", "j");
                            client.prepareIndex("contract", "section", String.valueOf(count))
                                    .setSource(obj.toString()).execute().actionGet();
                        } catch (Exception e) {
                            System.out.println(e.getMessage());
                        }
                        //"Borrowing should be replaced by the user input key"

                    } catch (Exception ex) {
                        Logger.getLogger(MainClass.class.getName()).log(Level.SEVERE, null, ex);
                    }

                }

                start = end;
                toBeTruncated += 0.01;
                sectionEnd = section + " " + f.format(toBeTruncated);
                System.out.println("SectionEnd " + sectionEnd);
                try {
                    end = article.indexOf(sectionEnd);
                } catch (Exception e) {
                    System.out.print(e.getMessage());
                }

                System.out.println("End section index " + end);
            }
            end = article.length() - 1;
            sectionOutput.append(" \n Key:" + sectionStart);
            try {
                sectionOutput.append(" \n Value:" + article.substring(start, end));
                obj.put(sectionStart, article.substring(start, end).replaceAll("\\r\\n|\\r|\\n", " "));
            } catch (Exception e) {
                //What if Article has No Sections
                String numberOnly = article.replaceAll("[^0-9]", "").substring(0, 1);
                String sectionArticle = "ARTICLE " + numberOnly;
                sectionOutput.append(" \n Value:" + article);
                obj.put(sectionArticle, article);

                System.out.println(e.getMessage());
            }

            DecimalFormat ff = new DecimalFormat("##");
            toBeTruncated = Double.valueOf(ff.format(toBeTruncated)) + 1.01;
        }
        skipFirstArtcile++;
    }

    for (String article : splitContectsAccordingToArticles) {
        if (skipFirstArtcile != 0) {
            DecimalFormat f = new DecimalFormat("##.00");
            String sectionStart = section + " " + f.format(toBeTruncated);
            int start = article.indexOf(sectionStart);
            toBeTruncated += 0.01;
            System.out.println();
            sectionEnd = section + " " + f.format(toBeTruncated);

            int end = article.indexOf(sectionEnd);
            while (end != -1) {
                sectionStart = section + " " + f.format(toBeTruncated - 0.01);
                sectionOutput.append(" \n Key:" + sectionStart);
                if (start < end) {
                    sectionOutput.append("\n Value:" + article.substring(start, end));
                    System.out.println(sectionOutput);
                    String patternStr = "\\n\\n+[(]";
                    String paragraphSubstringArray[] = article.substring(start, end).split(patternStr);

                    JSONObject paragraphObject = new JSONObject();
                    int counter = 0;
                    for (String paragraphSubstring : paragraphSubstringArray) {
                        counter++;
                        paragraphObject.put("Paragraph " + counter, paragraphSubstring);

                    }
                    obj.put(sectionStart, paragraphObject);

                }

                start = end;
                toBeTruncated += 0.01;
                sectionEnd = section + " " + f.format(toBeTruncated);
                System.out.println("SectionEnd " + sectionEnd);
                try {
                    end = article.indexOf(sectionEnd);
                } catch (Exception e) {
                    System.out.print(e.getMessage());
                }

                System.out.println("End section index " + end);
            }
            end = article.length() - 1;
            sectionOutput.append(" \n Key:" + sectionStart);
            try {
                sectionOutput.append(" \n Value:" + article.substring(start, end));
                obj.put(sectionStart, article.substring(start, end));
                PhraseDetection.getPhrases(docText);
            } catch (Exception e) {
                //What if Article has No Sections
                String sectionArticle = "ARTICLE";
                System.out.println(e.getMessage());
            }
            DecimalFormat ff = new DecimalFormat("##");
            toBeTruncated = Double.valueOf(ff.format(toBeTruncated)) + 1.01;
        }
        skipFirstArtcile++;
    }

    Elastic.getDocument(client, "contract", "section", "1");
    Elastic.searchDocument(client, "contract", "section", "Lenders");
    Elastic.searchDocument(client, "contract", "section", "Negative Covenants");
    try {
        FileWriter file = new FileWriter("TableOfIndex.txt");
        file.write(tocOutput.toString());
        file.flush();
        file.close();
    } catch (IOException e) {
        e.printStackTrace();
    }
    try {
        FileWriter file = new FileWriter("Contract3_JSONFile.txt");
        file.write(obj.toString());
        file.flush();
        file.close();
    } catch (IOException e) {
        e.printStackTrace();
    }
    try {
        FileWriter file = new FileWriter("Contract1_KeyValueSections.txt");
        file.write(sectionOutput.toString());
        file.flush();
        file.close();
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:markov.java

/**
 * @param args/*from w  w w.j a v a2  s. c o m*/
 */
public static void main(String[] args) {

    // hack: eclipse don't support IO redirection worth a shit
    // try {
    // System.setIn(new FileInputStream("./json"));
    // } catch (FileNotFoundException e1) {
    // // TODO Auto-generated catch block
    // e1.printStackTrace();
    // }

    boolean graphMode = false;
    boolean jsonMode = false;
    boolean jsonRecoverMode = false;
    boolean endNode = false;

    int count = -1;

    long n = 0;
    long sumOfSqr = 0;
    long sum = 0;

    for (String s : args) {

        if (!s.matches("^-[vegjJh]*(c[0-9]*)?$")) {
            System.out.println("invalid argument");
            return;
        }

        if (s.matches("^-.*h.*")) {
            System.out.println(HELP);
            return;
        }
        if (s.matches("^-.*v.*")) {
            verbose = true;
            log("verbose mode");
        }
        if (s.matches("^-.*g.*")) {
            graphMode = true;
            log("graph mode");
        }
        if (s.matches("^-.*j.*")) {
            jsonMode = true;
            log("json mode");
        }
        if (s.matches("^-.*J.*")) {
            jsonRecoverMode = true;
            log("json recover mode");
        }
        if (s.matches("^-.*e.*")) {
            endNode = true;
            log("include end node");
        }
        if (s.matches("^-.*c[0-9]*$")) {
            log("counted output mode");
            count = Integer.parseInt(s.replaceAll("^-.*c", ""));
        }

        boolean error = (graphMode == true && jsonMode == true);
        if (!error) {
            error = (count > -1) && (graphMode == true || jsonMode == true);
        }

        if (error) {
            System.err.println("[error] switches j, g and, c are mutualy exclusive.");
            return;
        }

    }

    StateTransitionDiagram<Character> std;
    BufferedReader br = new BufferedReader(new InputStreamReader(System.in));

    try {
        if (!jsonRecoverMode) {
            Trainer<Character> trainer = new Trainer<Character>();
            String s = br.readLine();
            while (s != null) {
                trainer.train(string2List(s));
                n++;
                sumOfSqr += s.length() * s.length();
                sum += s.length();
                s = br.readLine();
            }
            if (n == 0) {
                System.err
                        .println("Invalid corpus: At least one sample is required, two to make it interesting");
                return;
            }
            std = trainer.getTransitionDiagram();
        } else {
            std = new StateTransitionDiagram<Character>();
            GsonStub gstub = new Gson().fromJson(br, GsonStub.class);
            n = gstub.meta.n;
            sum = gstub.meta.sum;
            sumOfSqr = gstub.meta.sumOfSqr;

            for (Entry<String, StateStub> entry : gstub.states.entrySet()) {
                State<Character> state;
                if (entry.getKey().equals("null")) {
                    state = std.getGuard();
                } else {
                    state = std.getState(Character.valueOf(entry.getKey().charAt(0)));
                }
                for (Entry<String, Integer> transitions : entry.getValue().transitions.entrySet()) {
                    State<Character> tranny;
                    if (transitions.getKey().equals("null")) {
                        tranny = std.getGuard();
                    } else {
                        tranny = std.getState(Character.valueOf(transitions.getKey().charAt(0)));
                    }

                    state.addTransition(tranny.getValue(), transitions.getValue());
                }
            }
        }
        if (graphMode) {
            if (endNode) {
                System.out.println(std.toString());
            } else {
                System.out.println(std.removeEndGuards().toString());
            }
            return;
        }
        if (jsonMode) {
            Gson gson = new GsonBuilder().excludeFieldsWithoutExposeAnnotation().create();

            String partialJson;
            if (endNode) {
                partialJson = gson.toJson(std);
            } else {
                partialJson = gson.toJson(std.removeEndGuards());
            }
            GsonStub gstub = new Gson().fromJson(partialJson, GsonStub.class);
            gstub.meta = new Meta();
            gstub.meta.n = n;
            gstub.meta.sum = sum;
            gstub.meta.sumOfSqr = sumOfSqr;

            System.out.println(gson.toJson(gstub));
            return;
        }

        Generator<Character> generator;
        if (endNode) {
            generator = new EndTagGenerator<Character>(std);
        } else {
            double sd = ((double) sumOfSqr - (double) (sum * sum) / (double) n) / (double) (n - 1);
            double mean = (double) sum / (double) n;
            log(String.format("mean: %.4f sd: %.4f", mean, sd));
            NormalDistributionImpl dist = new NormalDistributionImpl(mean, sd);
            generator = new NormalizedGenerator<Character>(std.removeEndGuards(), dist);
        }
        if (count >= 0) {
            for (int c = 0; c < count; c++) {
                output(generator);
            }
        } else {
            while (true) {
                output(generator);
            }
        }

    } catch (IOException e) {
        e.printStackTrace();
    }

}

From source file:io.compgen.cgpipe.CGPipe.java

public static void main(String[] args) {
    String fname = null;/*from  w  w  w  . ja va2s  .  co  m*/
    String logFilename = null;
    String outputFilename = null;
    PrintStream outputStream = null;

    int verbosity = 0;
    boolean silent = false;
    boolean dryrun = false;
    boolean silenceStdErr = false;
    boolean showHelp = false;

    List<String> targets = new ArrayList<String>();
    Map<String, VarValue> confVals = new HashMap<String, VarValue>();

    String k = null;

    for (int i = 0; i < args.length; i++) {
        String arg = args[i];
        if (i == 0) {
            if (new File(arg).exists()) {
                fname = arg;
                silenceStdErr = true;
                continue;
            }
        } else if (args[i - 1].equals("-f")) {
            fname = arg;
            continue;
        } else if (args[i - 1].equals("-l")) {
            logFilename = arg;
            continue;
        } else if (args[i - 1].equals("-o")) {
            outputFilename = arg;
            continue;
        }

        if (arg.equals("-h") || arg.equals("-help") || arg.equals("--help")) {
            if (k != null) {
                if (k.contains("-")) {
                    k = k.replaceAll("-", "_");
                }
                confVals.put(k, VarBool.TRUE);
            }
            showHelp = true;
        } else if (arg.equals("-license")) {
            license();
            System.exit(1);
        } else if (arg.equals("-s")) {
            if (k != null) {
                if (k.contains("-")) {
                    k = k.replaceAll("-", "_");
                }
                confVals.put(k, VarBool.TRUE);
            }
            silent = true;
        } else if (arg.equals("-nolog")) {
            if (k != null) {
                if (k.contains("-")) {
                    k = k.replaceAll("-", "_");
                }
                confVals.put(k, VarBool.TRUE);
            }
            silenceStdErr = true;
        } else if (arg.equals("-v")) {
            if (k != null) {
                if (k.contains("-")) {
                    k = k.replaceAll("-", "_");
                }
                confVals.put(k, VarBool.TRUE);
            }
            verbosity++;
        } else if (arg.equals("-vv")) {
            if (k != null) {
                if (k.contains("-")) {
                    k = k.replaceAll("-", "_");
                }
                confVals.put(k, VarBool.TRUE);
            }
            verbosity += 2;
        } else if (arg.equals("-vvv")) {
            if (k != null) {
                if (k.contains("-")) {
                    k = k.replaceAll("-", "_");
                }
                confVals.put(k, VarBool.TRUE);
            }
            verbosity += 3;
        } else if (arg.equals("-dr")) {
            if (k != null) {
                if (k.contains("-")) {
                    k = k.replaceAll("-", "_");
                }
                confVals.put(k, VarBool.TRUE);
            }
            dryrun = true;
        } else if (arg.startsWith("--")) {
            if (k != null) {
                if (k.contains("-")) {
                    k = k.replaceAll("-", "_");
                }
                confVals.put(k, VarBool.TRUE);
            }
            k = arg.substring(2);
        } else if (k != null) {
            if (k.contains("-")) {
                k = k.replaceAll("-", "_");
            }
            if (confVals.containsKey(k)) {
                try {
                    VarValue val = confVals.get(k);
                    if (val.getClass().equals(VarList.class)) {
                        ((VarList) val).add(VarValue.parseStringRaw(arg));
                    } else {
                        VarList list = new VarList();
                        list.add(val);
                        list.add(VarValue.parseStringRaw(arg));
                        confVals.put(k, list);
                    }
                } catch (VarTypeException e) {
                    System.err.println("Error setting variable: " + k + " => " + arg);
                    System.exit(1);
                    ;
                }
            } else {
                confVals.put(k, VarValue.parseStringRaw(arg));
            }
            k = null;
        } else if (arg.charAt(0) != '-') {
            targets.add(arg);
        }
    }
    if (k != null) {
        if (k.contains("-")) {
            k = k.replaceAll("-", "_");
        }
        confVals.put(k, VarBool.TRUE);
    }

    confVals.put("cgpipe.loglevel", new VarInt(verbosity));

    if (fname == null) {
        usage();
        System.exit(1);
    }

    if (!showHelp) {
        switch (verbosity) {
        case 0:
            SimpleFileLoggerImpl.setLevel(Level.INFO);
            break;
        case 1:
            SimpleFileLoggerImpl.setLevel(Level.DEBUG);
            break;
        case 2:
            SimpleFileLoggerImpl.setLevel(Level.TRACE);
            break;
        case 3:
        default:
            SimpleFileLoggerImpl.setLevel(Level.ALL);
            break;
        }
    } else {
        SimpleFileLoggerImpl.setLevel(Level.FATAL);
    }

    SimpleFileLoggerImpl.setSilent(silenceStdErr || showHelp);

    Log log = LogFactory.getLog(CGPipe.class);
    log.info("Starting new run: " + fname);

    if (logFilename != null) {
        confVals.put("cgpipe.log", new VarString(logFilename));
    }

    if (System.getenv("CGPIPE_DRYRUN") != null && !System.getenv("CGPIPE_DRYRUN").equals("")) {
        dryrun = true;
    }

    JobRunner runner = null;
    try {
        // Load config values from global config. 
        RootContext root = new RootContext();
        loadInitFiles(root);

        // Load settings from environment variables.
        root.loadEnvironment();

        // Set cmd-line arguments
        if (silent) {
            root.setOutputStream(null);
        }

        if (outputFilename != null) {
            outputStream = new PrintStream(new FileOutputStream(outputFilename));
            root.setOutputStream(outputStream);
        }

        for (String k1 : confVals.keySet()) {
            log.info("config: " + k1 + " => " + confVals.get(k1).toString());
        }

        root.update(confVals);
        root.set("cgpipe.procs", new VarInt(Runtime.getRuntime().availableProcessors()));

        // update the URL Source loader configs
        SourceLoader.updateRemoteHandlers(root.cloneString("cgpipe.remote"));

        // Now check for help, only after we've setup the remote handlers...
        if (showHelp) {
            try {
                Parser.showHelp(fname);
                System.exit(0);
            } catch (IOException e) {
                System.err.println("Unable to find pipeline: " + fname);
                System.exit(1);
            }
        }

        // Set the global config values
        //         globalConfig.putAll(root.cloneValues());

        // Parse the AST and run it
        Parser.exec(fname, root);

        // Load the job runner *after* we execute the script to capture any config changes
        runner = JobRunner.load(root, dryrun);

        // find a build-target, and submit the job(s) to a runner
        if (targets.size() > 0) {
            for (String target : targets) {
                log.debug("building: " + target);

                BuildTarget initTarget = root.build(target);
                if (initTarget != null) {
                    runner.submitAll(initTarget, root);
                } else {
                    System.out.println("CGPIPE ERROR: Unable to find target: " + target);
                }
            }
        } else {
            BuildTarget initTarget = root.build();
            if (initTarget != null) {
                runner.submitAll(initTarget, root);
                // Leave this commented out - it should be allowed to run cgpipe scripts w/o a target defined (testing)
                //            } else {
                //               System.out.println("CGPIPE ERROR: Unable to find default target");
            }
        }
        runner.done();

        if (outputStream != null) {
            outputStream.close();
        }

    } catch (ASTParseException | ASTExecException | RunnerException | FileNotFoundException e) {
        if (outputStream != null) {
            outputStream.close();
        }
        if (runner != null) {
            runner.abort();
        }

        if (e.getClass().equals(ExitException.class)) {
            System.exit(((ExitException) e).getReturnCode());
        }

        System.out.println("CGPIPE ERROR " + e.getMessage());
        if (verbosity > 0) {
            e.printStackTrace();
        }
        System.exit(1);
    }
}

From source file:com.bright.json.JSonRequestor.java

public static void main(String[] args) {
    String fileBasename = null;/*  www  .  ja va2  s. co  m*/
    String[] zipArgs = null;
    JFileChooser chooser = new JFileChooser("/Users/panos/STR_GRID");
    try {

        chooser.setCurrentDirectory(new java.io.File("."));
        chooser.setDialogTitle("Select the input directory");

        chooser.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);
        chooser.setAcceptAllFileFilterUsed(false);

        if (chooser.showOpenDialog(null) == JFileChooser.APPROVE_OPTION) {
            System.out.println("getCurrentDirectory(): " + chooser.getCurrentDirectory());
            System.out.println("getSelectedFile() : " + chooser.getSelectedFile());

            // String fileBasename =
            // chooser.getSelectedFile().toString().substring(chooser.getSelectedFile().toString().lastIndexOf(File.separator)+1,chooser.getSelectedFile().toString().lastIndexOf("."));
            fileBasename = chooser.getSelectedFile().toString()
                    .substring(chooser.getSelectedFile().toString().lastIndexOf(File.separator) + 1);
            System.out.println("Base name: " + fileBasename);

            zipArgs = new String[] { chooser.getSelectedFile().toString(),
                    chooser.getCurrentDirectory().toString() + File.separator + fileBasename + ".zip" };
            com.bright.utils.ZipFile.main(zipArgs);

        } else {
            System.out.println("No Selection ");

        }
    } catch (Exception e) {

        System.out.println(e.toString());

    }

    JTextField uiHost = new JTextField("ucs-head.brightcomputing.com");
    // TextPrompt puiHost = new
    // TextPrompt("hadoop.brightcomputing.com",uiHost);
    JTextField uiUser = new JTextField("nexus");
    // TextPrompt puiUser = new TextPrompt("nexus", uiUser);
    JTextField uiPass = new JPasswordField("system");
    // TextPrompt puiPass = new TextPrompt("", uiPass);
    JTextField uiWdir = new JTextField("/home/nexus/pp1234");
    // TextPrompt puiWdir = new TextPrompt("/home/nexus/nexus_workdir",
    // uiWdir);
    JTextField uiOut = new JTextField("foo");
    // TextPrompt puiOut = new TextPrompt("foobar123", uiOut);

    JPanel myPanel = new JPanel(new GridLayout(5, 1));
    myPanel.add(new JLabel("Bright HeadNode hostname:"));
    myPanel.add(uiHost);
    // myPanel.add(Box.createHorizontalStrut(1)); // a spacer
    myPanel.add(new JLabel("Username:"));
    myPanel.add(uiUser);
    myPanel.add(new JLabel("Password:"));
    myPanel.add(uiPass);
    myPanel.add(new JLabel("Working Directory:"));
    myPanel.add(uiWdir);
    // myPanel.add(Box.createHorizontalStrut(1)); // a spacer
    myPanel.add(new JLabel("Output Study Name ( -s ):"));
    myPanel.add(uiOut);

    int result = JOptionPane.showConfirmDialog(null, myPanel, "Please fill in all the fields.",
            JOptionPane.OK_CANCEL_OPTION);
    if (result == JOptionPane.OK_OPTION) {
        System.out.println("Input received.");

    }

    String rfile = uiWdir.getText();
    String rhost = uiHost.getText();
    String ruser = uiUser.getText();
    String rpass = uiPass.getText();
    String nexusOut = uiOut.getText();

    String[] myarg = new String[] { zipArgs[1], ruser + "@" + rhost + ":" + rfile, nexusOut, fileBasename };
    com.bright.utils.ScpTo.main(myarg);

    String cmURL = "https://" + rhost + ":8081/json";
    List<Cookie> cookies = doLogin(ruser, rpass, cmURL);
    chkVersion(cmURL, cookies);

    jobSubmit myjob = new jobSubmit();
    jobSubmit.jobObject myjobObj = new jobSubmit.jobObject();

    myjob.setService("cmjob");
    myjob.setCall("submitJob");

    myjobObj.setQueue("defq");
    myjobObj.setJobname("myNexusJob");
    myjobObj.setAccount(ruser);
    myjobObj.setRundirectory(rfile);
    myjobObj.setUsername(ruser);
    myjobObj.setGroupname("cmsupport");
    myjobObj.setPriority("1");
    myjobObj.setStdinfile(rfile + "/stdin-mpi");
    myjobObj.setStdoutfile(rfile + "/stdout-mpi");
    myjobObj.setStderrfile(rfile + "/stderr-mpi");
    myjobObj.setResourceList(Arrays.asList(""));
    myjobObj.setDependencies(Arrays.asList(""));
    myjobObj.setMailNotify(false);
    myjobObj.setMailOptions("ALL");
    myjobObj.setMaxWallClock("00:10:00");
    myjobObj.setNumberOfProcesses(1);
    myjobObj.setNumberOfNodes(1);
    myjobObj.setNodes(Arrays.asList(""));
    myjobObj.setCommandLineInterpreter("/bin/bash");
    myjobObj.setUserdefined(Arrays.asList("cd " + rfile, "date", "pwd"));
    myjobObj.setExecutable("mpirun");
    myjobObj.setArguments("-env I_MPI_FABRICS shm:tcp " + Constants.NEXUSSIM_EXEC + " -mpi -c " + rfile + "/"
            + fileBasename + "/" + fileBasename + " -s " + rfile + "/" + fileBasename + "/" + nexusOut);
    myjobObj.setModules(Arrays.asList("shared", "nexus", "intel-mpi/64"));
    myjobObj.setDebug(false);
    myjobObj.setBaseType("Job");
    myjobObj.setIsSlurm(true);
    myjobObj.setUniqueKey(0);
    myjobObj.setModified(false);
    myjobObj.setToBeRemoved(false);
    myjobObj.setChildType("SlurmJob");
    myjobObj.setJobID("Nexus test");

    // Map<String,jobSubmit.jobObject > mymap= new HashMap<String,
    // jobSubmit.jobObject>();
    // mymap.put("Slurm",myjobObj);
    ArrayList<Object> mylist = new ArrayList<Object>();
    mylist.add("slurm");
    mylist.add(myjobObj);
    myjob.setArgs(mylist);

    GsonBuilder builder = new GsonBuilder();
    builder.enableComplexMapKeySerialization();

    // Gson g = new Gson();
    Gson g = builder.create();

    String json2 = g.toJson(myjob);

    // To be used from a real console and not Eclipse
    Delete.main(zipArgs[1]);
    String message = JSonRequestor.doRequest(json2, cmURL, cookies);
    @SuppressWarnings("resource")
    Scanner resInt = new Scanner(message).useDelimiter("[^0-9]+");
    int jobID = resInt.nextInt();
    System.out.println("Job ID: " + jobID);

    JOptionPane optionPane = new JOptionPane(message);
    JDialog myDialog = optionPane.createDialog(null, "CMDaemon response: ");
    myDialog.setModal(false);
    myDialog.setVisible(true);

    ArrayList<Object> mylist2 = new ArrayList<Object>();
    mylist2.add("slurm");
    String JobID = Integer.toString(jobID);
    mylist2.add(JobID);
    myjob.setArgs(mylist2);
    myjob.setService("cmjob");
    myjob.setCall("getJob");
    String json3 = g.toJson(myjob);
    System.out.println("JSON Request No. 4 " + json3);

    cmReadFile readfile = new cmReadFile();
    readfile.setService("cmmain");
    readfile.setCall("readFile");
    readfile.setUserName(ruser);

    int fileByteIdx = 1;

    readfile.setPath(rfile + "/" + fileBasename + "/" + fileBasename + ".sum@+" + fileByteIdx);
    String json4 = g.toJson(readfile);

    String monFile = JSonRequestor.doRequest(json4, cmURL, cookies).replaceAll("^\"|\"$", "");
    if (monFile.startsWith("Unable")) {
        monFile = "";
    } else {
        fileByteIdx += countLines(monFile, "\\\\n");
        System.out.println("");
    }

    StringBuffer output = new StringBuffer();
    // Get the correct Line Separator for the OS (CRLF or LF)
    String nl = System.getProperty("line.separator");
    String filename = chooser.getCurrentDirectory().toString() + File.separator + fileBasename + ".sum.txt";
    System.out.println("Local monitoring file: " + filename);

    output.append(monFile.replaceAll("\\\\n", System.getProperty("line.separator")));

    String getJobJSON = JSonRequestor.doRequest(json3, cmURL, cookies);
    jobGet getJobObj = new Gson().fromJson(getJobJSON, jobGet.class);
    System.out.println("Job " + jobID + " status: " + getJobObj.getStatus().toString());

    while (getJobObj.getStatus().toString().equals("RUNNING")
            || getJobObj.getStatus().toString().equals("COMPLETING")) {
        try {

            getJobJSON = JSonRequestor.doRequest(json3, cmURL, cookies);
            getJobObj = new Gson().fromJson(getJobJSON, jobGet.class);
            System.out.println("Job " + jobID + " status: " + getJobObj.getStatus().toString());

            readfile.setPath(rfile + "/" + fileBasename + "/" + fileBasename + ".sum@+" + fileByteIdx);
            json4 = g.toJson(readfile);
            monFile = JSonRequestor.doRequest(json4, cmURL, cookies).replaceAll("^\"|\"$", "");
            if (monFile.startsWith("Unable")) {
                monFile = "";
            } else {

                output.append(monFile.replaceAll("\\\\n", System.getProperty("line.separator")));
                System.out.println("FILE INDEX:" + fileByteIdx);
                fileByteIdx += countLines(monFile, "\\\\n");
            }
            Thread.sleep(Constants.STATUS_CHECK_INTERVAL);
        } catch (InterruptedException ex) {
            Thread.currentThread().interrupt();
        }

    }

    Gson gson_nice = new GsonBuilder().setPrettyPrinting().create();
    String json_out = gson_nice.toJson(getJobJSON);
    System.out.println(json_out);
    System.out.println("JSON Request No. 5 " + json4);

    readfile.setPath(rfile + "/" + fileBasename + "/" + fileBasename + ".sum@+" + fileByteIdx);
    json4 = g.toJson(readfile);
    monFile = JSonRequestor.doRequest(json4, cmURL, cookies).replaceAll("^\"|\"$", "");
    if (monFile.startsWith("Unable")) {
        monFile = "";
    } else {

        output.append(monFile.replaceAll("\\\\n", System.getProperty("line.separator")));
        fileByteIdx += countLines(monFile, "\\\\n");
    }
    System.out.println("FILE INDEX:" + fileByteIdx);

    /*
     * System.out.print("Monitoring file: " + monFile.replaceAll("\\n",
     * System.getProperty("line.separator"))); try {
     * FileUtils.writeStringToFile( new
     * File(chooser.getCurrentDirectory().toString() + File.separator +
     * fileBasename + ".sum.txt"), monFile.replaceAll("\\n",
     * System.getProperty("line.separator"))); } catch (IOException e) {
     * 
     * e.printStackTrace(); }
     */

    if (getJobObj.getStatus().toString().equals("COMPLETED")) {
        String[] zipArgs_from = new String[] { chooser.getSelectedFile().toString(),
                chooser.getCurrentDirectory().toString() + File.separator + fileBasename + "_out.zip" };
        String[] myarg_from = new String[] {
                ruser + "@" + rhost + ":" + rfile + "/" + fileBasename + "_out.zip", zipArgs_from[1], rfile,
                fileBasename };
        com.bright.utils.ScpFrom.main(myarg_from);

        JOptionPane optionPaneS = new JOptionPane("Job execution completed without errors!");
        JDialog myDialogS = optionPaneS.createDialog(null, "Job status: ");
        myDialogS.setModal(false);
        myDialogS.setVisible(true);

    } else {
        JOptionPane optionPaneF = new JOptionPane("Job execution FAILED!");
        JDialog myDialogF = optionPaneF.createDialog(null, "Job status: ");
        myDialogF.setModal(false);
        myDialogF.setVisible(true);
    }

    try {
        System.out.println("Local monitoring file: " + filename);

        BufferedWriter out = new BufferedWriter(new FileWriter(filename));
        String outText = output.toString();
        String newString = outText.replace("\\\\n", nl);

        System.out.println("Text: " + outText);
        out.write(newString);

        out.close();
        rmDuplicateLines.main(filename);
    } catch (IOException e) {
        e.printStackTrace();
    }
    doLogout(cmURL, cookies);
    System.exit(0);
}

From source file:ch.kostceco.tools.siardexcerpt.SIARDexcerpt.java

/** Die Eingabe besteht aus mind 3 Parameter: [0] Pfad zur SIARD-Datei oder Verzeichnis [1]
 * configfile [2] Modul/* www . j  a  va2s  . com*/
 * 
 * bersicht der Module: --init --search --extract sowie --finish
 * 
 * bei --search kommen danach noch die Suchtexte und bei --extract die Schlssel
 * 
 * @param args
 * @throws IOException */

public static void main(String[] args) throws IOException {
    ApplicationContext context = new ClassPathXmlApplicationContext("classpath:config/applicationContext.xml");

    /** SIARDexcerpt: Aufbau des Tools
     * 
     * 1) init: Config Kopieren und ggf SIARD-Datei ins Workverzeichnis entpacken
     * 
     * 2) search: gemss config die Tabelle mit Suchtext befragen und Ausgabe des Resultates
     * 
     * 3) extract: mit den Keys anhand der config einen Records herausziehen und anzeigen
     * 
     * 4) finish: Config-Kopie sowie Workverzeichnis lschen */

    /* TODO: siehe Bemerkung im applicationContext-services.xml bezglich Injection in der
     * Superklasse aller Impl-Klassen ValidationModuleImpl validationModuleImpl =
     * (ValidationModuleImpl) context.getBean("validationmoduleimpl"); */

    SIARDexcerpt siardexcerpt = (SIARDexcerpt) context.getBean("siardexcerpt");

    // Ist die Anzahl Parameter (mind 3) korrekt?
    if (args.length < 3) {
        System.out.println(siardexcerpt.getTextResourceService().getText(ERROR_PARAMETER_USAGE));
        System.exit(1);
    }

    String module = new String(args[2]);
    File siardDatei = new File(args[0]);
    File configFile = new File(args[1]);

    /* arg 1 gibt den Pfad zur configdatei an. Da dieser in ConfigurationServiceImpl hartcodiert
     * ist, wird diese nach "configuration/SIARDexcerpt.conf.xml" kopiert. */
    File configFileHard = new File("configuration" + File.separator + "SIARDexcerpt.conf.xml");

    // excerpt ist der Standardwert wird aber anhand der config dann gesetzt
    File directoryOfOutput = new File("excerpt");

    // temp_SIARDexcerpt ist der Standardwert wird aber anhand der config dann gesetzt
    File tmpDir = new File("temp_SIARDexcerpt");

    boolean okA = false;
    boolean okB = false;
    boolean okC = false;

    // die Anwendung muss mindestens unter Java 6 laufen
    String javaRuntimeVersion = System.getProperty("java.vm.version");
    if (javaRuntimeVersion.compareTo("1.6.0") < 0) {
        System.out.println(siardexcerpt.getTextResourceService().getText(ERROR_WRONG_JRE));
        System.exit(1);
    }

    if (module.equalsIgnoreCase("--init")) {

        /** 1) init: Config Kopieren und ggf SIARD-Datei ins Workverzeichnis entpacken
         * 
         * a) config muss existieren und SIARDexcerpt.conf.xml noch nicht
         * 
         * b) Excerptverzeichnis mit schreibrechte und ggf anlegen
         * 
         * c) Workverzeichnis muss leer sein und mit schreibrechte
         * 
         * d) SIARD-Datei entpacken
         * 
         * e) Struktur-Check SIARD-Verzeichnis
         * 
         * TODO: Erledigt */

        System.out.println("SIARDexcerpt: init");

        /** a) config muss existieren und SIARDexcerpt.conf.xml noch nicht */
        if (!configFile.exists()) {
            System.out.println(siardexcerpt.getTextResourceService().getText(ERROR_CONFIGFILE_FILENOTEXISTING,
                    configFile.getAbsolutePath()));
            System.exit(1);
        }

        if (configFileHard.exists()) {
            System.out
                    .println(siardexcerpt.getTextResourceService().getText(ERROR_CONFIGFILEHARD_FILEEXISTING));
            System.exit(1);
        }
        Util.copyFile(configFile, configFileHard);

        /** b) Excerptverzeichnis mit schreibrechte und ggf anlegen */
        String pathToOutput = siardexcerpt.getConfigurationService().getPathToOutput();

        directoryOfOutput = new File(pathToOutput);

        if (!directoryOfOutput.exists()) {
            directoryOfOutput.mkdir();
        }

        // Im Logverzeichnis besteht kein Schreibrecht
        if (!directoryOfOutput.canWrite()) {
            System.out.println(siardexcerpt.getTextResourceService().getText(ERROR_LOGDIRECTORY_NOTWRITABLE,
                    directoryOfOutput));
            // Lschen des configFileHard, falls eines angelegt wurde
            if (configFileHard.exists()) {
                Util.deleteDir(configFileHard);
            }
            System.exit(1);
        }

        if (!directoryOfOutput.isDirectory()) {
            System.out.println(siardexcerpt.getTextResourceService().getText(ERROR_LOGDIRECTORY_NODIRECTORY));
            // Lschen des configFileHard, falls eines angelegt wurde
            if (configFileHard.exists()) {
                Util.deleteDir(configFileHard);
            }
            System.exit(1);
        }

        /** c) Workverzeichnis muss leer sein und mit schreibrechte */
        String pathToWorkDir = siardexcerpt.getConfigurationService().getPathToWorkDir();

        tmpDir = new File(pathToWorkDir);

        /* bestehendes Workverzeichnis Abbruch wenn nicht leer, da am Schluss das Workverzeichnis
         * gelscht wird und entsprechend bestehende Dateien gelscht werden knnen */
        if (tmpDir.exists()) {
            if (tmpDir.isDirectory()) {
                // Get list of file in the directory. When its length is not zero the folder is not empty.
                String[] files = tmpDir.list();
                if (files.length > 0) {
                    System.out.println(siardexcerpt.getTextResourceService().getText(ERROR_WORKDIRECTORY_EXISTS,
                            pathToWorkDir));
                    // Lschen des configFileHard, falls eines angelegt wurde
                    if (configFileHard.exists()) {
                        Util.deleteDir(configFileHard);
                    }
                    System.exit(1);
                }
            }
        }
        if (!tmpDir.exists()) {
            tmpDir.mkdir();
        }

        // Im Workverzeichnis besteht kein Schreibrecht
        if (!tmpDir.canWrite()) {
            System.out.println(siardexcerpt.getTextResourceService().getText(ERROR_WORKDIRECTORY_NOTWRITABLE,
                    pathToWorkDir));
            // Lschen des configFileHard, falls eines angelegt wurde
            if (configFileHard.exists()) {
                Util.deleteDir(configFileHard);
            }
            System.exit(1);
        }

        /** d) SIARD-Datei entpacken */
        if (!siardDatei.exists()) {
            // SIARD-Datei existiert nicht
            System.out.println(siardexcerpt.getTextResourceService().getText(ERROR_SIARDFILE_FILENOTEXISTING,
                    siardDatei.getAbsolutePath()));
            // Lschen des configFileHard, falls eines angelegt wurde
            if (configFileHard.exists()) {
                Util.deleteDir(configFileHard);
            }
            System.exit(1);
        }

        if (!siardDatei.isDirectory()) {

            /* SIARD-Datei ist eine Datei
             * 
             * Die Datei muss ins Workverzeichnis extrahiert werden. Dies erfolgt im Modul A.
             * 
             * danach der Pfad zu SIARD-Datei dorthin zeigen lassen */

            Controllerexcerpt controllerexcerpt = (Controllerexcerpt) context.getBean("controllerexcerpt");
            File siardDateiNew = new File(pathToWorkDir + File.separator + siardDatei.getName());
            okA = controllerexcerpt.executeA(siardDatei, siardDateiNew, "");

            if (!okA) {
                // SIARD Datei konte nicht entpackt werden
                System.out.println(MESSAGE_XML_MODUL_A);
                System.out.println(ERROR_XML_A_CANNOTEXTRACTZIP);

                // Lschen des Arbeitsverzeichnisses und configFileHard, falls eines angelegt wurde
                if (tmpDir.exists()) {
                    Util.deleteDir(tmpDir);
                }
                if (configFileHard.exists()) {
                    Util.deleteDir(configFileHard);
                }
                // Fehler Extraktion --> invalide
                System.exit(2);
            } else {
                @SuppressWarnings("unused")
                File siardDateiOld = siardDatei;
                siardDatei = siardDateiNew;
            }

        } else {
            /* SIARD-Datei entpackt oder Datei war bereits ein Verzeichnis.
             * 
             * Gerade bei grsseren SIARD-Dateien ist es sinnvoll an einer Stelle das ausgepackte SIARD
             * zu haben, damit diese nicht immer noch extrahiert werden muss */
        }

        /** e) Struktur-Check SIARD-Verzeichnis */
        File content = new File(siardDatei.getAbsolutePath() + File.separator + "content");
        File header = new File(siardDatei.getAbsolutePath() + File.separator + "header");
        File xsd = new File(
                siardDatei.getAbsolutePath() + File.separator + "header" + File.separator + "metadata.xsd");
        File metadata = new File(
                siardDatei.getAbsolutePath() + File.separator + "header" + File.separator + "metadata.xml");

        if (!content.exists() || !header.exists() || !xsd.exists() || !metadata.exists()) {
            System.out.println(siardexcerpt.getTextResourceService().getText(ERROR_XML_B_STRUCTURE));
            // Lschen des Arbeitsverzeichnisses und configFileHard, falls eines angelegt wurde
            if (tmpDir.exists()) {
                Util.deleteDir(tmpDir);
            }
            if (configFileHard.exists()) {
                Util.deleteDir(configFileHard);
            }
            // Fehler Extraktion --> invalide
            System.exit(2);
        } else {
            // Struktur sieht plausibel aus, extraktion kann starten
        }

    } // End init

    if (module.equalsIgnoreCase("--search")) {

        /** 2) search: gemss config die Tabelle mit Suchtext befragen und Ausgabe des Resultates
         * 
         * a) Ist die Anzahl Parameter (mind 4) korrekt? arg4 = Suchtext
         * 
         * b) Suchtext einlesen
         * 
         * c) search.xml vorbereiten (Header) und xsl in Output kopieren
         * 
         * d) grep ausfhren
         * 
         * e) Suchergebnis speichern und anzeigen (via GUI)
         * 
         * TODO: Noch offen */

        System.out.println("SIARDexcerpt: search");

        String pathToOutput = siardexcerpt.getConfigurationService().getPathToOutput();

        directoryOfOutput = new File(pathToOutput);

        if (!directoryOfOutput.exists()) {
            directoryOfOutput.mkdir();
        }

        /** a) Ist die Anzahl Parameter (mind 4) korrekt? arg4 = Suchtext */
        if (args.length < 4) {
            System.out.println(siardexcerpt.getTextResourceService().getText(ERROR_PARAMETER_USAGE));
            System.exit(1);
        }

        if (!siardDatei.isDirectory()) {
            File siardDateiNew = new File(tmpDir.getAbsolutePath() + File.separator + siardDatei.getName());
            if (!siardDateiNew.exists()) {
                System.out.println(siardexcerpt.getTextResourceService().getText(ERROR_NOINIT));
                System.exit(1);
            } else {
                siardDatei = siardDateiNew;
            }
        }

        /** b) Suchtext einlesen */
        String searchString = new String(args[3]);

        /** c) search.xml vorbereiten (Header) und xsl in Output kopieren */
        // Zeitstempel der Datenextraktion
        java.util.Date nowStartS = new java.util.Date();
        java.text.SimpleDateFormat sdfStartS = new java.text.SimpleDateFormat("dd.MM.yyyy HH:mm:ss");
        String ausgabeStartS = sdfStartS.format(nowStartS);

        /* Der SearchString kann zeichen enthalten, welche nicht im Dateinamen vorkommen drfen.
         * Entsprechend werden diese normalisiert */
        String searchStringFilename = searchString.replaceAll("/", "_");
        searchStringFilename = searchStringFilename.replaceAll(">", "_");
        searchStringFilename = searchStringFilename.replaceAll("<", "_");
        searchStringFilename = searchStringFilename.replace(".*", "_");
        searchStringFilename = searchStringFilename.replaceAll("___", "_");
        searchStringFilename = searchStringFilename.replaceAll("__", "_");

        String outDateiNameS = siardDatei.getName() + "_" + searchStringFilename + "_SIARDsearch.xml";
        outDateiNameS = outDateiNameS.replaceAll("__", "_");

        // Informationen zum Archiv holen
        String archiveS = siardexcerpt.getConfigurationService().getArchive();

        // Konfiguration des Outputs, ein File Logger wird zustzlich erstellt
        LogConfigurator logConfiguratorS = (LogConfigurator) context.getBean("logconfigurator");
        String outFileNameS = logConfiguratorS.configure(directoryOfOutput.getAbsolutePath(), outDateiNameS);
        File outFileSearch = new File(outFileNameS);
        // Ab hier kann ins Output geschrieben werden...

        // Informationen zum XSL holen
        String pathToXSLS = siardexcerpt.getConfigurationService().getPathToXSLsearch();

        File xslOrigS = new File(pathToXSLS);
        File xslCopyS = new File(directoryOfOutput.getAbsolutePath() + File.separator + xslOrigS.getName());
        if (!xslCopyS.exists()) {
            Util.copyFile(xslOrigS, xslCopyS);
        }

        LOGGER.logError(siardexcerpt.getTextResourceService().getText(MESSAGE_XML_HEADER, xslCopyS.getName()));
        LOGGER.logError(siardexcerpt.getTextResourceService().getText(MESSAGE_XML_START, ausgabeStartS));
        LOGGER.logError(siardexcerpt.getTextResourceService().getText(MESSAGE_XML_ARCHIVE, archiveS));
        LOGGER.logError(siardexcerpt.getTextResourceService().getText(MESSAGE_XML_INFO));

        /** d) search: dies ist in einem eigenen Modul realisiert */
        Controllerexcerpt controllerexcerptS = (Controllerexcerpt) context.getBean("controllerexcerpt");

        okB = controllerexcerptS.executeB(siardDatei, outFileSearch, searchString);

        /** e) Ausgabe und exitcode */
        if (!okB) {
            // Suche konnte nicht erfolgen
            LOGGER.logError(siardexcerpt.getTextResourceService().getText(MESSAGE_XML_MODUL_B));
            LOGGER.logError(siardexcerpt.getTextResourceService().getText(ERROR_XML_B_CANNOTSEARCHRECORD));
            LOGGER.logError(siardexcerpt.getTextResourceService().getText(MESSAGE_XML_LOGEND));
            System.out.println(MESSAGE_XML_MODUL_B);
            System.out.println(ERROR_XML_B_CANNOTSEARCHRECORD);
            System.out.println("");

            // Lschen des Arbeitsverzeichnisses und configFileHard erfolgt erst bei schritt 4 finish

            // Fehler Extraktion --> invalide
            System.exit(2);
        } else {
            // Suche konnte durchgefhrt werden
            LOGGER.logError(siardexcerpt.getTextResourceService().getText(MESSAGE_XML_LOGEND));
            // Lschen des Arbeitsverzeichnisses und configFileHard erfolgt erst bei schritt 4 finish

            // Record konnte extrahiert werden
            System.exit(0);
        }

    } // End search

    if (module.equalsIgnoreCase("--excerpt")) {

        /** 3) extract: mit den Keys anhand der config einen Records herausziehen und anzeigen
         * 
         * a) Ist die Anzahl Parameter (mind 4) korrekt? arg4 = Suchtext
         * 
         * b) extract.xml vorbereiten (Header) und xsl in Output kopieren
         * 
         * c) extraktion: dies ist in einem eigenen Modul realisiert
         * 
         * d) Ausgabe und exitcode
         * 
         * TODO: Erledigt */

        System.out.println("SIARDexcerpt: extract");

        String pathToOutput = siardexcerpt.getConfigurationService().getPathToOutput();

        directoryOfOutput = new File(pathToOutput);

        if (!directoryOfOutput.exists()) {
            directoryOfOutput.mkdir();
        }

        /** a) Ist die Anzahl Parameter (mind 4) korrekt? arg4 = Suchtext */
        if (args.length < 4) {
            System.out.println(siardexcerpt.getTextResourceService().getText(ERROR_PARAMETER_USAGE));
            System.exit(1);
        }

        if (!siardDatei.isDirectory()) {
            File siardDateiNew = new File(tmpDir.getAbsolutePath() + File.separator + siardDatei.getName());
            if (!siardDateiNew.exists()) {
                System.out.println(siardexcerpt.getTextResourceService().getText(ERROR_NOINIT));
                System.exit(1);
            } else {
                siardDatei = siardDateiNew;
            }
        }

        /** b) extract.xml vorbereiten (Header) und xsl in Output kopieren */
        // Zeitstempel der Datenextraktion
        java.util.Date nowStart = new java.util.Date();
        java.text.SimpleDateFormat sdfStart = new java.text.SimpleDateFormat("dd.MM.yyyy HH:mm:ss");
        String ausgabeStart = sdfStart.format(nowStart);

        String excerptString = new String(args[3]);
        String outDateiName = siardDatei.getName() + "_" + excerptString + "_SIARDexcerpt.xml";

        // Informationen zum Archiv holen
        String archive = siardexcerpt.getConfigurationService().getArchive();

        // Konfiguration des Outputs, ein File Logger wird zustzlich erstellt
        LogConfigurator logConfigurator = (LogConfigurator) context.getBean("logconfigurator");
        String outFileName = logConfigurator.configure(directoryOfOutput.getAbsolutePath(), outDateiName);
        File outFile = new File(outFileName);
        // Ab hier kann ins Output geschrieben werden...

        // Informationen zum XSL holen
        String pathToXSL = siardexcerpt.getConfigurationService().getPathToXSL();

        File xslOrig = new File(pathToXSL);
        File xslCopy = new File(directoryOfOutput.getAbsolutePath() + File.separator + xslOrig.getName());
        if (!xslCopy.exists()) {
            Util.copyFile(xslOrig, xslCopy);
        }

        LOGGER.logError(siardexcerpt.getTextResourceService().getText(MESSAGE_XML_HEADER, xslCopy.getName()));
        LOGGER.logError(siardexcerpt.getTextResourceService().getText(MESSAGE_XML_START, ausgabeStart));
        LOGGER.logError(siardexcerpt.getTextResourceService().getText(MESSAGE_XML_ARCHIVE, archive));
        LOGGER.logError(siardexcerpt.getTextResourceService().getText(MESSAGE_XML_INFO));

        /** c) extraktion: dies ist in einem eigenen Modul realisiert */
        Controllerexcerpt controllerexcerpt = (Controllerexcerpt) context.getBean("controllerexcerpt");

        okC = controllerexcerpt.executeC(siardDatei, outFile, excerptString);

        /** d) Ausgabe und exitcode */
        if (!okC) {
            // Record konnte nicht extrahiert werden
            LOGGER.logError(siardexcerpt.getTextResourceService().getText(MESSAGE_XML_MODUL_C));
            LOGGER.logError(siardexcerpt.getTextResourceService().getText(ERROR_XML_C_CANNOTEXTRACTRECORD));
            LOGGER.logError(siardexcerpt.getTextResourceService().getText(MESSAGE_XML_LOGEND));
            System.out.println(MESSAGE_XML_MODUL_C);
            System.out.println(ERROR_XML_C_CANNOTEXTRACTRECORD);
            System.out.println("");

            // Lschen des Arbeitsverzeichnisses und configFileHard erfolgt erst bei schritt 4 finish

            // Fehler Extraktion --> invalide
            System.exit(2);
        } else {
            // Record konnte extrahiert werden
            LOGGER.logError(siardexcerpt.getTextResourceService().getText(MESSAGE_XML_LOGEND));
            // Lschen des Arbeitsverzeichnisses und configFileHard erfolgt erst bei schritt 4 finish

            // Record konnte extrahiert werden
            System.exit(0);

        }

    } // End extract

    if (module.equalsIgnoreCase("--finish")) {

        /** 4) finish: Config-Kopie sowie Workverzeichnis lschen
         * 
         * TODO: Erledigt */

        System.out.println("SIARDexcerpt: finish");

        // Lschen des Arbeitsverzeichnisses und confiFileHard, falls eines angelegt wurde
        if (tmpDir.exists()) {
            Util.deleteDir(tmpDir);
        }
        if (configFileHard.exists()) {
            Util.deleteDir(configFileHard);
        }

    } // End finish

}

From source file:org.fcrepo.client.test.PerformanceTests.java

public static void main(String[] args) throws Exception {

    if (args.length < 8 || args.length > 9) {
        usage();//from www. j a v  a 2s .c  o  m
    }

    String host = args[0];
    String port = args[1];
    String username = args[2];
    String password = args[3];
    String itr = args[4];
    String thrds = args[5];
    String output = args[6];
    String name = args[7];
    String context = Constants.FEDORA_DEFAULT_APP_CONTEXT;

    if (args.length == 9 && !args[8].equals("")) {
        context = args[8];
    }

    if (host == null || host.startsWith("$") || port == null || port.startsWith("$") || username == null
            || username.startsWith("$") || password == null || password.startsWith("$") || itr == null
            || itr.startsWith("$") || thrds == null || thrds.startsWith("$") || output == null
            || output.startsWith("$") || name == null || name.startsWith("$")) {
        usage();
    }
    name = name.replaceAll(",", ";");
    iterations = Integer.parseInt(itr);
    threads = Integer.parseInt(thrds);

    boolean newFile = true;
    File outputFile = new File(output);

    File tempFile = null;
    BufferedReader reader = null;
    String line = "";
    if (outputFile.exists()) {
        newFile = false;

        // Create a copy of the file to read from
        tempFile = File.createTempFile("performance-test", "tmp");
        BufferedReader input = new BufferedReader(new FileReader(outputFile));
        PrintStream tempOut = new PrintStream(tempFile);

        while ((line = input.readLine()) != null) {
            tempOut.println(line);
        }
        input.close();
        tempOut.close();

        reader = new BufferedReader(new FileReader(tempFile));
    }
    PrintStream out = new PrintStream(outputFile);

    if (newFile) {
        out.println(
                "--------------------------------------------------------------" + " Performance Test Results "
                        + "--------------------------------------------------------------");
    }

    PerformanceTests tests = new PerformanceTests();
    tests.init(host, port, context, username, password);
    System.out.println("Running Ingest Round-Trip Test...");
    long ingestResults = tests.runIngestTest();
    System.out.println("Running AddDatastream Round-Trip Test...");
    long addDsResults = tests.runAddDatastreamTest();
    System.out.println("Running ModifyDatastreamByReference Round-Trip Test...");
    long modifyRefResults = tests.runModifyDatastreamByRefTest();
    System.out.println("Running ModifyDatastreamByValue Round-Trip Test...");
    long modifyValResults = tests.runModifyDatastreamByValueTest();
    System.out.println("Running PurgeDatastream Round-Trip Test...");
    long purgeDsResults = tests.runPurgeDatastreamTest();
    System.out.println("Running PurgeObject Round-Trip Test...");
    long purgeObjectResults = tests.runPurgeObjectTest();
    System.out.println("Running GetDatastream Round-Trip Test...");
    long getDatastreamResults = tests.runGetDatastreamTest();
    System.out.println("Running GetDatastreamREST Round-Trip Test...");
    long getDatastreamRestResults = tests.runGetDatastreamRestTest();
    System.out.println("Running Throughput Tests...");
    long[] tpResults = tests.runThroughputTests();
    System.out.println("Running Threaded Throughput Tests...");
    long[] tptResults = tests.runThreadedThroughputTests();

    if (newFile) {
        out.println(
                "1. Test performing each operation in isolation. Time (in ms) is the average required to perform each operation.");
        out.println(
                "test name, ingest, addDatastream, modifyDatastreamByReference, modifyDatastreamByValue, purgeDatastream, purgeObject, getDatastream, getDatastreamREST");
    } else {
        line = reader.readLine();
        while (line != null && line.length() > 2) {
            out.println(line);
            line = reader.readLine();
        }
    }
    out.println(name + ", " + ingestResults + ", " + addDsResults + ", " + modifyRefResults + ", "
            + modifyValResults + ", " + purgeDsResults + ", " + purgeObjectResults + ", "
            + getDatastreamResults / iterations + ", " + getDatastreamRestResults / iterations);

    out.println();
    if (newFile) {
        out.println("2. Operations-Per-Second based on results listed in item 1.");
        out.println(
                "test name, ingest, addDatastream, modifyDatastreamByReference, modifyDatastreamByValue, purgeDatastream, purgeObject, getDatastream, getDatastreamREST");
    } else {
        line = reader.readLine();
        while (line != null && line.length() > 2) {
            out.println(line);
            line = reader.readLine();
        }
    }
    double ingestPerSecond = 1000 / (double) ingestResults;
    double addDsPerSecond = 1000 / (double) addDsResults;
    double modifyRefPerSecond = 1000 / (double) modifyRefResults;
    double modifyValPerSecond = 1000 / (double) modifyValResults;
    double purgeDsPerSecond = 1000 / (double) purgeDsResults;
    double purgeObjPerSecond = 1000 / (double) purgeObjectResults;
    double getDatastreamPerSecond = 1000 / ((double) getDatastreamResults / iterations);
    double getDatastreamRestPerSecond = 1000 / ((double) getDatastreamRestResults / iterations);
    out.println(name + ", " + round(ingestPerSecond) + ", " + round(addDsPerSecond) + ", "
            + round(modifyRefPerSecond) + ", " + round(modifyValPerSecond) + ", " + round(purgeDsPerSecond)
            + ", " + round(purgeObjPerSecond) + ", " + round(getDatastreamPerSecond) + ", "
            + round(getDatastreamRestPerSecond));

    out.println();
    if (newFile) {
        out.println(
                "3. Test performing operations back-to-back. Time (in ms) is that required to perform all iterations.");
        out.println(
                "test name, ingest, addDatastream, modifyDatastreamByReference, modifyDatastreamByValue, purgeDatastream, purgeObject, getDatastream, getDatastreamREST");
    } else {
        line = reader.readLine();
        while (line != null && line.length() > 2) {
            out.println(line);
            line = reader.readLine();
        }
    }
    out.println(name + ", " + tpResults[0] + ", " + tpResults[1] + ", " + tpResults[2] + ", " + tpResults[3]
            + ", " + tpResults[4] + ", " + tpResults[5] + ", " + getDatastreamResults + ", "
            + getDatastreamRestResults);

    out.println();
    if (newFile) {
        out.println("4. Operations-Per-Second based on results listed in item 3.");
        out.println(
                "test name, ingest, addDatastream, modifyDatastreamByReference, modifyDatastreamByValue, purgeDatastream, purgeObject, getDatastream, getDatastreamREST");
    } else {
        line = reader.readLine();
        while (line != null && line.length() > 2) {
            out.println(line);
            line = reader.readLine();
        }
    }
    double ingestItPerSecond = (double) (iterations * 1000) / tpResults[0];
    double addDsItPerSecond = (double) (iterations * 1000) / tpResults[1];
    double modifyRefItPerSecond = (double) (iterations * 1000) / tpResults[2];
    double modifyValItPerSecond = (double) (iterations * 1000) / tpResults[3];
    double purgeDsItPerSecond = (double) (iterations * 1000) / tpResults[4];
    double purgeObjItPerSecond = (double) (iterations * 1000) / tpResults[5];
    double getDsItPerSecond = (double) (iterations * 1000) / getDatastreamResults;
    double getDsRestItPerSecond = (double) (iterations * 1000) / getDatastreamRestResults;
    out.println(name + ", " + round(ingestItPerSecond) + ", " + round(addDsItPerSecond) + ", "
            + round(modifyRefItPerSecond) + ", " + round(modifyValItPerSecond) + ", "
            + round(purgeDsItPerSecond) + ", " + round(purgeObjItPerSecond) + ", " + round(getDsItPerSecond)
            + ", " + round(getDsRestItPerSecond));

    out.println();
    if (newFile) {
        out.println(
                "5. Test performing operations using a thread pool. Time (in ms) is that required to perform all iterations.");
        out.println(
                "test name, ingest, addDatastream, modifyDatastreamByReference, modifyDatastreamByValue, purgeDatastream, purgeObject, getDatastream, getDatastreamREST");
    } else {
        line = reader.readLine();
        while (line != null && line.length() > 2) {
            out.println(line);
            line = reader.readLine();
        }
    }
    out.println(name + ", " + tptResults[0] + ", " + tptResults[1] + ", " + tptResults[2] + ", " + tptResults[3]
            + ", " + tptResults[4] + ", " + tptResults[5] + ", " + tptResults[6] + ", " + tptResults[7]);

    out.println();
    if (newFile) {
        out.println("6. Operations-Per-Second based on results listed in item 5.");
        out.println(
                "test name, ingest, addDatastream, modifyDatastreamByReference, modifyDatastreamByValue, purgeDatastream, purgeObject, getDatastream, getDatastreamREST");
    } else {
        line = reader.readLine();
        while (line != null && line.length() > 2) {
            out.println(line);
            line = reader.readLine();
        }
    }
    double thrdIngestItPerSecond = (double) (iterations * 1000) / tptResults[0];
    double thrdAddDsItPerSecond = (double) (iterations * 1000) / tptResults[1];
    double thrdModifyRefItPerSecond = (double) (iterations * 1000) / tptResults[2];
    double thrdModifyValItPerSecond = (double) (iterations * 1000) / tptResults[3];
    double thrdPurgeDsItPerSecond = (double) (iterations * 1000) / tptResults[4];
    double thrdPurgeObjItPerSecond = (double) (iterations * 1000) / tptResults[5];
    double thrdGetDsItPerSecond = (double) (iterations * 1000) / tptResults[6];
    double thrdGetDsRestItPerSecond = (double) (iterations * 1000) / tptResults[7];
    out.println(name + ", " + round(thrdIngestItPerSecond) + ", " + round(thrdAddDsItPerSecond) + ", "
            + round(thrdModifyRefItPerSecond) + ", " + round(thrdModifyValItPerSecond) + ", "
            + round(thrdPurgeDsItPerSecond) + ", " + round(thrdPurgeObjItPerSecond) + ", "
            + round(thrdGetDsItPerSecond) + ", " + round(thrdGetDsRestItPerSecond));

    if (!newFile) {
        reader.close();
        tempFile.delete();
    }
    out.close();

    System.out.println("Performance Tests Complete.");
}

From source file:eu.fbk.dkm.sectionextractor.pantheon.WikipediaGoodTextExtractor.java

public static void main(String args[]) throws IOException {

    CommandLineWithLogger commandLineWithLogger = new CommandLineWithLogger();
    commandLineWithLogger.addOption(OptionBuilder.withArgName("file").hasArg()
            .withDescription("wikipedia xml dump file").isRequired().withLongOpt("wikipedia-dump").create("d"));
    commandLineWithLogger.addOption(OptionBuilder.withArgName("dir").hasArg()
            .withDescription("output directory in which to store output files").isRequired()
            .withLongOpt("output-dir").create("o"));
    commandLineWithLogger/*from   w  w  w.j  a v  a  2  s. c om*/
            .addOption(OptionBuilder.withDescription("use NAF format").withLongOpt("naf").create("n"));
    commandLineWithLogger.addOption(OptionBuilder.withDescription("tokenize and ssplit with Stanford")
            .withLongOpt("stanford").create("s"));

    commandLineWithLogger.addOption(OptionBuilder.withArgName("file").hasArg().withDescription("Filter file")
            .withLongOpt("filter").create("f"));
    commandLineWithLogger.addOption(OptionBuilder.withArgName("file").hasArg()
            .withDescription("ID and category file").withLongOpt("idcat").create("i"));
    commandLineWithLogger.addOption(OptionBuilder.withArgName("file").hasArg().withDescription("Redirect file")
            .withLongOpt("redirect").create("r"));

    commandLineWithLogger.addOption(OptionBuilder.withArgName("int").hasArg()
            .withDescription(
                    "number of threads (default " + AbstractWikipediaXmlDumpParser.DEFAULT_THREADS_NUMBER + ")")
            .withLongOpt("num-threads").create("t"));
    commandLineWithLogger.addOption(OptionBuilder.withArgName("int").hasArg()
            .withDescription("number of pages to process (default all)").withLongOpt("num-pages").create("p"));
    commandLineWithLogger.addOption(OptionBuilder.withArgName("int").hasArg()
            .withDescription("receive notification every n pages (default "
                    + AbstractWikipediaExtractor.DEFAULT_NOTIFICATION_POINT + ")")
            .withLongOpt("notification-point").create("b"));
    commandLineWithLogger.addOption(new Option("n", "NAF format"));

    CommandLine commandLine = null;
    try {
        commandLine = commandLineWithLogger.getCommandLine(args);
        PropertyConfigurator.configure(commandLineWithLogger.getLoggerProps());
    } catch (Exception e) {
        System.exit(1);
    }

    int numThreads = Integer.parseInt(commandLine.getOptionValue("num-threads",
            Integer.toString(AbstractWikipediaXmlDumpParser.DEFAULT_THREADS_NUMBER)));
    int numPages = Integer.parseInt(commandLine.getOptionValue("num-pages",
            Integer.toString(AbstractWikipediaExtractor.DEFAULT_NUM_PAGES)));
    int notificationPoint = Integer.parseInt(commandLine.getOptionValue("notification-point",
            Integer.toString(AbstractWikipediaExtractor.DEFAULT_NOTIFICATION_POINT)));

    boolean nafFormat = commandLine.hasOption("n");
    boolean useStanford = commandLine.hasOption("s");

    HashMap<Integer, String> idCategory = new HashMap<>();
    String idcatFileName = commandLine.getOptionValue("idcat");
    if (idcatFileName != null) {
        logger.info("Loading categories");
        File idcatFile = new File(idcatFileName);
        if (idcatFile.exists()) {
            List<String> lines = Files.readLines(idcatFile, Charsets.UTF_8);
            for (String line : lines) {
                line = line.trim();
                if (line.length() == 0) {
                    continue;
                }

                String[] parts = line.split("\\s+");
                if (parts.length < 3) {
                    continue;
                }

                idCategory.put(Integer.parseInt(parts[1]), parts[2]);
            }
        }
    }

    HashMap<String, String> redirects = new HashMap<>();
    String redirectFileName = commandLine.getOptionValue("redirect");
    if (redirectFileName != null) {
        logger.info("Loading redirects");
        File redirectFile = new File(redirectFileName);
        if (redirectFile.exists()) {
            List<String> lines = Files.readLines(redirectFile, Charsets.UTF_8);
            for (String line : lines) {
                line = line.trim();
                if (line.length() == 0) {
                    continue;
                }

                String[] parts = line.split("\\t+");
                if (parts.length < 2) {
                    continue;
                }

                redirects.put(parts[0], parts[1]);
            }
        }
    }

    HashSet<String> pagesToConsider = null;
    String filterFileName = commandLine.getOptionValue("filter");
    if (filterFileName != null) {
        logger.info("Loading file list");
        File filterFile = new File(filterFileName);
        if (filterFile.exists()) {
            pagesToConsider = new HashSet<>();
            List<String> lines = Files.readLines(filterFile, Charsets.UTF_8);
            for (String line : lines) {
                line = line.trim();
                if (line.length() == 0) {
                    continue;
                }

                line = line.replaceAll("\\s+", "_");

                pagesToConsider.add(line);

                addRedirects(pagesToConsider, redirects, line, 0);
            }
        }
    }

    ExtractorParameters extractorParameters = new ExtractorParameters(
            commandLine.getOptionValue("wikipedia-dump"), commandLine.getOptionValue("output-dir"));

    File outputFolder = new File(commandLine.getOptionValue("output-dir"));
    if (!outputFolder.exists()) {
        boolean mkdirs = outputFolder.mkdirs();
        if (!mkdirs) {
            throw new IOException("Unable to create folder " + outputFolder.getAbsolutePath());
        }
    }

    WikipediaExtractor wikipediaPageParser = new WikipediaGoodTextExtractor(numThreads, numPages,
            extractorParameters.getLocale(), outputFolder, nafFormat, pagesToConsider, useStanford, idCategory);
    wikipediaPageParser.setNotificationPoint(notificationPoint);
    wikipediaPageParser.start(extractorParameters);

    logger.info("extraction ended " + new Date());

}

From source file:enrichment.Disambiguate.java

/**prerequisites:
 * cd silk_2.5.3/*_links//from w ww  . ja  va  2s  .  co  m
 * cat *.nt|sort  -t' ' -k3   > $filename
 * 
 * @param args $filename
 * @throws IOException
 * @throws URISyntaxException
 */
public static void main(String[] args) {
    File file = new File(args[0]);
    if (file.isDirectory()) {
        args = file.list(new OnlyExtFilenameFilter("nt"));
    }

    BufferedReader in;
    for (int q = 0; q < args.length; q++) {
        String filename = null;
        if (file.isDirectory()) {
            filename = file.getPath() + File.separator + args[q];
        } else {
            filename = args[q];
        }
        try {
            FileWriter output = new FileWriter(filename + "_disambiguated.nt");
            String prefix = "@prefix rdrel: <http://rdvocab.info/RDARelationshipsWEMI/> .\n"
                    + "@prefix dbpedia:    <http://de.dbpedia.org/resource/> .\n"
                    + "@prefix frbr:   <http://purl.org/vocab/frbr/core#> .\n"
                    + "@prefix lobid: <http://lobid.org/resource/> .\n"
                    + "@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .\n"
                    + "@prefix foaf: <http://xmlns.com/foaf/0.1/> .\n"
                    + "@prefix mo: <http://purl.org/ontology/mo/> .\n"
                    + "@prefix wikipedia: <https://de.wikipedia.org/wiki/> .";
            output.append(prefix + "\n\n");
            in = new BufferedReader(new InputStreamReader(new FileInputStream(filename)));

            HashMap<String, HashMap<String, ArrayList<String>>> hm = new HashMap<String, HashMap<String, ArrayList<String>>>();
            String s;
            HashMap<String, ArrayList<String>> hmLobid = new HashMap<String, ArrayList<String>>();
            Stack<String> old_object = new Stack<String>();

            while ((s = in.readLine()) != null) {
                String[] triples = s.split(" ");
                String object = triples[2].substring(1, triples[2].length() - 1);
                if (old_object.size() > 0 && !old_object.firstElement().equals(object)) {
                    hmLobid = new HashMap<String, ArrayList<String>>();
                    old_object = new Stack<String>();
                }
                old_object.push(object);
                String subject = triples[0].substring(1, triples[0].length() - 1);
                System.out.print("\nSubject=" + object);
                System.out.print("\ntriples[2]=" + triples[2]);
                hmLobid.put(subject, getAllCreators(new URI(subject)));
                hm.put(object, hmLobid);

            }
            // get all dbpedia resources
            for (String key_one : hm.keySet()) {
                System.out.print("\n==============\n==== " + key_one + "\n===============");
                int resources_cnt = hm.get(key_one).keySet().size();
                ArrayList<String>[] creators = new ArrayList[resources_cnt];
                HashMap<String, Integer> creators_backed = new HashMap<String, Integer>();
                int x = 0;
                // get all lobid_resources subsumed under the dbpedia resource
                for (String subject_uri : hm.get(key_one).keySet()) {
                    creators[x] = new ArrayList<String>();
                    System.out.print("\n     subject_uri=" + subject_uri);
                    Iterator<String> ite = hm.get(key_one).get(subject_uri).iterator();
                    int y = 0;
                    // get all creators of the lobid resource
                    while (ite.hasNext()) {
                        String creator = ite.next();
                        System.out.print("\n          " + creator);
                        if (creators_backed.containsKey(creator)) {
                            y = creators_backed.get(creator);
                        } else {
                            y = creators_backed.size();
                            creators_backed.put(creator, y);
                        }
                        while (creators[x].size() <= y) {
                            creators[x].add("-");
                        }
                        creators[x].set(y, creator);
                        y++;
                    }
                    x++;
                }
                if (creators_backed.size() == 1) {
                    System.out
                            .println("\n" + "Every resource pointing to " + key_one + " has the same creator!");
                    for (String key_two : hm.get(key_one).keySet()) {
                        output.append("<" + key_two + "> rdrel:workManifested <" + key_one + "> .\n");
                        output.append("<" + key_two + ">  mo:wikipedia <"
                                + key_one.replaceAll("dbpedia\\.org/resource", "wikipedia\\.org/wiki")
                                + "> .\n");
                    }
                } /*else  {
                    for (int a = 0; a < creators.length; a++) {
                       System.out.print(creators[a].toString()+",");
                    }
                  }*/
            }

            output.flush();
            if (output != null) {
                output.close();
            }
        } catch (Exception e) {
            System.out.print("Exception while working on " + filename + ": \n");
            e.printStackTrace(System.out);
        }
    }
}