com.grantingersoll.opengrok.analysis.python.TestPythonSymbolTokenizer.java Source code

Java tutorial

Introduction

Here is the source code for com.grantingersoll.opengrok.analysis.python.TestPythonSymbolTokenizer.java

Source

/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * See LICENSE.txt included in this distribution for the specific
 * language governing permissions and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at LICENSE.txt.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

package com.grantingersoll.opengrok.analysis.python;

import com.grantingersoll.opengrok.analysis.SymbolTokenizer;
import org.apache.commons.io.IOUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Tokenizer;
import org.junit.Test;

import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.StandardCharsets;

public class TestPythonSymbolTokenizer extends BaseTokenStreamTestCase {
    private Analyzer analyzer;

    @Override
    public void setUp() throws Exception {
        super.setUp();
        analyzer = new Analyzer() {
            @Override
            protected TokenStreamComponents createComponents(String fieldName) {
                Tokenizer tokenizer = new PythonSymbolTokenizer(newAttributeFactory());
                return new TokenStreamComponents(tokenizer);
            }
        };
    }

    @Override
    public void tearDown() throws Exception {
        analyzer.close();
        super.tearDown();
    }

    @Test
    public void testHexLiteral() throws Exception {
        String input = "0xFFFF";
        String[] output = new String[] {}; // zero output tokens
        assertAnalyzesTo(analyzer, input, output);
    }

    @Test
    public void test() throws Exception {
        String input;
        try (InputStream stream = TestPythonSymbolTokenizer.class.getResourceAsStream("main.py");
                Reader in = new InputStreamReader(stream, StandardCharsets.UTF_8)) {
            input = IOUtils.toString(in);
        }
        String[] output = new String[] {
                //// #!/usr/bin/env python
                //// #
                ////
                //// # testing comment
                ////
                "getopt", //// import getopt
                "os", //// import os
                //// #import stat
                "string", //// import string
                "sys", //// import sys
                ////
                "datetime", //// import datetime
                "time", //// import time
                "math", //// import math
                "random", //// import random
                ////
                "SCRIPTHOME", "os", "path", "dirname", "sys", "argv", //// SCRIPTHOME=os.path.dirname(sys.argv[0])
                ////
                "version_string", //// version_string = "0.4"
                ////
                "banner", //// def banner():
                ////     print """This is a multi
                ////  string with mu
                ////  ltiple lines
                //// """,
                ////
                ////
                "version", //// def version():
                "version_string", ////     print "version %s " % version_string,
                ////
                "MyClass", //// class MyClass:
                ////     """A simple example class"""
                "i", ////     i = 12345
                "x", ////     x = 0xdeadbeef
                "l", "L", /* TODO: support all numeric formats! */ ////     l= 456L
                "z", ////     z=14*8
                "f", ////     f=12.38
                "i", "e", "j", /* TODO: support all numeric formats! */ ////     i=3.14e-10j
                "ef", "e", /* TODO: support all numeric formats! */ ////     ef=14e-13
                "f", "self", ////     def f(self):
                ////         return 'hello world'
                ////
                "main", //// def main():
                ////
                ////     # supress RuntimeWarning for tempnam being insecure
                ////     #    warnings.filterwarnings( "ignore" )
                ////
                ////     # go ahead
                "x", "MyClass", ////     x = MyClass()
                ////     print "hello world",
                ////
                "version", ////     version()
                ////
                //// # -----------------------------------------------------
                "__name__", //// if  __name__ == "__main__":
                "main", ////     main()
                        ////
                        ////
                        ////
        };
        assertAnalyzesTo(analyzer, input, output);
    }

    @Test
    public void testMimeType() {
        SymbolTokenizer tokenizer = new PythonSymbolTokenizer(newAttributeFactory());
        assertEquals("text/x-python", tokenizer.getMimeType());
    }

    @Test
    public void testSourceCodeLanguage() {
        SymbolTokenizer tokenizer = new PythonSymbolTokenizer(newAttributeFactory());
        assertEquals("Python", tokenizer.getSourceCodeLanguage());
    }
}