ExtractorContentTest.java Source code

Java tutorial

Introduction

Here is the source code for ExtractorContentTest.java

Source

import static org.junit.Assert.assertNotNull;

import java.io.File;
import java.io.FileFilter;
import java.io.IOException;
import java.lang.ProcessBuilder.Redirect;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.commons.io.FileUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.junit.Test;
import org.xtext.example.mydsl.fML.FMFormat;

import z3.TseitinTransformationDisjunctive;
import fr.unice.polytech.modalis.familiar.experimental.KSynthesisConfiguration;
import fr.unice.polytech.modalis.familiar.operations.AggregatorFM;
import fr.unice.polytech.modalis.familiar.operations.ExpressionUtility;
import fr.unice.polytech.modalis.familiar.operations.FMLMergerBDD;
import fr.unice.polytech.modalis.familiar.operations.FMLMergerDisjunctiveSAT;
import fr.unice.polytech.modalis.familiar.operations.featureide.SATFMLFormula;
import fr.unice.polytech.modalis.familiar.parser.FMBuilder;
import fr.unice.polytech.modalis.familiar.parser.MyExpressionParser;
import fr.unice.polytech.modalis.familiar.test.FMLTest;
import fr.unice.polytech.modalis.familiar.variable.FeatureModelVariable;
import fr.unice.polytech.modalis.familiar.variable.featureide.FeatureModelVariableSATFormula;
import gsd.graph.ImplicationGraph;
import gsd.graph.SimpleEdge;
import gsd.graph.TransitiveReduction;
import gsd.synthesis.Expression;
import gsd.synthesis.Formula;

public class ExtractorContentTest extends FMLTest {

    public static String OUTPUT_DIRECTORY = "outputFML/wikipedia-comparison-tables/";

    public static String URL_BASE_NAME = "http://en.wikipedia.org";

    Set<String> excludePCMs = new HashSet<String>(Arrays.asList(new String[] {
            // not relevant (and actually it does not parse)
            "Comparison_between_Esperanto_and_Ido", "Comparison_between_Esperanto_and_Interlingua",
            "Comparison_between_Esperanto_and_Novial", "Comparison_between_Ido_and_Interlingua",
            "Comparison_between_Ido_and_Novial", "Comparison_between_U.S._states_and_countries_by_GDP_(PPP)",
            "Comparison_of_ALGOL_68_and_C%2B%2B", "Comparison_of_Afrikaans_and_Dutch",
            "Comparison_of_Asian_national_space_programs", "Comparison_of_Axis_%26_Allies_games",

            "Comparison_of_C_Sharp_and_Visual_Basic_.NET", // no table !
            "Comparison_of_Chernobyl_and_other_radioactivity_releases", // no table !

            "Comparison_of_Home_Owners%27_and_Civic_Associations", // no table !

            "Comparison_of_IOC,_FIFA,_and_ISO_3166_country_codes", // limited interest

            "Comparison_of_Java_and_C%2B%2B", // limited interest (rather a qualitative comparison, based on natural language)

            "Comparison_of_MD_and_DO_in_the_United_States", // no table and interest ! 

            "Comparison_of_Norwegian_Bokm%C3%A5l_and_Standard_Danish", // limited interest

            "Comparison_of_Portuguese_and_Spanish", // limited interest (pattern: comparison of "languages" being Esperanto, Norwe., Spanish, etc.)

            "Comparison_of_privilege_authorization_features", // no table  (rather a qualitative comparison, based on natural language)

            "Comparison_of_the_Hare_and_Droop_quotas", // limited interest

            "Comparison_of_the_imperial_and_US_customary_measurement_systems", // out of the scope  

            // due to the current status of the parser
            // FIXME
            "Comparison_of_Android_e-book_reader_software", "Comparison_of_Exchange_ActiveSync_clients",
            "Comparison_of_Linux_distributions", "Comparison_of_Symbian_devices",
            "Comparison_of_browser_synchronizers", "Comparison_of_business_integration_software",
            "Comparison_of_consumer_brain%E2%80%93computer_interfaces", "Comparison_of_domestic_robots",
            "Comparison_of_e-book_formats", "Comparison_of_e-book_readers", "Comparison_of_file_hosting_services",
            "Comparison_of_layout_engines_(Cascading_Style_Sheets)", "Comparison_of_layout_engines_(MathML)",
            "Comparison_of_machine_translation_applications", "Comparison_of_mobile_operating_systems",
            "Comparison_of_network_diagram_software", "Comparison_of_numerical_analysis_software",
            "Comparison_of_statistics_journals", "Comparison_of_text_editors", "Comparison_of_web_server_software",
            "Comparison_of_TeX_editors",
            // limited interest IMO 
            "Comparison_of_United_States_presidential_candidates,_2008", "Comparison_of_World_War_I_tanks",
            "Comparison_of_programming_languages_(object-oriented_programming)",
            "Comparison_of_programming_languages_(string_functions)",

    }));

    @Test
    public void collectAllComparisonOf() throws IOException {

        List<Element> hrefs = new ArrayList<Element>();
        _collectAllComparisonOf(
                "/w/index.php?title=Special%3APrefixIndex&prefix=Comparison&namespace=0&hideredirects=1", hrefs);

        System.err.println("#hrefs=" + hrefs.size());
        StringBuilder content = new StringBuilder();
        content.append("Title ; URL\n"); // header
        for (Element href : hrefs) {
            String hText = href.attr("title");
            String hURL = href.attr("href");
            content.append("" + hText + " ; " + URL_BASE_NAME + hURL + "\n");
        }

        //FileUtils.writeStringToFile(new File ("comparisonsData.csv"), content.toString());

    }

    private void _collectAllComparisonOf(String url, List<Element> hrefs) throws IOException {

        Document doc = Jsoup.connect("" + URL_BASE_NAME + url).get();
        Elements aHrefs = doc.select("a[href]");

        Element urlNext = null;
        for (Element aHref : aHrefs) {
            Element h = aHref.getElementsByAttribute("href").first(); // val() ;
            String hText = h.attr("title");
            String hURL = h.attr("href");
            if (hText.contains("Comparison") && hURL.startsWith("/wiki/")) {
                hrefs.add(aHref);
            }
            String aText = aHref.text();
            if (aText.contains("Next page") && hURL.startsWith("/w/index.php?"))
                urlNext = aHref;

        }

        if (urlNext != null) {
            _collectAllComparisonOf(urlNext.attr("href"), hrefs);
        }

    }

    @Test
    public void testStatistics() throws Exception {

        List<Element> hrefs = new ArrayList<Element>();
        _collectAllComparisonOf(
                "/w/index.php?title=Special%3APrefixIndex&prefix=Comparison&namespace=0&hideredirects=1", hrefs);

        int j = 0; // j-th comparison 
        int nRelevant = 0;
        for (Element href : hrefs) {
            String hURL = href.attr("href");
            int n = "/wiki/".length();
            String wikiPageName = hURL.substring(n);
            System.err.println("(" + j++ + ") " + wikiPageName);

            if (excludePCMs.contains(wikiPageName)) {
                System.err.println("Ignoring");
                continue;
            }

            PCMStatistic stat = computeStatistic(wikiPageName);

            // we exploit here the stats by printing 

            int nTable = stat.getNumbersOfTables();
            System.err.println("numbers of tables:" + nTable);

            if (nTable > 0)
                nRelevant++;

            Collection<CatalogStat> catalogStats = stat.getCatalogStats();
            int i = 1;
            for (CatalogStat catalogStat : catalogStats) {
                System.err.println("table(" + i++ + ")");
                System.err.println("#headers=" + catalogStat.getNumbersOfHeaders());
                System.err.println("#products=" + catalogStat.getNumbersOfProduct());
            }
            System.err.println("\n\n\n");

        }

        System.err.println("number of relevant PCMs: " + nRelevant);

        //   String wikiPageName = "Comparison_of_Java_virtual_machines"; 

    }

    @Test
    public void testQuantitativeStats() throws Exception {

        List<Element> hrefs = new ArrayList<Element>();
        _collectAllComparisonOf(
                "/w/index.php?title=Special%3APrefixIndex&prefix=Comparison&namespace=0&hideredirects=1", hrefs);

        int j = 0; // j-th comparison 
        int nRelevant = 0;

        int nHeaders = 0;
        int nProducts = 0;
        //int nUncertains = 0 ;
        int nBooleanValues = 0;
        int nEmpty = 0;
        int nTotalValues = 0;

        int nMultiValues = 0;
        int nSingleValues = 0;
        int nUnknowns = 0;
        int nConstrains = 0;

        for (Element href : hrefs) { // for each page
            String hURL = href.attr("href");
            int n = "/wiki/".length();
            String wikiPageName = hURL.substring(n);
            System.err.println("(" + j++ + ") " + wikiPageName);

            if (excludePCMs.contains(wikiPageName)) {
                System.err.println("Ignoring");
                continue;
            }

            PCMStatistic stat = computeStatistic(wikiPageName);
            int nTable = stat.getNumbersOfTables();
            System.err.println("numbers of tables:" + nTable);

            if (nTable > 0)
                nRelevant++;

            //analyzeStat (stat);

            Collection<CatalogStat> catalogStats = stat.getCatalogStats();

            for (CatalogStat catalogStat : catalogStats) { // for each table
                // System.err.println("table(" + i++ + ")");
                nHeaders += catalogStat.getNumbersOfHeaders();
                nProducts += catalogStat.getNumbersOfProduct();
                //nUncertains += catalogStat.getnUncertains() ; 
                nBooleanValues += catalogStat.getnBooleans();
                nEmpty += catalogStat.getnEmpty();
                nMultiValues += catalogStat.getnMultiValues();
                nSingleValues += catalogStat.getnSingleV();
                nUnknowns += catalogStat.getnUnknowns();
                nConstrains += catalogStat.getnConstrained();

                int lHeaders = catalogStat.getNumbersOfHeaders();
                int lProducts = catalogStat.getNumbersOfProduct();
                nTotalValues += (lHeaders * lProducts) - (lHeaders + lProducts); // effective values 

                nSingleValues -= lHeaders + lProducts; // not cell values but headers or product names

                // 1 pattern

            }

            System.err.println("#headers=" + nHeaders);
            System.err.println("#products=" + nProducts);

            System.err.println("#nBooleanValues(1)=" + nBooleanValues);
            System.err.println("#nSingleValues(3)=" + nSingleValues);
            System.err.println("#nMultiValues(4)=" + nMultiValues);
            //System.err.println("#nUncertains()=" + nUncertains);

            System.err.println("#nEmpty(6)=" + nEmpty);

            System.err.println("#nUnknowns(5)=" + nUnknowns);
            System.err.println("#nConstrains(2)=" + nConstrains);
            System.err.println("\n\n\n");
            System.err.println("#nTotalValues=" + nTotalValues);
            System.err.println("#nTotalValues (bis)="
                    + (nBooleanValues + nSingleValues + nMultiValues + nEmpty + nUnknowns + nConstrains));

        }

        System.err.println("number of relevant PCMs: " + nRelevant);

        //   String wikiPageName = "Comparison_of_Java_virtual_machines"; 

    }

    private void analyzeStat(PCMStatistic stat) {
        // we exploit here the stats by printing 

        Collection<CatalogStat> catalogStats = stat.getCatalogStats();
        int i = 1;
        int nHeaders = 0;
        int nProducts = 0;
        //int nUncertains = 0 ;
        int nBooleanValues = 0;
        int nEmpty = 0;
        int nTotalValues = 0;

        int nMultiValues = 0;
        int nSingleValues = 0;
        int nUnknowns = 0;
        int nConstrains = 0;

        for (CatalogStat catalogStat : catalogStats) { // for each table
            // System.err.println("table(" + i++ + ")");
            nHeaders += catalogStat.getNumbersOfHeaders();
            nProducts += catalogStat.getNumbersOfProduct();
            //nUncertains += catalogStat.getnUncertains() ; 
            nBooleanValues += catalogStat.getnBooleans();
            nEmpty += catalogStat.getnEmpty();
            nMultiValues += catalogStat.getnMultiValues();
            nSingleValues += catalogStat.getnSingleV();
            nUnknowns += catalogStat.getnUnknowns();
            nConstrains += catalogStat.getnConstrained();

            int lHeaders = catalogStat.getNumbersOfHeaders();
            int lProducts = catalogStat.getNumbersOfProduct();
            nTotalValues += lHeaders * lProducts;

            // 1 pattern

        }

        System.err.println("#headers=" + nHeaders);
        System.err.println("#products=" + nProducts);

        System.err.println("#nBooleanValues(1)=" + nBooleanValues);
        System.err.println("#nSingleValues(3)=" + nSingleValues);
        System.err.println("#nMultiValues(4)=" + nMultiValues);
        //System.err.println("#nUncertains()=" + nUncertains);

        System.err.println("#nEmpty(6)=" + nEmpty);

        System.err.println("#nUnknowns(5)=" + nUnknowns);
        System.err.println("#nConstrains(2)=" + nConstrains);
        System.err.println("\n\n\n");
        System.err.println("#nTotalValues=" + nTotalValues);

    }

    @Test
    public void testAdobe() throws Exception {
        PCMStatistic stat = computeStatistic("Comparison_of_Adobe_Flex_charts");
        analyzeStat(stat);
    }

    private PCMStatistic computeStatistic(String wikiPageName) throws Exception {
        WikiPageContentExtractor wikipediaExtractor = new WikiPageContentExtractor();

        String content = wikipediaExtractor.getContent(wikiPageName);

        assertNotNull(content);
        FileUtils.writeStringToFile(new File("output/" + wikiPageName + ".wikipedia"), content);
        //System.err.println("content = " + content);

        WikiTabularExtractor wikiTabExtractor = new WikiTabularExtractor();

        //content = "'''Video converters''' are [[computer program]]s" ; 
        String htmlContent = wikiTabExtractor.run(content, "" + wikiPageName);

        assertNotNull(htmlContent);

        //Document doc = Jsoup.connect("http://en.wikipedia.org/w/index.php?title=" + wikiPageName).get();
        Document doc = Jsoup.parse(htmlContent);
        FileUtils.writeStringToFile(new File("output/" + wikiPageName + ".html"), doc.toString());

        //Element docContentEntryPoint = doc ; // doc.getElementsByClass("article-content").first(); 
        //Elements sections = docContentEntryPoint.getElementsByClass("section") ; 
        // FIXME what about no section ?
        //treatSection(doc.body());

        Elements tabs = doc.select("table");

        List<Catalog> catalogs = new ArrayList<Catalog>();
        for (Element section : tabs) {
            treatTable(section, catalogs);
        }

        Collection<CatalogStat> catalogStats = new ArrayList<CatalogStat>();
        for (Catalog catalog : catalogs) {
            int nHeaders = catalog.getHeaders().size();
            int nProduct = catalog.size();
            CatalogStat catalogStat = new CatalogStat();
            catalogStat.setNHeaders(nHeaders);
            catalogStat.setNProduct(nProduct);

            // analyze each product and all values
            int nUncertain = 0;
            int nBoolean = 0;
            int nEmpty = 0;
            int nMulti = 0;
            int nSingleV = 0;
            int nUnknowns = 0;
            int nConstrained = 0;

            for (Product product : catalog) {
                Collection<String> values = product.getAllValues();
                for (String val : values) {
                    if (VariabilityPatternsUtils.isUncertain(val)) {
                        nUncertain++;
                    }

                    else if (VariabilityPatternsUtils.isYes(val) || VariabilityPatternsUtils.isNot(val)) { // pattern #1
                        nBoolean++;
                    } else if (VariabilityPatternsUtils.isBlanked(val)) { // pattern #6
                        nEmpty++;
                    } else if (VariabilityPatternsUtils.isMultiValues(val)) { // pattern #4
                        nMulti++;
                    } else if (VariabilityPatternsUtils.isUnknowns(val)) { // pattern #5
                        nUnknowns++;
                    } else if (VariabilityPatternsUtils.isConstrained(val)) { // pattern #2
                        nConstrained++;
                    } else { // pattern #3
                        nSingleV++;
                    }
                }

            }

            catalogStat.setnConstrained(nConstrained);
            catalogStat.setnUnknowns(nUnknowns);
            catalogStat.setnSingleV(nSingleV);
            catalogStat.setnMultiValues(nMulti);
            catalogStat.setnEmpty(nEmpty);
            catalogStat.setnBooleans(nBoolean);
            catalogStat.setnUncertains(nUncertain);
            catalogStats.add(catalogStat);

        }

        int nTable = catalogs.size();

        return new PCMStatistic(nTable, catalogStats);

    }

    @Test
    public void test() throws Exception {

        final String[] _EMPTY = new String[] {};

        String wikiPageName = "Comparison_of_Java_virtual_machines";
        //"Comparison_of_free_web_hosting_services"; 
        //"Comparison_of_free_and_open-source_software_licenses" ;  
        //"Comparison_of_file_systems" ;
        //"Comparison_of_Subversion_clients"; 
        //"Comparison_of_Prolog_implementations" ;    
        //"Comparison_of_BitTorrent_clients" ; 
        //"Comparison_of_FTP_client_software" ; 
        //"Comparison_of_hardware_random_number_generators" ; //"Comparison_of_image_formats" ; //"Comparison_of_video_editing_software" ; // "Comparison_of_video_codecs" ; //"Comparison_of_container_formats" ; 
        //"Comparison_of_video_converters" ;

        /*
         * Scoping directives here
         */

        _shell.setVerbose(false);

        /****
         * DONE 
         */

        String[] excludeColumnNames = { "Latest supported Java version", "Other", "Status", "Latest release date",
                "Latest stable version", "First public release", "Creator", "Name" }; // {} ; 
        String[] excludeProductNames = { "IKVM.NET" };
        postTreatFM(
                executeWikipediaToFML("Comparison_of_Java_virtual_machines", excludeColumnNames,
                        excludeProductNames, new String[] {}),
                new String[] { "Under development", "Preliminary ARMv5 support", "On Jailbroken iPhone", "Port",
                        "With third-party patches", "Java true.6" });

        postTreatFM(executeWikipediaToFML("Comparison_of_SSH_clients",
                new String[] { "Name", "iPhone,{{Noteiphone}} iPod Touch, iPad", "Based on", "Latest release",
                        "Status", "First release", "Developer", "Compromised by the NSA <ref>...</ref>",
                        "Port forwarding", "SOCKS{{NoteSOCKS}}", "VPN{{NoteVPN}}", "Port forwarding",
                        "SOCKS{{NoteSOCKS}}", "VPN{{NoteVPN}}", "Session multiplexing{{Notemux}}", "Kerberos",
                        "IPv6", "Terminal", "SFTP/SCP", "Proxy client{{NoteconnectViaProxy}}" },
                new String[] {}, new String[] { "Features" }), new String[] { "Port forwarding" });

        postTreatFM(executeWikipediaToFML("Comparison_of_audio_synthesis_environments",
                new String[] { "Primary Purpose(s)", "Most recent update", "First release date", "Cost", "Creator",
                        "Most recent version", "Name", "Other technical features",
                        "Programming (plugin) API language(s)" // due to problem with multi-features
                }, _EMPTY, new String[] { "Data interface methods" }), _EMPTY,
                new String[] { "Programming language features", "General", "Technical" });

        postTreatFM(executeWikipediaToFML("Comparison_of_HTML_editors",

                new String[] { "Website", "Editor", "Creator", "Version", "Cost (USD)", "XHTML" },
                new String[] { "Maqetta", "Brackets", }, new String[] { "HTML/XHTML specification support" }),
                _EMPTY);

        postTreatFM(executeWikipediaToFML("Comparison_of_photo_gallery_software", new String[] { "Name" }, _EMPTY,
                new String[] {

                        "Desktop applications" }),
                _EMPTY);

        postTreatFM(executeWikipediaToFML("Comparison_of_Internet_Relay_Chat_clients",
                new String[] { "Primary developers", "Client"

                }, _EMPTY, new String[] { "Release history" }), _EMPTY);

        postTreatFM(executeWikipediaToFML("Comparison_of_YouTube_downloaders", _EMPTY, _EMPTY, _EMPTY), _EMPTY);

        // 8. 
        postTreatFM(
                executeWikipediaToFML("Comparison_of_file_comparison_tools",
                        new String[] { "Name", "Other platforms", "Creator", "Cost", "First public release date",
                                "Year of latest stable version", "Max Supported File Size" },
                        _EMPTY, _EMPTY),
                new String[] { "both", "Both" });

        postTreatFM(executeWikipediaToFML("Comparison_of_iOS_e-book_reader_software",
                new String[] { "Product", "total # of formats" }, _EMPTY, new String[] { "Special features" }),
                new String[] { "as of v2.0", "Unlimited" });

        postTreatFM(
                executeWikipediaToFML("Comparison_of_iPod_managers", new String[] { "Manager", "Creator(s)",
                        "First public release date", "Latest stable version" }, _EMPTY, _EMPTY),
                new String[] { "", "" });

        postTreatFM(
                executeWikipediaToFML("Comparison_of_image_viewers", new String[] { "Name", "other / special",
                        "Program", "View functions", "Other functions 3", "Price", "Comic book" }, _EMPTY, _EMPTY),
                new String[] { "", "" });

        // 12.
        postTreatFM(executeWikipediaToFML("Comparison_of_mobile_Internet_Relay_Chat_clients",
                new String[] { "Client", "Primary developers", "Website" }, _EMPTY,
                new String[] { "Release history" }), new String[] { "", "" });

        // 13.
        Map<String, String> renamings = new HashMap<String, String>();
        renamings.put("See also", "Comparison_of_help_desk_issue_tracking_software");
        postTreatFM(
                executeWikipediaToFML("Comparison_of_help_desk_issue_tracking_software",
                        new String[] { "System", "Creator", "Launch Date" }, _EMPTY, _EMPTY, renamings),
                new String[] { "" }, new String[] { "License", "Back end", "Implementation language(s)" });

        postTreatFM(
                executeWikipediaToFML("Comparison_of_relational_database_management_systems",
                        new String[] { "Maintainer", "First public release date", "Latest stable version",
                                "Latest release date", },
                        _EMPTY, new String[] { "Limits", "Data types" }),
                new String[] { "" });

        // 15.
        postTreatFM(
                executeWikipediaToFML("Comparison_of_project_management_software", new String[] { "Software" },
                        _EMPTY, _EMPTY),
                new String[] { "unk", "Programming Language" }, new String[] { "License", "Programming language" });

        // 18.

        postTreatFM(executeWikipediaToFML("Comparison_of_open-source_operating_systems",
                new String[] { "Name", "Kernel type", "Oldest non-EOL version{{Notea1}}", "Kernel thread support",
                        "Forks", "other special file system features", "Others", "other" }

                , _EMPTY, _EMPTY), _EMPTY);

        // 19.      
        postTreatFM(executeWikipediaToFML(
                "Comparison_of_remote_desktop_software", new String[] { "Software", "Creator",
                        "First public release date", "Latest stable version", "Maximum simultaneous connections", },
                _EMPTY, _EMPTY), _EMPTY);

        // 20. 
        postTreatFM(
                executeWikipediaToFML("Comparison_of_video_converters",
                        new String[] { "Developer", "Video converter", "Website", "Input" }, _EMPTY, _EMPTY),
                _EMPTY);

        // 21.
        postTreatFM(executeWikipediaToFML("Comparison_of_Subversion_clients",
                new String[] { "Current version", "Last release date", "Name" }, _EMPTY,
                new String[] { "Standalone Subversion clients comparison table" }), _EMPTY);

        // 22. (not very satisfied with the quality)
        postTreatFM(
                executeWikipediaToFML("Comparison_of_audio_formats",
                        new String[] { "Codec", "Audio compression format", "Creator", "First public release date",
                                "Latest stable version", "Sample Rate", "Bit Rate", "Bit rate", "Latency",
                                "Bits per sample", "Algorithm" },
                        _EMPTY, new String[] { "Technical details" }),
                _EMPTY, _EMPTY)
        // new String[] {"General information"})
        ;

        // 23
        postTreatFM(
                executeWikipediaToFML("Comparison_of_documentation_generators",
                        new String[] { "Name", "", "Creator", "First public release date", "Latest stable version",
                                "Other features" },
                        _EMPTY, _EMPTY),
                new String[] { "with Plugin2" }, new String[] { "Software license" });

        // 24. 
        postTreatFM(
                executeWikipediaToFML("Comparison_of_desktop_publishing_software",
                        new String[] { "Desktop publishing software", "Developer(s)", "Latest stable version",
                                "Initial release", "Other" },
                        _EMPTY, new String[] { "Output format" }),
                new String[] { "supported versions<11", "supported versions<7", "supported versions<8" });

        // 17.

        postTreatFM(executeWikipediaToFML("Comparison_of_reference_management_software",
                new String[] { "Software", "Developer", "First public release", "Latest stable version",
                        "Cost (USD)", "Notes", "Other", "RTF scan<ref ...>...</ref>", },
                _EMPTY, new String[] { "Import file formats" }), _EMPTY);

        // 26
        postTreatFM(executeWikipediaToFML("Comparison_of_enterprise_bookmarking_platforms",
                new String[] { "Notes", "Latest stable release", "Developed by", "Software" }, _EMPTY, _EMPTY),
                _EMPTY);

        // 27
        postTreatFM(executeWikipediaToFML("Comparison_of_file_managers",
                new String[] { "First public version (date)", "Latest stable version (date, number)",
                        "Content dependent <ref ...>...</ref>", "File manager", "File Manager", "Creator"

                }, _EMPTY, _EMPTY),
                new String[] { "Plugin", "With helper apps", "Needs [[POSIX]]-compliant platform POSIX",
                        "Needs [[KDE]] KDE", "Needs [[X Window System|X]]X" });

        // 28
        postTreatFM(executeWikipediaToFML("Comparison_of_disk_encryption_software",
                new String[] { "Name", "Encryption", "Developer", "First released",

                }, _EMPTY, _EMPTY), new String[] { "Last update to web site 2009-07-02" });

        // 16.
        postTreatFM(executeWikipediaToFML("Comparison_of_mail_servers", new String[] { "Other" }, _EMPTY, _EMPTY),
                new String[] { "zzzzzUsers", "zzzzzFeatures", "zzzzzStorage", "zzzzzServer OS support", "V 5.0",
                        "with patch" });

        // 25

        postTreatFM(executeWikipediaToFML("Comparison_of_genealogy_software",
                new String[] { "", "Software", "Name", "Latest version", "Latest release" }, _EMPTY, _EMPTY),
                new String[] { "Winebbb", "Javaaaa", "(v4.x/5.x)", "(v4.x)", "(v5.x)", " public<br />test" });

        // 29.       
        postTreatFM(executeWikipediaToFML("Comparison_of_Internet_forum_software",
                new String[] { "Creator", "Latest release date", "Current stable version", "" }, _EMPTY, _EMPTY),
                new String[] { "Planned (Version 8.0)", "Planned", "Full", "Session", "Plugin" });

        // 30.
        postTreatFM(executeWikipediaToFML("Comparison_of_SSH_servers",
                new String[] { "Name", "Last release date", "iOS: iPhone,{{Noteiphone}} iPod Touch",
                        "Official web page", "Developer", "Last release", "First release date" },
                _EMPTY, _EMPTY), _EMPTY);

        // have to FIX it 
        //executeWikipediaToFML("Comparison_of_Linux_distributions", _EMPTY, _EMPTY, _EMPTY);

        // have to fix it
        /*
        executeWikipediaToFML("Comparison_of_operating_systems", new String[] {
              "Latest stable version", "Latest release date", "First public release"
        }, 
                  
              _EMPTY,
                  
              _EMPTY);*/

        /****
         * 
         * END (done)
         * 
         */

        /**
         * TODO
         */

        /* should be easy
        postTreatFM(
        executeWikipediaToFML("Comparison_of_disc_authoring_software", _EMPTY, _EMPTY, _EMPTY), 
        _EMPTY); */

        /*   problem with sections   
        postTreatFM( executeWikipediaToFML("Comparison_of_file_archivers", new String[] {
        "Creator(s)", "First public release date", "Latest stable version", "", 
        "Unicode file / directory names{{unicode-names5}}"
        }, _EMPTY, new String[] {"Writing"}), 
        new String[]{"unk", "some formats", "Separate"}); */

        // TODO interesting have to hack here
        /*
        postTreatFM(
        executeWikipediaToFML("Comparison_of_instant_messaging_clients", new String[] {"Author, creator", "Latest stable version", "First public release"}, new String[] {
              "XMPP-related features", "Features",  "Features", "General information", "Protocol support"
        }, _EMPTY),
        _EMPTY)
        ;*/
        // TODO FIXME (scoping) 
        /*
        postTreatFM( 
        executeWikipediaToFML("Comparison_of_download_managers", new String[] {"Latest stable release",
              "Adware, Malware & Spyware<ref ...>...</ref>"
                  
                  
        }, _EMPTY, _EMPTY),
        _EMPTY)
        ;*/

        /* may be fixed
        postTreatFM( 
        executeWikipediaToFML("Comparison_of_email_clients", new String[] {"Client", "Creator",
        "TLS?{{NoteIMAPPOP}}", "TLS?{{NoteIMAPPOP}}", "TLS?{{NoteIMAPPOP}}", "forced recode {{Noterecode}}", 
        "MD5 APOP?{{NoteAPOP}}", "OCSP?{{NoteOCSP}}", "" 
            
        }, _EMPTY, new String[] {
           "Release history"   
        }), _EMPTY);*/
        /*
        postTreatFM( 
        executeWikipediaToFML("Comparison_of_document_markup_languages", new String[] {"Language", "Creator", "First public release date", "Editor"}, 
                  
              _EMPTY, _EMPTY), 
        _EMPTY
        );*/

        /* 3 for free 
        postTreatFM(
        executeWikipediaToFML("Comparison_of_desktop_application_launchers", new String[]{"Creator",
        "Latest stable version", "Latest release date", ""      
        }, _EMPTY, new String[] {"Linux"}),
        _EMPTY);*/

        /**
         * 
         * end (TODO)
         * 
         */

        /**
         * 
         * EXOTIC
         * 
         */

        // too poor
        /*
        postTreatFM( 
        executeWikipediaToFML("Comparison_of_geographic_information_systems_software", new String[] {"GIS software"}, _EMPTY, 
              new String[]{"Mobile clients", "Pure web client", "Pure server"}
              ), _EMPTY);*/

        // hack due to parsers :( 
        /*postTreatFM(
         executeWikipediaToFML("Comparison_of_behavioral_experiment_software", new String[] {"Name"}, _EMPTY, _EMPTY), 
         _EMPTY);*/

        // exostic
        /*
        postTreatFM(
        executeWikipediaToFML("Comparison_of_birth_control_methods", _EMPTY, _EMPTY, _EMPTY),
        _EMPTY);*/

        // exotic and poorly structured
        /*
        postTreatFM(      
        executeWikipediaToFML("Comparison of accounting software", _EMPTY, _EMPTY, new String[] {
              "Proprietary software", "Latest stable version", "Latest release date", "Stable release date"
        }),
        new String[] {""}
        );*/

        /* difficult to hack the sections
        postTreatFM(
        executeWikipediaToFML("Comparison_of_open-source_software_hosting_facilities", new String[] {"Users", "Established",
        "Projects", "Prominent projects", "Name"
        }, _EMPTY, new String[] {"Popularity"}),
        _EMPTY)
        ;
        */

        /* rather poor
        postTreatFM(         
        executeWikipediaToFML("Comparison_of_webmail_providers", 
        new String[]{
        //"Product", "Service name", "Owner", "Release", "Attachment limit",
        //"Language support", "URL"
        }, 
        new String[]{
        //"Alternative Fuse"
        }, 
            
        new String[] {"General information", "Language support", "Unique features"}), _EMPTY);
            
        */

        /* BENCHMARK
        postTreatFM(
        executeWikipediaToFML("Comparison_of_Android_devices", new String[] {
        "Android version", "Name", "Maker", "GPU", "chipset", "Capacities", "Camera(s)", "Special?features"
        }, _EMPTY, new String[] {
        //"Unofficial and community ports", 
        //"Officially released",
        //"Future", 
            
        }), _EMPTY);*/

        /* poor and structure hard to parse
        postTreatFM(
        executeWikipediaToFML("Comparison_of_BSD_operating_systems", new String[] {
        "First public release", 
        "Version", 
        "First release date"
        }, _EMPTY, _EMPTY),
        _EMPTY);*/

        /**
         *  END (exostic)
         * 
         */

        /*
         * OK
         * Poorly structured (Linux, Windows, ..., editors...) 
         * -- note that there are ordered (implicit hierarchy in the order IMO) 
        executeWikipediaToFML("Comparison_of_XML_editors", 
        new String[]{"Name", "Version", "Price for commercial version (exc. VAT)"}, 
        _EMPTY, _EMPTY);
        */

        // have to fix it
        // executeWikipediaToFML("Comparison of audio player software", _EMPTY, _EMPTY, _EMPTY);

        // executeWikipediaToFML("Comparison_of_boot_loaders", _EMPTY, _EMPTY, _EMPTY);

        // executeWikipediaToFML("Comparison_of_brainwave_entrainment_software", _EMPTY, _EMPTY, _EMPTY);

        // FIXIT
        //executeWikipediaToFML("Comparison of business integration software ", _EMPTY, _EMPTY, _EMPTY);

        // scope and some directives (OR / XOR)
        // executeWikipediaToFML("Comparison_of_BPEL_engines", _EMPTY, _EMPTY, _EMPTY);

        //executeWikipediaToFML("Comparison of chess video games", _EMPTY, _EMPTY, _EMPTY);

        //executeWikipediaToFML("Comparison_of_code_generation_tools", _EMPTY, _EMPTY, _EMPTY);

        // executeWikipediaToFML("Comparison_of_command_shells", _EMPTY, _EMPTY, _EMPTY);

        //executeWikipediaToFML("Comparison_of_continuous_integration_software", _EMPTY, _EMPTY, _EMPTY);

        // executeWikipediaToFML("Comparison_of_data_modeling_tools", _EMPTY, _EMPTY, _EMPTY);

        // have to fix executeWikipediaToFML("Comparison of database tools", _EMPTY, _EMPTY, _EMPTY);

        //executeWikipediaToFML("Comparison of debuggers", _EMPTY, _EMPTY, _EMPTY);

        //executeWikipediaToFML("Comparison_of_defragmentation_software", _EMPTY, _EMPTY, _EMPTY);

        // executeWikipediaToFML("Comparison_of_dental_practice_management_software", _EMPTY, _EMPTY, _EMPTY);

        //executeWikipediaToFML("Comparison_of_development_estimation_software", _EMPTY, _EMPTY, _EMPTY);

        //executeWikipediaToFML("Comparison_of_digital_audio_editors", _EMPTY, _EMPTY, _EMPTY);

        //executeWikipediaToFML("Comparison_of_file_verification_software", new String[] {"Developer", "First public release", "Latest stable date (version)"}, _EMPTY, _EMPTY);

        // FIXME executeWikipediaToFML("Comparison of free and open-source software licenses ", _EMPTY, _EMPTY, _EMPTY);

        // exotic actually: I would give up
        /*
        executeWikipediaToFML("Comparison_of_container_formats", _EMPTY, 
        _EMPTY, 
        new String[] {
        "Caption (Subtitle) formats supported"
        });*/

        /*FIXME
         * executeWikipediaToFML("Comparison of hex editors ", new String[] {
        "Cost", "Latest version", "Latest release date"
        }, _EMPTY, _EMPTY);*/

        // does not scale
        /*
        executeWikipediaToFML("Comparison_of_issue-tracking_systems", 
        new String[] {"Launch Date", "Refs", "Creator"}, // "System", 
        _EMPTY, _EMPTY);*/

        // TODO http://en.wikipedia.org/wiki/Comparison_of_revision_control_software

        // Comparison of regular expression engines : exostic

        // http://en.wikipedia.org/wiki/Comparison_of_mobile_operating_systems exotic
        // Comparison of mobile phone standards technical, poor, exotic
        // Comparison of movie cameras  bench style
        // Comparison of online backup services limited and bench style
        // Comparison of online music stores flattened 
        // Comparison of massively multiplayer online role-playing games  benchmark like
        // Comparison of memory cards bench like
        // Comparison of metadata editors (exotic) 
        // Comparison of management accounting and financial accounting plain texte
        // Comparison of lightweight web servers erroneous
        // Comparison of macro recorder software very poor
        // Comparison of karate styles  plain text
        // Comparison of layout engines (Cascading Style Sheets) (bench style)
        // Comparison of layout engines (Document Object Model) same as above
        // http://en.wikipedia.org/wiki/Comparison_of_integrated_development_environments exotic
        // Comparison of instant messaging protocols : poor content/structure
        // Comparison of iSCSI targets (too poor)
        // Comparison of high definition optical disc formats : poor
        // Comparison of hub gears bench style
        // Comparison of hardware random number generators  pure text and poor
        // Comparison of graphics file formats poor and poorly structured
        // Comparison of free web hosting services very bof
        // Comparison of file sharing applications  very pooor
        // Comparison of file synchronization software very poor

        // Comparison of free credit report websites : very poor
        // Comparison of free software eCommerce web application frameworks  : very very poor
        // Comparison of firewalls very pooor
        // very poor Comparison of file hosting services 

        // Comparison of feed aggregators (seems that the data are really "in progress" lots of blanked cells)

        // Comparison of executable file formats (very poor)
        // Comparison of facial image datasets (too poor)

        // Comparison of early HTML editors very poor
        // Comparison of eDonkey software (exotic structure)

        // Comparison of e-book readers (benchmark)
        // Comparison of e-book formats (plain text)
        // Comparison of dosimeters benchmark style 
        // Comparison of domestic robots exotic and very poor
        // Comparison of data serialization formats : too poor
        // Comparison of database access : too poor

        // Comparison of crowd funding services very pooor
        // Comparison of cryptographic hash functions too technical with numerics values
        // Comparison of dance video games too poor

        // Comparison of cognitive architectures : very incomplete

        // can be fixed: Comparison of cluster software but rather poor

        // Comparison of X Window System desktop environments : exotic
        // Comparison of antivirus software : exotic but can be adapted 
        // Comparison of archive formats : poor
        // Comparison of browser synchronizers : exotic, poor

        // ridiculous: http://en.wikipedia.org/wiki/Comparison_of_free_software_eCommerce_web_application_frameworks

        // Comparison of TeX editors exotic structure + poor content

        // Comparison of VMware Fusion and Parallels Desktop  : interesting but poorly structured

        // Comparison of JavaScript frameworks : poorly structured and exotic structure
        // Comparison of MIDI standards : nothing
        // Comparison of Macintosh models : close to very technical description (numbers)
        // Comparison of Nikon DSLR cameras : same as above 
        // http://en.wikipedia.org/wiki/Comparison_of_MySQL_database_engines : too poor
        // bof poorly structured 
        // executeWikipediaToFML("Comparison_of_distributed_file_systems", _EMPTY, _EMPTY, _EMPTY);
        // very poor: http://en.wikipedia.org/wiki/Comparison_of_Skype_recorders
        // bof http://en.wikipedia.org/wiki/Comparison_of_PSA_systems
        // poorly structured http://en.wikipedia.org/wiki/Comparison_of_defragmentation_software
        // poorly structured http://en.wikipedia.org/wiki/Comparison_of_DEX_software

        // poor and numerics all around: 
        //http://en.wikipedia.org/wiki/Comparison_of_Toyota_hybrids

        /* poorly structured
        executeWikipediaToFML("Comparison_of_antivirus_software", 
        _EMPTY, 
        _EMPTY, _EMPTY);
        */

        // no interest: http://en.wikipedia.org/wiki/Comparison_of_AMD_CPU_microarchitectures
        // very poorly structured: http://en.wikipedia.org/wiki/Comparison_of_application_virtual_machines

        //executeWikipediaToFML("Comparison_of_file_archivers", _EMPTY, _EMPTY, _EMPTY);

    }

    private void postTreatFM(FeatureModelVariable fmMerged, String[] negatedFts, String[] positiveFts)
            throws Exception {
        /*
         * Post-process
         */

        // 1. negated features (irrelevant values of the cell)
        for (String negatedFt : negatedFts) {
            fmMerged.getFormula()
                    .andWith(new Formula<String>(_builder.nget(negatedFt), Arrays.asList(negatedFt), _builder));
            fmMerged.removeFeature(negatedFt);
            fmMerged.addConstraint(new Expression<String>(negatedFt).not());
        }

        // 1. positive features (force the mandatory)
        for (String positiveFt : positiveFts) {
            fmMerged.getFormula()
                    .andWith(new Formula<String>(_builder.get(positiveFt), Arrays.asList(positiveFt), _builder));
            fmMerged.setMandatory(fmMerged.getFeature(positiveFt));
            fmMerged.addConstraint(new Expression<String>(positiveFt));
        }

        System.err.println("cliques: " + fmMerged.cliques().names());

        int nFts = fmMerged.features().size();
        System.err.println("#fts " + nFts);

        ImplicationGraph<String> big = fmMerged.computeImplicationGraph();
        System.err.println("#IG (edges) " + big.edges().size());
        Collection<String> vtxs = big.vertices();
        int t = 0;
        for (String ft : vtxs) {
            Collection<SimpleEdge> iedges = big.outgoingEdges(ft);
            int n = iedges.size();
            //System.err.println("ft=" + ft + " " + n);
            t += n;
        }

        System.err.println("(average) " + t / nFts);
        System.err.println("(rfm) " + fmMerged);

        //System.err.println("fmMerged = " + fmMerged);

        String bddContent = fmMerged.convert(FMFormat.FMLBDD);

        String wikiPageName = fmMerged.getIdentifier();
        File f = new File(OUTPUT_DIRECTORY + wikiPageName + ".fmlbdd");
        FileUtils.writeStringToFile(f, bddContent);

        File f2 = new File(OUTPUT_DIRECTORY + wikiPageName + ".fml");
        FileUtils.writeStringToFile(f2, fmMerged + "");

        _shell.reset();
        FeatureModelVariable fmv1 = FMBuilder.parseFMLBDD(OUTPUT_DIRECTORY + wikiPageName + ".fmlbdd", _builder);
        //      System.err.println("#" + fmv1.counting());
        assertNotNull(fmv1);

        /*
        FeatureModelVariableBDDFormula flaMerged = new FeatureModelVariableBDDFormula("", new FMLMergerBDDSPLOT(fmvsToMerge, _builder).calculateFormula(Mode.Union), _builder);
            
        System.err.println("#" + flaMerged.counting());*/

        //System.err.println("doc=" + sections);
        //System.err.println("doc=" + doc.getElementsByTag("title"));
        //System.err.println("doc=" + doc.title());

        _shell.reset();

    }

    @Test
    public void testGeneralizedNotation() throws Exception {

        FeatureModelVariable fmv1 = new FeatureModelVariable("", FMBuilder.getInternalFM("FM ("
                + "WikiMatrix: General ; " + "General: (LicenseCostFee|Unicode)+ [Storage] [Language] License RSS ;"
                + "LicenseCostFee: (DifferentLicences|US10|Community)? ;" + "Language: (Java|Python|PHP|Perl) ; "
                + "License: (Commercial|GPL|GPL2|Nolimit) ;" + "Storage: (Files|Database|FileRCS) ;"
                + "(Java -> Database);" + "(Nolimit -> !Unicode);" + "(Nolimit -> LicenseCostFee); "
                + "(GPL2 -> Storage);" + "(DifferentLicences -> GPL2);" + "(GPL2 -> PHP);"
                + "(DifferentLicences -> Database);" + "(GPL -> Unicode);" + "(Community -> GPL);"
                + "(Storage <-> Unicode);" + "(Python -> GPL);" + "(Files -> !LicenseCostFee);"
                + "(Community <-> FileRCS);" + "(Commercial <-> US10);" + "(Python -> Files);"
                + "(FileRCS <-> Perl); " + "(Unicode <-> Language);" + "(US10 <-> Java);" + ")"));

        FeatureModelVariable fmv1bis = fmv1.toGeneralizedNotationWithoutOR();
        System.err.println("fmv1bis=" + fmv1bis);
    }

    @Test
    public void testStatisticsBIG() throws Exception {

        final String OUTPUT_DIR = "/Users/macher1/Documents/RESEARCH/INPROGRESS/ICSE2014-KSynthesis/PCMs/";

        File dir = new File(OUTPUT_DIR);
        File[] fileFMs = dir.listFiles(new FileFilter() {

            @Override
            public boolean accept(File pathname) {
                return pathname.getName().contains("fmlbdd");
            }
        });
        _shell.setVerbose(false);
        int i = 1;
        int totalAverage = 0;
        int totalNft = 0;
        int totalEdges = 0;
        for (File fileFM : fileFMs) {
            _shell.reset();
            FeatureModelVariable fmv1 = FMBuilder.parseFMLBDD(fileFM.getAbsolutePath(), _builder);
            assertNotNull(fmv1);

            /* new FeatureModelVariable("", FMBuilder.getInternalFM("FM (" +
            "WikiMatrix: General ; " +
            "General: (LicenseCostFee|Unicode)+ [Storage] [Language] License RSS ;" + 
            "LicenseCostFee: (DifferentLicences|US10|Community)? ;" +  
            "Language: (Java|Python|PHP|Perl) ; " + 
            "License: (Commercial|GPL|GPL2|Nolimit) ;" + 
            "Storage: (Files|Database|FileRCS) ;" +  
            "(Java -> Database);" + 
            "(Nolimit -> !Unicode);" +
            "(Nolimit -> LicenseCostFee); " +
            "(GPL2 -> Storage);" + 
            "(DifferentLicences -> GPL2);" + 
            "(GPL2 -> PHP);" + 
            "(DifferentLicences -> Database);" + 
            "(GPL -> Unicode);" + 
            "(Community -> GPL);" + 
            "(Storage <-> Unicode);" + 
            "(Python -> GPL);" + 
            "(Files -> !LicenseCostFee);" +
            "(Community <-> FileRCS);" +
            "(Commercial <-> US10);" + 
            "(Python -> Files);" + 
            "(FileRCS <-> Perl); " + 
            "(Unicode <-> Language);" +
            "(US10 <-> Java);"  + ")"));  */

            System.err.println("====== " + i++ + " ===========");

            int nFts = fmv1.features().size();
            System.err.println("#fts " + nFts);

            ImplicationGraph<String> big = fmv1.computeImplicationGraph();
            //TransitiveReduction.INSTANCE.reduce(big);

            System.err.println("#IG (edges) " + big.edges().size());
            Collection<String> vtxs = big.vertices();
            int t = 0;
            for (String ft : vtxs) {
                Collection<SimpleEdge> iedges = big.outgoingEdges(ft);
                int n = iedges.size();
                //System.err.println("ft=" + ft + " " + n);
                t += n;
            }

            int nAverage = t / nFts;
            System.err.println("(average) " + nAverage);
            //System.err.println("(rfm) " + fmv1);

            totalAverage += nAverage;

            _shell.reset();

            totalNft += nFts;
            totalEdges += t;

        }

        //System.err.println("" + (double) ((double)totalAverage / (double)i));
        System.err.println("" + (double) ((double) totalEdges / (double) totalNft));

    }

    private FeatureModelVariable executeWikipediaToFML(String wikiPageName, String[] excludeColumnNames,
            String[] excludeProductNames, String[] excludeSectionNames, Map<String, String> renamings)
            throws Exception {

        WikiPageContentExtractor wikipediaExtractor = new WikiPageContentExtractor();

        String content = wikipediaExtractor.getContent(wikiPageName);

        assertNotNull(content);
        //System.err.println("content = " + content);

        WikiTabularExtractor wikiTabExtractor = new WikiTabularExtractor();

        //content = "'''Video converters''' are [[computer program]]s" ; 
        String htmlContent = wikiTabExtractor.run(content, "video");

        assertNotNull(htmlContent);

        //Document doc = Jsoup.connect("http://en.wikipedia.org/w/index.php?title=" + wikiPageName).get();
        Document doc = Jsoup.parse(htmlContent);
        FileUtils.writeStringToFile(new File("output/" + wikiPageName + ".html"), doc.toString());

        //Element docContentEntryPoint = doc ; // doc.getElementsByClass("article-content").first(); 
        //Elements sections = docContentEntryPoint.getElementsByClass("section") ; 
        // FIXME what about no section ?
        //treatSection(doc.body());

        Elements tabs = doc.select("table");

        List<Catalog> catalogs = new ArrayList<Catalog>();
        for (Element section : tabs) {
            treatTable(section, catalogs);

        }
        /*for (Element section : sections) {
           treatSection (section, catalogs);
               
        }*/

        // set the "ID" / names
        // clean up

        // FIXME here it is specific 

        for (Catalog catalog : catalogs) {
            for (String columnName : excludeColumnNames) {
                if (!catalog.hasHeader(columnName))
                    continue;
                if (!catalog.removeColumn(columnName)) {
                    System.err.println("Unable to remove the column " + columnName);
                }
            }

        }

        Set<String> excludeProductIDs = new HashSet<String>(Arrays.asList(excludeProductNames));
        Set<String> excludeSections = new HashSet<String>(Arrays.asList(excludeSectionNames));

        List<FeatureModelVariable> fmvs = new ArrayList<FeatureModelVariable>();
        for (Catalog catalog : catalogs) {
            String catalogName = catalog.getName();
            if (excludeSections.contains(catalogName))
                continue;
            System.err.println("***" + catalogName + "****");
            /*
            if (!catalog.getName().equals("General information"))
               continue ; */
            for (Product product : catalog) {
                FeatureModelVariable fmv = product.toFeatureDiagram();
                /*
                 * POST
                 */

                // renaming

                Set<String> oFts = renamings.keySet(); // features to rename
                for (String oFt : oFts) {
                    fmv.renameFeature(oFt, renamings.get(oFt));
                }

                String id = fmv.getIdentifier();
                if (!excludeProductIDs.contains(id))
                    fmvs.add(fmv);

            }
            //System.err.println("\n\nfmvs=" + fmvs);
        }

        List<FeatureModelVariable> fmvsToMerge = new ArrayList<FeatureModelVariable>();

        if (catalogs.size() == 1) {
            fmvsToMerge = fmvs;
        }
        // aggregate feature models with same identifiers when there are numerous catalogs (dimensions)
        else {
            Set<String> idsDone = new HashSet<String>();
            for (FeatureModelVariable fmv : fmvs) {
                String id1 = fmv.getIdentifier();
                if (idsDone.contains(id1))
                    continue;
                //System.err.println("Aggregating..." + id1) ; // + " = " + fmv);

                List<FeatureModelVariable> toAggreagte = new ArrayList<FeatureModelVariable>();

                for (FeatureModelVariable fmv2 : fmvs) {
                    String id2 = fmv2.getIdentifier();
                    if (id1.equals(id2)) {
                        toAggreagte.add(fmv2);
                    }
                }
                if (!toAggreagte.isEmpty()) {
                    fmvsToMerge.add(new AggregatorFM().build(toAggreagte, new HashSet<Expression<String>>(),
                            _interop(wikiPageName)));
                } else {
                    System.err.println("Didn't find another for " + id1);
                    continue;
                }

                idsDone.add(id1);
            }
        }

        // serialize product by product (for debug)
        StringBuffer sb = new StringBuffer();
        int i = 0;
        for (FeatureModelVariable fmv : fmvsToMerge) {
            sb.append("fmProduct" + i++ + " = FM (" + fmv + "\n)\n\n");
        }
        File f = new File(OUTPUT_DIRECTORY + wikiPageName + "_FMLMergingScript" + ".fml");
        FileUtils.writeStringToFile(f, sb.toString());

        FMLMergerBDD fmlMerger = new FMLMergerBDD(fmvsToMerge, _builder); // 

        FeatureModelVariable fmMerged = null;

        _shell.setVerbose(true);
        boolean _SAT_EVALUATION = false;
        if (_SAT_EVALUATION) {
            fmMerged = new FMLMergerDisjunctiveSAT(fmvsToMerge).union();
            fmMerged.setIdentifier(wikiPageName);
            return fmMerged;
        }

        boolean _SAT_EVALUATION_2 = false;
        if (_SAT_EVALUATION_2) {
            Collection<Expression<String>> exprs = new TseitinTransformationDisjunctive(
                    fmvsToMerge.toArray(new FeatureModelVariable[] {})).compute(); //new TseitinTransformation(_z3, b12).compute();
            //System.err.println("exprs:" + exprs);
            // SMT bridges
            System.err.println("" + new FeatureModelVariableSATFormula("",
                    new SATFMLFormula(ExpressionUtility.mkConjunction(exprs))).computeImplicationGraph());
            return null;
        }

        //Formula<String> flaMerged = fmlMerger.calculateFormula(Mode.StrictUnion);
        //System.err.println("#fla=" + flaMerged.getDomain().size());

        fmMerged = fmlMerger.union(new KSynthesisConfiguration() {

            @Override
            public boolean isAddingCrossTreeConstraints() {
                return false; //false;
            }

            @Override
            public boolean hasOrGroupSupport() {
                return false;
            }

        });

        // post-process: mandatory status for       
        for (Catalog catalog : catalogs) {
            String catalogName = catalog.getName();
            if (excludeSections.contains(catalogName))
                continue;
            if (fmMerged.features().names().contains(catalogName)) {
                fmMerged.setMandatory(fmMerged.getFeature(catalogName));
                //  fmMerged.addConstraint(new Expression<String>(catalogName));
                fmMerged.getFormula()
                        .andWith(new Formula<String>(_builder.mkExpression(new Expression<String>(catalogName)),
                                Arrays.asList(catalogName), _builder));
            }
        }

        fmMerged.setIdentifier(wikiPageName);
        return fmMerged;
    }

    private void postTreatFM(FeatureModelVariable fmMerged, String[] negatedFts) throws Exception {
        postTreatFM(fmMerged, negatedFts, new String[] {});
    }

    private FeatureModelVariable executeWikipediaToFML(String wikiPageName, String[] excludeColumnNames,
            String[] excludeProductNames, String[] excludeSectionNames) throws Exception {

        return executeWikipediaToFML(wikiPageName, excludeColumnNames, excludeProductNames, excludeSectionNames,
                new HashMap<String, String>());

    }

    private String _interop(String hS) {

        return hS.replaceAll("-", "");
    }

    private void treatTable(Element table, List<Catalog> catalogs) {
        // 1. get section name

        Elements sect2 = table.parents().select("h2"); // section.getElementsByTag("h2") ; 
        String s2 = null;
        if (!sect2.isEmpty())
            s2 = sect2.first().text(); // FIXME what about more than 1 ?

        String s3 = null;
        Elements sect3 = table.parents().select("h3");
        if (!sect3.isEmpty())
            s3 = sect3.first().text();

        String dt = null;
        Elements sectDT = table.parents().select("p");
        if (!sectDT.isEmpty()) {
            String contentDT = sectDT.first().text();
            if (contentDT.startsWith(";"))
                dt = contentDT.replaceAll(";", "");
        }

        Elements caption = table.select("caption");
        String captionName = null;
        if (!caption.isEmpty())
            captionName = caption.first().text();

        // FIXME other forms of structural information 

        /*** 
         * Headers
         */
        //
        List<Header> rHeaders = collectHeaders(table);

        boolean sortable = !table.select("[class=sortable wikitable]").isEmpty()
                || !table.select("[class=wikitable sortable]").isEmpty();
        //   || !table.select("[class=sortable wikitable jquery-tablesorter]").isEmpty() ; 

        // FIXME: other cases
        Elements heads = table.select("thead");

        if (sortable && (!heads.isEmpty())) {
            rHeaders = collectHeaders(heads.first());
        }

        System.err.println("SORTABLE:" + sortable + " rHeaders=" + rHeaders);

        // 2 treat row               
        Catalog product = null;
        Tree<String> structuralInformation = mkStructuralInformation(s2, s3, dt, captionName);
        if (sortable) {
            product = treatRows(table.select("tbody").first(), structuralInformation, rHeaders, sortable);
        } else
            product = treatRows(table, structuralInformation, rHeaders, sortable);
        catalogs.add(product);

        // 

        // set the "ID" / names
        // clean up

        for (Catalog catalog : catalogs) {
            List<Product> toRemove = new ArrayList<Product>();
            for (Product p : catalog) {
                Header primaryHeader = p.getHeaders().get(0);
                p.setName(p.getValue(primaryHeader.getName()));

                // some products are headers (each value equals to header name)
                List<Header> headers = p.getHeaders();
                boolean isHeader = true;
                for (Header header : headers) {
                    String hName = header.getName();
                    String pValue = p.getValue(hName);
                    if (pValue == null)
                        continue;
                    if (!hName.contains(pValue)) {
                        isHeader = false;
                    }
                }
                if (isHeader) {
                    toRemove.add(p);
                }
            }
            if (!toRemove.isEmpty() && !catalog.isEmpty())
                catalog.removeAll(toRemove);
        }

    }

    private void treatSection(Element section, List<Catalog> catalogs) {

        // 1. get section name
        // FIXME what is it does not exist?
        // FIXME can be "h3"
        Elements sect2 = section.getElementsByTag("h2");
        String s2 = null;

        if (!sect2.isEmpty())
            s2 = sect2.first().text(); // FIXME what about more than 1 ?

        String s3 = null;
        Elements sect3 = section.getElementsByTag("h3");
        if (!sect3.isEmpty())
            s3 = sect3.first().text();

        String dt = null;
        Elements sectDT = section.getElementsByTag("p");
        if (!sectDT.isEmpty()) {
            String contentDT = sectDT.first().text();
            if (contentDT.startsWith(";"))
                dt = contentDT.replaceAll(";", "");
        }

        // FIXME can be subsection

        // FIXME (1. optional step) some comments

        // 2. retrieve tabular
        Elements tables = section.getElementsByTag("table");
        //if (!tables.isEmpty()) 
        //System.err.println("\n****** " + s2 + " " + s3 + " *******\n");

        for (Element table : tables) {

            // (0. optional step) act as subviewname
            Elements caption = table.select("caption");
            String captionName = null;
            if (!caption.isEmpty())
                captionName = caption.first().text();

            /*** 
             * Headers
             */
            //
            List<Header> rHeaders = collectHeaders(table);

            boolean sortable = !table.select("[class=sortable wikitable]").isEmpty()
                    || !table.select("[class=wikitable sortable]").isEmpty();

            // FIXME: other cases
            Elements heads = table.select("thead");
            if (sortable && (!heads.isEmpty())) {
                rHeaders = collectHeaders(heads.first());
            }

            // 2 treat row               
            Catalog product = null;
            Tree<String> structuralInformation = mkStructuralInformation(s2, s3, dt, captionName);
            if (sortable) {
                product = treatRows(table.select("tbody").first(), structuralInformation, rHeaders, sortable);
            } else
                product = treatRows(table, structuralInformation, rHeaders, sortable);
            catalogs.add(product);

            // 

        }

        // set the "ID" / names
        // clean up
        for (Catalog catalog : catalogs) {
            for (Product p : catalog) {
                Header primaryHeader = p.getHeaders().get(0);
                p.setName(p.getValue(primaryHeader.getName()));
            }
        }

    }

    // Catalog aka list of product
    // at this step, cell (I, J) corresponds to value of the J-th header of I-th product 
    private Catalog treatRows(Element table, Tree<String> structuralInformation, List<Header> rHeaders,
            boolean sortable) {
        int I = 0;
        Catalog product = new Catalog(structuralInformation, rHeaders);
        for (Element row : table.select("tr")) {

            Elements lines;
            if (sortable) {
                lines = row.select("th"); // first entry is a header in sortable table
                lines.addAll(row.select("td"));
            } else {
                lines = row.select("td");
            }

            Product p = new Product("product_" + I, structuralInformation, rHeaders);
            int J = 0;
            for (Element line : lines) {
                p.add(J, line.text());
                J++;
            }

            // necessarily a tr with a td
            if (!lines.isEmpty()) {
                if (sortable && (I == 0)) {
                    // header (first entry) is not a product
                } else
                    product.add(p);
                I++;
            }

        }
        return product;
    }

    private List<Header> collectHeaders(Element table) {
        List<Header> headers = new ArrayList<Header>();
        List<Header> headersWithNestedHeaders = new ArrayList<Header>();
        List<List<Header>> nestedHeaders = new ArrayList<List<Header>>();

        int levelHeader = 0; // FIXME nested header > 1

        for (Element row : table.select("tr")) {

            if (isEmpty(row)) // sometimes the first row, especially in sortable table, is empty (the second row is relevant for headers) 
                continue;

            if (levelHeader == 0) {
                for (Element header : row.select("th")) {
                    String hName = header.text();
                    Header headerV = new Header(hName);
                    Elements colspan = header.getElementsByAttribute("colspan");
                    if (!colspan.isEmpty()) {
                        headersWithNestedHeaders.add(headerV);
                        int v = Integer.parseInt(colspan.first().attr("colspan"));
                        headerV.setNumbersOfNestedHeaders(v);
                    }

                    headers.add(headerV);

                }
                levelHeader++;
            }

            else if (levelHeader == 1) {
                // nested header
                List<Header> nHeaders = new ArrayList<Header>();
                for (Element header : row.select("th")) {
                    String hName = header.text();
                    Header headerV = new Header(hName);
                    nHeaders.add(headerV);
                }
                nestedHeaders.add(nHeaders);
                levelHeader++;
            }

        }

        // FIXME table.select("thead"); 

        // FIXME assign a "number" of appearance for headers 
        // especially important for nested headers (colspan="3")
        List<Header> rHeaders = new ArrayList<Header>();
        List<Header> nHeaders = new ArrayList<Header>();
        if (nestedHeaders.size() > 0)
            nHeaders = nestedHeaders.get(0); // FIXME 0 at the moment but normally it can be refined

        int lastIndex = 0;
        for (Header header : headers) {
            // nested
            if (headersWithNestedHeaders.contains(header)) { // header has nested headers

                int nNestedHeaders = header.getNumbersOfNestedHeaders(); // number of hested headers 

                // now associating an header to nested headers
                // nHeaders[lastIndex...lastIndex+nNestedHeaders]

                int v = 0;
                int u = 0;
                for (Header nH : nHeaders) {
                    if (u++ < lastIndex)
                        continue;
                    rHeaders.add(nH);
                    if (v < nNestedHeaders) {
                        header.addNestedHeader(nH);
                        nH.addParentHeader(header);
                        v++;
                    }

                }
                lastIndex += nNestedHeaders;

            } else {
                rHeaders.add(header);
            }

        }
        //System.err.println("rHeaders=" + rHeaders);
        return rHeaders;
    }

    private boolean isEmpty(Element row) {
        for (Element header : row.select("th")) {
            String headerV = header.text();
            if (!headerV.isEmpty())
                return false;
        }
        // all empty
        return true;
    }

    private Tree<String> mkStructuralInformation(String... fts) {
        boolean first = true;
        Tree<String> t = null;
        String lastFt = null;
        for (String ft : fts) {
            if (ft == null || ft.isEmpty())
                continue;
            if (first) {
                first = false;
                t = new Tree<String>(ft);
                lastFt = ft;
            } else {
                t.addLeaf(lastFt, ft);
                lastFt = ft;
            }
        }

        return t;
    }

}