Java tutorial
/** * Copyright (C) 2016 Tarik Moataz * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package org.crypto.sse; import com.amazonaws.AmazonClientException; import com.amazonaws.AmazonServiceException; import com.amazonaws.auth.AWSCredentials; import com.amazonaws.auth.BasicAWSCredentials; import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.AmazonS3Client; import com.amazonaws.services.s3.model.DeleteObjectRequest; import com.amazonaws.services.s3.model.GetObjectRequest; import com.amazonaws.services.s3.model.ObjectMetadata; import com.amazonaws.services.s3.model.S3Object; import com.google.common.collect.ArrayListMultimap; import com.google.common.collect.Multimap; import org.apache.commons.net.util.Base64; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.mapreduce.*; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.FileSplit; import org.apache.hadoop.mapreduce.lib.input.KeyValueLineRecordReader; import org.apache.hadoop.mapreduce.lib.input.LineRecordReader; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.en.EnglishAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.util.CharArraySet; import javax.crypto.NoSuchPaddingException; import java.io.*; import java.security.InvalidAlgorithmParameterException; import java.security.InvalidKeyException; import java.security.NoSuchAlgorithmException; import java.security.NoSuchProviderException; import java.util.*; import java.util.concurrent.ConcurrentHashMap; public class IEX2LevAMAZON { public static class FileNameKeyInputFormat extends FileInputFormat<Text, Text> { public FileNameKeyInputFormat() { super(); } @Override protected boolean isSplitable(JobContext context, Path filename) { return false; } @Override public RecordReader<Text, Text> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { taskAttemptContext.setStatus(inputSplit.toString()); return new FileNameKeyRecordReader(taskAttemptContext.getConfiguration(), (FileSplit) inputSplit); } } /** * Makes filename the key to the Map Function, value is the line */ public static class FileNameKeyRecordReader extends KeyValueLineRecordReader { private final LineRecordReader lineRecordReader = new LineRecordReader(); private Text key = new Text(); private Text value = new Text(); private String fileName = new String(); public FileNameKeyRecordReader(Configuration conf, FileSplit split) throws IOException { super(conf); fileName = split.getPath().getName(); } public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { this.lineRecordReader.initialize(genericSplit, context); } public synchronized boolean nextKeyValue() throws IOException { if (this.lineRecordReader.nextKeyValue()) { this.key = new Text(fileName); //Set the key to the fileName this.value = this.lineRecordReader.getCurrentValue(); //Value is the line of file return true; } return false; } public Text getCurrentKey() { return this.key; } public Text getCurrentValue() { return this.value; } } public static class ArrayListWritable implements WritableComparable { private ArrayList<Text> values; public ArrayListWritable() { values = new ArrayList<Text>(); } public ArrayListWritable(ArrayList<Text> values) { this.values = values; } public ArrayList<Text> get() { return values; } public void add(Text val) { this.values.add(val); } @Override public int compareTo(Object o) { if (!(o instanceof ArrayListWritable)) { throw new ClassCastException(); } ArrayList<Text> newValues = (ArrayList<Text>) o; int compareToVal = -1; for (int i = 0; i < this.values.size(); ++i) { compareToVal = newValues.get(i).compareTo(this.values.get(i)); } return compareToVal; } public String toString() { String outp = ""; for (Text text : this.values) { outp += text + "\t"; } return outp; } @Override public void write(DataOutput dataOutput) throws IOException { dataOutput.writeInt(this.values.size()); for (Text text : values) { text.write(dataOutput); } } @Override public void readFields(DataInput dataInput) throws IOException { this.values = new ArrayList<Text>(); for (int i = 0; i < this.values.size(); ++i) { Text value = new Text(); value.readFields(dataInput); this.values.set(i, value); } } } /* * Create a Multi-map that maps filenames and documents contents * Stored in the form of a file */ public static class MLK1 extends Mapper<Text, Text, Text, Text> { //private final static IntWritable one = new IntWritable(1); private static ConcurrentHashMap<String, IntWritable> mapTable = new ConcurrentHashMap<String, IntWritable>(); private Text fileName = new Text(); private Text word = new Text(); public void map(Text key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); CharArraySet noise = EnglishAnalyzer.getDefaultStopSet(); // We are using a standard tokenizer that eliminates the stop words. We can use Stemming tokenizer such Porter // A set of English noise keywords is used that will eliminates words such as "the, a, etc" Analyzer analyzer = new StandardAnalyzer(noise); List<String> token = Tokenizer.tokenizeString(analyzer, line); Iterator<String> it = token.iterator(); while (it.hasNext()) { word.set(it.next()); fileName.set(key); if (!mapTable.containsKey(fileName.toString() + word.toString())) { context.write(fileName, word); mapTable.put(fileName.toString() + word.toString(), new IntWritable(1)); } } } } public static class RLK1 extends Reducer<Text, Text, Text, ArrayListWritable> { public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { ArrayListWritable keywords = new ArrayListWritable(new ArrayList<Text>()); for (Text val : values) { Text tmp = new Text(val); keywords.add(tmp); } context.write(key, keywords); } } /* * Create a Multi-map that maps keywords to their filenames * Stored in the form of a file */ public static class MLK2 extends Mapper<Text, Text, Text, Text> { static ConcurrentHashMap<String, IntWritable> mapTable = new ConcurrentHashMap<String, IntWritable>(); private Text fileName = new Text(); private Text word = new Text(); public void map(Text key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); CharArraySet noise = EnglishAnalyzer.getDefaultStopSet(); // We are using a standard tokenizer that eliminates the stop words. We can use Stemming tokenizer such Porter // A set of English noise keywords is used that will eliminates words such as "the, a, etc" Analyzer analyzer = new StandardAnalyzer(noise); List<String> token = Tokenizer.tokenizeString(analyzer, line); Iterator<String> it = token.iterator(); while (it.hasNext()) { word.set(it.next()); fileName.set(key); if (!mapTable.containsKey(fileName.toString() + word.toString())) { context.write(word, fileName); mapTable.put(fileName.toString() + word.toString(), new IntWritable(1)); } } } } public static class RLK2 extends Reducer<Text, Text, Text, ArrayListWritable> { public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { ArrayListWritable fileNames = new ArrayListWritable(new ArrayList<Text>()); for (Text val : values) { Text tmp = new Text(val); fileNames.add(tmp); } context.write(key, fileNames); } } /* * Creation of the Local multimap */ public static class LocalMM extends Mapper<LongWritable, Text, Text, Text> { private Text fileName = new Text(); private Text word = new Text(); Configuration conf; String inst1; String inst2; String inst3; final int bigBlock = 100; final int smallBlock = 10; final int dataSize = 1000; public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { conf = context.getConfiguration(); inst1 = conf.get("lookup"); inst2 = conf.get("lookup2"); inst3 = conf.get("setKeys"); Multimap<String, String> lookup = null; try { lookup = (Multimap<String, String>) Serializer.deserialize(Base64.decodeBase64(inst1.getBytes())); } catch (ClassNotFoundException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } Multimap<String, String> lookup2 = null; try { lookup2 = (Multimap<String, String>) Serializer.deserialize(Base64.decodeBase64(inst2.getBytes())); } catch (ClassNotFoundException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } List<byte[]> listSK = null; try { listSK = (List<byte[]>) Serializer.deserialize(Base64.decodeBase64(inst3.getBytes())); } catch (ClassNotFoundException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } String line = value.toString(); String[] token = line.split("\\s+"); int counter = 0; // First computing V_w. Determine Doc identifiers Set<String> VW = new TreeSet<String>(); for (String idDoc : lookup.get(token[0])) { VW.addAll(lookup2.get(idDoc)); System.out.println("Document est: " + idDoc); } Multimap<String, String> secondaryLookup = ArrayListMultimap.create(); // here we are only interested in documents in the intersection between "keyword" and "word" for (String word : VW) { Collection<String> l1 = new ArrayList<String>(lookup.get(word)); Collection<String> l2 = new ArrayList<String>(lookup.get(token[0])); l1.retainAll(l2); secondaryLookup.putAll(word, l1); } //End of VW construction MMGlobal.counter = 0; Multimap<String, byte[]> obj = null; try { if (secondaryLookup.size() > 0) { obj = MMGlobal.setupSI(CryptoPrimitives.generateCmac(listSK.get(0), token[0]), secondaryLookup.keySet().toArray(new String[0]), secondaryLookup, bigBlock, smallBlock, dataSize); } } catch (InvalidKeyException | InvalidAlgorithmParameterException | NoSuchAlgorithmException | NoSuchProviderException | NoSuchPaddingException e) { // TODO Auto-generated catch block e.printStackTrace(); } word.set(Integer.toString(counter)); fileName.set(Serializer.serialize(obj)); context.write(word, fileName); counter++; } } /* * Creation of local multi-maps */ /** * @param args * @throws Exception */ /** * @param args * @throws Exception */ @SuppressWarnings("null") public static void main(String[] args) throws Exception { //First Job Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "IEX-2Lev"); job.setJarByClass(IEX2LevAMAZON.class); job.setMapperClass(MLK1.class); job.setReducerClass(RLK1.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setNumReduceTasks(1); job.setOutputValueClass(ArrayListWritable.class); job.setInputFormatClass(FileNameKeyInputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); //Second Job Configuration conf2 = new Configuration(); Job job2 = Job.getInstance(conf2, "IEX-2Lev"); job2.setJarByClass(IEX2LevAMAZON.class); job2.setMapperClass(MLK2.class); job2.setReducerClass(RLK2.class); job2.setNumReduceTasks(1); job2.setMapOutputKeyClass(Text.class); job2.setMapOutputValueClass(Text.class); job2.setOutputKeyClass(Text.class); job2.setOutputValueClass(ArrayListWritable.class); job2.setInputFormatClass(FileNameKeyInputFormat.class); FileInputFormat.addInputPath(job2, new Path(args[0])); FileOutputFormat.setOutputPath(job2, new Path(args[2])); job.waitForCompletion(true); job2.waitForCompletion(true); //Here add your Amazon Credentials AWSCredentials credentials = new BasicAWSCredentials("XXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXX"); // create a client connection based on credentials AmazonS3 s3client = new AmazonS3Client(credentials); // create bucket - name must be unique for all S3 users String bucketName = "iexmaptest"; S3Object s3object = s3client.getObject(new GetObjectRequest(bucketName, args[4])); System.out.println(s3object.getObjectMetadata().getContentType()); System.out.println(s3object.getObjectMetadata().getContentLength()); List<String> lines = new ArrayList<String>(); String folderName = "2"; BufferedReader reader = new BufferedReader(new InputStreamReader(s3object.getObjectContent())); String line; int counter = 0; while ((line = reader.readLine()) != null) { // can copy the content locally as well // using a buffered writer lines.add(line); System.out.println(line); // upload file to folder String fileName = folderName + "/" + Integer.toString(counter); ByteArrayInputStream input = new ByteArrayInputStream(line.getBytes()); s3client.putObject(bucketName, fileName, input, new ObjectMetadata()); counter++; } Multimap<String, String> lookup = ArrayListMultimap.create(); for (int i = 0; i < lines.size(); i++) { String[] tokens = lines.get(i).split("\\s+"); for (int j = 1; j < tokens.length; j++) { lookup.put(tokens[0], tokens[j]); } } // Loading inverted index that associates files identifiers to keywords lines = new ArrayList<String>(); s3object = s3client.getObject(new GetObjectRequest(bucketName, args[5])); System.out.println(s3object.getObjectMetadata().getContentType()); System.out.println(s3object.getObjectMetadata().getContentLength()); // Loading inverted index that associates keywords to identifiers reader = new BufferedReader(new InputStreamReader(s3object.getObjectContent())); while ((line = reader.readLine()) != null) { lines.add(line); } Multimap<String, String> lookup2 = ArrayListMultimap.create(); for (int i = 0; i < lines.size(); i++) { String[] tokens = lines.get(i).split("\\s+"); for (int j = 1; j < tokens.length; j++) { lookup2.put(tokens[0], tokens[j]); } } // Delete File try { s3client.deleteObject(new DeleteObjectRequest(bucketName, args[4])); } catch (AmazonServiceException ase) { System.out.println("Caught an AmazonServiceException."); System.out.println("Error Message: " + ase.getMessage()); System.out.println("HTTP Status Code: " + ase.getStatusCode()); System.out.println("AWS Error Code: " + ase.getErrorCode()); System.out.println("Error Type: " + ase.getErrorType()); System.out.println("Request ID: " + ase.getRequestId()); } catch (AmazonClientException ace) { System.out.println("Caught an AmazonClientException."); System.out.println("Error Message: " + ace.getMessage()); } /* * Start of IEX-2Lev construction */ // Generation of keys for IEX-2Lev BufferedReader keyRead = new BufferedReader(new InputStreamReader(System.in)); System.out.println("Enter your password :"); String pass = keyRead.readLine(); // You can change the size of the key; Here we set it to 128 List<byte[]> listSK = IEX2Lev.keyGen(128, pass, "salt/salt", 100); // Generation of Local Multi-maps with Mapper job only without reducer Configuration conf3 = new Configuration(); String testSerialization1 = new String(Base64.encodeBase64(Serializer.serialize(lookup))); String testSerialization2 = new String(Base64.encodeBase64(Serializer.serialize(lookup2))); String testSerialization3 = new String(Base64.encodeBase64(Serializer.serialize(listSK))); //String testSerialization2 = gson.toJson(lookup2); conf3.set("lookup", testSerialization1); conf3.set("lookup2", testSerialization2); conf3.set("setKeys", testSerialization3); Job job3 = Job.getInstance(conf3, "Local MM"); job3.setJarByClass(IEX2LevAMAZON.class); job3.setMapperClass(LocalMM.class); job3.setNumReduceTasks(0); FileInputFormat.addInputPath(job3, new Path(args[2])); FileOutputFormat.setOutputPath(job3, new Path(args[3])); job3.waitForCompletion(true); } }