List of usage examples for org.apache.mahout.math Vector size
int size();
From source file:com.scaleunlimited.classify.vectors.VectorUtils.java
License:Apache License
public static String dumpVector(Vector v) { StringBuffer result = new StringBuffer(); result.append(String.format("Vector '%s': ", "<unknown>")); int baseSize = v.size(); for (int i = 0; i < baseSize; i++) { double component = v.getQuick(i); if (component != 0.0) { result.append(String.format("%d => %f, ", i, component)); }//w ww . j av a 2 s . c om } return result.toString(); }
From source file:com.scaleunlimited.classify.vectors.VectorUtilsTest.java
License:Apache License
@Test public void testMakeExtraVector() { List<String> uniqueTerms = new ArrayList<String>(2); uniqueTerms.add("a"); uniqueTerms.add("b"); Map<String, Integer> docTerms = new HashMap<String, Integer>(); docTerms.put("a", 1); docTerms.put("c", 5); Vector v = VectorUtils.makeExtraVector(uniqueTerms, docTerms); Assert.assertEquals(1, v.size()); Assert.assertEquals(5, new Double(v.get(0)).intValue()); }
From source file:com.scaleunlimited.classify.vectors.VectorUtilsTest.java
License:Apache License
@Test public void testAppend() { Vector v1 = new RandomAccessSparseVector(2); v1.setQuick(0, 0);//from w ww. j ava2s. c o m v1.setQuick(1, 1); Vector v2 = new RandomAccessSparseVector(3); v2.setQuick(0, 2); v2.setQuick(1, 3); v2.setQuick(2, 4); Vector v3 = VectorUtils.appendVectors(v1, v2); Assert.assertEquals(5, v3.size()); for (int i = 0; i < 5; i++) { Assert.assertEquals(i, new Double(v3.getQuick(i)).intValue()); } }
From source file:com.scaleunlimited.classify.vectors.WritableComparableVectorTest.java
License:Apache License
private void compareVectors(Vector vector1, Vector vector2) { Assert.assertEquals(vector1.size(), vector2.size()); for (int i = 0; i < vector1.size(); i++) { Assert.assertEquals(vector1.getQuick(i), vector2.getQuick(i)); }/* w w w . ja v a 2 s . co m*/ }
From source file:com.tamingtext.mahout.VectorExamplesTest.java
License:Apache License
@Test public void testProgrammatic() throws Exception { //<start id="vec.examples.programmatic"/> double[] vals = new double[] { 0.3, 1.8, 200.228 }; Vector dense = new DenseVector(vals);//<co id="vec.exam.dense"/> assertTrue(dense.size() == 3); Vector sparseSame = new SequentialAccessSparseVector(3);//<co id="vec.exam.sparse.same"/> Vector sparse = new SequentialAccessSparseVector(3000);//<co id="vec.exam.sparse"/> for (int i = 0; i < vals.length; i++) {//<co id="vec.exam.assign.sparse"/> sparseSame.set(i, vals[i]);//from w w w. j av a 2 s. com sparse.set(i, vals[i]); } assertFalse(dense.equals(sparse));//<co id="vec.exam.notequals.d.s"/> assertEquals(dense, sparseSame);//<co id="vec.exam.equals.d.s"/> assertFalse(sparse.equals(sparseSame)); /* <calloutlist> <callout arearefs="vec.exam.dense"><para>Create a <classname>DenseVector</classname> with a label of "my-dense" and 3 values. The cardinality of this vector is 3 </para></callout> <callout arearefs="vec.exam.sparse.same"><para>Create a <classname>SparseVector</classname> with a label of my-sparse-same that has cardinality of 3</para></callout> <callout arearefs="vec.exam.sparse"><para>Create a <classname>SparseVector</classname> with a label of my-sparse and a cardinality of 3000.</para></callout> <callout arearefs="vec.exam.assign.sparse"><para>Set the values to the first 3 items in the sparse vectors.</para></callout> <callout arearefs="vec.exam.notequals.d.s"><para>The dense and the sparse <classname>Vector</classname>s are not equal because they have different cardinality.</para></callout> <callout arearefs="vec.exam.equals.d.s"><para>The dense and sparseSame <classname>Vector</classname>s are equal because they have the same values and cardinality</para></callout> </calloutlist> */ //<end id="vec.examples.programmatic"/> //<start id="vec.examples.seq.file"/> File tmpDir = new File(System.getProperty("java.io.tmpdir")); File tmpLoc = new File(tmpDir, "sfvwt"); tmpLoc.mkdirs(); File tmpFile = File.createTempFile("sfvwt", ".dat", tmpLoc); Path path = new Path(tmpFile.getAbsolutePath()); Configuration conf = new Configuration();//<co id="vec.examples.seq.conf"/> FileSystem fs = FileSystem.get(conf); SequenceFile.Writer seqWriter = SequenceFile.createWriter(fs, conf, path, LongWritable.class, VectorWritable.class);//<co id="vec.examples.seq.writer"/> VectorWriter vecWriter = new SequenceFileVectorWriter(seqWriter);//<co id="vec.examples.seq.vecwriter"/> List<Vector> vectors = new ArrayList<Vector>(); vectors.add(sparse); vectors.add(sparseSame); vecWriter.write(vectors);//<co id="vec.examples.seq.write"/> vecWriter.close(); /* <calloutlist> <callout arearefs="vec.examples.seq.conf"><para>Create a <classname>Configuration</classname> for Hadoop</para></callout> <callout arearefs="vec.examples.seq.writer"><para>Create a Hadoop <classname>SequenceFile.Writer</classname> to handle the job of physically writing out the vectors to a file in HDFS</para></callout> <callout arearefs="vec.examples.seq.vecwriter"><para>A <classname>VectorWriter</classname> processes the <classname>Vector</classname>s and invokes the underlying write methods on the <classname>SequenceFile.Writer</classname></para></callout> <callout arearefs="vec.examples.seq.write"><para>Do the work of writing out the files</para></callout> </calloutlist> */ //<end id="vec.examples.seq.file"/> }
From source file:de.isabeldrostfromm.sof.util.Vectors.java
License:Open Source License
/** * Appends two vectors directly after one another, leaving all non set elements zero. * *///from w w w . j a v a 2s . c o m public static Vector append(Vector... vectors) { int totalSize = 0; for (Vector vec : vectors) { totalSize += vec.size(); } Vector result = new SequentialAccessSparseVector(totalSize); result.assign(0); int lastIndex = 0; for (Vector vector : vectors) { for (Element elem : vector) { result.setQuick(lastIndex + elem.index(), elem.get()); } lastIndex += vector.size(); } return result; }
From source file:de.rwth.i9.palm.analytics.algorithm.lda.CustomVectorDumper.java
License:Apache License
@Override public int run(String[] args) throws Exception { /**//from w w w. j a v a 2 s . c o m * Option seqOpt = * obuilder.withLongName("seqFile").withRequired(false).withArgument( * abuilder.withName("seqFile").withMinimum(1).withMaximum(1).create()). * withDescription( * "The Sequence File containing the Vectors").withShortName * ("s").create(); Option dirOpt = * obuilder.withLongName("seqDirectory"). * withRequired(false).withArgument( * abuilder.withName("seqDirectory").withMinimum * (1).withMaximum(1).create()) .withDescription( * "The directory containing Sequence File of Vectors") * .withShortName("d").create(); */ addInputOption(); addOutputOption(); addOption("useKey", "u", "If the Key is a vector than dump that instead"); addOption("printKey", "p", "Print out the key as well, delimited by tab (or the value if useKey is true"); addOption("dictionary", "d", "The dictionary file.", false); addOption("dictionaryType", "dt", "The dictionary file type (text|seqfile)", false); addOption("csv", "c", "Output the Vector as CSV. Otherwise it substitutes in the terms for vector cell entries"); addOption("namesAsComments", "n", "If using CSV output, optionally add a comment line for each NamedVector " + "(if the vector is one) printing out the name"); addOption("nameOnly", "N", "Use the name as the value for each NamedVector (skip other vectors)"); addOption("sortVectors", "sort", "Sort output key/value pairs of the vector entries in abs magnitude " + "descending order"); addOption("quiet", "q", "Print only file contents"); addOption("sizeOnly", "sz", "Dump only the size of the vector"); addOption("numItems", "ni", "Output at most <n> vecors", false); addOption("vectorSize", "vs", "Truncate vectors to <vs> length when dumping (most useful when in" + " conjunction with -sort", false); addOption(buildOption("filter", "fi", "Only dump out those vectors whose name matches the filter." + " Multiple items may be specified by repeating the argument.", true, 1, Integer.MAX_VALUE, false, null)); if (parseArguments(args, false, true) == null) { return -1; } Path[] pathArr; Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path input = getInputPath(); FileStatus fileStatus = fs.getFileStatus(input); if (fileStatus.isDir()) { pathArr = FileUtil.stat2Paths(fs.listStatus(input, new OutputFilesFilter())); } else { FileStatus[] inputPaths = fs.globStatus(input); pathArr = new Path[inputPaths.length]; int i = 0; for (FileStatus fstatus : inputPaths) { pathArr[i++] = fstatus.getPath(); } } String dictionaryType = getOption("dictionaryType", "text"); boolean sortVectors = hasOption("sortVectors"); boolean quiet = hasOption("quiet"); if (!quiet) { log.info("Sort? {}", sortVectors); } String[] dictionary = null; if (hasOption("dictionary")) { String dictFile = getOption("dictionary"); if ("text".equals(dictionaryType)) { dictionary = VectorHelper.loadTermDictionary(new File(dictFile)); } else if ("sequencefile".equals(dictionaryType)) { dictionary = VectorHelper.loadTermDictionary(conf, dictFile); } else { // TODO: support Lucene's FST as a dictionary type throw new IOException("Invalid dictionary type: " + dictionaryType); } } Set<String> filters; if (hasOption("filter")) { filters = Sets.newHashSet(getOptions("filter")); } else { filters = null; } boolean useCSV = hasOption("csv"); boolean sizeOnly = hasOption("sizeOnly"); boolean nameOnly = hasOption("nameOnly"); boolean namesAsComments = hasOption("namesAsComments"); boolean transposeKeyValue = hasOption("vectorAsKey"); Writer writer; boolean shouldClose; File output = getOutputFile(); if (output != null) { shouldClose = true; log.info("Output file: {}", output); Files.createParentDirs(output); writer = Files.newWriter(output, Charsets.UTF_8); } else { shouldClose = false; writer = new OutputStreamWriter(System.out, Charsets.UTF_8); } try { boolean printKey = hasOption("printKey"); if (useCSV && dictionary != null) { writer.write("#"); for (int j = 0; j < dictionary.length; j++) { writer.write(dictionary[j]); if (j < dictionary.length - 1) { writer.write(','); } } writer.write('\n'); } Long numItems = null; if (hasOption("numItems")) { numItems = Long.parseLong(getOption("numItems")); if (quiet) { writer.append("#Max Items to dump: ").append(String.valueOf(numItems)).append('\n'); } } int maxIndexesPerVector = hasOption("vectorSize") ? Integer.parseInt(getOption("vectorSize")) : Integer.MAX_VALUE; long itemCount = 0; int fileCount = 0; for (Path path : pathArr) { if (numItems != null && numItems <= itemCount) { break; } if (quiet) { log.info("Processing file '{}' ({}/{})", path, ++fileCount, pathArr.length); } SequenceFileIterable<Writable, Writable> iterable = new SequenceFileIterable<Writable, Writable>( path, true, conf); Iterator<Pair<Writable, Writable>> iterator = iterable.iterator(); long i = 0; while (iterator.hasNext() && (numItems == null || itemCount < numItems)) { Pair<Writable, Writable> record = iterator.next(); Writable keyWritable = record.getFirst(); Writable valueWritable = record.getSecond(); if (printKey) { Writable notTheVectorWritable = transposeKeyValue ? valueWritable : keyWritable; writer.write(notTheVectorWritable.toString()); writer.write('\t'); } Vector vector; try { vector = ((VectorWritable) (transposeKeyValue ? keyWritable : valueWritable)).get(); } catch (ClassCastException e) { if ((transposeKeyValue ? keyWritable : valueWritable) instanceof WeightedPropertyVectorWritable) { vector = ((WeightedPropertyVectorWritable) (transposeKeyValue ? keyWritable : valueWritable)).getVector(); } else { throw e; } } if (filters != null && vector instanceof NamedVector && !filters.contains(((NamedVector) vector).getName())) { // we are filtering out this item, skip continue; } if (sizeOnly) { if (vector instanceof NamedVector) { writer.write(((NamedVector) vector).getName()); writer.write(":"); } else { writer.write(String.valueOf(i++)); writer.write(":"); } writer.write(String.valueOf(vector.size())); writer.write('\n'); } else if (nameOnly) { if (vector instanceof NamedVector) { writer.write(((NamedVector) vector).getName()); writer.write('\n'); } } else { String fmtStr; if (useCSV) { fmtStr = VectorHelper.vectorToCSVString(vector, namesAsComments); } else { fmtStr = VectorHelper.vectorToJson(vector, dictionary, maxIndexesPerVector, sortVectors); } writer.write(fmtStr); writer.write('\n'); } itemCount++; } } writer.flush(); } finally { if (shouldClose) { Closeables.close(writer, false); } } return 0; }
From source file:de.tuberlin.dima.cuttlefish.TrainingDataReader.java
License:Open Source License
public static void main(String[] args) { //----------------------------------------------------------------------------- String documentVectorsFile = "/home/ssc/Desktop/cuttlefish/output/vectors/documentVectors.seq"; //----------------------------------------------------------------------------- Configuration conf = new Configuration(); int n = 0;/*ww w. ja va 2 s . c om*/ for (Pair<IDAndCodes, VectorWritable> labeledArticle : new SequenceFileIterable<IDAndCodes, VectorWritable>( new Path(documentVectorsFile), conf)) { System.out.println("ID: " + labeledArticle.getFirst().id()); Vector features = labeledArticle.getSecond().get(); System.out.println("Features: " + features.getNumNondefaultElements() + " of " + features.size()); Multimap<String, String> codes = labeledArticle.getFirst().codes(); for (Map.Entry<String, String> codeEntry : codes.entries()) { System.out.println("\t" + codeEntry.getKey() + "=" + codeEntry.getValue()); } if (n++ == 10) { break; } } }
From source file:edu.indiana.d2i.htrc.vecproj.TestRandomProjection.java
License:Apache License
@Test public void testProject() { long t0 = System.nanoTime(); random = Functions.random();/*from w w w . j ava2s .c o m*/ for (int i = 0; i < 100; i++) { // test 100 times DenseVector vector = new DenseVector(originalDim); vector.assign(random); Vector projected = projector.project(vector); Assert.assertEquals(reducedDim, projected.size()); } long t1 = System.nanoTime(); System.out.println("elapsed " + ((double) (t1 - t0)) / 1e9); }
From source file:edu.rosehulman.mahout.math.VectorWritable.java
License:Apache License
public static void writeVector(DataOutput out, Vector vector, boolean laxPrecision) throws IOException { boolean dense = vector.isDense(); boolean sequential = vector.isSequentialAccess(); boolean named = vector instanceof NamedVector; out.writeByte((dense ? FLAG_DENSE : 0) | (sequential ? FLAG_SEQUENTIAL : 0) | (named ? FLAG_NAMED : 0) | (laxPrecision ? FLAG_LAX_PRECISION : 0)); Varint.writeUnsignedVarInt(vector.size(), out); if (dense) {// ww w.j av a2 s . c om for (Vector.Element element : vector.all()) { if (laxPrecision) { out.writeFloat((float) element.get()); } else { out.writeDouble(element.get()); } } } else { Varint.writeUnsignedVarInt(vector.getNumNonZeroElements(), out); Iterator<Element> iter = vector.nonZeroes().iterator(); if (sequential) { int lastIndex = 0; while (iter.hasNext()) { Vector.Element element = iter.next(); if (element.get() == 0) { continue; } int thisIndex = element.index(); // Delta-code indices: Varint.writeUnsignedVarInt(thisIndex - lastIndex, out); lastIndex = thisIndex; if (laxPrecision) { out.writeFloat((float) element.get()); } else { out.writeDouble(element.get()); } } } else { while (iter.hasNext()) { Vector.Element element = iter.next(); if (element.get() == 0) { // TODO(robinanil): Fix the damn iterator for the zero element. continue; } Varint.writeUnsignedVarInt(element.index(), out); if (laxPrecision) { out.writeFloat((float) element.get()); } else { out.writeDouble(element.get()); } } } } if (named) { String name = ((NamedVector) vector).getName(); out.writeUTF(name == null ? "" : name); } }