Example usage for org.apache.hadoop.io Text readString

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text readString.

Prototype

public static String readString(DataInput in) throws IOException

Source Link

Document

Read a UTF8 encoded string from in

Usage

From source file:org.apache.ignite.internal.processors.hadoop.v2.GridHadoopV2TaskContext.java

License:Apache License

/**
 * @param split External split./*from  w w  w.ja  v  a2  s.  com*/
 * @return Native input split.
 * @throws IgniteCheckedException If failed.
 */
@SuppressWarnings("unchecked")
private Object readExternalSplit(GridHadoopExternalSplit split) throws IgniteCheckedException {
    Path jobDir = new Path(jobConf().get(MRJobConfig.MAPREDUCE_JOB_DIR));

    try (FileSystem fs = FileSystem.get(jobDir.toUri(), jobConf());
            FSDataInputStream in = fs.open(JobSubmissionFiles.getJobSplitFile(jobDir))) {

        in.seek(split.offset());

        String clsName = Text.readString(in);

        Class<?> cls = jobConf().getClassByName(clsName);

        assert cls != null;

        Serialization serialization = new SerializationFactory(jobConf()).getSerialization(cls);

        Deserializer deserializer = serialization.getDeserializer(cls);

        deserializer.open(in);

        Object res = deserializer.deserialize(null);

        deserializer.close();

        assert res != null;

        return res;
    } catch (IOException | ClassNotFoundException e) {
        throw new IgniteCheckedException(e);
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.v2.HadoopV2Job.java

License:Apache License

/** {@inheritDoc} */
@Override/*from   w  ww .  ja v a 2 s  . c om*/
public Collection<HadoopInputSplit> input() throws IgniteCheckedException {
    Thread.currentThread().setContextClassLoader(jobConf.getClassLoader());

    try {
        String jobDirPath = jobConf.get(MRJobConfig.MAPREDUCE_JOB_DIR);

        if (jobDirPath == null) { // Probably job was submitted not by hadoop client.
            // Assume that we have needed classes and try to generate input splits ourself.
            if (jobConf.getUseNewMapper())
                return HadoopV2Splitter.splitJob(jobCtx);
            else
                return HadoopV1Splitter.splitJob(jobConf);
        }

        Path jobDir = new Path(jobDirPath);

        try (FileSystem fs = FileSystem.get(jobDir.toUri(), jobConf)) {
            JobSplit.TaskSplitMetaInfo[] metaInfos = SplitMetaInfoReader.readSplitMetaInfo(hadoopJobID, fs,
                    jobConf, jobDir);

            if (F.isEmpty(metaInfos))
                throw new IgniteCheckedException("No input splits found.");

            Path splitsFile = JobSubmissionFiles.getJobSplitFile(jobDir);

            try (FSDataInputStream in = fs.open(splitsFile)) {
                Collection<HadoopInputSplit> res = new ArrayList<>(metaInfos.length);

                for (JobSplit.TaskSplitMetaInfo metaInfo : metaInfos) {
                    long off = metaInfo.getStartOffset();

                    String[] hosts = metaInfo.getLocations();

                    in.seek(off);

                    String clsName = Text.readString(in);

                    HadoopFileBlock block = HadoopV1Splitter.readFileBlock(clsName, in, hosts);

                    if (block == null)
                        block = HadoopV2Splitter.readFileBlock(clsName, in, hosts);

                    res.add(block != null ? block : new HadoopExternalSplit(hosts, off));
                }

                return res;
            }
        } catch (Throwable e) {
            throw transformException(e);
        }
    } finally {
        Thread.currentThread().setContextClassLoader(null);
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.v2.HadoopV2TaskContext.java

License:Apache License

/**
 * @param split External split./*from   w  ww  .  j  av a  2s  . c  om*/
 * @return Native input split.
 * @throws IgniteCheckedException If failed.
 */
@SuppressWarnings("unchecked")
private Object readExternalSplit(HadoopExternalSplit split) throws IgniteCheckedException {
    Path jobDir = new Path(jobConf().get(MRJobConfig.MAPREDUCE_JOB_DIR));

    try (FileSystem fs = FileSystem.get(jobDir.toUri(), jobConf());
            FSDataInputStream in = fs.open(JobSubmissionFiles.getJobSplitFile(jobDir))) {

        in.seek(split.offset());

        String clsName = Text.readString(in);

        Class<?> cls = jobConf().getClassByName(clsName);

        assert cls != null;

        Serialization serialization = new SerializationFactory(jobConf()).getSerialization(cls);

        Deserializer deserializer = serialization.getDeserializer(cls);

        deserializer.open(in);

        Object res = deserializer.deserialize(null);

        deserializer.close();

        assert res != null;

        return res;
    } catch (IOException | ClassNotFoundException e) {
        throw new IgniteCheckedException(e);
    }
}

From source file:org.apache.kylin.source.kafka.hadoop.KafkaInputSplit.java

License:Apache License

public void readFields(DataInput in) throws IOException {
    brokers = Text.readString(in);
    topic = Text.readString(in);/*  w  ww . ja v  a2s  .c o  m*/
    partition = in.readInt();
    offsetStart = in.readLong();
    offsetEnd = in.readLong();
}

From source file:org.apache.mrql.MR_string.java

License:Apache License

final public static MR_string read(DataInput in) throws IOException {
    return new MR_string(Text.readString(in));
}

From source file:org.apache.mrql.MR_string.java

License:Apache License

public void readFields(DataInput in) throws IOException {
    value = Text.readString(in);
}

From source file:org.apache.nutch.crawl.Inlink.java

License:Apache License

public void readFields(DataInput in) throws IOException {
    fromUrl = Text.readString(in);
    anchor = Text.readString(in);
}

From source file:org.apache.nutch.crawl.MapWritable.java

License:Apache License

public void readFields(DataInput in) throws IOException {
    clear();//from ww  w .ja  v a 2 s.  c  om
    fSize = in.readInt();
    if (fSize > 0) {
        // read class-id map
        fIdCount = in.readByte();
        byte id;
        Class clazz;
        for (int i = 0; i < fIdCount; i++) {
            try {
                id = in.readByte();
                clazz = Class.forName(Text.readString(in));
                addIdEntry(id, clazz);
            } catch (Exception e) {
                if (LOG.isWarnEnabled()) {
                    LOG.warn("Unable to load internal map entry" + e.toString());
                }
                fIdCount--;
            }
        }
        KeyValueEntry entry;
        for (int i = 0; i < fSize; i++) {
            try {
                entry = getKeyValueEntry(in.readByte(), in.readByte());
                entry.fKey.readFields(in);
                entry.fValue.readFields(in);
                if (fFirst == null) {
                    fFirst = fLast = entry;
                } else {
                    fLast = fLast.fNextEntry = entry;
                }
            } catch (IOException e) {
                if (LOG.isWarnEnabled()) {
                    LOG.warn("Unable to load meta data entry, ignoring.. : " + e.toString());
                }
                fSize--;
            }
        }
    }
}

From source file:org.apache.nutch.fetch.data.FetchEntry.java

License:Apache License

@Override
public void readFields(DataInput in) throws IOException {
    key = Text.readString(in);
    page = IOUtils.deserialize(getConf(), in, null, WebPage.class);
}

From source file:org.apache.nutch.hostdb.HostDatum.java

License:Apache License

@Override
public void readFields(DataInput in) throws IOException {
    score = in.readFloat();/*from  ww  w. java  2 s  . c  om*/
    lastCheck = new Date(in.readLong());
    homepageUrl = Text.readString(in);

    dnsFailures = in.readLong();
    connectionFailures = in.readLong();

    unfetched = in.readLong();
    fetched = in.readLong();
    notModified = in.readLong();
    redirTemp = in.readLong();
    redirPerm = in.readLong();
    gone = in.readLong();

    metaData = new org.apache.hadoop.io.MapWritable();
    metaData.readFields(in);
}