Hadoop小文件操作之SequenceFile

import java.io.BufferedInputStream;

import java.io.FileInputStream;

import java.io.IOException;

import java.io.InputStream;

import java.net.URI;

import org.apache.Hadoop.conf.Configuration;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IOUtils;

import org.apache.hadoop.io.SequenceFile;

import org.apache.hadoop.io.Text;

public class SequenceFileWrite {

public static void main(String[] args) throws IOException {

String src = "E:\\test\\spring3_MVC.docx";

InputStream in = new BufferedInputStream(new FileInputStream(src));

String uri = "hdfs://localhost:9000/home/hdfs/spring.seq";

Configuration conf = new Configuration();

FileSystem fs = FileSystem.get(URI.create(uri), conf);

Path path = new Path(uri);

Text key = new Text();  

Text value = new Text();

SequenceFile.Writer writer = null;   

try {

//返回一个SequenceFile.Writer实例 需要数据流和path对象 将数据写入了path对象

writer = SequenceFile.createWriter(fs, conf, path,key.getClass(), value.getClass()); 

int len = 0;

byte[] buff = new byte[1024];

key.set("spring.docx");

while ((len = in.read(buff))!= -1) {

value.set(buff,0,len);

writer.append(key, value);//将每条记录追加到SequenceFile.Writer实例的末尾   

value.clear();

}

} finally {

IOUtils.closeStream(writer);

IOUtils.closeStream(in);

}

}

}

读取文件:

import java.io.FileOutputStream;

import java.io.IOException;

import java.io.OutputStream;

import java.net.URI;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IOUtils;

import org.apache.hadoop.io.SequenceFile;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.io.Writable;

import org.apache.hadoop.util.ReflectionUtils;

public class SequenceFileReader {

public static void main(String[] args) throws IOException {  

String uri = "hdfs://localhost:9000/home/hdfs/spring.seq";  

Configuration conf = new Configuration();  

FileSystem fs = FileSystem.get(URI.create(uri), conf);  

Path path = new Path(uri);   

SequenceFile.Reader reader = null;  

String dst = "e:\\test\\spring.docx";   

OutputStream out = null;

try {  

reader = new SequenceFile.Reader(fs, path, conf);

//返回 SequenceFile.Reader 对象       getKeyClass()获得Sequence中使用的类型  

Writable key = (Writable)  ReflectionUtils.newInstance(reader.getKeyClass(), conf);

out =new  FileOutputStream(dst);

Text  value = new Text();

while (reader.next(key, value)) { //next()方法迭代读取记录 直到读完返回false  

System.out.println(key);

out.write(value.getBytes(),0,value.getLength());//这个长度一定要添加,否则不兼容office2007

value.clear();  //记着清除一下,不然可能会出现多余的输出     

}  

out.flush();

} finally {  

IOUtils.closeStream(reader);  

IOUtils.closeStream(out);

}  

}

内容版权声明:除非注明,否则皆为本站原创文章。

转载注明出处:http://www.heiqu.com/0e365ddcd90ceb5d1bacf806f0298484.html