- 论坛徽章:
- 0
|
示例看看吧
1. 读取 HDFS 文件,处理后写入到 SequoiaDB 中去:
public class HdfsSequoiadbMR {
static class MobileMapper extends Mapper<LongWritable,Text,Text,IntWritable>{
private static final IntWritable ONE=new IntWritable(1);
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String valueStr=value.toString();
String mobile_prefix=valueStr.split(",")[3].substring(0,3);
context.write(new Text(mobile_prefix), ONE);
}
}
static class MobileReducer extends Reducer<Text, IntWritable, NullWritable, BSONWritable>{
@Override
protected void reduce(Text key, Iterable<IntWritable> values,Context context)
throws IOException, InterruptedException {
Iterator<IntWritable> iterator=values.iterator();
long sum=0;
while(iterator.hasNext()){
sum+=iterator.next().get();
}
BSONObject bson=new BasicBSONObject();
bson.put("prefix", key.toString());
bson.put("count", sum);
context.write(null,new BSONWritable(bson));
}
}
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
if(args.length<1){
System.out.print("please set input path ");
System.exit(1);
}
Configuration conf=new Configuration();
conf.addResource("sequoiadb-hadoop.xml"); //加载配置文件
Job job=Job.getInstance(conf);
job.setJarByClass(HdfsSequoiadbMR.class);
job.setJobName("HdfsSequoiadbMR");
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(SequoiadbOutputFormat.class); //reduce 输出写入到 SequoiaDB 中
TextInputFormat.setInputPaths(job, new Path(args[0]));
job.setMapperClass(MobileMapper.class);
job.setReducerClass(MobileReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(BSONWritable.class);
job.waitForCompletion(true);
}
}
2. 读取 SequoiaDB 中数据处理后写入到 HDFS 中。
public class SequoiadbHdfsMR {
/**
*
* @author gaoshengjie
* read the data, count penple in a province
*/
static class ProvinceMapper extends Mapper<Object, BSONObject,IntWritable,IntWritable>{
private static final IntWritable ONE=new IntWritable(1);
@Override
protected void map(Object key, BSONObject value, Context context)
throws IOException, InterruptedException {
int province=(Integer) value.get("province_code");
context.write(new IntWritable(province), ONE);
}
}
static class ProvinceReducer extends Reducer<IntWritable,IntWritable,IntWritable,LongWritable>{
@Override
protected void reduce(IntWritable key, Iterable<IntWritable> values,
Context context)
throws IOException, InterruptedException {
Iterator<IntWritable> iterator=values.iterator();
long sum=0;
while(iterator.hasNext()){
sum+=iterator.next().get();
}
context.write(key,new LongWritable(sum));
}
}
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
if(args.length<1){
System.out.print("please set output path ");
System.exit(1);
}
Configuration conf=new Configuration();
conf.addResource("sequoiadb-hadoop.xml");
Job job=Job.getInstance(conf);
job.setJarByClass(SequoiadbHdfsMR.class);
job.setJobName("SequoiadbHdfsMR");
job.setInputFormatClass(SequoiadbInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileOutputFormat.setOutputPath(job, new Path(args[0]+"/result"));
job.setMapperClass(ProvinceMapper.class);
job.setReducerClass(ProvinceReducer.class);
job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(LongWritable.class);
job.waitForCompletion(true);
}
}
配置信息:
<?xml version="1.0" encoding="UTF-8"?>
<configuration>
<property>
<name>sequoiadb.input.url</name>
<value>localhost:11810</value>
</property>
<property>
<name>sequoiadb.output.url</name>
<value>localhost:11810</value>
</property>
<property>
<name>sequoiadb.in.collectionspace</name>
<value>default</value>
</property>
<property>
<name>sequoiadb.in.collect</name>
<value>student</value>
</property>
<property>
<name>sequoiadb.out.collectionspace</name>
<value>default</value>
</property>
<property>
<name>sequoiadb.out.collect</name>
<value>result</value>
</property>
<property>
<name>sequoiadb.out.bulknum</name>
<value>10</value>
</property>
</configuration> |
|