如何用Hadoop计算平均值

如何用Hadoop计算平均值数据data.txt
a 2
a 3
a 4
b 5
b 6
b 7代码import java.io.IOException;
import java.util.StringTokenizer;import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;public class Average { public static class TokenizerMapper extends
Mapper<Object, Text, Text, Text> { private final static IntWritable one = new IntWritable（1）;
private Text word = new Text（）; public void map（Object key, Text value, Context context）
throws IOException, InterruptedException {
StringTokenizer itr = new StringTokenizer（value.toString（））;
while （itr.hasMoreTokens（）） {
word.set（itr.nextToken（））;
if （itr.hasMoreTokens（）） {
context.write（word, new Text（itr.nextToken（） + ",1"））;
}
}
}
} static class AverageCombine extends Reducer<Text, Text, Text, Text> {
public void reduce（Text key, Iterable<Text> values, Context context）
throws IOException, InterruptedException {
int sum = 0, cnt = 0;
for （Text val : values） {
String[] s1 = val.toString（）.split（","）;
sum += Integer.parseInt（s1[0]）;
cnt += Integer.parseInt（s1[1]）;
}
String s;
System.out.println（"Combine" + （s = new String（sum + "," + cnt）））;
context.write（key, new Text（new String（sum + "," + cnt）））;
}
} static class AverageReducer extends
Reducer<Text, Text, Text, DoubleWritable> {
public void reduce（Text key, Iterable<Text> values, Context context）
throws IOException, InterruptedException {
int sum = 0, cnt = 0;
for （Text val : values） {
String[] s = val.toString（）.split（","）;
sum += Integer.parseInt（s[0]）;
cnt += Integer.parseInt（s[1]）;
}
String s;
System.out.println（"reduce"
+ （s = new String（key + "," + （sum * 1.0 / cnt））））;
context.write（key, new DoubleWritable（sum * 1.0 / cnt））;
}
} public static void main（String[] args） throws Exception {
Configuration conf = new Configuration（）;
String[] otherArgs = args;
if （otherArgs.length ！= 2） {
System.err.println（"Usage:Data Average <in> <out>"）;
System.exit（2）;
}
Job job = new Job（conf, "Data Average"）;
job.setJarByClass（Average.class）;
job.setMapperClass（TokenizerMapper.class）;
job.setCombinerClass（AverageCombine.class）;
job.setReducerClass（AverageReducer.class）;
job.setOutputKeyClass（Text.class）;
job.setOutputValueClass（Text.class）;
FileInputFormat.addInputPath（job, new Path（otherArgs[0]））;
FileOutputFormat.setOutputPath（job, new Path（otherArgs[1]））;
System.exit（job.waitForCompletion（true）？ 0 : 1）;
}
}执行bin/hadoop jar Average.jar Average data.txt out结果a 3.0
b 6.0更多Hadoop相关信息见Hadoop 专题页面 http://www.linuxidc.com/topicnews.aspx？tid=13