public class SecondarySort { //自己定义的key类应该实现WritableComparable接口 public static class IntPair implements WritableComparable<IntPair> { String first; String second; /** * Set the left and right values. */ public void set(String left, String right) { first = left; second = right; } public String getFirst() { return first; } public String getSecond() { return second; } //反序列化,从流中的二进制转换成IntPair public void readFields(DataInput in) throws IOException { first = in.readUTF(); second = in.readUTF(); } //序列化,将IntPair转化成使用流传送的二进制 public void write(DataOutput out) throws IOException { out.writeUTF(first); out.writeUTF(second); } //重载 compareTo 方法,进行组合键 key 的比较,该过程是默认行为。 //分组后的二次排序会隐式调用该方法。 public int compareTo(IntPair o) { if (!first.equals(o.first) ) { return first.compareTo(o.first); } else if (!second.equals(o.second)) { return second.compareTo(o.second); } else { return 0; } }
//新定义类应该重写的两个方法 //The hashCode() method is used by the HashPartitioner (the default partitioner in MapReduce) public int hashCode() { return first.hashCode() * 157 + second.hashCode(); } public boolean equals(Object right) { if (right == null) return false; if (this == right) return true; if (right instanceof IntPair) { IntPair r = (IntPair) right; return r.first.equals(first) && r.second.equals(second) ; } else { return false; } } } /** * 分区函数类。根据first确定Partition。 */ public static class FirstPartitioner extends Partitioner<IntPair, Text> { public int getPartition(IntPair key, Text value,int numPartitions) { return Math.abs(key.getFirst().hashCode() * 127) % numPartitions; } }
/** * 分组函数类。只要first相同就属于同一个组。 */ /*//第一种方法,实现接口RawComparator public static class GroupingComparator implements RawComparator<IntPair> { public int compare(IntPair o1, IntPair o2) { int l = o1.getFirst(); int r = o2.getFirst(); return l == r ? 0 : (l < r ? -1 : 1); } //一个字节一个字节的比,直到找到一个不相同的字节,然后比这个字节的大小作为两个字节流的大小比较结果。 public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2){ return WritableComparator.compareBytes(b1, s1, Integer.SIZE/8, b2, s2, Integer.SIZE/8); } }*/ //第二种方法,继承WritableComparator public static class GroupingComparator extends WritableComparator { protected GroupingComparator() { super(IntPair.class, true); } //Compare two WritableComparables. // 重载 compare:对组合键按第一个自然键排序分组 public int compare(WritableComparable w1, WritableComparable w2) { IntPair ip1 = (IntPair) w1; IntPair ip2 = (IntPair) w2; String l = ip1.getFirst(); String r = ip2.getFirst(); return l.compareTo(r); } }
// 自定义map public static class Map extends Mapper<LongWritable, Text, IntPair, Text> { private final IntPair keyPair = new IntPair(); String[] lineArr = null; public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); lineArr = line.split(" ", -1); keyPair.set(lineArr[0], lineArr[1]); context.write(keyPair, value); } } // 自定义reduce // public static class Reduce extends Reducer<IntPair, Text, Text, Text> { private static final Text SEPARATOR = new Text("------------------------------------------------");
public void reduce(IntPair key, Iterable<Text> values,Context context) throws IOException, InterruptedException { context.write(SEPARATOR, null); for (Text val : values) { context.write(null, val); } } }