Hadoop 上使用C 语言编程

今天尝试用C语言在Hadoop上编写统计单词的程序，具体过程如下：一、编写map和reduce程序mapper.c

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BUF_SIZE 2048
#define DELIM " "
int main（int argc, char * argv[]）
{
char buffer[BUF_SIZE];
while（fgets（buffer,BUF_SIZE-1,stdin））
{
int len = strlen（buffer）;
if（buffer[len-1] == DELIM） // 将换行符去掉
buffer[len-1] = 0;
char *query = NULL;
query = strtok（buffer, " "）;
while（query）
{
printf（"%s 1 ",query）;
query = strtok（NULL," "）;
}
}
return 0;
}

reducer.c

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BUFFER_SIZE 1024
#define DELIM " "
int main（int argc, char * argv[]）
{
char str_last_key[BUFFER_SIZE];
char str_line[BUFFER_SIZE];
int count = 0;
*str_last_key = "";
while（ fgets（str_line,BUFFER_SIZE-1,stdin））
{
char * str_cur_key = NULL;
char * str_cur_num = NULL;
str_cur_key = strtok（str_line,DELIM）;
str_cur_num = strtok（NULL,DELIM）;
if（str_last_key[0] ==""）
{
strcpy（str_last_key,str_cur_key）;
}
if（strcmp（str_cur_key, str_last_key））// 前后不相等，输出
{
printf（"%s %d ",str_last_key,count）;
count = atoi（str_cur_num）;
}else{// 相等，则加当前的key的value
count += atoi（str_cur_num）;
}
strcpy（str_last_key,str_cur_key）;
}
printf（"%s %d ",str_last_key,count）;
return 0;
}

二、编译 gcc mapper.c -o mappergcc reducer.c -o reducer三、运行（一）启动hadoop后将待统计单词的输入文件放到 input文件夹中：bin/hadoop fs -put 待统计文件 input（二）使用contrib/streaming/下的jar工具调用上面的mapper educer:bin/hadoop jar /home/huangkq/Desktop/hadoop/contrib/streaming/hadoop-streaming-0.20.203.0.jar -mapper /home/huangkq/Desktop/hadoop2/mapper -reducer /home/huangkq/Desktop/hadoop2/reducer -input input -output c_output -jobconf mapred.reduce.tasks=2
说明：hadoop-streaming-0.20.203.0.jar是一个管道工具（三）查看结果：bin/hadoop fs -cat c_output/*更多Hadoop相关信息见Hadoop 专题页面 http://www.linuxidc.com/topicnews.aspx？tid=13