Mapreduce-好友推荐@H_502_7@
数据集在集群中编辑vim file.txt,切记不能留空行
第一位表示的是本人,后面表示的是他的好友
tom hello hadoop cat
world hadoop hello hive
cat tom hive
mr hive hello
hive cat hadoop world hello mr
hadoop tom hive world
hello tom world hive mr
思路:
A B C D E F
1、 找到A到B C D E F的共同好友数
a) A-B NUM1 A-C NUM2 A-D NUM3 A-E NUM4 A-F NUM5
2、 按照共同好友数对B C D E F进行排序
a) A-E NUM4 A-D NUM3 A-C NUM2 A-F NUM5 A-B NUM1
第一行<tom-hello,R> <hello-hadoop,G>,统计共有多少组键值对。
因为有的是<hadoop-hello,G>,所以通过比较hash值,把和变成相同,在下面map的getnames()函数中。
Mapper
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class FOFMapper extends Mapper<LongWritable, Text,Text,Text>{
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
//{"","","",""}
String[] names = line.split(" ");
for (int j=0; j< names.length; j++) {
for (int i = j+1; i < names.length; i++) {
if(j==0){
context.write(new Text(getNames(names[j],names[i])),new Text("R"));
}else{
context.write(new Text(getNames(names[j],names[i])),new Text("G"));
}
}
}
}
//使<hello-hadoop>和<hadoop-hello>都变成<hadoop-hello>方便统计
private String getNames(String namea,String nameb){
int result = namea.compareto(nameb);
if(result>0){
return namea + "_" + nameb;
}
return nameb + "_" + namea;
}
}
Reducer
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class FOFReducer extends Reducer<Text,Text,Text, IntWritable> {
@Override
protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
int sum = 0;
for (Text value : values){
String val = value.toString();
if("R".equals(val)){
return;
}
sum++;
}
context.write(key, new IntWritable(sum));
}
}
MainClass
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class MainClass {
public static void main(String[] args) throws Exception {
if(args == null || args.length!=2){
System.out.println(" yarn jar myfof.jar xpu.com.mr.MainClass <inputpath> <outPath>");
System.exit(1);
}
Configuration conf = new Configuration(true);
Job job = Job.getInstance(conf);
conf.set("mapreduce.framework.name","local");
job.setJobName("好友推荐-共同好友数");
job.setJarByClass(MainClass.class);
FileInputFormat.addInputPath(job,new Path(args[0]));
FileOutputFormat.setoutputPath(job,new Path(args[1]));
job.setMapperClass(FOFMapper.class);
job.setReducerClass(FOFReducer.class);
job.setMapOutputValueClass(Text.class);
job.setMapOutputKeyClass(Text.class);
/*job.setoutputKeyClass(Text.class);
job.setoutputValueClass(IntWritable.class);*/
job.waitForCompletion(true);
}
}
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 [email protected] 举报,一经查实,本站将立刻删除。