题目:统计美国各州确诊病例数top1的县
一、思路:这个题目需要分组以及排序且需要重写分组类和排序类。我们先统计各州的病例数进行排序,如果是相同的州,则将确诊病例数倒序、如果州不同,则将州降序排序,最后将州分组,然后输出每个州第一个,即是各州确诊病例数top1的县。
二:数据集:
链接:https://pan.baidu.com/s/1vfY_sTi0iV6ZrYI5MYtkvw
提取码:vvpy
三:上代码:
package uscovid;
import java.io.IOException;
import java.util.Comparator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class us {
public static class Mymaper extends Mapper<LongWritable , Text, TopBean, NullWritable>{
protected void map(LongWritable key, Text value,
Mapper<LongWritable, Text, TopBean, NullWritable>.Context context)
throws IOException, InterruptedException {
String b[] = value.toString().split(",");
context.write(new TopBean(b[2].toString(),b[1].toString(),Integer.parseInt(b[4])), NullWritable.get());
}
}
public static class MyReduce extends Reducer<TopBean, NullWritable, TopBean, NullWritable>{
protected void reduce(TopBean k2, Iterable<NullWritable> arg1,
Reducer<TopBean, NullWritable, TopBean, NullWritable>.Context context)
throws IOException, InterruptedException {
context.write(k2, NullWritable.get());
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
//设置参数
Configuration conf =new Configuration();
//创建任务
Job job =Job.getInstance(conf,us.class.getSimpleName() );
//指定jar文件
job.setJarByClass(us.class);
//指定输入路径
FileInputFormat.addInputPath(job,new Path(args[0]));
//指定map类以及key和value的输出类型
job.setMapperClass(Mymaper.class);
job.setMapOutputKeyClass(TopBean.class);
job.setMapOutputValueClass(NullWritable.class);
//指定reduce类以及key和value的输出类型
job.setReducerClass(MyReduce.class);
job.setoutputKeyClass(TopBean.class);
job.setoutputValueClass(NullWritable.class);
//配置分组类
job.setGroupingComparatorClass(comparator.class);
//输出路径
FileOutputFormat.setoutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
}
}
package uscovid;
import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.io.WritableComparable;
public class comparator extends WritableComparator {
protected comparator() {
super(TopBean.class, true);
}
public int compare(WritableComparable a, WritableComparable b) {
TopBean abean = (TopBean)a;
TopBean bbean = (TopBean)b;
return abean.getState().compareto(bbean.getState());
}
}
package uscovid;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.WritableComparable;
public class TopBean implements WritableComparable<TopBean> {
private String state;
private String country;
private int cases;
public String getState() {
return state;
}
public void setState(String state) {
this.state = state;
}
public String getCountry() {
return country;
}
public void setCountry(String country) {
this.country = country;
}
public int getCases() {
return cases;
}
public void setCases(int cases) {
this.cases = cases;
}
public TopBean() {
}
public TopBean(String state,String country,int cases) {
this.state=state;
this.country=country;
this.cases=cases;
}
public void readFields(DataInput in) throws IOException {
this.state=in.readUTF();
this.country=in.readUTF();
this.cases=in.readInt();
}
public void write(DataOutput out) throws IOException {
out.writeUTF(state);
out.writeUTF(country);
out.writeInt(cases);
}
public int compareto(TopBean o) {
// 如果州相等,将县病例数倒序排序,如果州不相等,州倒序排序。后面只要将州进行分组,输出第一个即可。
if (o.state.compareto(this.state)==0) {
return o.cases-this.cases;
}
return -(o.state.compareto(this.state));
}
public String toString() {
return " "+ this.state + "\t" + this.country +"\t" + this.cases ;
}
}
代码注意事项://配置分组类
job.setGroupingComparatorClass(comparator.class);
四:
运行结果
不清楚怎么运行的可以看看上篇mapreduce二哈。
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 [email protected] 举报,一经查实,本站将立刻删除。