微信公众号搜"智元新知"关注
微信扫一扫可直接关注哦!

<mapreduce三>

题目:统计美国各州确诊病例数top1的县

一、思路:这个题目需要分组以及排序且需要重写分组类和排序类。我们先统计各州的病例数进行排序,如果是相同的州,则将确诊病例数倒序、如果州不同,则将州降序排序,最后将州分组,然后输出每个州第一个,即是各州确诊病例数top1的县。

二:数据集:

链接:https://pan.baidu.com/s/1vfY_sTi0iV6ZrYI5MYtkvw 
提取码:vvpy 
三:上代码

package uscovid;

import java.io.IOException;
import java.util.Comparator;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;



public class us {
	public static class Mymaper  extends Mapper<LongWritable , Text, TopBean, NullWritable>{
		protected void map(LongWritable key, Text value,
				Mapper<LongWritable, Text, TopBean, NullWritable>.Context context)
				throws IOException, InterruptedException {
			String b[]  = value.toString().split(",");
			context.write(new TopBean(b[2].toString(),b[1].toString(),Integer.parseInt(b[4])), NullWritable.get());
		}
	}
	public static class MyReduce extends Reducer<TopBean, NullWritable, TopBean, NullWritable>{
		protected void reduce(TopBean k2, Iterable<NullWritable> arg1,
				Reducer<TopBean, NullWritable, TopBean, NullWritable>.Context context)
				throws IOException, InterruptedException {
			
			context.write(k2, NullWritable.get());
		}
	}
	public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
		//设置参数
		Configuration  conf =new Configuration();
		//创建任务
		Job job =Job.getInstance(conf,us.class.getSimpleName() );
		//指定jar文件
		job.setJarByClass(us.class);
		//指定输入路径
		FileInputFormat.addInputPath(job,new Path(args[0]));
		//指定map类以及key和value的输出类型
		job.setMapperClass(Mymaper.class);
		job.setMapOutputKeyClass(TopBean.class);
		job.setMapOutputValueClass(NullWritable.class);
		//指定reduce类以及key和value的输出类型
		job.setReducerClass(MyReduce.class);
		job.setoutputKeyClass(TopBean.class);
		job.setoutputValueClass(NullWritable.class);
		//配置分组类
		job.setGroupingComparatorClass(comparator.class);
		//输出路径
		FileOutputFormat.setoutputPath(job, new Path(args[1]));
		job.waitForCompletion(true);		
}
	
}
package uscovid;

import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.io.WritableComparable;

public class comparator  extends WritableComparator {
	protected comparator() {
        super(TopBean.class, true);
    }
	public int compare(WritableComparable a, WritableComparable b) {
        TopBean abean = (TopBean)a;
        TopBean bbean = (TopBean)b;
        return abean.getState().compareto(bbean.getState());

    }

	
	
}
package uscovid;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.WritableComparable;

public class TopBean implements WritableComparable<TopBean> {

	private String state;
    private String country;
    private int cases;
    
	public String getState() {
		return state;
	}


	public void setState(String state) {
		this.state = state;
	}


	public String getCountry() {
		return country;
	}


	public void setCountry(String country) {
		this.country = country;
	}


	public int getCases() {
		return cases;
	}


	public void setCases(int cases) {
		this.cases = cases;
	}
    public TopBean() {
    	
    }
    public TopBean(String state,String country,int cases) {
    	this.state=state;
    	this.country=country;
    	this.cases=cases;	
    }
	public void readFields(DataInput in) throws IOException {
		this.state=in.readUTF();
		this.country=in.readUTF();
		this.cases=in.readInt();
		
	}
	public void write(DataOutput out) throws IOException {
		out.writeUTF(state);
		out.writeUTF(country);
		out.writeInt(cases);
		
	}
	public int compareto(TopBean o) {
		// 如果州相等,将县病例数倒序排序,如果州不相等,州倒序排序。后面只要将州进行分组,输出一个即可。
		if (o.state.compareto(this.state)==0) {
			return o.cases-this.cases;
		}
		return -(o.state.compareto(this.state));
	}
	public String toString() {
		return " "+ this.state + "\t" + this.country +"\t" + this.cases ;
	}

}

代码注意事项://配置分组类
        job.setGroupingComparatorClass(comparator.class);

四:

运行结果

不清楚怎么运行的可以看看上篇mapreduce二哈。

版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 [email protected] 举报,一经查实,本站将立刻删除。

相关推荐