＜mapreduce三＞

题目：统计美国各州确诊病例数top1的县

一、思路：这个题目需要分组以及排序且需要重写分组类和排序类。我们先统计各州的病例数进行排序，如果是相同的州，则将确诊病例数倒序、如果州不同，则将州降序排序，最后将州分组，然后输出每个州第一个，即是各州确诊病例数top1的县。

二：数据集：

链接：https://pan.baidu.com/s/1vfY_sTi0iV6ZrYI5MYtkvw
提取码：vvpy
三：上代码：

package uscovid;

import java.io.IOException;
import java.util.Comparator;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;



public class us {
	public static class Mymaper  extends Mapper<LongWritable , Text, TopBean, NullWritable>{
		protected void map(LongWritable key, Text value,
				Mapper<LongWritable, Text, TopBean, NullWritable>.Context context)
				throws IOException, InterruptedException {
			String b[]  = value.toString().split(",");
			context.write(new TopBean(b[2].toString(),b[1].toString(),Integer.parseInt(b[4])), NullWritable.get());
		}
	}
	public static class MyReduce extends Reducer<TopBean, NullWritable, TopBean, NullWritable>{
		protected void reduce(TopBean k2, Iterable<NullWritable> arg1,
				Reducer<TopBean, NullWritable, TopBean, NullWritable>.Context context)
				throws IOException, InterruptedException {
			
			context.write(k2, NullWritable.get());
		}
	}
	public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
		//设置参数
		Configuration  conf =new Configuration();
		//创建任务
		Job job =Job.getInstance(conf,us.class.getSimpleName() );
		//指定jar文件
		job.setJarByClass(us.class);
		//指定输入路径
		FileInputFormat.addInputPath(job,new Path(args[0]));
		//指定map类以及key和value的输出类型
		job.setMapperClass(Mymaper.class);
		job.setMapOutputKeyClass(TopBean.class);
		job.setMapOutputValueClass(NullWritable.class);
		//指定reduce类以及key和value的输出类型
		job.setReducerClass(MyReduce.class);
		job.setoutputKeyClass(TopBean.class);
		job.setoutputValueClass(NullWritable.class);
		//配置分组类
		job.setGroupingComparatorClass(comparator.class);
		//输出路径
		FileOutputFormat.setoutputPath(job, new Path(args[1]));
		job.waitForCompletion(true);		
}
	
}

package uscovid;

import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.io.WritableComparable;

public class comparator  extends WritableComparator {
	protected comparator() {
        super(TopBean.class, true);
    }
	public int compare(WritableComparable a, WritableComparable b) {
        TopBean abean = (TopBean)a;
        TopBean bbean = (TopBean)b;
        return abean.getState().compareto(bbean.getState());

    }

	
	
}

package uscovid;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.WritableComparable;

public class TopBean implements WritableComparable<TopBean> {

	private String state;
    private String country;
    private int cases;
    
	public String getState() {
		return state;
	}


	public void setState(String state) {
		this.state = state;
	}


	public String getCountry() {
		return country;
	}


	public void setCountry(String country) {
		this.country = country;
	}


	public int getCases() {
		return cases;
	}


	public void setCases(int cases) {
		this.cases = cases;
	}
    public TopBean() {
    	
    }
    public TopBean(String state,String country,int cases) {
    	this.state=state;
    	this.country=country;
    	this.cases=cases;	
    }
	public void readFields(DataInput in) throws IOException {
		this.state=in.readUTF();
		this.country=in.readUTF();
		this.cases=in.readInt();
		
	}
	public void write(DataOutput out) throws IOException {
		out.writeUTF(state);
		out.writeUTF(country);
		out.writeInt(cases);
		
	}
	public int compareto(TopBean o) {
		// 如果州相等，将县病例数倒序排序，如果州不相等，州倒序排序。后面只要将州进行分组，输出第一个即可。
		if (o.state.compareto(this.state)==0) {
			return o.cases-this.cases;
		}
		return -(o.state.compareto(this.state));
	}
	public String toString() {
		return " "+ this.state + "\t" + this.country +"\t" + this.cases ;
	}

}

代码注意事项：//配置分组类
job.setGroupingComparatorClass(comparator.class);

四：

运行结果

不清楚怎么运行的可以看看上篇mapreduce二哈。

相关推荐