一:配置pom
<dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-core</artifactId> <version>1.0.1</version> </dependency>
二:测试代码
package com.jachs.hadoop; import java.io.IOException; import java.util.Iterator; import java.util.StringTokenizer; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.FileOutputFormat; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.MapReduceBase; import org.apache.hadoop.mapred.Mapper; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reducer; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapred.TextInputFormat; import org.apache.hadoop.mapred.textoutputFormat; /*** * * @author zhanchaohan * */ public class WordCount { /*输入文件地址,测试内容如下: * ------------------------ Hello Workd Bye World Hello Hadoop Goodbye Hadoop Hello Workd Bye World And Hello Hadoop Goodbye Hadoop ------------------------ */ private static final String InputFile="/usr/jachs/hadoop/A"; //输出路径 private static final String OutDir="/usr/jachs/hadoop/B"; //将读取文件的内容切割出每个单词,标记数量为1,<word,1>形式,然后交给Reduce处理 public static class Map extends MapReduceBase implements Mapper<LongWritable,Text,Text,IntWritable>{ private final static IntWritable one=new IntWritable(1); private Text word=new Text(); public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { String line=value.toString(); StringTokenizer token=new StringTokenizer(line); while(token.hasMoretokens()) { word.set(token.nextToken()); output.collect(value, one); } } } //Reduce简单将数值累计求和 public static class Reduce extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable>{ public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { int sum=0; while (values.hasNext()) { sum+=values.next().get(); } output.collect(key, new IntWritable(sum)); } } public static void main(String[] args) throws IOException { JobConf conf=new JobConf(WordCount.class); conf.setJobName("wordCount"); conf.setoutputKeyClass(Text.class); conf.setoutputValueClass(IntWritable.class); conf.setMapperClass(Map.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setoutputFormat(textoutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(InputFile)); FileOutputFormat.setoutputPath(conf, new Path(OutDir)); JobClient.runJob(conf); } }
三:执行windows下执行会报异常需要修改源码。返回结构如下图打印每行单词个数
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 [email protected] 举报,一经查实,本站将立刻删除。