隨著Hadoop 0.21.0的釋出,你可以更方便的來測試你的MapReduce程式,因為它包含了一套由Cloudera所貢獻用來測試MapReudce程式的Library - MRUnit,不過目前除了官方所提供的overview.html檔之外,其它的相關文件卻相當稀少(補充中:Add MRUnit documentation),而本文純粹記錄一下簡單的WordCount測試程式(New API)來介紹MRUnit的使用方式:
TokenizerMapper
import java.io.IOException; import java.util.StringTokenizer; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; public class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable> { private final static IntWritable one = new IntWritable(1); private Text word = new Text(); public void map(Object key, Text value, Context context) throws IOException, InterruptedException { StringTokenizer itr = new StringTokenizer(value.toString()); while (itr.hasMoreTokens()) { word.set(itr.nextToken()); context.write(word, one); } } }
WordCountReducer
import java.io.IOException; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; public class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> { private IntWritable result = new IntWritable(); public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int sum = 0; for (IntWritable val : values) { sum += val.get(); } result.set(sum); context.write(key, result); } }
MRUnitTest
import junit.framework.TestCase; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver; import org.apache.hadoop.mrunit.types.Pair; import org.junit.Before; import org.junit.Test; public class MRUnitTest extends TestCase { private Mapper<Object, Text, Text, IntWritable> mapper; private Reducer<Text, IntWritable, Text, IntWritable> reducer; private MapReduceDriver<Object, Text, Text, IntWritable,Text, IntWritable> driver; @Before public void setUp() { mapper = new TokenizerMapper(); reducer = new WordCountReducer(); driver = new MapReduceDriver<Object, Text, Text, IntWritable,Text, IntWritable>(mapper, reducer); } @Test public void testIdentityMapper() { Pair<Object, Text> p1 = new Pair<Object, Text>(new Object(), new Text("bar")); Pair<Object, Text> p2 = new Pair<Object, Text>(new Object(), new Text("foo")); Pair<Object, Text> p3 = new Pair<Object, Text>(new Object(), new Text("bar")); Pair<Object, Text> p4 = new Pair<Object, Text>(new Object(), new Text("bar")); driver.withInput(p1); driver.withInput(p2); driver.withInput(p3); driver.withInput(p4); driver.withOutput(new Text("bar"), new IntWritable(3)); driver.withOutput(new Text("foo"), new IntWritable(1)); driver.runTest(); } }
由於上述範例主要測試MapReduce整個流程,所以透過「MapReduceDriver」物件來驅動整個測試,當然你也可以只測試Map流程,那就透過「MapDriver」即可,另外MRUnit也支援Counters,所以也可以透過它來驗證程式。
相關資源