隨著Hadoop 0.21.0的釋出,你可以更方便的來測試你的MapReduce程式,因為它包含了一套由Cloudera所貢獻用來測試MapReudce程式的Library - MRUnit,不過目前除了官方所提供的overview.html檔之外,其它的相關文件卻相當稀少(補充中:Add MRUnit documentation),而本文純粹記錄一下簡單的WordCount測試程式(New API)來介紹MRUnit的使用方式:
TokenizerMapper
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable>
{
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
public void map(Object key, Text value, Context context) throws IOException, InterruptedException
{
StringTokenizer itr = new StringTokenizer(value.toString());
while (itr.hasMoreTokens())
{
word.set(itr.nextToken());
context.write(word, one);
}
}
}
WordCountReducer
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable>
{
private IntWritable result = new IntWritable();
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException
{
int sum = 0;
for (IntWritable val : values)
{
sum += val.get();
}
result.set(sum);
context.write(key, result);
}
}
MRUnitTest
import junit.framework.TestCase;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver;
import org.apache.hadoop.mrunit.types.Pair;
import org.junit.Before;
import org.junit.Test;
public class MRUnitTest extends TestCase
{
private Mapper<Object, Text, Text, IntWritable> mapper;
private Reducer<Text, IntWritable, Text, IntWritable> reducer;
private MapReduceDriver<Object, Text, Text, IntWritable,Text, IntWritable> driver;
@Before
public void setUp()
{
mapper = new TokenizerMapper();
reducer = new WordCountReducer();
driver = new MapReduceDriver<Object, Text, Text, IntWritable,Text, IntWritable>(mapper, reducer);
}
@Test
public void testIdentityMapper()
{
Pair<Object, Text> p1 = new Pair<Object, Text>(new Object(), new Text("bar"));
Pair<Object, Text> p2 = new Pair<Object, Text>(new Object(), new Text("foo"));
Pair<Object, Text> p3 = new Pair<Object, Text>(new Object(), new Text("bar"));
Pair<Object, Text> p4 = new Pair<Object, Text>(new Object(), new Text("bar"));
driver.withInput(p1);
driver.withInput(p2);
driver.withInput(p3);
driver.withInput(p4);
driver.withOutput(new Text("bar"), new IntWritable(3));
driver.withOutput(new Text("foo"), new IntWritable(1));
driver.runTest();
}
}
由於上述範例主要測試MapReduce整個流程,所以透過「MapReduceDriver」物件來驅動整個測試,當然你也可以只測試Map流程,那就透過「MapDriver」即可,另外MRUnit也支援Counters,所以也可以透過它來驗證程式。
相關資源
