分两部份:
<span>/***
* @author YangXin
* @date 2016/2/21
* @ info 重要功效是mahout实现解析Wikipedia连接文件的Mapper接口
*/
package unitSix;
import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.mahout.math.VarLongWritable;
public class WikipediaToItemPrefsMapper extends Mapper<LongWritable, Text, VarLongWritable, VarLongWritable>{
private static final Pattern NUMBERS = Pattern.compile("(\d+)");
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException{
String line = value.toString();
Matcher m = NUMBERS.matcher(line);
//定位用户ID
m.find();
VarLongWritable userID = new VarLongWritable(Long.parseLong(m.group()));
VarLongWritable itemID = new VarLongWritable();
while(m.find()){
itemID.set(Long.parseLong(m.group()));
//为每一个物品ID生成用户-物品对
context.write(userID, itemID);
}
}
}</span><span>/***
* @author YangXin
* @date 2016