hadoop - MapReduce Old API - Passing Command Line Argument to Map -
i coding mapreduce job finding occurrence of search string (passed through command line argument) in input file stored in hdfs using old api.
below driver class -
public class stringsearchdriver { public static void main(string[] args) throws ioexception { jobconf jc = new jobconf(stringsearchdriver.class); jc.set("searchword", args[2]); jc.setjobname("string search"); fileinputformat.addinputpath(jc, new path(args[0])); fileoutputformat.setoutputpath(jc, new path(args[1])); jc.setmapperclass(stringsearchmap.class); jc.setreducerclass(stringsearchreduce.class); jc.setoutputkeyclass(text.class); jc.setoutputvalueclass(intwritable.class); jobclient.runjob(jc); } }
below mapper class -
public class stringsearchmap extends mapreducebase implements mapper<longwritable, text, text, intwritable> { string searchword; public void configure(jobconf jc) { searchword = jc.get("searchword"); } @override public void map(longwritable key, text value, outputcollector<text, intwritable> out, reporter reporter) throws ioexception { string[] input = value.tostring().split(""); for(string word:input) { if (word.equalsignorecase(searchword)) out.collect(new text(word), new intwritable(1)); } } }
on running job (command line string passed "hi"), getting below error -
14/09/21 22:35:41 info mapred.jobclient: task id : attempt_201409212134_0005_m_000001_2, status : failed java.lang.classcastexception: interface javax.xml.soap.text @ java.lang.class.assubclass(class.java:3129) @ org.apache.hadoop.mapred.jobconf.getoutputkeycomparator(jobconf.java:795) @ org.apache.hadoop.mapred.maptask$mapoutputbuffer.<init>(maptask.java:964) @ org.apache.hadoop.mapred.maptask.runoldmapper(maptask.java:422) @ org.apache.hadoop.mapred.maptask.run(maptask.java:366) @ org.apache.hadoop.mapred.child$4.run(child.java:255) @ java.security.accesscontroller.doprivileged(native method) @ javax.security.auth.subject.doas(subject.java:416) @ org.apache.hadoop.security.usergroupinformation.doas(usergroupinformation.java:1190) @ org.apache.hadoop.mapred.child.main(child.java:249)
please suggest.
you auto imported wrong import. instead of import org.apache.hadoop.io.text import javax.xml.soap.text
you can find sample wrong import in blog.
one point , better adopt new api
edit
i used new api
import java.io.ioexception; import java.util.stringtokenizer; import org.apache.hadoop.conf.configuration; import org.apache.hadoop.conf.configured; import org.apache.hadoop.fs.filesystem; import org.apache.hadoop.fs.path; import org.apache.hadoop.io.intwritable; import org.apache.hadoop.io.longwritable; import org.apache.hadoop.io.text; import org.apache.hadoop.mapreduce.job; import org.apache.hadoop.mapreduce.mapper; import org.apache.hadoop.mapreduce.reducer; import org.apache.hadoop.mapreduce.lib.input.fileinputformat; import org.apache.hadoop.mapreduce.lib.input.textinputformat; import org.apache.hadoop.mapreduce.lib.output.fileoutputformat; import org.apache.hadoop.mapreduce.lib.output.textoutputformat; import org.apache.hadoop.util.tool; import org.apache.hadoop.util.toolrunner; /** * @author unmesha sreeveni * @date 23 sep 2014 */ public class stringsearchdriver extends configured implements tool { public static class map extends mapper<longwritable, text, text, intwritable> { private final static intwritable 1 = new intwritable(1); private text word = new text(); public void map(longwritable key, text value, context context) throws ioexception, interruptedexception { configuration conf = context.getconfiguration(); string line = value.tostring(); string searchstring = conf.get("word"); stringtokenizer tokenizer = new stringtokenizer(line); while (tokenizer.hasmoretokens()) { string token = tokenizer.nexttoken(); if(token.equals(searchstring)){ word.set(token); context.write(word, one); } } } } public static class reduce extends reducer<text, intwritable, text, intwritable> { public void reduce(text key, iterable<intwritable> values, context context) throws ioexception, interruptedexception { int sum = 0; (intwritable val : values) { sum += val.get(); } context.write(key, new intwritable(sum)); } } public static void main(string[] args) throws exception { configuration conf = new configuration(); int res = toolrunner.run(conf, new stringsearchdriver(), args); system.exit(res); } @override public int run(string[] args) throws exception { // todo auto-generated method stub if (args.length != 3) { system.out .printf("usage: search string <input dir> <output dir> <search word> \n"); system.exit(-1); } string source = args[0]; string dest = args[1]; string searchword = args[2]; configuration conf = new configuration(); conf.set("word", searchword); job job = new job(conf, "search string"); job.setjarbyclass(stringsearchdriver.class); filesystem fs = filesystem.get(conf); path in =new path(source); path out =new path(dest); if (fs.exists(out)) { fs.delete(out, true); } job.setmapoutputkeyclass(text.class); job.setmapoutputvalueclass(intwritable.class); job.setoutputkeyclass(text.class); job.setoutputvalueclass(intwritable.class); job.setmapperclass(map.class); job.setreducerclass(reduce.class); job.setinputformatclass(textinputformat.class); job.setoutputformatclass(textoutputformat.class); fileinputformat.addinputpath(job, in); fileoutputformat.setoutputpath(job, out); boolean sucess = job.waitforcompletion(true); return (sucess ? 0 : 1); } }
this works.
Comments
Post a Comment