hadoop - MapReduce Old API - Passing Command Line Argument to Map -

i coding mapreduce job finding occurrence of search string (passed through command line argument) in input file stored in hdfs using old api.

below driver class -

public class stringsearchdriver {      public static void main(string[] args) throws ioexception     {         jobconf jc = new jobconf(stringsearchdriver.class);         jc.set("searchword", args[2]);         jc.setjobname("string search");         fileinputformat.addinputpath(jc, new path(args[0]));         fileoutputformat.setoutputpath(jc, new path(args[1]));         jc.setmapperclass(stringsearchmap.class);         jc.setreducerclass(stringsearchreduce.class);         jc.setoutputkeyclass(text.class);         jc.setoutputvalueclass(intwritable.class);         jobclient.runjob(jc);     } }

below mapper class -

public class stringsearchmap extends mapreducebase implements         mapper<longwritable, text, text, intwritable> {     string searchword;      public void configure(jobconf jc)     {         searchword = jc.get("searchword");      }        @override     public void map(longwritable key, text value,               outputcollector<text, intwritable> out, reporter reporter)             throws ioexception     {         string[] input = value.tostring().split("");          for(string word:input)         {             if (word.equalsignorecase(searchword))                 out.collect(new text(word), new intwritable(1));         }     }  }

on running job (command line string passed "hi"), getting below error -

14/09/21 22:35:41 info mapred.jobclient: task id : attempt_201409212134_0005_m_000001_2, status : failed java.lang.classcastexception: interface javax.xml.soap.text     @ java.lang.class.assubclass(class.java:3129)     @ org.apache.hadoop.mapred.jobconf.getoutputkeycomparator(jobconf.java:795)     @ org.apache.hadoop.mapred.maptask$mapoutputbuffer.<init>(maptask.java:964)     @ org.apache.hadoop.mapred.maptask.runoldmapper(maptask.java:422)     @ org.apache.hadoop.mapred.maptask.run(maptask.java:366)     @ org.apache.hadoop.mapred.child$4.run(child.java:255)     @ java.security.accesscontroller.doprivileged(native method)     @ javax.security.auth.subject.doas(subject.java:416)     @ org.apache.hadoop.security.usergroupinformation.doas(usergroupinformation.java:1190)     @ org.apache.hadoop.mapred.child.main(child.java:249)

please suggest.

you auto imported wrong import. instead of import org.apache.hadoop.io.text import javax.xml.soap.text

you can find sample wrong import in blog.

one point , better adopt new api

edit

i used new api

import java.io.ioexception; import java.util.stringtokenizer;  import org.apache.hadoop.conf.configuration; import org.apache.hadoop.conf.configured; import org.apache.hadoop.fs.filesystem; import org.apache.hadoop.fs.path; import org.apache.hadoop.io.intwritable; import org.apache.hadoop.io.longwritable; import org.apache.hadoop.io.text; import org.apache.hadoop.mapreduce.job; import org.apache.hadoop.mapreduce.mapper; import org.apache.hadoop.mapreduce.reducer; import org.apache.hadoop.mapreduce.lib.input.fileinputformat; import org.apache.hadoop.mapreduce.lib.input.textinputformat; import org.apache.hadoop.mapreduce.lib.output.fileoutputformat; import org.apache.hadoop.mapreduce.lib.output.textoutputformat; import org.apache.hadoop.util.tool; import org.apache.hadoop.util.toolrunner;  /**  * @author unmesha sreeveni  * @date 23 sep 2014  */ public class stringsearchdriver extends configured implements tool {     public static class map extends     mapper<longwritable, text, text, intwritable> {          private final static intwritable 1 = new intwritable(1);         private text word = new text();          public void map(longwritable key, text value, context context)                 throws ioexception, interruptedexception {             configuration conf = context.getconfiguration();             string line = value.tostring();             string searchstring = conf.get("word");             stringtokenizer tokenizer = new stringtokenizer(line);             while (tokenizer.hasmoretokens()) {                 string token = tokenizer.nexttoken();                 if(token.equals(searchstring)){                     word.set(token);                     context.write(word, one);                 }              }         }     }      public static class reduce extends     reducer<text, intwritable, text, intwritable> {          public void reduce(text key, iterable<intwritable> values,                 context context) throws ioexception, interruptedexception {              int sum = 0;             (intwritable val : values) {                 sum += val.get();             }             context.write(key, new intwritable(sum));         }     }     public static void main(string[] args) throws exception {         configuration conf = new configuration();         int res = toolrunner.run(conf, new stringsearchdriver(), args);         system.exit(res);      }     @override     public int run(string[] args) throws exception {         // todo auto-generated method stub         if (args.length != 3) {             system.out             .printf("usage: search string <input dir> <output dir> <search word> \n");             system.exit(-1);         }          string source = args[0];         string dest = args[1];         string searchword = args[2];         configuration conf = new configuration();         conf.set("word", searchword);         job job = new job(conf, "search string");         job.setjarbyclass(stringsearchdriver.class);         filesystem fs = filesystem.get(conf);          path in =new path(source);         path out =new path(dest);         if (fs.exists(out)) {             fs.delete(out, true);         }          job.setmapoutputkeyclass(text.class);         job.setmapoutputvalueclass(intwritable.class);         job.setoutputkeyclass(text.class);         job.setoutputvalueclass(intwritable.class);         job.setmapperclass(map.class);         job.setreducerclass(reduce.class);         job.setinputformatclass(textinputformat.class);         job.setoutputformatclass(textoutputformat.class);         fileinputformat.addinputpath(job, in);         fileoutputformat.setoutputpath(job, out);         boolean sucess = job.waitforcompletion(true);         return (sucess ? 0 : 1);     } }

this works.

Search This Blog

UIO

hadoop - MapReduce Old API - Passing Command Line Argument to Map -

Comments

Post a Comment

Popular posts from this blog

How to dequeue messages from RabbitMQ in a scheduled time -

Python Kivy ListView: How to delete selected ListItemButton? -

ruby - How do I merge two hashes into a hash of arrays? -