line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Hadoop::Streaming::Mapper; |
2
|
|
|
|
|
|
|
{ |
3
|
|
|
|
|
|
|
$Hadoop::Streaming::Mapper::VERSION = '0.122420'; |
4
|
|
|
|
|
|
|
} |
5
|
1
|
|
|
1
|
|
20754
|
use Any::Moose qw(Role); |
|
1
|
|
|
|
|
33461
|
|
|
1
|
|
|
|
|
8
|
|
6
|
1
|
|
|
1
|
|
3198
|
use IO::Handle; |
|
1
|
|
|
|
|
6868
|
|
|
1
|
|
|
|
|
130
|
|
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
with 'Hadoop::Streaming::Role::Emitter'; |
9
|
|
|
|
|
|
|
#requires qw(emit counter status); #from Hadoop::Streaming::Role::Emitter |
10
|
|
|
|
|
|
|
requires qw(map); # from consumer |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
# ABSTRACT: Simplify writing Hadoop Streaming Mapper jobs. Write a map() function and let this role handle the Stream interface. |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
sub run |
18
|
|
|
|
|
|
|
{ |
19
|
0
|
|
|
0
|
1
|
|
my $class = shift; |
20
|
0
|
|
|
|
|
|
my $self = $class->new; |
21
|
|
|
|
|
|
|
|
22
|
0
|
|
|
|
|
|
while ( my $line = STDIN->getline ) |
23
|
|
|
|
|
|
|
{ |
24
|
0
|
|
|
|
|
|
chomp $line; |
25
|
0
|
|
|
|
|
|
$self->map($line); |
26
|
|
|
|
|
|
|
} |
27
|
|
|
|
|
|
|
} |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
1; |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
__END__ |
32
|
|
|
|
|
|
|
=pod |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
=head1 NAME |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
Hadoop::Streaming::Mapper - Simplify writing Hadoop Streaming Mapper jobs. Write a map() function and let this role handle the Stream interface. |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
=head1 VERSION |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
version 0.122420 |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
=head1 SYNOPSIS |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
#!/usr/bin/env perl |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
package Wordcount::Mapper; |
47
|
|
|
|
|
|
|
use Any::Moose; |
48
|
|
|
|
|
|
|
with 'Hadoop::Streaming::Mapper'; |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
sub map |
51
|
|
|
|
|
|
|
{ |
52
|
|
|
|
|
|
|
my ( $self, $line ) = @_; |
53
|
|
|
|
|
|
|
$self->emit( $_ => 1 ) for ( split /\s+/, $line ); |
54
|
|
|
|
|
|
|
} |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
package main; |
57
|
|
|
|
|
|
|
Wordcount::Mapper->run; |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
Your mapper class must implement map($key,$value) and your reducer must |
60
|
|
|
|
|
|
|
implement reduce($key,$value). Your classes will have emit(), counter(), |
61
|
|
|
|
|
|
|
status() and run() methods added via a role. |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
=head1 METHODS |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
=head2 run |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
Package->run(); |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
This method starts the Hadoop::Streaming::Mapper instance. |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
After creating a new object instance, it reads from STDIN and calls |
72
|
|
|
|
|
|
|
$object->map() on each line of input. Subclasses need only implement map() |
73
|
|
|
|
|
|
|
to produce a complete Hadoop Streaming compatible mapper. |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
=head1 INTERFACE DETAILS |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
The default inputformat for streaming jobs is TextInputFormat, which returns lines without keys in the streaming context. Because of this, map is not provided a key/value pair, instead it is given the value (the input line). |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
If you change your jar options to use a different JavaClassName as inputformat, you may need to deal with key and value. TBD. |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
quoting from: http://hadoop.apache.org/common/docs/r0.20.2/streaming.html#Specifying+Other+Plugins+for+Jobs |
82
|
|
|
|
|
|
|
=over 4 |
83
|
|
|
|
|
|
|
Specifying Other Plugins for Jobs |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
Just as with a normal Map/Reduce job, you can specify other plugins for a streaming job: |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
-inputformat JavaClassName |
88
|
|
|
|
|
|
|
-outputformat JavaClassName |
89
|
|
|
|
|
|
|
-partitioner JavaClassName |
90
|
|
|
|
|
|
|
-combiner JavaClassName |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
The class you supply for the input format should return key/value pairs of Text class. If you do not specify an input format class, the TextInputFormat is used as the default. Since the TextInputFormat returns keys of LongWritable class, which are actually not part of the input data, the keys will be discarded; only the values will be piped to the streaming mapper. |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
The class you supply for the output format is expected to take key/value pairs of Text class. If you do not specify an output format class, the TextOutputFormat is used as the default. |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
=back |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
=head1 AUTHORS |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
=over 4 |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
=item * |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
andrew grangaard <spazm@cpan.org> |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
=item * |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
Naoya Ito <naoya@hatena.ne.jp> |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
=back |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
This software is copyright (c) 2012 by Naoya Ito <naoya@hatena.ne.jp>. |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under |
117
|
|
|
|
|
|
|
the same terms as the Perl 5 programming language system itself. |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
=cut |
120
|
|
|
|
|
|
|
|