File Coverage

blib/lib/WWW/Google/News/TW.pm
Criterion Covered Total %
statement 58 118 49.1
branch 6 22 27.2
condition n/a
subroutine 10 11 90.9
pod 2 3 66.6
total 76 154 49.3


line stmt bran cond sub pod time code
1             package WWW::Google::News::TW;
2              
3 3     3   108891 use utf8;
  3         25  
  3         19  
4 3     3   97 use strict;
  3         7  
  3         105  
5 3     3   18 use warnings;
  3         10  
  3         310  
6              
7             require Exporter;
8              
9             our @ISA = qw(Exporter);
10             our @EXPORT_OK = qw(get_news get_news_for_topic get_news_for_category);
11             our $VERSION = '0.12';
12              
13 3     3   16 use Carp;
  3         6  
  3         304  
14 3     3   2710 use LWP;
  3         355206  
  3         146  
15 3     3   47 use URI::Escape;
  3         115  
  3         9846  
16 3     3   13900 use Encode;
  3         49212  
  3         5302  
17              
18             sub get_news {
19             # Web version: http://news.google.com.tw/news?ned=tw
20             # plain text version : http://news.google.com.tw/news?ned=ttw
21 1     1 1 9 my $url = 'http://news.google.com.tw/news?ned=ttw';
22 1         4777 my $ua = LWP::UserAgent->new;
23 1         4049 $ua->agent('Mozilla/5.0');
24 1         82 my $response = $ua->get($url);
25 1         658921 my $results = {};
26 1 50       6 return unless $response->is_success;
27              
28 1         14 my $re1 = ' (.*?)';
29 1         4 my $re2 = '([^<]*)
'.
30             '([^<]*)'.
31             '\s?([^<]*)
'.
32             '([^<]*)...';
33              
34 1         11 my $content = $response->decoded_content;
35 1 50       21660 $content = $response->content if (not defined $content);
36 1         2588 my @sections = split /($re1)/m,$content;
37 1         6 my $current_section = '';
38 1         5 foreach my $section (@sections) {
39 1 50       137 if ($section =~ m/$re1/m) {
40 0         0 $current_section = $1;
41 0         0 $current_section =~ s/ //g; # or put this  (.*?)(?: )? in re1
42             } else {
43 1     1   2073 my @stories = split /($re2)/mi,$section;
  1         14  
  1         3  
  1         21  
44 1         103020 foreach my $story (@stories) {
45 1 50       34847 if ($story =~ m/$re2/mi) {
46 0 0       0 if (!(exists($results->{$current_section}))) {
47 0         0 $results->{$current_section} = [];
48             }
49 0         0 my $story_h = {};
50 0         0 my( $url, $headline, $source, $update_time, $summary ) = ( $1, $2, $3, $4, $5 );
51 0         0 $story_h->{url} = $url;
52 0         0 $story_h->{headline} = $headline;
53 0         0 $story_h->{source} = $source;
54 0         0 $story_h->{source} =~ s/ -//g;
55 0         0 $story_h->{update_time} = $update_time;
56 0         0 $story_h->{summary} = $summary;
57 0         0 push(@{$results->{$current_section}},$story_h);
  0         0  
58             }
59             }
60             }
61             }
62 1         321 return $results;
63             }
64              
65             sub get_news_for_topic {
66              
67 1     1 1 16 my $topic = uri_escape( $_[0] );
68              
69 1         78 my @results = ();
70 1         5 my $url = "http://news.google.com.tw/news?hl=zh-TW&ned=ttw&q=$topic";
71 1         13 my $ua = LWP::UserAgent->new();
72 1         3646 $ua->agent('Mozilla/5.0');
73              
74 1         75 my $response = $ua->get($url);
75 1 50       635650 return unless $response->is_success;
76              
77 1         15 my $re1 = ' (.*?)©\d{4} Google';
78 1         4 my $re2 = '(.*?)
'.
79             '([^<]*)'.
80             '\s?([^<]*)
'.
81             '(.*?)...';
82              
83 1         11 my $content = $response->decoded_content;
84 1 50       12182 $content = $response->content if (not defined $content);
85 1         135 my( $section ) = ( $content =~ m/$re1/s );
86 1         158 $section =~ s/\n//g;
87 1         120 my @stories = split /($re2)/mi,$section;
88              
89 1         4 foreach my $story (@stories) {
90 0 0       0 if ($story =~ m/$re2/i) {
91 0         0 my $story_h = {};
92            
93 0         0 my( $url, $headline, $source, $update_time, $summary ) = ( $1, $2, $3, $4, $5 );
94 0         0 $source =~ s/ / /g;
95 0         0 $source =~ s/\s+/ /g;
96 0         0 $update_time =~ s/ / /g;
97 0         0 $update_time =~ s/\s+/ /g;
98 0         0 $update_time =~ s/-//g;
99 0         0 $headline =~ s#<.+?>##gi;
100 0         0 $summary =~ s#<.+?>##gi;
101              
102 0         0 $story_h->{url} = $url;
103 0         0 $story_h->{headline} = $headline;
104 0         0 $story_h->{source} = $source;
105 0         0 $story_h->{update_time} = $update_time;
106 0         0 $story_h->{summary} = $summary;
107              
108 0         0 push(@results,$story_h);
109              
110             }
111             }
112              
113 1         178 return \@results;
114              
115             }
116              
117             sub get_news_for_category {
118             # Web version: http://news.google.com.tw/news?ned=tw
119             # plain text version : http://news.google.com.tw/news?ned=ttw
120 0     0 0 0 my $topic = $_[0];
121 0         0 my $url = 'http://news.google.com.tw/news?ned=ttw&topic='.$topic;
122 0         0 my $ua = LWP::UserAgent->new;
123 0         0 $ua->agent('Mozilla/5.0');
124 0         0 my $response = $ua->get($url);
125 0         0 my $results = [];
126 0 0       0 return unless $response->is_success;
127              
128 0         0 my $re1 = '
(.*?)
';
129 0         0 my $re2 = '([^<]*)
'.
130             '([^<]*)'.
131             '\s?([^<]*)
'.
132             '([^<]*)....*?'.
133             ']*)>([^<]*)';
134 0         0 my @sections = split /($re1)/s,$response->content;
135 0         0 my $current_section = '';
136 0         0 foreach my $section (@sections) {
137 0 0       0 if ($section =~ m/$re1/s) {
138 0         0 $current_section = $1;
139 0         0 my @stories = split /($re2)/si,$current_section;
140 0         0 foreach my $story (@stories) {
141 0 0       0 if ($story =~ m/$re2/si) {
142 0         0 my $story_h = {};
143 0         0 my( $url, $headline, $source, $update_time, $summary, $related_url, $related_news) =
144             ( $1, $2, $3, $4, $5, $6, $7 );
145 0         0 $story_h->{url} = $url;
146 0         0 $story_h->{headline} = $headline;
147 0         0 $story_h->{source} = $source;
148 0         0 $story_h->{source} =~ s/ -//g;
149 0         0 $story_h->{update_time} = $update_time;
150 0         0 $story_h->{summary} = $summary;
151 0         0 $story_h->{related_url} = $related_url;
152 0         0 $story_h->{related_news} = $related_news;
153 0         0 push(@{$results},$story_h);
  0         0  
154             }
155             }
156             }
157             }
158 0         0 return $results;
159             }
160             1;
161              
162             __END__