blib/lib/WWW/Google/News/TW.pm | |||
---|---|---|---|
Criterion | Covered | Total | % |
statement | 58 | 118 | 49.1 |
branch | 6 | 22 | 27.2 |
condition | n/a | ||
subroutine | 10 | 11 | 90.9 |
pod | 2 | 3 | 66.6 |
total | 76 | 154 | 49.3 |
line | stmt | bran | cond | sub | pod | time | code | |
---|---|---|---|---|---|---|---|---|
1 | package WWW::Google::News::TW; | |||||||
2 | ||||||||
3 | 3 | 3 | 108891 | use utf8; | ||||
3 | 25 | |||||||
3 | 19 | |||||||
4 | 3 | 3 | 97 | use strict; | ||||
3 | 7 | |||||||
3 | 105 | |||||||
5 | 3 | 3 | 18 | use warnings; | ||||
3 | 10 | |||||||
3 | 310 | |||||||
6 | ||||||||
7 | require Exporter; | |||||||
8 | ||||||||
9 | our @ISA = qw(Exporter); | |||||||
10 | our @EXPORT_OK = qw(get_news get_news_for_topic get_news_for_category); | |||||||
11 | our $VERSION = '0.12'; | |||||||
12 | ||||||||
13 | 3 | 3 | 16 | use Carp; | ||||
3 | 6 | |||||||
3 | 304 | |||||||
14 | 3 | 3 | 2710 | use LWP; | ||||
3 | 355206 | |||||||
3 | 146 | |||||||
15 | 3 | 3 | 47 | use URI::Escape; | ||||
3 | 115 | |||||||
3 | 9846 | |||||||
16 | 3 | 3 | 13900 | use Encode; | ||||
3 | 49212 | |||||||
3 | 5302 | |||||||
17 | ||||||||
18 | sub get_news { | |||||||
19 | # Web version: http://news.google.com.tw/news?ned=tw | |||||||
20 | # plain text version : http://news.google.com.tw/news?ned=ttw | |||||||
21 | 1 | 1 | 1 | 9 | my $url = 'http://news.google.com.tw/news?ned=ttw'; | |||
22 | 1 | 4777 | my $ua = LWP::UserAgent->new; | |||||
23 | 1 | 4049 | $ua->agent('Mozilla/5.0'); | |||||
24 | 1 | 82 | my $response = $ua->get($url); | |||||
25 | 1 | 658921 | my $results = {}; | |||||
26 | 1 | 50 | 6 | return unless $response->is_success; | ||||
27 | ||||||||
28 | 1 | 14 | my $re1 = ' | (.*?)'; | ||||
29 | 1 | 4 | my $re2 = '([^<]*) '. |
|||||
30 | '([^<]*)'. | |||||||
31 | '\s? '. |
|||||||
32 | '([^<]*)...'; | |||||||
33 | ||||||||
34 | 1 | 11 | my $content = $response->decoded_content; | |||||
35 | 1 | 50 | 21660 | $content = $response->content if (not defined $content); | ||||
36 | 1 | 2588 | my @sections = split /($re1)/m,$content; | |||||
37 | 1 | 6 | my $current_section = ''; | |||||
38 | 1 | 5 | foreach my $section (@sections) { | |||||
39 | 1 | 50 | 137 | if ($section =~ m/$re1/m) { | ||||
40 | 0 | 0 | $current_section = $1; | |||||
41 | 0 | 0 | $current_section =~ s/ //g; # or put this (.*?)(?: )? in re1 | |||||
42 | } else { | |||||||
43 | 1 | 1 | 2073 | my @stories = split /($re2)/mi,$section; | ||||
1 | 14 | |||||||
1 | 3 | |||||||
1 | 21 | |||||||
44 | 1 | 103020 | foreach my $story (@stories) { | |||||
45 | 1 | 50 | 34847 | if ($story =~ m/$re2/mi) { | ||||
46 | 0 | 0 | 0 | if (!(exists($results->{$current_section}))) { | ||||
47 | 0 | 0 | $results->{$current_section} = []; | |||||
48 | } | |||||||
49 | 0 | 0 | my $story_h = {}; | |||||
50 | 0 | 0 | my( $url, $headline, $source, $update_time, $summary ) = ( $1, $2, $3, $4, $5 ); | |||||
51 | 0 | 0 | $story_h->{url} = $url; | |||||
52 | 0 | 0 | $story_h->{headline} = $headline; | |||||
53 | 0 | 0 | $story_h->{source} = $source; | |||||
54 | 0 | 0 | $story_h->{source} =~ s/ -//g; | |||||
55 | 0 | 0 | $story_h->{update_time} = $update_time; | |||||
56 | 0 | 0 | $story_h->{summary} = $summary; | |||||
57 | 0 | 0 | push(@{$results->{$current_section}},$story_h); | |||||
0 | 0 | |||||||
58 | } | |||||||
59 | } | |||||||
60 | } | |||||||
61 | } | |||||||
62 | 1 | 321 | return $results; | |||||
63 | } | |||||||
64 | ||||||||
65 | sub get_news_for_topic { | |||||||
66 | ||||||||
67 | 1 | 1 | 1 | 16 | my $topic = uri_escape( $_[0] ); | |||
68 | ||||||||
69 | 1 | 78 | my @results = (); | |||||
70 | 1 | 5 | my $url = "http://news.google.com.tw/news?hl=zh-TW&ned=ttw&q=$topic"; | |||||
71 | 1 | 13 | my $ua = LWP::UserAgent->new(); | |||||
72 | 1 | 3646 | $ua->agent('Mozilla/5.0'); | |||||
73 | ||||||||
74 | 1 | 75 | my $response = $ua->get($url); | |||||
75 | 1 | 50 | 635650 | return unless $response->is_success; | ||||
76 | ||||||||
77 | 1 | 15 | my $re1 = ' | (.*?)©\d{4} Google'; | ||||
78 | 1 | 4 | my $re2 = '(.*?) '. |
|||||
79 | '([^<]*)'. | |||||||
80 | '\s? '. |
|||||||
81 | '(.*?)...'; | |||||||
82 | ||||||||
83 | 1 | 11 | my $content = $response->decoded_content; | |||||
84 | 1 | 50 | 12182 | $content = $response->content if (not defined $content); | ||||
85 | 1 | 135 | my( $section ) = ( $content =~ m/$re1/s ); | |||||
86 | 1 | 158 | $section =~ s/\n//g; | |||||
87 | 1 | 120 | my @stories = split /($re2)/mi,$section; | |||||
88 | ||||||||
89 | 1 | 4 | foreach my $story (@stories) { | |||||
90 | 0 | 0 | 0 | if ($story =~ m/$re2/i) { | ||||
91 | 0 | 0 | my $story_h = {}; | |||||
92 | ||||||||
93 | 0 | 0 | my( $url, $headline, $source, $update_time, $summary ) = ( $1, $2, $3, $4, $5 ); | |||||
94 | 0 | 0 | $source =~ s/ / /g; | |||||
95 | 0 | 0 | $source =~ s/\s+/ /g; | |||||
96 | 0 | 0 | $update_time =~ s/ / /g; | |||||
97 | 0 | 0 | $update_time =~ s/\s+/ /g; | |||||
98 | 0 | 0 | $update_time =~ s/-//g; | |||||
99 | 0 | 0 | $headline =~ s#<.+?>##gi; | |||||
100 | 0 | 0 | $summary =~ s#<.+?>##gi; | |||||
101 | ||||||||
102 | 0 | 0 | $story_h->{url} = $url; | |||||
103 | 0 | 0 | $story_h->{headline} = $headline; | |||||
104 | 0 | 0 | $story_h->{source} = $source; | |||||
105 | 0 | 0 | $story_h->{update_time} = $update_time; | |||||
106 | 0 | 0 | $story_h->{summary} = $summary; | |||||
107 | ||||||||
108 | 0 | 0 | push(@results,$story_h); | |||||
109 | ||||||||
110 | } | |||||||
111 | } | |||||||
112 | ||||||||
113 | 1 | 178 | return \@results; | |||||
114 | ||||||||
115 | } | |||||||
116 | ||||||||
117 | sub get_news_for_category { | |||||||
118 | # Web version: http://news.google.com.tw/news?ned=tw | |||||||
119 | # plain text version : http://news.google.com.tw/news?ned=ttw | |||||||
120 | 0 | 0 | 0 | 0 | my $topic = $_[0]; | |||
121 | 0 | 0 | my $url = 'http://news.google.com.tw/news?ned=ttw&topic='.$topic; | |||||
122 | 0 | 0 | my $ua = LWP::UserAgent->new; | |||||
123 | 0 | 0 | $ua->agent('Mozilla/5.0'); | |||||
124 | 0 | 0 | my $response = $ua->get($url); | |||||
125 | 0 | 0 | my $results = []; | |||||
126 | 0 | 0 | 0 | return unless $response->is_success; | ||||
127 | ||||||||
128 | 0 | 0 | my $re1 = '
|
|||||
129 | 0 | 0 | my $re2 = '([^<]*) '. |
|||||
130 | '([^<]*)'. | |||||||
131 | '\s? '. |
|||||||
132 | '([^<]*)....*?'. | |||||||
133 | ']*)> |
|||||||
134 | 0 | 0 | my @sections = split /($re1)/s,$response->content; | |||||
135 | 0 | 0 | my $current_section = ''; | |||||
136 | 0 | 0 | foreach my $section (@sections) { | |||||
137 | 0 | 0 | 0 | if ($section =~ m/$re1/s) { | ||||
138 | 0 | 0 | $current_section = $1; | |||||
139 | 0 | 0 | my @stories = split /($re2)/si,$current_section; | |||||
140 | 0 | 0 | foreach my $story (@stories) { | |||||
141 | 0 | 0 | 0 | if ($story =~ m/$re2/si) { | ||||
142 | 0 | 0 | my $story_h = {}; | |||||
143 | 0 | 0 | my( $url, $headline, $source, $update_time, $summary, $related_url, $related_news) = | |||||
144 | ( $1, $2, $3, $4, $5, $6, $7 ); | |||||||
145 | 0 | 0 | $story_h->{url} = $url; | |||||
146 | 0 | 0 | $story_h->{headline} = $headline; | |||||
147 | 0 | 0 | $story_h->{source} = $source; | |||||
148 | 0 | 0 | $story_h->{source} =~ s/ -//g; | |||||
149 | 0 | 0 | $story_h->{update_time} = $update_time; | |||||
150 | 0 | 0 | $story_h->{summary} = $summary; | |||||
151 | 0 | 0 | $story_h->{related_url} = $related_url; | |||||
152 | 0 | 0 | $story_h->{related_news} = $related_news; | |||||
153 | 0 | 0 | push(@{$results},$story_h); | |||||
0 | 0 | |||||||
154 | } | |||||||
155 | } | |||||||
156 | } | |||||||
157 | } | |||||||
158 | 0 | 0 | return $results; | |||||
159 | } | |||||||
160 | 1; | |||||||
161 | ||||||||
162 | __END__ |