File Coverage

blib/lib/WWW/Eksisozluk.pm

Criterion	Covered	Total	%
statement	18	117	15.3
branch	0	40	0.0
condition	0	9	0.0
subroutine	6	11	54.5
pod	0	5	0.0
total	24	182	13.1

line	stmt	bran	cond	sub	pod	time	code
1							package WWW::Eksisozluk;
2
3	1			1		15696	use strict;
	1					1
	1					32
4	1			1		3	use warnings;
	1					1
	1					22
5	1			1		859	use DateTime;
	1					99599
	1					34
6	1			1		706	use LWP::UserAgent;
	1					39645
	1					78
7	1			1		472	use experimental 'smartmatch';
	1					2844
	1					4
8	1			1		475	use utf8::all;
	1					39216
	1					7
9
10							require Exporter;
11							our @ISA = qw(Exporter);
12							our %EXPORT_TAGS = ( 'all' => [ qw(
13							new
14							) ] );
15
16							#our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
17							our @EXPORT_OK = ( 'new' );
18							our @EXPORT = qw();
19							our $VERSION = '0.09';
20
21							#Variables to be used throughout the program.
22							my $date_now = DateTime->now->set_time_zone('Europe/Istanbul');
23							my $date_search = DateTime->now->subtract(days=>1)->ymd;
24							#my $date_file = DateTime->now->subtract(days=>1)->dmy;
25							#my $date_today = DateTime->now->dmy;
26							my $link_debe="https://eksisozluk.com/istatistik/dunun-en-begenilen-entryleri";
27							my $link_author="https://eksisozluk.com/biri/";
28							my $link_entry="https://eksisozluk.com/entry/";
29							my $link_topic="https://eksisozluk.com/";
30							my $link_search = "?a=search&searchform.when.from=$date_search";
31							my $link_popular="https://eksisozluk.com/basliklar/populer?p=";
32
33							sub new
34							{
35	0			0	0		my $class = shift;
36	0						my $self = {};
37	0						bless $self, $class;
38	0						return $self;
39							}
40
41							sub entry{
42
43							#Get id from arguments.
44	0			0	0		my $class = shift;
45	0						my $id = shift;
46
47							#Test if satisfies id number format.
48	0	0	0				if($id !~ /^\d{1,9}$/ \|\| $id==0){
49	0						die "Argument passed to the get_entry_by_id is of wrong format. It might be possible that you didn't call this method from an object. See readme file.";
50							}
51
52	0						my %entry = (
53							'id' => $id,
54							'id_link' => "$link_entry$id",
55							'id_ref' => 0,
56							'date_accessed' => $date_now,
57
58							'is_found' => 0,
59
60							'topic' => "",
61							'topic_link' => "",
62							'number_in_topic' => 0,
63
64							'date_published' => 0,
65							'is_modified' => 0,
66							'date_modified' => 0,
67							'date_print' => 0,
68
69							'author' => "",
70							'author_link' => "",
71							'body_raw' => "",
72							'body' => "",
73							'fav_count' => 0
74							);
75
76							#Get the entry file.
77	0						my $ua = LWP::UserAgent->new;
78	0						$ua->timeout(10);
79	0						$ua->env_proxy;
80	0						my $response = $ua->get("$link_entry$id");
81	0						my $downloaded_entry_file;
82
83	0	0					if($response->is_success){
84	0						$entry{'is_found'}=1;
85	0						$downloaded_entry_file = $response->decoded_content;
86							}else{
87							#return with is_found=0
88	0						return %entry;
89							#Another possible way of handling could have been:
90							#die "Error on downloading entry. Response: ".$response->status_line;
91							#TODO ask user which way he/she wishes, ie. take parameters to handle this issue.
92							}
93
94							#topic & topic_link
95	0	0					if($downloaded_entry_file=~/([^<>]*)<\/span><\/a>[^<]/){
96	0						$entry{'topic_link'}=$link_topic.$1;
97	0						$entry{'topic'}=$2;
98							}
99
100
101							#number_in_topic
102	0	0					if($downloaded_entry_file=~/
103	0						$entry{'number_in_topic'}=$1;
104							}
105
106							#date_published, is_modified, date_modified, date_print
107	0	0					if($downloaded_entry_file=~/"commentTime">(\d\d)\.(\d\d)\.(\d\d\d\d)(\s\d\d\:\d\d)?/){
108	0	0					$entry{'date_published'}=$1.".".$2.".".$3.((defined $4) ? $4 : "");
109	0						$entry{'date_print'}=$entry{'date_published'};
110	0	0					if($downloaded_entry_file=~/"son g.ncelleme zaman.">([^<>]*)<\/time>/){
111	0						$entry{'is_modified'}=1;
112	0						$entry{'date_modified'}=$1;
113	0						$entry{'date_print'}.=" ~ ".$entry{'date_modified'};
114							}
115							}
116
117							#author
118	0	0					if($downloaded_entry_file=~/data-author="(.*)" data-flags/){
119	0						$entry{'author'}=$1;
120	0						$entry{'author_link'}=$link_author.$1;
121							}
122
123							#body_raw, body
124	0	0					if($downloaded_entry_file=~/commentText">(.*)<\/div>/){
125	0						$entry{'body_raw'}=$1;
126	0						$entry{'body'}=$1;
127							}
128
129							#body: open goo.gl
130	0						while($entry{'body'}=~/href="(http:\/\/goo.gl[^"]*)"/){
131	0						my $temp=&longgoogl($1);
132	0						$entry{'body'}=~s/href="(http:\/\/goo.gl[^"]*)"/href="$temp"/;
133							}
134
135							#body: open hidden references (akıllı bkz)
136	0						$entry{'body'}=~s/^{<([^<])(data-query=\")([^>])\">\<\/a><\/sup>/<$1$2$3\">$ $3$<\/a>/g;}
137
138							#body: fix links so that they work outside eksisozluk.com + _blank
139	0						$entry{'body'}=~s/href="\//target="_blank" href="https:\/\/eksisozluk.com\//g;
140
141							#body: gmail underline fix
142	0						$entry{'body'}=~s/href="/style="text-decoration:none;" href="/g;
143
144							#body: fix imgur links ending without jpg
145	0						$entry{'body'}=~s/(href="https?:\/\/[^.]*\.?imgur.com\/\w{7})"/$1\.jpg"/g;
146
147							#body: add img src to display images that are jpg jpeg png gif
148	0						$entry{'body'}=~s/(href="([^"]\.(jpe?g\|png\|gif)(:large)?)"[^<]<\/a>)/$1 /g;
149
150							#body: add a northwest arrow, and domain name in parantheses
151	0						$entry{'body'}=~s/(https?:\/\/(?!eksisozluk.com)([^\/<]\.[^\/<])[^<]*<\/a>)/$1 $$2 ↗$/g;
152
153							#favcount
154	0	0					if($downloaded_entry_file=~/data-favorite-count="(\d+)"/){
155	0						$entry{'fav_count'}=$1;
156							}
157
158							#id_ref (first entry of the day, used for debe)
159	0						$response = $ua->get("$entry{'topic_link'}$link_search");
160	0						my $downloaded_search_file;
161	0	0					if($response->is_success){
162	0						$downloaded_search_file=$response->decoded_content;
163	0	0					if($downloaded_search_file=~/
164	0						$entry{'id_ref'}=$1;
165							}
166							}else{
167							#Return with minus 1.
168	0						$entry{'id_ref'}=-1;
169							#Another possible way of handling.
170							#die "Error on searching reference entry. Response: ".$response->status_line;
171							}
172
173
174	0						return %entry;
175
176							}
177
178
179							sub popular{
180
181	0			0	0		my $currentpage = 0;
182	0						my $pagecount = 1;
183	0						my %popular_topics;
184
185	0						while($currentpage<$pagecount){
186
187							#update pagecount
188	0						$currentpage++;
189
190	0						my $ua = LWP::UserAgent->new;
191	0						$ua->timeout(10);
192	0						$ua->env_proxy;
193	0						my $response = $ua->get("$link_popular$currentpage");
194	0						my $downloaded_popular_file;
195
196	0	0					if($response->is_success){
197	0						$downloaded_popular_file=$response->decoded_content;
198
199							#Die if downloaded page's number does not match what was expected.
200	0	0	0				if($downloaded_popular_file=~/data-currentpage="(\d+)"/ && $currentpage != $1){
201	0						die("Asked for page \#$currentpage, got page \#$1.");
202							}
203
204							#Get the pagecount value only once.
205	0	0	0				if($pagecount == 1 && $downloaded_popular_file=~/data-pagecount="(\d+)"/){
206	0						$pagecount = $1;
207							}
208
209							#We might have removed left frame populars here, but it doesn't really matter.
210
211							#Add topics to the hash, with the number of entries in it.
212	0						while($downloaded_popular_file =~ />(.*)\s?(\d+)
213
214							#Add if not added before
215	0	0					if(!($1 ~~ %popular_topics)){
216	0						$popular_topics{"$1"}=$2;
217							}
218							#Cross out the processed one
219	0						$downloaded_popular_file=~s/>(.*)\s(\d+)
220							}
221
222							}else{
223	0						die "Error on downloading popular page. Response: ".$response->status_line;
224							}
225
226							}
227
228	0						return %popular_topics;
229
230							}
231
232
233
234
235							sub debe_ids{
236
237	0			0	0		my @debe;
238	0						my $ua = LWP::UserAgent->new;
239	0						$ua->timeout(10);
240	0						$ua->env_proxy;
241	0						my $response = $ua->get("$link_debe");
242	0						my $downloaded_debe_file;
243
244	0	0					if($response->is_success){
245	0						$downloaded_debe_file=$response->decoded_content;
246
247
248	0						while($downloaded_debe_file =~ /%23(\d+)">/){
249
250							#If the matched entry id did not added before, then add.
251	0	0					if(!($1 ~~ @debe)){
252	0						push @debe,$1;
253							}
254							#Otherwise just cross it, don't add something twice.
255	0						$downloaded_debe_file=~s/%23(\d+)">/%23XXXX">/;
256							}
257
258	0	0					if(scalar(@debe)!=50){
259	0						my $miscount = scalar(@debe);
260	0						die "Debe list has $miscount entries";
261							}
262
263							}else{
264	0						die "Error on downloading data. Response: ".$response->status_line;
265							}
266
267	0						return @debe;
268							}
269
270
271							sub longgoogl{
272	0			0	0		my $googl = $_[0];
273	0						my $long = `curl -s $1 \|grep HREF`;
274	0	0					if($long =~/"(http[^"]*)"/){
275	0						$long = $1;
276							}
277	0						return $long;
278							}
279
280
281							1;
282							__END__