File Coverage

blib/lib/WWW/Eksisozluk.pm
Criterion Covered Total %
statement 18 118 15.2
branch 0 34 0.0
condition 0 9 0.0
subroutine 6 11 54.5
pod 0 5 0.0
total 24 177 13.5


line stmt bran cond sub pod time code
1             package WWW::Eksisozluk;
2             # ABSTRACT: Perl interface for Eksisozluk.com
3             $WWW::Eksisozluk::VERSION = '0.11';
4 1     1   55915 use strict;
  1         2  
  1         31  
5 1     1   5 use warnings;
  1         2  
  1         33  
6 1     1   1208 use DateTime;
  1         286792  
  1         35  
7 1     1   666 use LWP::UserAgent;
  1         26046  
  1         28  
8 1     1   481 use experimental 'smartmatch';
  1         2361  
  1         4  
9 1     1   485 use utf8::all;
  1         21025  
  1         4  
10              
11             #Exporting stuff
12             require Exporter;
13             our @ISA = qw(Exporter);
14             our %EXPORT_TAGS = ( 'all' => [ qw(
15             new
16             ) ] );
17             our @EXPORT_OK = ( 'new' );
18             our @EXPORT = qw();
19              
20             sub new{
21 0     0 0   my $class = shift;
22 0           my $self = {};
23 0           bless $self, $class;
24 0           return $self;
25             }
26              
27              
28             #Global variables.
29             my $date_now = DateTime->now->set_time_zone('Europe/Istanbul');
30             my $date_search = DateTime->now->subtract(days=>1)->ymd; #2015-04-25
31             my %link = (
32             'debe' => "https://eksisozluk.com/istatistik/dunun-en-begenilen-entryleri",
33             'author' => "https://eksisozluk.com/biri/",
34             'entry' => "https://eksisozluk.com/entry/",
35             'topic' => "https://eksisozluk.com/",
36             'search' => "?a=search&searchform.when.from=$date_search",
37             'popular' => "https://eksisozluk.com/basliklar/populer?p=",
38             'today' => "https://eksisozluk.com/basliklar/bugun/"
39             );
40             my $sleeptime = 5; #sleep after each request. 0 would mean disabled.
41              
42              
43              
44             sub entry{
45              
46             #Get id from arguments.
47 0     0 0   my $class = shift;
48 0           my $id = shift;
49              
50             #Test if satisfies id number format.
51 0 0 0       if($id !~ /^\d{1,9}$/ || $id==0){
52 0           die "Argument passed to the entry subroutine is not correct. Did you create an object as described in synopsis?";
53             }
54              
55 0           my %entry = (
56             'id' => $id,
57             'id_link' => "$link{entry}$id",
58             'id_ref' => 0,
59              
60             'is_found' => 0,
61              
62             'topic' => "",
63             'topic_link' => "",
64            
65             'date' => 0,
66              
67             'author' => "",
68             'author_link' => "",
69             'body_raw' => "",
70             'body' => "",
71             'fav_count' => 0
72             );
73            
74             #Get the entry file.
75 0           my $ua = LWP::UserAgent->new;
76 0           $ua->timeout(10);
77 0           $ua->env_proxy;
78 0           my $response = $ua->get("$link{entry}$id");
79 0           sleep($sleeptime);
80 0           my $downloaded_entry_file;
81            
82 0 0         if($response->is_success){
83 0           $entry{'is_found'}=1;
84 0           $downloaded_entry_file = $response->decoded_content;
85             }else{
86             #return with is_found=0
87 0           return %entry;
88             #Another possible way of handling could have been:
89             #die "Error on downloading entry. Response: ".$response->status_line;
90             #TODO ask user which way he/she wishes, ie. take parameters to handle this issue.
91             }
92              
93             #topic & topic_link
94 0 0         if($downloaded_entry_file=~/<a href="([^<>]*)" itemprop="url"><span itemprop="name">([^<>]*)<\/span><\/a>[^<]/){
95 0           $entry{'topic_link'}=$link{topic}.$1;
96 0           $entry{'topic'}=$2;
97             }
98              
99             #date
100 0 0         if($downloaded_entry_file=~/$entry{'id'}\s([\d\s\.\:~]+)/){
101 0           $entry{'date'}=$1;
102             }
103            
104             #author
105 0 0         if($downloaded_entry_file=~/data-author="([\w\d\s]+)" data-author-id/){
106 0           $entry{'author'}=$1;
107 0           $entry{'author_link'}=$link{author}.$1;
108             }
109              
110             #body_raw, body
111 0 0         if($downloaded_entry_file=~/class=\"content\">(.*?)<\/div>/){
112 0           $entry{'body_raw'}=$1;
113 0           $entry{'body'}=$1; #handled below.
114             }
115              
116             #body: open goo.gl
117 0           while($entry{'body'}=~/href="(http:\/\/goo.gl[^"]*)"/){
118 0           my $temp=&longgoogl($1);
119 0           $entry{'body'}=~s/href="(http:\/\/goo.gl[^"]*)"/href="$temp"/;
120             }
121            
122             #body: open hidden references (akıllı bkz)
123 0           $entry{'body'}=~s/<sup class=\"ab\"><([^<]*)(data-query=\")([^>]*)\">\*<\/a><\/sup>/<$1$2$3\">\(* $3\)<\/a>/g;
124            
125             #body: fix links so that they work outside eksisozluk.com + _blank
126 0           $entry{'body'}=~s/href="\//target="_blank" href="https:\/\/eksisozluk.com\//g;
127            
128             #body: gmail underline fix
129 0           $entry{'body'}=~s/href="/style="text-decoration:none;" href="/g;
130            
131             #body: fix imgur links ending without jpg
132 0           $entry{'body'}=~s/(href="https?:\/\/[^.]*\.?imgur.com\/\w{7})"/$1\.jpg"/g;
133              
134             #body: add img src to display images that are jpg jpeg png
135 0           $entry{'body'}=~s/(href="([^"]*\.(jpe?g|png)(:large)?)"[^<]*<\/a>)/$1<br><br><img src="$2"><br><br>/g;
136            
137             #body: add a northwest arrow, and domain name in parantheses
138 0           $entry{'body'}=~s/(https?:\/\/(?!eksisozluk.com)([^\/<]*\.[^\/<]*)[^<]*<\/a>)/$1 \($2 &#8599;\)/g;
139              
140             #favcount
141 0 0         if($downloaded_entry_file=~/data-favorite-count="(\d+)"/){
142 0           $entry{'fav_count'}=$1;
143             }
144              
145             #id_ref (first entry of the day, used for debe)
146 0           $response = $ua->get("$entry{'topic_link'}$link{search}");
147 0           sleep($sleeptime);
148 0           my $downloaded_search_file;
149 0 0         if($response->is_success){
150 0           $downloaded_search_file=$response->decoded_content;
151 0 0         if($downloaded_search_file=~/<li data-id="(\d+)"/){
152 0           $entry{'id_ref'}=$1;
153             }
154             }else{
155             #Return with minus 1.
156 0           $entry{'id_ref'}=-1;
157             #Another possible way of handling.
158             #die "Error on searching reference entry. Response: ".$response->status_line;
159             }
160              
161              
162 0           return %entry;
163              
164             }
165              
166              
167              
168             sub topiclist{
169              
170             #Get type from arguments.
171 0     0 0   my $class = shift;
172 0           my $type = shift;
173              
174             #Test if it's valid.
175 0 0 0       if($type ne "popular" && $type ne "today"){
176 0           die "Argument passed to topiclist subroutine has to be either \"popular\" or \"today\".";
177             }
178              
179 0           my $currentpage = 0;
180 0           my $pagecount = 2;
181 0           my %topiclist_topics;
182              
183 0           while($currentpage<$pagecount){
184              
185             #update pagecount
186 0           $currentpage++;
187 0           print "looking for page $currentpage\n";
188              
189 0           my $ua = LWP::UserAgent->new;
190 0           $ua->timeout(10);
191 0           $ua->env_proxy;
192 0           my $response = $ua->get("$link{$type}$currentpage");
193 0           sleep($sleeptime);
194 0           my $downloaded_topiclist_file;
195              
196 0 0         if($response->is_success){
197 0           $downloaded_topiclist_file=$response->decoded_content;
198              
199             #Get the pagecount value only once.
200             #First page doesn't have pagecount.. Check it at second page.
201 0 0 0       if($currentpage == 2 && $downloaded_topiclist_file=~/data-pagecount="(\d+)"/){
202 0           $pagecount = $1;
203 0           print "pagecount becomes $pagecount\n";
204             }
205              
206             #We might have removed left frame populars here, but it doesn't really matter.
207              
208             #Add topics to the hash, with the number of entries in it.
209 0           while($downloaded_topiclist_file =~ />(.*)\s?<small>(\d+)</){
210              
211             #Add if not added before
212 0 0         if(!($1 ~~ %topiclist_topics)){
213 0           $topiclist_topics{"$1"}=$2;
214             }
215             #Cross out the processed one
216 0           $downloaded_topiclist_file=~s/>(.*)\s<small>(\d+)</-----/;
217             }
218              
219             }else{
220 0           die "Error on downloading topic list. Response: ".$response->status_line;
221             }
222              
223             }
224              
225 0           return %topiclist_topics;
226              
227             }
228              
229              
230              
231              
232             sub debe_ids{
233              
234 0     0 0   my @debe;
235 0           my $ua = LWP::UserAgent->new;
236 0           $ua->timeout(10);
237 0           $ua->env_proxy;
238 0           my $response = $ua->get("$link{debe}");
239 0           sleep($sleeptime);
240 0           my $downloaded_debe_file;
241            
242 0 0         if($response->is_success){
243 0           $downloaded_debe_file=$response->decoded_content;
244              
245              
246 0           while($downloaded_debe_file =~ /%23(\d+)">/){
247              
248             #If the matched entry id did not added before, then add.
249 0 0         if(!($1 ~~ @debe)){
250 0           push @debe,$1;
251             }
252             #Cross it to avoid duplicates.
253 0           $downloaded_debe_file=~s/%23(\d+)">/%23XXXX">/;
254             }
255              
256 0 0         if(scalar(@debe)!=50){
257 0           my $miscount = scalar(@debe);
258 0           warn "Debe list has $miscount entries";
259             }
260            
261             }else{
262 0           die "Error on downloading data. Response: ".$response->status_line;
263             }
264              
265 0           return @debe;
266             }
267              
268              
269             sub longgoogl{
270 0     0 0   my $googl = $_[0];
271 0           my $long = `curl -s $1 |grep HREF`;
272 0 0         if($long =~/"(http[^"]*)"/){
273 0           $long = $1;
274             }
275 0           return $long;
276             }
277              
278              
279             1;
280              
281             =pod
282              
283             =encoding UTF-8
284              
285              
286             =head1 DEPRECATED
287              
288             This module will be renamed as L<WWW::Eksi> and removed on 2016-10-29.
289              
290             =head1 NAME
291              
292             WWW::Eksisozluk - Perl interface for Eksisozluk.com
293              
294             =head1 VERSION
295              
296             version 0.12
297              
298             =head1 SYNOPSIS
299              
300             use WWW::Eksisozluk;
301             #You should create an object as shown below.
302             my $eksi = WWW::Eksisozluk->new();
303              
304             #IDs for today's debe list (element at index 0 is the top one)
305             my @debe = $eksi->debe_ids();
306              
307             #Details (body, author, date etc) of an entry with given id.
308             my %entry = $eksi->entry($debe[0]);
309              
310             #Popular topics with number of recent entries in it.
311             my %popular = $eksi->topiclist(popular);
312              
313             #Today's topics with number of recent entries in it.
314             my %today = $eksi->topiclist(today);
315              
316             =head1 DESCRIPTION
317              
318             This module provides a simple perl interface for eksisozluk, which is a user-based
319             web dictionary written mostly in Turkish, active since 1999. You can get debe list
320             (top entries of yesterday) by using this module. You can also reach topic list for
321             today, and popular topic lists.
322              
323             As a friendly note, data you reach by using this module might be subject to copyright
324             terms of Eksisozluk. See eksisozluk.com for details.
325              
326             =head1 AUTHOR
327              
328             Kivanc Yazan
329              
330             =head1 COPYRIGHT AND LICENSE
331              
332             This software is copyright (c) 2015 by Kivanc Yazan.
333              
334             This is free software; you can redistribute it and/or modify it under
335             the same terms as the Perl 5 programming language system itself.
336              
337             =cut
338              
339             __END__
340              
341             ===========================
342