File Coverage

blib/lib/WWW/Eksisozluk.pm
Criterion Covered Total %
statement 18 117 15.3
branch 0 40 0.0
condition 0 9 0.0
subroutine 6 11 54.5
pod 0 5 0.0
total 24 182 13.1


line stmt bran cond sub pod time code
1             package WWW::Eksisozluk;
2              
3 1     1   15696 use strict;
  1         1  
  1         32  
4 1     1   3 use warnings;
  1         1  
  1         22  
5 1     1   859 use DateTime;
  1         99599  
  1         34  
6 1     1   706 use LWP::UserAgent;
  1         39645  
  1         78  
7 1     1   472 use experimental 'smartmatch';
  1         2844  
  1         4  
8 1     1   475 use utf8::all;
  1         39216  
  1         7  
9              
10             require Exporter;
11             our @ISA = qw(Exporter);
12             our %EXPORT_TAGS = ( 'all' => [ qw(
13             new
14             ) ] );
15              
16             #our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
17             our @EXPORT_OK = ( 'new' );
18             our @EXPORT = qw();
19             our $VERSION = '0.09';
20              
21             #Variables to be used throughout the program.
22             my $date_now = DateTime->now->set_time_zone('Europe/Istanbul');
23             my $date_search = DateTime->now->subtract(days=>1)->ymd;
24             #my $date_file = DateTime->now->subtract(days=>1)->dmy;
25             #my $date_today = DateTime->now->dmy;
26             my $link_debe="https://eksisozluk.com/istatistik/dunun-en-begenilen-entryleri";
27             my $link_author="https://eksisozluk.com/biri/";
28             my $link_entry="https://eksisozluk.com/entry/";
29             my $link_topic="https://eksisozluk.com/";
30             my $link_search = "?a=search&searchform.when.from=$date_search";
31             my $link_popular="https://eksisozluk.com/basliklar/populer?p=";
32              
33             sub new
34             {
35 0     0 0   my $class = shift;
36 0           my $self = {};
37 0           bless $self, $class;
38 0           return $self;
39             }
40              
41             sub entry{
42              
43             #Get id from arguments.
44 0     0 0   my $class = shift;
45 0           my $id = shift;
46              
47             #Test if satisfies id number format.
48 0 0 0       if($id !~ /^\d{1,9}$/ || $id==0){
49 0           die "Argument passed to the get_entry_by_id is of wrong format. It might be possible that you didn't call this method from an object. See readme file.";
50             }
51              
52 0           my %entry = (
53             'id' => $id,
54             'id_link' => "$link_entry$id",
55             'id_ref' => 0,
56             'date_accessed' => $date_now,
57              
58             'is_found' => 0,
59              
60             'topic' => "",
61             'topic_link' => "",
62             'number_in_topic' => 0,
63            
64             'date_published' => 0,
65             'is_modified' => 0,
66             'date_modified' => 0,
67             'date_print' => 0,
68              
69             'author' => "",
70             'author_link' => "",
71             'body_raw' => "",
72             'body' => "",
73             'fav_count' => 0
74             );
75            
76             #Get the entry file.
77 0           my $ua = LWP::UserAgent->new;
78 0           $ua->timeout(10);
79 0           $ua->env_proxy;
80 0           my $response = $ua->get("$link_entry$id");
81 0           my $downloaded_entry_file;
82            
83 0 0         if($response->is_success){
84 0           $entry{'is_found'}=1;
85 0           $downloaded_entry_file = $response->decoded_content;
86             }else{
87             #return with is_found=0
88 0           return %entry;
89             #Another possible way of handling could have been:
90             #die "Error on downloading entry. Response: ".$response->status_line;
91             #TODO ask user which way he/she wishes, ie. take parameters to handle this issue.
92             }
93              
94             #topic & topic_link
95 0 0         if($downloaded_entry_file=~/
96 0           $entry{'topic_link'}=$link_topic.$1;
97 0           $entry{'topic'}=$2;
98             }
99              
100            
101             #number_in_topic
102 0 0         if($downloaded_entry_file=~/
  • 103 0           $entry{'number_in_topic'}=$1;
    104             }
    105              
    106             #date_published, is_modified, date_modified, date_print
    107 0 0         if($downloaded_entry_file=~/"commentTime">(\d\d)\.(\d\d)\.(\d\d\d\d)(\s\d\d\:\d\d)?/){
    108 0 0         $entry{'date_published'}=$1.".".$2.".".$3.((defined $4) ? $4 : "");
    109 0           $entry{'date_print'}=$entry{'date_published'};
    110 0 0         if($downloaded_entry_file=~/"son g.ncelleme zaman.">([^<>]*)<\/time>/){
    111 0           $entry{'is_modified'}=1;
    112 0           $entry{'date_modified'}=$1;
    113 0           $entry{'date_print'}.=" ~ ".$entry{'date_modified'};
    114             }
    115             }
    116            
    117             #author
    118 0 0         if($downloaded_entry_file=~/data-author="(.*)" data-flags/){
    119 0           $entry{'author'}=$1;
    120 0           $entry{'author_link'}=$link_author.$1;
    121             }
    122              
    123             #body_raw, body
    124 0 0         if($downloaded_entry_file=~/commentText">(.*)<\/div>/){
    125 0           $entry{'body_raw'}=$1;
    126 0           $entry{'body'}=$1;
    127             }
    128              
    129             #body: open goo.gl
    130 0           while($entry{'body'}=~/href="(http:\/\/goo.gl[^"]*)"/){
    131 0           my $temp=&longgoogl($1);
    132 0           $entry{'body'}=~s/href="(http:\/\/goo.gl[^"]*)"/href="$temp"/;
    133             }
    134            
    135             #body: open hidden references (akıllı bkz)
    136 0           $entry{'body'}=~s/<([^<]*)(data-query=\")([^>]*)\">\*<\/a><\/sup>/<$1$2$3\">\(* $3\)<\/a>/g;
    137            
    138             #body: fix links so that they work outside eksisozluk.com + _blank
    139 0           $entry{'body'}=~s/href="\//target="_blank" href="https:\/\/eksisozluk.com\//g;
    140            
    141             #body: gmail underline fix
    142 0           $entry{'body'}=~s/href="/style="text-decoration:none;" href="/g;
    143            
    144             #body: fix imgur links ending without jpg
    145 0           $entry{'body'}=~s/(href="https?:\/\/[^.]*\.?imgur.com\/\w{7})"/$1\.jpg"/g;
    146              
    147             #body: add img src to display images that are jpg jpeg png gif
    148 0           $entry{'body'}=~s/(href="([^"]*\.(jpe?g|png|gif)(:large)?)"[^<]*<\/a>)/$1



    /g;
    149            
    150             #body: add a northwest arrow, and domain name in parantheses
    151 0           $entry{'body'}=~s/(https?:\/\/(?!eksisozluk.com)([^\/<]*\.[^\/<]*)[^<]*<\/a>)/$1 \($2 ↗\)/g;
    152              
    153             #favcount
    154 0 0         if($downloaded_entry_file=~/data-favorite-count="(\d+)"/){
    155 0           $entry{'fav_count'}=$1;
    156             }
    157              
    158             #id_ref (first entry of the day, used for debe)
    159 0           $response = $ua->get("$entry{'topic_link'}$link_search");
    160 0           my $downloaded_search_file;
    161 0 0         if($response->is_success){
    162 0           $downloaded_search_file=$response->decoded_content;
    163 0 0         if($downloaded_search_file=~/
  • 164 0           $entry{'id_ref'}=$1;
    165             }
    166             }else{
    167             #Return with minus 1.
    168 0           $entry{'id_ref'}=-1;
    169             #Another possible way of handling.
    170             #die "Error on searching reference entry. Response: ".$response->status_line;
    171             }
    172              
    173              
    174 0           return %entry;
    175              
    176             }
    177              
    178              
    179             sub popular{
    180              
    181 0     0 0   my $currentpage = 0;
    182 0           my $pagecount = 1;
    183 0           my %popular_topics;
    184              
    185 0           while($currentpage<$pagecount){
    186              
    187             #update pagecount
    188 0           $currentpage++;
    189              
    190 0           my $ua = LWP::UserAgent->new;
    191 0           $ua->timeout(10);
    192 0           $ua->env_proxy;
    193 0           my $response = $ua->get("$link_popular$currentpage");
    194 0           my $downloaded_popular_file;
    195              
    196 0 0         if($response->is_success){
    197 0           $downloaded_popular_file=$response->decoded_content;
    198              
    199             #Die if downloaded page's number does not match what was expected.
    200 0 0 0       if($downloaded_popular_file=~/data-currentpage="(\d+)"/ && $currentpage != $1){
    201 0           die("Asked for page \#$currentpage, got page \#$1.");
    202             }
    203            
    204             #Get the pagecount value only once.
    205 0 0 0       if($pagecount == 1 && $downloaded_popular_file=~/data-pagecount="(\d+)"/){
    206 0           $pagecount = $1;
    207             }
    208              
    209             #We might have removed left frame populars here, but it doesn't really matter.
    210              
    211             #Add topics to the hash, with the number of entries in it.
    212 0           while($downloaded_popular_file =~ />(.*)\s?(\d+)
    213              
    214             #Add if not added before
    215 0 0         if(!($1 ~~ %popular_topics)){
    216 0           $popular_topics{"$1"}=$2;
    217             }
    218             #Cross out the processed one
    219 0           $downloaded_popular_file=~s/>(.*)\s(\d+)
    220             }
    221              
    222             }else{
    223 0           die "Error on downloading popular page. Response: ".$response->status_line;
    224             }
    225              
    226             }
    227              
    228 0           return %popular_topics;
    229              
    230             }
    231              
    232              
    233              
    234              
    235             sub debe_ids{
    236              
    237 0     0 0   my @debe;
    238 0           my $ua = LWP::UserAgent->new;
    239 0           $ua->timeout(10);
    240 0           $ua->env_proxy;
    241 0           my $response = $ua->get("$link_debe");
    242 0           my $downloaded_debe_file;
    243            
    244 0 0         if($response->is_success){
    245 0           $downloaded_debe_file=$response->decoded_content;
    246              
    247              
    248 0           while($downloaded_debe_file =~ /%23(\d+)">/){
    249              
    250             #If the matched entry id did not added before, then add.
    251 0 0         if(!($1 ~~ @debe)){
    252 0           push @debe,$1;
    253             }
    254             #Otherwise just cross it, don't add something twice.
    255 0           $downloaded_debe_file=~s/%23(\d+)">/%23XXXX">/;
    256             }
    257              
    258 0 0         if(scalar(@debe)!=50){
    259 0           my $miscount = scalar(@debe);
    260 0           die "Debe list has $miscount entries";
    261             }
    262            
    263             }else{
    264 0           die "Error on downloading data. Response: ".$response->status_line;
    265             }
    266              
    267 0           return @debe;
    268             }
    269              
    270              
    271             sub longgoogl{
    272 0     0 0   my $googl = $_[0];
    273 0           my $long = `curl -s $1 |grep HREF`;
    274 0 0         if($long =~/"(http[^"]*)"/){
    275 0           $long = $1;
    276             }
    277 0           return $long;
    278             }
    279              
    280              
    281             1;
    282             __END__