| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package WWW::Eksisozluk; | 
| 2 |  |  |  |  |  |  |  | 
| 3 | 1 |  |  | 1 |  | 15696 | use strict; | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 32 |  | 
| 4 | 1 |  |  | 1 |  | 3 | use warnings; | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 22 |  | 
| 5 | 1 |  |  | 1 |  | 859 | use DateTime; | 
|  | 1 |  |  |  |  | 99599 |  | 
|  | 1 |  |  |  |  | 34 |  | 
| 6 | 1 |  |  | 1 |  | 706 | use LWP::UserAgent; | 
|  | 1 |  |  |  |  | 39645 |  | 
|  | 1 |  |  |  |  | 78 |  | 
| 7 | 1 |  |  | 1 |  | 472 | use experimental 'smartmatch'; | 
|  | 1 |  |  |  |  | 2844 |  | 
|  | 1 |  |  |  |  | 4 |  | 
| 8 | 1 |  |  | 1 |  | 475 | use utf8::all; | 
|  | 1 |  |  |  |  | 39216 |  | 
|  | 1 |  |  |  |  | 7 |  | 
| 9 |  |  |  |  |  |  |  | 
| 10 |  |  |  |  |  |  | require Exporter; | 
| 11 |  |  |  |  |  |  | our @ISA = qw(Exporter); | 
| 12 |  |  |  |  |  |  | our %EXPORT_TAGS = ( 'all' => [ qw( | 
| 13 |  |  |  |  |  |  | new | 
| 14 |  |  |  |  |  |  | ) ] ); | 
| 15 |  |  |  |  |  |  |  | 
| 16 |  |  |  |  |  |  | #our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); | 
| 17 |  |  |  |  |  |  | our @EXPORT_OK = ( 'new' ); | 
| 18 |  |  |  |  |  |  | our @EXPORT = qw(); | 
| 19 |  |  |  |  |  |  | our $VERSION = '0.09'; | 
| 20 |  |  |  |  |  |  |  | 
| 21 |  |  |  |  |  |  | #Variables to be used throughout the program. | 
| 22 |  |  |  |  |  |  | my $date_now   = DateTime->now->set_time_zone('Europe/Istanbul'); | 
| 23 |  |  |  |  |  |  | my $date_search = DateTime->now->subtract(days=>1)->ymd; | 
| 24 |  |  |  |  |  |  | #my $date_file   = DateTime->now->subtract(days=>1)->dmy; | 
| 25 |  |  |  |  |  |  | #my $date_today  = DateTime->now->dmy; | 
| 26 |  |  |  |  |  |  | my $link_debe="https://eksisozluk.com/istatistik/dunun-en-begenilen-entryleri"; | 
| 27 |  |  |  |  |  |  | my $link_author="https://eksisozluk.com/biri/"; | 
| 28 |  |  |  |  |  |  | my $link_entry="https://eksisozluk.com/entry/"; | 
| 29 |  |  |  |  |  |  | my $link_topic="https://eksisozluk.com/"; | 
| 30 |  |  |  |  |  |  | my $link_search = "?a=search&searchform.when.from=$date_search"; | 
| 31 |  |  |  |  |  |  | my $link_popular="https://eksisozluk.com/basliklar/populer?p="; | 
| 32 |  |  |  |  |  |  |  | 
| 33 |  |  |  |  |  |  | sub new | 
| 34 |  |  |  |  |  |  | { | 
| 35 | 0 |  |  | 0 | 0 |  | my $class = shift; | 
| 36 | 0 |  |  |  |  |  | my $self = {}; | 
| 37 | 0 |  |  |  |  |  | bless $self, $class; | 
| 38 | 0 |  |  |  |  |  | return $self; | 
| 39 |  |  |  |  |  |  | } | 
| 40 |  |  |  |  |  |  |  | 
| 41 |  |  |  |  |  |  | sub entry{ | 
| 42 |  |  |  |  |  |  |  | 
| 43 |  |  |  |  |  |  | #Get id from arguments. | 
| 44 | 0 |  |  | 0 | 0 |  | my $class = shift; | 
| 45 | 0 |  |  |  |  |  | my $id = shift; | 
| 46 |  |  |  |  |  |  |  | 
| 47 |  |  |  |  |  |  | #Test if satisfies id number format. | 
| 48 | 0 | 0 | 0 |  |  |  | if($id !~ /^\d{1,9}$/ || $id==0){ | 
| 49 | 0 |  |  |  |  |  | die "Argument passed to the get_entry_by_id is of wrong format. It might be possible that you didn't call this method from an object. See readme file."; | 
| 50 |  |  |  |  |  |  | } | 
| 51 |  |  |  |  |  |  |  | 
| 52 | 0 |  |  |  |  |  | my %entry = ( | 
| 53 |  |  |  |  |  |  | 'id' => $id, | 
| 54 |  |  |  |  |  |  | 'id_link' => "$link_entry$id", | 
| 55 |  |  |  |  |  |  | 'id_ref' => 0, | 
| 56 |  |  |  |  |  |  | 'date_accessed' => $date_now, | 
| 57 |  |  |  |  |  |  |  | 
| 58 |  |  |  |  |  |  | 'is_found' => 0, | 
| 59 |  |  |  |  |  |  |  | 
| 60 |  |  |  |  |  |  | 'topic' => "", | 
| 61 |  |  |  |  |  |  | 'topic_link' => "", | 
| 62 |  |  |  |  |  |  | 'number_in_topic' => 0, | 
| 63 |  |  |  |  |  |  |  | 
| 64 |  |  |  |  |  |  | 'date_published' => 0, | 
| 65 |  |  |  |  |  |  | 'is_modified' => 0, | 
| 66 |  |  |  |  |  |  | 'date_modified' => 0, | 
| 67 |  |  |  |  |  |  | 'date_print' => 0, | 
| 68 |  |  |  |  |  |  |  | 
| 69 |  |  |  |  |  |  | 'author' => "", | 
| 70 |  |  |  |  |  |  | 'author_link' => "", | 
| 71 |  |  |  |  |  |  | 'body_raw' => "", | 
| 72 |  |  |  |  |  |  | 'body' => "", | 
| 73 |  |  |  |  |  |  | 'fav_count' => 0 | 
| 74 |  |  |  |  |  |  | ); | 
| 75 |  |  |  |  |  |  |  | 
| 76 |  |  |  |  |  |  | #Get the entry file. | 
| 77 | 0 |  |  |  |  |  | my $ua = LWP::UserAgent->new; | 
| 78 | 0 |  |  |  |  |  | $ua->timeout(10); | 
| 79 | 0 |  |  |  |  |  | $ua->env_proxy; | 
| 80 | 0 |  |  |  |  |  | my $response = $ua->get("$link_entry$id"); | 
| 81 | 0 |  |  |  |  |  | my $downloaded_entry_file; | 
| 82 |  |  |  |  |  |  |  | 
| 83 | 0 | 0 |  |  |  |  | if($response->is_success){ | 
| 84 | 0 |  |  |  |  |  | $entry{'is_found'}=1; | 
| 85 | 0 |  |  |  |  |  | $downloaded_entry_file = $response->decoded_content; | 
| 86 |  |  |  |  |  |  | }else{ | 
| 87 |  |  |  |  |  |  | #return with is_found=0 | 
| 88 | 0 |  |  |  |  |  | return %entry; | 
| 89 |  |  |  |  |  |  | #Another possible way of handling could have been: | 
| 90 |  |  |  |  |  |  | #die "Error on downloading entry. Response: ".$response->status_line; | 
| 91 |  |  |  |  |  |  | #TODO ask user which way he/she wishes, ie. take parameters to handle this issue. | 
| 92 |  |  |  |  |  |  | } | 
| 93 |  |  |  |  |  |  |  | 
| 94 |  |  |  |  |  |  | #topic & topic_link | 
| 95 | 0 | 0 |  |  |  |  | if($downloaded_entry_file=~/([^<>]*)<\/span><\/a>[^<]/){ | 
| 96 | 0 |  |  |  |  |  | $entry{'topic_link'}=$link_topic.$1; | 
| 97 | 0 |  |  |  |  |  | $entry{'topic'}=$2; | 
| 98 |  |  |  |  |  |  | } | 
| 99 |  |  |  |  |  |  |  | 
| 100 |  |  |  |  |  |  |  | 
| 101 |  |  |  |  |  |  | #number_in_topic | 
| 102 | 0 | 0 |  |  |  |  | if($downloaded_entry_file=~/ | 
| 103 | 0 |  |  |  |  |  | $entry{'number_in_topic'}=$1; | 
| 104 |  |  |  |  |  |  | } | 
| 105 |  |  |  |  |  |  |  | 
| 106 |  |  |  |  |  |  | #date_published, is_modified, date_modified, date_print | 
| 107 | 0 | 0 |  |  |  |  | if($downloaded_entry_file=~/"commentTime">(\d\d)\.(\d\d)\.(\d\d\d\d)(\s\d\d\:\d\d)?/){ | 
| 108 | 0 | 0 |  |  |  |  | $entry{'date_published'}=$1.".".$2.".".$3.((defined $4) ? $4 : ""); | 
| 109 | 0 |  |  |  |  |  | $entry{'date_print'}=$entry{'date_published'}; | 
| 110 | 0 | 0 |  |  |  |  | if($downloaded_entry_file=~/"son g.ncelleme zaman.">([^<>]*)<\/time>/){ | 
| 111 | 0 |  |  |  |  |  | $entry{'is_modified'}=1; | 
| 112 | 0 |  |  |  |  |  | $entry{'date_modified'}=$1; | 
| 113 | 0 |  |  |  |  |  | $entry{'date_print'}.=" ~ ".$entry{'date_modified'}; | 
| 114 |  |  |  |  |  |  | } | 
| 115 |  |  |  |  |  |  | } | 
| 116 |  |  |  |  |  |  |  | 
| 117 |  |  |  |  |  |  | #author | 
| 118 | 0 | 0 |  |  |  |  | if($downloaded_entry_file=~/data-author="(.*)" data-flags/){ | 
| 119 | 0 |  |  |  |  |  | $entry{'author'}=$1; | 
| 120 | 0 |  |  |  |  |  | $entry{'author_link'}=$link_author.$1; | 
| 121 |  |  |  |  |  |  | } | 
| 122 |  |  |  |  |  |  |  | 
| 123 |  |  |  |  |  |  | #body_raw, body | 
| 124 | 0 | 0 |  |  |  |  | if($downloaded_entry_file=~/commentText">(.*)<\/div>/){ | 
| 125 | 0 |  |  |  |  |  | $entry{'body_raw'}=$1; | 
| 126 | 0 |  |  |  |  |  | $entry{'body'}=$1; | 
| 127 |  |  |  |  |  |  | } | 
| 128 |  |  |  |  |  |  |  | 
| 129 |  |  |  |  |  |  | #body: open goo.gl | 
| 130 | 0 |  |  |  |  |  | while($entry{'body'}=~/href="(http:\/\/goo.gl[^"]*)"/){ | 
| 131 | 0 |  |  |  |  |  | my $temp=&longgoogl($1); | 
| 132 | 0 |  |  |  |  |  | $entry{'body'}=~s/href="(http:\/\/goo.gl[^"]*)"/href="$temp"/; | 
| 133 |  |  |  |  |  |  | } | 
| 134 |  |  |  |  |  |  |  | 
| 135 |  |  |  |  |  |  | #body: open hidden references (akıllı bkz) | 
| 136 | 0 |  |  |  |  |  | $entry{'body'}=~s/<([^<]*)(data-query=\")([^>]*)\">\*<\/a><\/sup>/<$1$2$3\">\(* $3\)<\/a>/g; | 
| 137 |  |  |  |  |  |  |  | 
| 138 |  |  |  |  |  |  | #body: fix links so that they work outside eksisozluk.com + _blank | 
| 139 | 0 |  |  |  |  |  | $entry{'body'}=~s/href="\//target="_blank" href="https:\/\/eksisozluk.com\//g; | 
| 140 |  |  |  |  |  |  |  | 
| 141 |  |  |  |  |  |  | #body: gmail underline fix | 
| 142 | 0 |  |  |  |  |  | $entry{'body'}=~s/href="/style="text-decoration:none;" href="/g; | 
| 143 |  |  |  |  |  |  |  | 
| 144 |  |  |  |  |  |  | #body: fix imgur links ending without jpg | 
| 145 | 0 |  |  |  |  |  | $entry{'body'}=~s/(href="https?:\/\/[^.]*\.?imgur.com\/\w{7})"/$1\.jpg"/g; | 
| 146 |  |  |  |  |  |  |  | 
| 147 |  |  |  |  |  |  | #body: add img src to display images that are jpg jpeg png gif | 
| 148 | 0 |  |  |  |  |  | $entry{'body'}=~s/(href="([^"]*\.(jpe?g|png|gif)(:large)?)"[^<]*<\/a>)/$1 
 
  
 /g;
 | 
| 149 |  |  |  |  |  |  |  | 
| 150 |  |  |  |  |  |  | #body: add a northwest arrow, and domain name in parantheses | 
| 151 | 0 |  |  |  |  |  | $entry{'body'}=~s/(https?:\/\/(?!eksisozluk.com)([^\/<]*\.[^\/<]*)[^<]*<\/a>)/$1 \($2 ↗\)/g; | 
| 152 |  |  |  |  |  |  |  | 
| 153 |  |  |  |  |  |  | #favcount | 
| 154 | 0 | 0 |  |  |  |  | if($downloaded_entry_file=~/data-favorite-count="(\d+)"/){ | 
| 155 | 0 |  |  |  |  |  | $entry{'fav_count'}=$1; | 
| 156 |  |  |  |  |  |  | } | 
| 157 |  |  |  |  |  |  |  | 
| 158 |  |  |  |  |  |  | #id_ref (first entry of the day, used for debe) | 
| 159 | 0 |  |  |  |  |  | $response = $ua->get("$entry{'topic_link'}$link_search"); | 
| 160 | 0 |  |  |  |  |  | my $downloaded_search_file; | 
| 161 | 0 | 0 |  |  |  |  | if($response->is_success){ | 
| 162 | 0 |  |  |  |  |  | $downloaded_search_file=$response->decoded_content; | 
| 163 | 0 | 0 |  |  |  |  | if($downloaded_search_file=~/ | 
| 164 | 0 |  |  |  |  |  | $entry{'id_ref'}=$1; | 
| 165 |  |  |  |  |  |  | } | 
| 166 |  |  |  |  |  |  | }else{ | 
| 167 |  |  |  |  |  |  | #Return with minus 1. | 
| 168 | 0 |  |  |  |  |  | $entry{'id_ref'}=-1; | 
| 169 |  |  |  |  |  |  | #Another possible way of handling. | 
| 170 |  |  |  |  |  |  | #die "Error on searching reference entry. Response: ".$response->status_line; | 
| 171 |  |  |  |  |  |  | } | 
| 172 |  |  |  |  |  |  |  | 
| 173 |  |  |  |  |  |  |  | 
| 174 | 0 |  |  |  |  |  | return %entry; | 
| 175 |  |  |  |  |  |  |  | 
| 176 |  |  |  |  |  |  | } | 
| 177 |  |  |  |  |  |  |  | 
| 178 |  |  |  |  |  |  |  | 
| 179 |  |  |  |  |  |  | sub popular{ | 
| 180 |  |  |  |  |  |  |  | 
| 181 | 0 |  |  | 0 | 0 |  | my $currentpage = 0; | 
| 182 | 0 |  |  |  |  |  | my $pagecount = 1; | 
| 183 | 0 |  |  |  |  |  | my %popular_topics; | 
| 184 |  |  |  |  |  |  |  | 
| 185 | 0 |  |  |  |  |  | while($currentpage<$pagecount){ | 
| 186 |  |  |  |  |  |  |  | 
| 187 |  |  |  |  |  |  | #update pagecount | 
| 188 | 0 |  |  |  |  |  | $currentpage++; | 
| 189 |  |  |  |  |  |  |  | 
| 190 | 0 |  |  |  |  |  | my $ua = LWP::UserAgent->new; | 
| 191 | 0 |  |  |  |  |  | $ua->timeout(10); | 
| 192 | 0 |  |  |  |  |  | $ua->env_proxy; | 
| 193 | 0 |  |  |  |  |  | my $response = $ua->get("$link_popular$currentpage"); | 
| 194 | 0 |  |  |  |  |  | my $downloaded_popular_file; | 
| 195 |  |  |  |  |  |  |  | 
| 196 | 0 | 0 |  |  |  |  | if($response->is_success){ | 
| 197 | 0 |  |  |  |  |  | $downloaded_popular_file=$response->decoded_content; | 
| 198 |  |  |  |  |  |  |  | 
| 199 |  |  |  |  |  |  | #Die if downloaded page's number does not match what was expected. | 
| 200 | 0 | 0 | 0 |  |  |  | if($downloaded_popular_file=~/data-currentpage="(\d+)"/ && $currentpage != $1){ | 
| 201 | 0 |  |  |  |  |  | die("Asked for page \#$currentpage, got page \#$1."); | 
| 202 |  |  |  |  |  |  | } | 
| 203 |  |  |  |  |  |  |  | 
| 204 |  |  |  |  |  |  | #Get the pagecount value only once. | 
| 205 | 0 | 0 | 0 |  |  |  | if($pagecount == 1 && $downloaded_popular_file=~/data-pagecount="(\d+)"/){ | 
| 206 | 0 |  |  |  |  |  | $pagecount = $1; | 
| 207 |  |  |  |  |  |  | } | 
| 208 |  |  |  |  |  |  |  | 
| 209 |  |  |  |  |  |  | #We might have removed left frame populars here, but it doesn't really matter. | 
| 210 |  |  |  |  |  |  |  | 
| 211 |  |  |  |  |  |  | #Add topics to the hash, with the number of entries in it. | 
| 212 | 0 |  |  |  |  |  | while($downloaded_popular_file =~ />(.*)\s?(\d+)){ | 
| 213 |  |  |  |  |  |  |  | 
| 214 |  |  |  |  |  |  | #Add if not added before | 
| 215 | 0 | 0 |  |  |  |  | if(!($1 ~~ %popular_topics)){ | 
| 216 | 0 |  |  |  |  |  | $popular_topics{"$1"}=$2; | 
| 217 |  |  |  |  |  |  | } | 
| 218 |  |  |  |  |  |  | #Cross out the processed one | 
| 219 | 0 |  |  |  |  |  | $downloaded_popular_file=~s/>(.*)\s(\d+)-----/; | 
| 220 |  |  |  |  |  |  | } | 
| 221 |  |  |  |  |  |  |  | 
| 222 |  |  |  |  |  |  | }else{ | 
| 223 | 0 |  |  |  |  |  | die "Error on downloading popular page. Response: ".$response->status_line; | 
| 224 |  |  |  |  |  |  | } | 
| 225 |  |  |  |  |  |  |  | 
| 226 |  |  |  |  |  |  | } | 
| 227 |  |  |  |  |  |  |  | 
| 228 | 0 |  |  |  |  |  | return %popular_topics; | 
| 229 |  |  |  |  |  |  |  | 
| 230 |  |  |  |  |  |  | } | 
| 231 |  |  |  |  |  |  |  | 
| 232 |  |  |  |  |  |  |  | 
| 233 |  |  |  |  |  |  |  | 
| 234 |  |  |  |  |  |  |  | 
| 235 |  |  |  |  |  |  | sub debe_ids{ | 
| 236 |  |  |  |  |  |  |  | 
| 237 | 0 |  |  | 0 | 0 |  | my @debe; | 
| 238 | 0 |  |  |  |  |  | my $ua = LWP::UserAgent->new; | 
| 239 | 0 |  |  |  |  |  | $ua->timeout(10); | 
| 240 | 0 |  |  |  |  |  | $ua->env_proxy; | 
| 241 | 0 |  |  |  |  |  | my $response = $ua->get("$link_debe"); | 
| 242 | 0 |  |  |  |  |  | my $downloaded_debe_file; | 
| 243 |  |  |  |  |  |  |  | 
| 244 | 0 | 0 |  |  |  |  | if($response->is_success){ | 
| 245 | 0 |  |  |  |  |  | $downloaded_debe_file=$response->decoded_content; | 
| 246 |  |  |  |  |  |  |  | 
| 247 |  |  |  |  |  |  |  | 
| 248 | 0 |  |  |  |  |  | while($downloaded_debe_file =~ /%23(\d+)">/){ | 
| 249 |  |  |  |  |  |  |  | 
| 250 |  |  |  |  |  |  | #If the matched entry id did not added before, then add. | 
| 251 | 0 | 0 |  |  |  |  | if(!($1 ~~ @debe)){ | 
| 252 | 0 |  |  |  |  |  | push @debe,$1; | 
| 253 |  |  |  |  |  |  | } | 
| 254 |  |  |  |  |  |  | #Otherwise just cross it, don't add something twice. | 
| 255 | 0 |  |  |  |  |  | $downloaded_debe_file=~s/%23(\d+)">/%23XXXX">/; | 
| 256 |  |  |  |  |  |  | } | 
| 257 |  |  |  |  |  |  |  | 
| 258 | 0 | 0 |  |  |  |  | if(scalar(@debe)!=50){ | 
| 259 | 0 |  |  |  |  |  | my $miscount = scalar(@debe); | 
| 260 | 0 |  |  |  |  |  | die "Debe list has $miscount entries"; | 
| 261 |  |  |  |  |  |  | } | 
| 262 |  |  |  |  |  |  |  | 
| 263 |  |  |  |  |  |  | }else{ | 
| 264 | 0 |  |  |  |  |  | die "Error on downloading data. Response: ".$response->status_line; | 
| 265 |  |  |  |  |  |  | } | 
| 266 |  |  |  |  |  |  |  | 
| 267 | 0 |  |  |  |  |  | return @debe; | 
| 268 |  |  |  |  |  |  | } | 
| 269 |  |  |  |  |  |  |  | 
| 270 |  |  |  |  |  |  |  | 
| 271 |  |  |  |  |  |  | sub longgoogl{ | 
| 272 | 0 |  |  | 0 | 0 |  | my $googl = $_[0]; | 
| 273 | 0 |  |  |  |  |  | my $long = `curl -s $1 |grep HREF`; | 
| 274 | 0 | 0 |  |  |  |  | if($long =~/"(http[^"]*)"/){ | 
| 275 | 0 |  |  |  |  |  | $long = $1; | 
| 276 |  |  |  |  |  |  | } | 
| 277 | 0 |  |  |  |  |  | return $long; | 
| 278 |  |  |  |  |  |  | } | 
| 279 |  |  |  |  |  |  |  | 
| 280 |  |  |  |  |  |  |  | 
| 281 |  |  |  |  |  |  | 1; | 
| 282 |  |  |  |  |  |  | __END__ |