| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package WWW::Mechanize::Chrome::URLBlacklist; | 
| 2 | 1 |  |  | 1 |  | 736 | use Moo 2; | 
|  | 1 |  |  |  |  | 29 |  | 
|  | 1 |  |  |  |  | 9 |  | 
| 3 | 1 |  |  | 1 |  | 411 | use Filter::signatures; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 9 |  | 
| 4 | 1 |  |  | 1 |  | 33 | use feature 'signatures'; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 191 |  | 
| 5 | 1 |  |  | 1 |  | 8 | no warnings 'experimental::signatures'; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 649 |  | 
| 6 |  |  |  |  |  |  |  | 
| 7 |  |  |  |  |  |  | our $VERSION = '0.70'; | 
| 8 |  |  |  |  |  |  |  | 
| 9 |  |  |  |  |  |  | =head1 NAME | 
| 10 |  |  |  |  |  |  |  | 
| 11 |  |  |  |  |  |  | WWW::Mechanize::Chrome::URLBlacklist - blacklist URLs from fetching | 
| 12 |  |  |  |  |  |  |  | 
| 13 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 14 |  |  |  |  |  |  |  | 
| 15 |  |  |  |  |  |  | use WWW::Mechanize::Chrome; | 
| 16 |  |  |  |  |  |  | use WWW::Mechanize::Chrome::URLBlacklist; | 
| 17 |  |  |  |  |  |  |  | 
| 18 |  |  |  |  |  |  | my $mech = WWW::Mechanize::Chrome->new(); | 
| 19 |  |  |  |  |  |  | my $bl = WWW::Mechanize::Chrome::URLBlacklist->new( | 
| 20 |  |  |  |  |  |  | blacklist => [ | 
| 21 |  |  |  |  |  |  | qr!\bgoogleadservices\b!, | 
| 22 |  |  |  |  |  |  | ], | 
| 23 |  |  |  |  |  |  | whitelist => [ | 
| 24 |  |  |  |  |  |  | qr!\bcorion\.net\b!, | 
| 25 |  |  |  |  |  |  | ], | 
| 26 |  |  |  |  |  |  |  | 
| 27 |  |  |  |  |  |  | # fail all unknown URLs | 
| 28 |  |  |  |  |  |  | default => 'failRequest', | 
| 29 |  |  |  |  |  |  | # allow all unknown URLs | 
| 30 |  |  |  |  |  |  | # default => 'continueRequest', | 
| 31 |  |  |  |  |  |  |  | 
| 32 |  |  |  |  |  |  | on_default => sub { | 
| 33 |  |  |  |  |  |  | warn "Ignored URL $_[0] (action was '$_[1]')", | 
| 34 |  |  |  |  |  |  | }, | 
| 35 |  |  |  |  |  |  | ); | 
| 36 |  |  |  |  |  |  | $bl->enable($mech); | 
| 37 |  |  |  |  |  |  |  | 
| 38 |  |  |  |  |  |  | =head1 DESCRIPTION | 
| 39 |  |  |  |  |  |  |  | 
| 40 |  |  |  |  |  |  | This module allows an easy approach to whitelisting/blacklisting URLs | 
| 41 |  |  |  |  |  |  | so that Chrome does not make requests to the blacklisted URLs. | 
| 42 |  |  |  |  |  |  |  | 
| 43 |  |  |  |  |  |  | =head1 ATTRIBUTES | 
| 44 |  |  |  |  |  |  |  | 
| 45 |  |  |  |  |  |  | =head2 C<< whitelist >> | 
| 46 |  |  |  |  |  |  |  | 
| 47 |  |  |  |  |  |  | Arrayref containing regular expressions of URLs to always allow fetching. | 
| 48 |  |  |  |  |  |  |  | 
| 49 |  |  |  |  |  |  | =cut | 
| 50 |  |  |  |  |  |  |  | 
| 51 |  |  |  |  |  |  | has 'whitelist' => ( | 
| 52 |  |  |  |  |  |  | is => 'lazy', | 
| 53 |  |  |  |  |  |  | default => sub { [] }, | 
| 54 |  |  |  |  |  |  | ); | 
| 55 |  |  |  |  |  |  |  | 
| 56 |  |  |  |  |  |  | =head2 C<< blacklist >> | 
| 57 |  |  |  |  |  |  |  | 
| 58 |  |  |  |  |  |  | Arrayref containing regular expressions of URLs to always deny fetching unless | 
| 59 |  |  |  |  |  |  | they are matched by something in the C<whitelist>. | 
| 60 |  |  |  |  |  |  |  | 
| 61 |  |  |  |  |  |  | =cut | 
| 62 |  |  |  |  |  |  |  | 
| 63 |  |  |  |  |  |  | has 'blacklist' => ( | 
| 64 |  |  |  |  |  |  | is => 'lazy', | 
| 65 |  |  |  |  |  |  | default => sub { [] }, | 
| 66 |  |  |  |  |  |  | ); | 
| 67 |  |  |  |  |  |  |  | 
| 68 |  |  |  |  |  |  | =head2 C<< default >> | 
| 69 |  |  |  |  |  |  |  | 
| 70 |  |  |  |  |  |  | default => 'continueRequest' | 
| 71 |  |  |  |  |  |  |  | 
| 72 |  |  |  |  |  |  | The action to take if an URL appears neither in the C<whitelist> nor | 
| 73 |  |  |  |  |  |  | in the C<blacklist>. The default is C<continueRequest>. If you want to block | 
| 74 |  |  |  |  |  |  | all unknown URLs, use C<failRequest> | 
| 75 |  |  |  |  |  |  |  | 
| 76 |  |  |  |  |  |  | =cut | 
| 77 |  |  |  |  |  |  |  | 
| 78 |  |  |  |  |  |  | has 'default' => ( | 
| 79 |  |  |  |  |  |  | is => 'rw', | 
| 80 |  |  |  |  |  |  | default => 'continueRequest', | 
| 81 |  |  |  |  |  |  | ); | 
| 82 |  |  |  |  |  |  |  | 
| 83 |  |  |  |  |  |  | =head2 C<< on_default >> | 
| 84 |  |  |  |  |  |  |  | 
| 85 |  |  |  |  |  |  | on_default => sub { | 
| 86 |  |  |  |  |  |  | my( $url, $action ) = @_; | 
| 87 |  |  |  |  |  |  | warn "Unknown URL <$url>"; | 
| 88 |  |  |  |  |  |  | }; | 
| 89 |  |  |  |  |  |  |  | 
| 90 |  |  |  |  |  |  | This callback is invoked for every URL that is neither in the whitelist nor | 
| 91 |  |  |  |  |  |  | in the blacklist. This is useful to see what URLs are still missing a category. | 
| 92 |  |  |  |  |  |  |  | 
| 93 |  |  |  |  |  |  | =cut | 
| 94 |  |  |  |  |  |  |  | 
| 95 |  |  |  |  |  |  |  | 
| 96 |  |  |  |  |  |  | has 'on_default' => ( | 
| 97 |  |  |  |  |  |  | is => 'rw', | 
| 98 |  |  |  |  |  |  | ); | 
| 99 |  |  |  |  |  |  |  | 
| 100 |  |  |  |  |  |  | =head2 C<< _mech >> | 
| 101 |  |  |  |  |  |  |  | 
| 102 |  |  |  |  |  |  | (internal) The WWW::Mechanize::Chrome instance we are connected to | 
| 103 |  |  |  |  |  |  |  | 
| 104 |  |  |  |  |  |  | =cut | 
| 105 |  |  |  |  |  |  |  | 
| 106 |  |  |  |  |  |  | has '_mech' => ( | 
| 107 |  |  |  |  |  |  | is => 'rw', | 
| 108 |  |  |  |  |  |  | ); | 
| 109 |  |  |  |  |  |  |  | 
| 110 |  |  |  |  |  |  | =head2 C<< _request_listener >> | 
| 111 |  |  |  |  |  |  |  | 
| 112 |  |  |  |  |  |  | (internal) The request listener created by WWW::Mechanize::Chrome while listening | 
| 113 |  |  |  |  |  |  | for URL messages | 
| 114 |  |  |  |  |  |  |  | 
| 115 |  |  |  |  |  |  | =cut | 
| 116 |  |  |  |  |  |  |  | 
| 117 |  |  |  |  |  |  | has '_request_listener' => ( | 
| 118 |  |  |  |  |  |  | is => 'rw', | 
| 119 |  |  |  |  |  |  | ); | 
| 120 |  |  |  |  |  |  |  | 
| 121 |  |  |  |  |  |  | =head1 METHODS | 
| 122 |  |  |  |  |  |  |  | 
| 123 |  |  |  |  |  |  | =head2 C<< ->new >> | 
| 124 |  |  |  |  |  |  |  | 
| 125 |  |  |  |  |  |  | my $bl = WWW::Mechanize::Chrome::URLBlacklist->new( | 
| 126 |  |  |  |  |  |  | blacklist => [ | 
| 127 |  |  |  |  |  |  | qr!\bgoogleadservices\b!, | 
| 128 |  |  |  |  |  |  | qr!\ioam\.de\b!, | 
| 129 |  |  |  |  |  |  | qr!\burchin\.js$!, | 
| 130 |  |  |  |  |  |  | qr!.*\.(?:woff|ttf)$!, | 
| 131 |  |  |  |  |  |  | qr!.*\.css(\?\w+)?$!, | 
| 132 |  |  |  |  |  |  | qr!.*\.png$!, | 
| 133 |  |  |  |  |  |  | qr!.*\bfavicon.ico$!, | 
| 134 |  |  |  |  |  |  | ], | 
| 135 |  |  |  |  |  |  | ); | 
| 136 |  |  |  |  |  |  | $bl->enable( $mech ); | 
| 137 |  |  |  |  |  |  |  | 
| 138 |  |  |  |  |  |  | Creates a new instance of a blacklist, but does B<not> activate it yet. | 
| 139 |  |  |  |  |  |  | See C<< ->enable >> for that. | 
| 140 |  |  |  |  |  |  |  | 
| 141 |  |  |  |  |  |  | =cut | 
| 142 |  |  |  |  |  |  |  | 
| 143 | 0 |  |  | 0 | 0 |  | sub on_requestPaused( $self, $info ) { | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
| 144 | 0 |  |  |  |  |  | my $id = $info->{params}->{requestId}; | 
| 145 | 0 |  |  |  |  |  | my $request = $info->{params}->{request}; | 
| 146 | 0 |  |  |  |  |  | my $mech = $self->_mech; | 
| 147 |  |  |  |  |  |  |  | 
| 148 | 0 | 0 |  |  |  |  | if( grep { $request->{url} =~ /$_/ } @{ $self->whitelist } ) { | 
|  | 0 | 0 |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
| 149 |  |  |  |  |  |  | #warn "Whitelisted URL $request->{url}"; | 
| 150 | 0 |  |  |  |  |  | $mech->target->send_message('Fetch.continueRequest', requestId => $id, )->retain; | 
| 151 |  |  |  |  |  |  |  | 
| 152 | 0 |  |  |  |  |  | } elsif( grep { $request->{url} =~ /$_/ } @{ $self->blacklist }) { | 
|  | 0 |  |  |  |  |  |  | 
| 153 |  |  |  |  |  |  | #warn "Whitelisted URL $request->{url}"; | 
| 154 | 0 |  |  |  |  |  | $mech->target->send_message('Fetch.failRequest', requestId => $id, errorReason => 'AddressUnreachable' )->retain; | 
| 155 |  |  |  |  |  |  |  | 
| 156 |  |  |  |  |  |  | } else { | 
| 157 |  |  |  |  |  |  |  | 
| 158 | 0 |  |  |  |  |  | my $action; | 
| 159 | 0 | 0 |  |  |  |  | if( $self->default eq 'continueRequest' ) { | 
| 160 | 0 |  |  |  |  |  | $mech->target->send_message('Fetch.continueRequest', requestId => $id, )->retain; | 
| 161 | 0 |  |  |  |  |  | $action = 'continue'; | 
| 162 |  |  |  |  |  |  | } else { | 
| 163 | 0 |  |  |  |  |  | $mech->target->send_message('Fetch.failRequest', requestId => $id, errorReason => 'AddressUnreachable' ); | 
| 164 | 0 |  |  |  |  |  | $action = 'fail'; | 
| 165 |  |  |  |  |  |  | }; | 
| 166 | 0 | 0 |  |  |  |  | if( my $cb = $self->on_default ) { | 
| 167 | 0 |  |  |  |  |  | local $@; | 
| 168 | 0 |  |  |  |  |  | my $ok = eval { | 
| 169 | 0 |  |  |  |  |  | $cb->($request->{url}, $action); | 
| 170 | 0 |  |  |  |  |  | 1; | 
| 171 |  |  |  |  |  |  | }; | 
| 172 | 0 | 0 |  |  |  |  | warn $@ if !$ok; | 
| 173 |  |  |  |  |  |  | }; | 
| 174 |  |  |  |  |  |  | }; | 
| 175 |  |  |  |  |  |  | }; | 
| 176 |  |  |  |  |  |  |  | 
| 177 |  |  |  |  |  |  | =head2 C<< ->enable >> | 
| 178 |  |  |  |  |  |  |  | 
| 179 |  |  |  |  |  |  | $bl->enable( $mech ); | 
| 180 |  |  |  |  |  |  |  | 
| 181 |  |  |  |  |  |  | Attaches the blacklist to a WWW::Mechanize::Chrome object. | 
| 182 |  |  |  |  |  |  |  | 
| 183 |  |  |  |  |  |  | =cut | 
| 184 |  |  |  |  |  |  |  | 
| 185 | 0 |  |  | 0 | 1 |  | sub enable( $self, $mech ) { | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
| 186 | 0 |  |  |  |  |  | $self->_mech( $mech ); | 
| 187 | 0 |  |  |  |  |  | $self->_mech->target->send_message('Fetch.enable'); | 
| 188 |  |  |  |  |  |  | my $request_listener = $mech->add_listener('Fetch.requestPaused', sub { | 
| 189 | 0 |  |  | 0 |  |  | $self->on_requestPaused( @_ ); | 
| 190 | 0 |  |  |  |  |  | }); | 
| 191 | 0 |  |  |  |  |  | $self->_request_listener( $request_listener ); | 
| 192 |  |  |  |  |  |  | }; | 
| 193 |  |  |  |  |  |  |  | 
| 194 |  |  |  |  |  |  | =head2 C<< ->enable >> | 
| 195 |  |  |  |  |  |  |  | 
| 196 |  |  |  |  |  |  | $bl->disable( $mech ); | 
| 197 |  |  |  |  |  |  |  | 
| 198 |  |  |  |  |  |  | Removes the blacklist to a WWW::Mechanize::Chrome object. | 
| 199 |  |  |  |  |  |  |  | 
| 200 |  |  |  |  |  |  | =cut | 
| 201 |  |  |  |  |  |  |  | 
| 202 | 0 |  |  | 0 | 0 |  | sub disable( $self ) { | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
| 203 | 0 |  |  |  |  |  | $self->request_listener(undef); | 
| 204 | 0 |  |  |  |  |  | $self->_mech->target->send_message('Fetch.disable'); | 
| 205 | 0 |  |  |  |  |  | $self->_mech(undef); | 
| 206 |  |  |  |  |  |  | }; | 
| 207 |  |  |  |  |  |  |  | 
| 208 |  |  |  |  |  |  | 1; | 
| 209 |  |  |  |  |  |  |  | 
| 210 |  |  |  |  |  |  | __END__ | 
| 211 |  |  |  |  |  |  |  | 
| 212 |  |  |  |  |  |  | =head1 REPOSITORY | 
| 213 |  |  |  |  |  |  |  | 
| 214 |  |  |  |  |  |  | The public repository of this module is | 
| 215 |  |  |  |  |  |  | L<https://github.com/Corion/www-mechanize-chrome>. | 
| 216 |  |  |  |  |  |  |  | 
| 217 |  |  |  |  |  |  | =head1 SUPPORT | 
| 218 |  |  |  |  |  |  |  | 
| 219 |  |  |  |  |  |  | The public support forum of this module is L<https://perlmonks.org/>. | 
| 220 |  |  |  |  |  |  |  | 
| 221 |  |  |  |  |  |  | =head1 TALKS | 
| 222 |  |  |  |  |  |  |  | 
| 223 |  |  |  |  |  |  | I've given a German talk at GPW 2017, see L<http://act.yapc.eu/gpw2017/talk/7027> | 
| 224 |  |  |  |  |  |  | and L<https://corion.net/talks> for the slides. | 
| 225 |  |  |  |  |  |  |  | 
| 226 |  |  |  |  |  |  | At The Perl Conference 2017 in Amsterdam, I also presented a talk, see | 
| 227 |  |  |  |  |  |  | L<http://act.perlconference.org/tpc-2017-amsterdam/talk/7022>. | 
| 228 |  |  |  |  |  |  | The slides for the English presentation at TPCiA 2017 are at | 
| 229 |  |  |  |  |  |  | L<https://corion.net/talks/WWW-Mechanize-Chrome/www-mechanize-chrome.en.html>. | 
| 230 |  |  |  |  |  |  |  | 
| 231 |  |  |  |  |  |  | =head1 BUG TRACKER | 
| 232 |  |  |  |  |  |  |  | 
| 233 |  |  |  |  |  |  | Please report bugs in this module via the Github bug queue at | 
| 234 |  |  |  |  |  |  | L<https://github.com/Corion/WWW-Mechanize-Chrome/issues> | 
| 235 |  |  |  |  |  |  |  | 
| 236 |  |  |  |  |  |  | =head1 AUTHOR | 
| 237 |  |  |  |  |  |  |  | 
| 238 |  |  |  |  |  |  | Max Maischein C<corion@cpan.org> | 
| 239 |  |  |  |  |  |  |  | 
| 240 |  |  |  |  |  |  | =head1 COPYRIGHT (c) | 
| 241 |  |  |  |  |  |  |  | 
| 242 |  |  |  |  |  |  | Copyright 2010-2023 by Max Maischein C<corion@cpan.org>. | 
| 243 |  |  |  |  |  |  |  | 
| 244 |  |  |  |  |  |  | =head1 LICENSE | 
| 245 |  |  |  |  |  |  |  | 
| 246 |  |  |  |  |  |  | This module is released under the same terms as Perl itself. | 
| 247 |  |  |  |  |  |  |  | 
| 248 |  |  |  |  |  |  | =cut |