File Coverage

blib/lib/WordPress/Grep.pm
Criterion Covered Total %
statement 23 138 16.6
branch 0 54 0.0
condition 0 12 0.0
subroutine 8 27 29.6
pod 3 3 100.0
total 34 234 14.5


line stmt bran cond sub pod time code
1             package WordPress::Grep;
2 1     1   898 use v5.14;
  1         3  
3 1     1   4 use strict;
  1         2  
  1         16  
4 1     1   4 use warnings;
  1         1  
  1         26  
5              
6 1     1   509 use utf8;
  1         12  
  1         5  
7 1     1   470 use subs qw();
  1         21  
  1         24  
8 1     1   4 use vars qw($VERSION);
  1         2  
  1         46  
9              
10 1     1   5 use Carp qw(croak);
  1         2  
  1         43  
11 1     1   1278 use DBI;
  1         15246  
  1         1552  
12              
13             $VERSION = '0.010_004';
14              
15             =encoding utf8
16              
17             =head1 NAME
18              
19             WordPress::Grep - Search Wordpress titles and content
20              
21             =head1 SYNOPSIS
22              
23             use WordPress::Grep;
24              
25             my $wp_grep = WordPress::Grep->connect(
26             # required
27             user => $user,
28             database => $db,
29              
30             # optional
31             password => $pass,
32              
33             # has defaults
34             host => 'localhost',
35             port => '3306',
36             );
37              
38             my $posts = $wp_grep->search(
39             sql_like => '....',
40             regex => qr/ ... /,
41             code => sub { ... },
42             include_columns => [ ], # not implemented
43             exclude_columns => [ ], # not implemented
44             );
45              
46             foreach my $post_id ( keys %$post ) {
47             printf "%4d %s\n",
48             $posts->{$post_id}{ID}, $posts->{$post_id}{post_title};
49             }
50              
51             =head1 DESCRIPTION
52              
53             [This is alpha software.]
54              
55             This module allows you to search through the posts in a WordPress
56             database by directly examining the C table. Forget about
57             these limited APIs. Use the power of Perl directly on the content.
58              
59             I've long wanted this tool to examine consistency in my posts. I want
60             to check my use of CSS and HTML across all posts to check what I may
61             need to change when I change how I do things. This sort of thing is hard
62             to do with existing tools and the WordPress API (although there is a
63             L).
64              
65             I want to go through all posts with all the power of Perl, so my
66             grep:
67              
68             =over 4
69              
70             =item 1 Takes an optional LIKE argument that it applies to C and C.
71              
72             =item 2 Takes an optional regex argument that it uses to filter the returned rows, keeping only the rows whose titles or content that satisfy the regex.
73              
74             =item 3 Takes a code argument that it uses to filter the returned rows, keeping only the rows which return true for that subroutine.
75              
76             =item 4 Returns the matching rows in the same form that C's C returns. The top-level key is the value in the
77             C column.
78              
79             =back
80              
81             Right now, there are some limitations based on my particular use:
82              
83             =over 4
84              
85             =item * I only select the C types.
86              
87             =item * I assume UTF-8 everywhere, including in the database.
88              
89             =item * Applying a regex or code filter always return (at least) the C and C.
90              
91             =item * The LIKE and regex filters only work on C and C. The code filter gets the entire row as a hash reference and can do what it likes.
92              
93             =back
94              
95             I've set up a slave of the MySQL server that runs my WordPress
96             installations. In that slave, I set up a read-only user for this tool.
97              
98             =head2 Methods
99              
100             =over 4
101              
102             =item connect
103              
104             Connect to the WordPress database. You must specify these parameters,
105             which should be the same ones in your I (although if
106             you need this tool frequently, consider setting up a read-only user
107             for this, or run it against a slave).
108              
109             user
110             database
111              
112             If you need a password, you'll have to provide that:
113              
114             password
115              
116             These parameters have defaults
117              
118             host defaults to localhost
119             port defaults to 3306
120             user defaults to root
121              
122             =cut
123              
124             sub connect {
125 0     0 1   my( $class, %args ) = @_;
126              
127 0           foreach my $required ( qw(database) ) {
128             croak "You must set '$required' in connect()"
129 0 0         unless defined $args{$required};
130             }
131              
132 0   0       $args{host} //= 'localhost';
133 0   0       $args{port} //= 3306;
134 0   0       $args{user} //= 'root';
135              
136 0           my $dsn = "dbi:mysql:db=$args{database};host=$args{host};port=$args{port}";
137              
138             #dbi:DriverName:database_name
139             #dbi:DriverName:database_name@hostname:port
140             #dbi:DriverName:database=database_name;host=hostname;port=port
141              
142 0           my $db = DBI->connect( $dsn, $args{user}, $args{password} );
143 0 0         croak "Could not connect to database [$args{host}:$args{port}]\n$DBI::Error"
144             unless defined $db;
145 0           my $self = bless {
146             db => $db,
147             args => \%args,
148             }, $class;
149 0           $self->_db_init;
150              
151 0           return $self;
152             }
153              
154             sub _db_init {
155 0     0     my( $self ) = @_;
156              
157 0           $self->_db_utf8;
158             }
159              
160             sub _db_utf8 {
161 0     0     my( $self ) = @_;
162              
163 0           my $sql = qq{SET NAMES 'utf8';};
164 0           $self->db->do($sql);
165 0           $self->db->{'mysql_enable_utf8'} = 1;
166             }
167              
168             =item db
169              
170             Return the db connection. This is a vanilla DBI connection to MySQL.
171             If you subclass this, you can do further setup by overriding C<_db_init>.
172              
173             =cut
174              
175 0     0 1   sub db { $_[0]->{db} }
176              
177             =item search
178              
179             The possible arguments:
180              
181             sql_like - a string
182             regex - a regular expression reference (qr//)
183             code - a subroutine reference
184              
185             categories - an array reference of category names
186             tags - an array reference of tags names
187              
188             This method first builds a query to search through the C
189             table.
190              
191             If you specify C, it limits the returned rows to those whose
192             C or C match that argument.
193              
194             If you specify C or C, another query annotates the
195             return rows with term information. If the C or C have
196             values, the return rows are reduced to those that have those categories
197             or tags. If you don't want to reduce the rows just yet, you can use C
198             to examine the row yourself.
199              
200             If you specify C, it filters the returned rows to those whose
201             C or C satisfy the regular expression.
202              
203             If you specify C, it filters the returned rows to those for
204             which the subroutine reference returns true. The coderef gets a hash
205             reference of the current row. It's up to you to decide what to do with
206             it.
207              
208             These filters are consecutive. You can specify any combination of them
209             but they always happen in that order. The C only gets the rows
210             that satisfied the C, and the C only gets the rows
211             that satisfied C and C.
212              
213             =cut
214              
215             sub search {
216 0     0 1   my( $self, %args ) = @_;
217              
218 0           $self->_set_args( \%args );
219 0           $self->_check_args;
220              
221 0           my $query = $self->_get_sql;
222              
223             # filter results by the LIKE, directly in the SQL
224 0 0         $query .= $self->_like_where_clause if defined $args{sql_like};
225 0           $self->_set_query( $query );
226              
227 0           my $posts = $self->_get_posts;
228              
229             # filter posts by the regex
230 0 0         if( defined $self->_args->{regex} ) {
231 0           my $re = $self->_args->{regex};
232 0           foreach my $post_id ( keys %$posts ) {
233             delete $posts->{$post_id} unless
234             (
235             $posts->{$post_id}{post_title} =~ m/$re/
236             or
237 0 0 0       $posts->{$post_id}{post_content} =~ m/$re/
238             );
239             }
240             }
241              
242             # filter posts by the sub
243 0 0         if( defined $args{code} ) {
244 0           foreach my $post_id ( keys %$posts ) {
245             delete $posts->{$post_id}
246 0 0         unless $args{code}->( $posts->{$post_id} );
247             }
248             }
249              
250 0           $self->_clear_search;
251              
252 0           return $posts;
253             }
254              
255 0 0   0     sub _query { exists $_[0]->{query} ? $_[0]->{query} : '' }
256             sub _set_query {
257 0     0     my( $self, $query ) = @_;
258              
259             #XXX Can I figure out the number dynamically in a better way?
260 0           my $param_count = () = $query =~ /\?/g;
261             $self->_set_bind_params( [ ( $self->_args->{sql_like} ) x $param_count ] )
262 0 0         if defined $self->_args->{sql_like};
263              
264 0           $self->{query} = $query;
265             }
266              
267 0 0   0     sub _bind_params { exists $_[0]->{bind_params} ? @{$_[0]->{bind_params}} : () }
  0            
268             sub _set_bind_params {
269 0 0   0     croak "_set_bind_params must be an array reference" unless
270             ref $_[1] eq ref [];
271 0           $_[0]->{bind_params} = $_[1];
272             }
273              
274 0 0   0     sub _args { exists $_[0]->{args} ? $_[0]->{args} : {} }
275             sub _set_args {
276 0 0   0     croak "_set_args must be a hash reference" unless
277             ref $_[1] eq ref {};
278 0           $_[0]->{args} = $_[1];
279             }
280              
281              
282             sub _clear_search {
283 0     0     my @clear_keys = qw( args sql bind_params );
284 0           delete @{ $_[0] }{ @clear_keys };
  0            
285             }
286              
287             sub _check_args {
288 0     0     my( $self ) = @_;
289              
290 0 0         if( exists $self->_args->{regex} ) {
291 0           croak "'regex' value must be a regex reference [@{[$self->_args->{regex}]}]"
292 0 0         unless ref $self->_args->{regex} eq ref qr//;
293             }
294              
295 0 0         if( exists $self->_args->{code} ) {
296             croak "'code' value must be a code reference"
297 0 0   0     unless ref $self->_args->{code} eq ref sub {};
298             }
299              
300 0           my @array_keys = qw( type categories tags include_columns exclude_columns );
301 0           foreach my $array_arg ( @array_keys ) {
302 0 0         next unless exists $self->_args->{$array_arg};
303             croak "'$array_arg' value must be an array reference"
304 0 0         unless ref $self->_args->{$array_arg} eq ref [];
305             }
306              
307 0           return 1;
308             }
309              
310             sub _get_sql {
311 0     0     'SELECT * FROM wp_posts WHERE post_type = "post"'
312             }
313              
314             sub _like_where_clause {
315 0     0     ' AND (post_title LIKE ? OR post_content LIKE ?)'
316             }
317              
318             sub _get_posts {
319 0     0     my( $self ) = @_;
320 0           my $sth = $self->db->prepare( $self->_query );
321 0 0         croak
322             "Could not create statement handle\n\n" .
323             "DBI Error: $DBI::Error\n\n" .
324 0           "Statement-----\n@{[$self->_query]}\n-----\n"
325             unless defined $sth;
326 0           $sth->execute( $self->_bind_params );
327              
328 0           my $posts = $sth->fetchall_hashref( 'ID' );
329              
330 0 0         if( $self->_include_terms ) {
331 0           my @post_ids = keys %$posts;
332 0           my $terms = $self->_get_terms;
333              
334 0           my %categories = map { $_, 1 } @{ $self->_args->{categories} };
  0            
  0            
335 0           my %tags = map { $_, 1 } @{ $self->_args->{tags} };
  0            
  0            
336              
337             # reduce the posts in
338 0           foreach my $post_key ( @post_ids ) {
339 0           my $this = $terms->{$post_key};
340              
341 0           my $found = 0;
342              
343             # if none are specified, include all
344 0 0 0       $found = 1 if( 0 == keys %tags && 0 == keys %categories );
345              
346             my %this_tags =
347 0           map { $this->{$_}{name}, $this->{$_}{term_taxonomy_id} }
348 0           grep { $this->{$_}{taxonomy} eq 'post_tag' }
  0            
349             keys %$this;
350              
351 0           my %Seen_tags;
352 0           my @found_tags = grep { ++$Seen_tags{$_} > 1 }
  0            
353             keys %this_tags, keys %tags;
354 0           $found += do {
355 0 0         if( $self->_args->{tags_and} ) { @found_tags == keys %tags }
  0            
356 0           else { scalar @found_tags }
357             };
358              
359             my %this_categories =
360 0           map { $this->{$_}{name}, $this->{$_}{term_taxonomy_id} }
361 0           grep { $this->{$_}{taxonomy} eq 'category' }
  0            
362             keys %$this;
363 0           my %Seen_categories;
364 0           my @found_categories = grep { ++$Seen_categories{$_} > 1 }
  0            
365             keys %this_categories, keys %categories;
366 0           $found += do {
367 0 0         if( $self->_args->{categories_and} ) { @found_categories == keys %categories }
  0            
368 0           else { scalar @found_categories }
369             };
370              
371 0 0         if( $found ) {
372 0           $posts->{$post_key}{terms} = $terms->{$post_key};
373 0           $posts->{$post_key}{tags} = [ keys %this_tags ];
374 0           $posts->{$post_key}{categories} = [ keys %this_categories ];
375             }
376             else {
377 0           delete $posts->{$post_key};
378             }
379              
380             }
381             }
382              
383 0           return $posts;
384             }
385              
386             sub _include_terms {
387 0 0   0     $_[0]->_args->{categories} or $_[0]->_args->{tags};
388             }
389              
390             sub _get_terms {
391 0     0     my( $self, $post_ids ) = @_;
392              
393 0           my $query =<<'SQL';
394             SELECT
395             wp_posts.ID,
396             wp_posts.post_title,
397             wp_terms.term_id,
398             wp_terms.name,
399             wp_term_taxonomy.term_taxonomy_id,
400             wp_term_taxonomy.parent,
401             wp_term_taxonomy.taxonomy
402             FROM
403             wp_posts
404             LEFT JOIN
405             wp_term_relationships ON wp_term_relationships.object_id = wp_posts.ID
406             LEFT JOIN
407             wp_term_taxonomy ON wp_term_taxonomy.term_taxonomy_id = wp_term_relationships.term_taxonomy_id
408             LEFT JOIN
409             wp_terms ON wp_terms.term_id = wp_term_taxonomy.term_id
410             WHERE
411             wp_term_taxonomy.taxonomy IS NOT NULL
412             SQL
413              
414 0           my $sth = $self->db->prepare( $query );
415 0 0         croak
416             "Could not create statement handle\n\n" .
417             "DBI Error: $DBI::Error\n\n" .
418             "Statement-----\n$query\n-----\n"
419             unless defined $sth;
420              
421 0           $sth->execute;
422              
423 0           my $terms = $sth->fetchall_hashref( [ qw( ID term_id) ] );
424             }
425              
426             =back
427              
428             =head1 TO DO
429              
430              
431             =head1 SEE ALSO
432              
433             L
434              
435             =head1 SOURCE AVAILABILITY
436              
437             This source is in Github:
438              
439             http://github.com/briandfoy/wordpress-grep/
440              
441             =head1 AUTHOR
442              
443             brian d foy, C<< >>
444              
445             =head1 COPYRIGHT AND LICENSE
446              
447             Copyright © 2013-2018, brian d foy . All rights reserved.
448              
449             You may redistribute this under the Artistic License 2.0.
450              
451             =cut
452              
453             1;