| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package WWW::HyperGlossary; | 
| 2 | 1 |  |  | 1 |  | 1151 | use base qw(WWW::HyperGlossary::Base); | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 92 |  | 
| 3 | 1 |  |  | 1 |  | 5 | use Class::Std; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 7 |  | 
| 4 | 1 |  |  | 1 |  | 97 | use Class::Std::Utils; | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 21 |  | 
| 5 | 1 |  |  | 1 |  | 32 | use DBI; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 45 |  | 
| 6 | 1 |  |  | 1 |  | 4 | use DBIx::MySperql qw(DBConnect SQLExec $dbh); | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 107 |  | 
| 7 | 1 |  |  | 1 |  | 1075 | use LWP::Simple; | 
|  | 1 |  |  |  |  | 76647 |  | 
|  | 1 |  |  |  |  | 15 |  | 
| 8 | 1 |  |  | 1 |  | 504 | use Encode; | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 74 |  | 
| 9 | 1 |  |  | 1 |  | 563 | use HTML::Encoding 'encoding_from_http_message'; | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
| 10 |  |  |  |  |  |  | use Set::Infinite; | 
| 11 |  |  |  |  |  |  | use Regexp::List; | 
| 12 |  |  |  |  |  |  |  | 
| 13 |  |  |  |  |  |  | use warnings; | 
| 14 |  |  |  |  |  |  | use strict; | 
| 15 |  |  |  |  |  |  | use Carp; | 
| 16 |  |  |  |  |  |  |  | 
| 17 |  |  |  |  |  |  | use version; our $VERSION = qv('0.0.2'); | 
| 18 |  |  |  |  |  |  |  | 
| 19 |  |  |  |  |  |  | our @colors = qw( x 009900 66FF66 FF6666 990000 660000); | 
| 20 |  |  |  |  |  |  |  | 
| 21 |  |  |  |  |  |  | { | 
| 22 |  |  |  |  |  |  | my %category_id_of   : ATTR( ); | 
| 23 |  |  |  |  |  |  | my %category_name_of : ATTR( :default<''> ); | 
| 24 |  |  |  |  |  |  | my %matches_of       : ATTR( :default<''> );  # Used only once below; added at package testing :RAH | 
| 25 |  |  |  |  |  |  |  | 
| 26 |  |  |  |  |  |  | sub BUILD { | 
| 27 |  |  |  |  |  |  | my ($self, $ident, $arg_ref) = @_; | 
| 28 |  |  |  |  |  |  |  | 
| 29 |  |  |  |  |  |  | # Set category id | 
| 30 |  |  |  |  |  |  | $category_id_of{$ident}   = $arg_ref->{category_id} ? $arg_ref->{category_id} : 0; | 
| 31 |  |  |  |  |  |  |  | 
| 32 |  |  |  |  |  |  | # Set category name if hg_categories included | 
| 33 |  |  |  |  |  |  | if ( (defined $arg_ref->{hg_categories}) && (defined $arg_ref->{category_id}) ) { | 
| 34 |  |  |  |  |  |  | foreach my $category ( @{ $arg_ref->{hg_categories} } ) { | 
| 35 |  |  |  |  |  |  | if ( $category->[0] == $arg_ref->{category_id} ) { | 
| 36 |  |  |  |  |  |  | $category_name_of{$ident} = $category->[1]; | 
| 37 |  |  |  |  |  |  | } | 
| 38 |  |  |  |  |  |  | } | 
| 39 |  |  |  |  |  |  | } | 
| 40 |  |  |  |  |  |  |  | 
| 41 |  |  |  |  |  |  | return; | 
| 42 |  |  |  |  |  |  | } | 
| 43 |  |  |  |  |  |  |  | 
| 44 |  |  |  |  |  |  | sub get_category_id    { my ($self) = @_; return $category_id_of{ident $self}; } | 
| 45 |  |  |  |  |  |  | sub get_category_name  { my ($self) = @_; return $category_name_of{ident $self}; } | 
| 46 |  |  |  |  |  |  |  | 
| 47 |  |  |  |  |  |  | sub start_url  { | 
| 48 |  |  |  |  |  |  | my ( $self, $arg_ref ) = @_; | 
| 49 |  |  |  |  |  |  |  | 
| 50 |  |  |  |  |  |  | # Get the page | 
| 51 |  |  |  |  |  |  | my $url       = $arg_ref->{url}  ? $self->fill_url( $arg_ref->{url} ) : ""; | 
| 52 |  |  |  |  |  |  | my $html      = get( $url ); | 
| 53 |  |  |  |  |  |  |  | 
| 54 |  |  |  |  |  |  | # Parse head and body and add the body control div | 
| 55 |  |  |  |  |  |  | $html      =~ m/(.*)<\/head>/six; | 
| 56 |  |  |  |  |  |  | my $head_tag  = $1; | 
| 57 |  |  |  |  |  |  | my $head_text = $2; | 
| 58 |  |  |  |  |  |  | $html      =~ m/(.*)<\/body>/six; | 
| 59 |  |  |  |  |  |  | my $body_tag  = $1; | 
| 60 |  |  |  |  |  |  | my $body_text = " \n" . $2 . "    \n"; | 
| 61 |  |  |  |  |  |  | # TODO Add Base URL tag (search Kyles::base_tag) | 
| 62 |  |  |  |  |  |  |  | 
| 63 |  |  |  |  |  |  | # TODO Retrieve Cached Pages | 
| 64 |  |  |  |  |  |  | # TODO ELSE | 
| 65 |  |  |  |  |  |  | # Save page | 
| 66 |  |  |  |  |  |  | my ($page_id) = $self->new_page({ category_id => $arg_ref->{category_id}, | 
| 67 |  |  |  |  |  |  | url         => $url, | 
| 68 |  |  |  |  |  |  | html        => $html, | 
| 69 |  |  |  |  |  |  | body        => $body_text }); | 
| 70 |  |  |  |  |  |  |  | 
| 71 |  |  |  |  |  |  | # Rebuild the page | 
| 72 |  |  |  |  |  |  | my $text  = "\n"; | 
| 73 |  |  |  |  |  |  | $text .= "    \n"; | 
| 74 |  |  |  |  |  |  | $text .= "    $head_text\n"; | 
| 75 |  |  |  |  |  |  | $text .= "    \n"; | 
| 76 |  |  |  |  |  |  | $text .= "    \n"; | 
| 77 |  |  |  |  |  |  | $text .= "    \n"; | 
| 78 |  |  |  |  |  |  | $text .= "    $body_text\n"; | 
| 79 |  |  |  |  |  |  | $text .= "    \n"; | 
| 80 |  |  |  |  |  |  | $text .= "\n"; | 
| 81 |  |  |  |  |  |  | # TODO END ELSE | 
| 82 |  |  |  |  |  |  |  | 
| 83 |  |  |  |  |  |  | return $text; | 
| 84 |  |  |  |  |  |  | } | 
| 85 |  |  |  |  |  |  |  | 
| 86 |  |  |  |  |  |  | sub new_page  { | 
| 87 |  |  |  |  |  |  | my ( $self, $arg_ref ) = @_; | 
| 88 |  |  |  |  |  |  | my $category_id  = $arg_ref->{category_id} ? $self->_sql_escape( $arg_ref->{category_id} ) : ""; | 
| 89 |  |  |  |  |  |  | my $html         = $arg_ref->{html}        ? $self->_sql_escape( $arg_ref->{html} ) : ""; | 
| 90 |  |  |  |  |  |  | my $body         = $arg_ref->{body}        ? $self->_sql_escape( $arg_ref->{body} ) : ""; | 
| 91 |  |  |  |  |  |  | my $url          = $arg_ref->{url}         ? $self->_sql_escape( $arg_ref->{url} )  : ""; | 
| 92 |  |  |  |  |  |  |  | 
| 93 |  |  |  |  |  |  | # Save page and return the id | 
| 94 |  |  |  |  |  |  | my $sql   = "insert into hg_pages (category_id, url, html, body) "; | 
| 95 |  |  |  |  |  |  | $sql  .= "values ('$category_id', '$url', '$html', '$body')"; | 
| 96 |  |  |  |  |  |  | SQLExec( $sql ); | 
| 97 |  |  |  |  |  |  | $sql   = "select LAST_INSERT_ID()"; | 
| 98 |  |  |  |  |  |  | return SQLExec( $sql, '@' ); | 
| 99 |  |  |  |  |  |  | } | 
| 100 |  |  |  |  |  |  |  | 
| 101 |  |  |  |  |  |  | sub next_set  { | 
| 102 |  |  |  |  |  |  | my ( $self, $arg_ref ) = @_; | 
| 103 |  |  |  |  |  |  | my $hg_words    = $arg_ref->{hg_words}; | 
| 104 |  |  |  |  |  |  | my $page_id     = $arg_ref->{page_id}     ? $arg_ref->{page_id}     : 0; | 
| 105 |  |  |  |  |  |  | # defined below :RAH		my $category_id = $arg_ref->{category_id} ? $arg_ref->{category_id} : 1; | 
| 106 |  |  |  |  |  |  | my $match; | 
| 107 |  |  |  |  |  |  | # Retrieve the body | 
| 108 |  |  |  |  |  |  | my $sql  = "select category_id, body, set_id from hg_pages where page_id = '$page_id'"; | 
| 109 |  |  |  |  |  |  | my ( $category_id, $body, $set_id ) = SQLExec( $sql, '@' ); | 
| 110 |  |  |  |  |  |  |  | 
| 111 |  |  |  |  |  |  | #		my $safe_set = $self->create_safe_set( $body ); | 
| 112 |  |  |  |  |  |  |  | 
| 113 |  |  |  |  |  |  | # TODO Include Kyles::safe_set and otherwise make BETTER | 
| 114 |  |  |  |  |  |  | # Parse and replace | 
| 115 |  |  |  |  |  |  | my $words = $hg_words->{$category_id}->{$set_id}->{'words'}; | 
| 116 |  |  |  |  |  |  |  | 
| 117 |  |  |  |  |  |  | my $wordregex = $self->_build_regex( $words ); | 
| 118 |  |  |  |  |  |  |  | 
| 119 |  |  |  |  |  |  | # Create set of safe substituion zones | 
| 120 |  |  |  |  |  |  | my $safe_set = $self->create_safe_set( $body ); | 
| 121 |  |  |  |  |  |  |  | 
| 122 |  |  |  |  |  |  | # Replace matched words | 
| 123 |  |  |  |  |  |  | $body = $self->search_replace_word( $body, $wordregex, $safe_set, $set_id ); | 
| 124 |  |  |  |  |  |  |  | 
| 125 |  |  |  |  |  |  | # Update the set_id | 
| 126 |  |  |  |  |  |  | $sql  = "update hg_pages set body = '" . $self->_sql_escape( $body ) . "', set_id = '" . ($set_id + 1) . "' where page_id = '$page_id'"; | 
| 127 |  |  |  |  |  |  | SQLExec( $sql ); | 
| 128 |  |  |  |  |  |  |  | 
| 129 |  |  |  |  |  |  | return $body; | 
| 130 |  |  |  |  |  |  | } | 
| 131 |  |  |  |  |  |  |  | 
| 132 |  |  |  |  |  |  | # COPIED CODE | 
| 133 |  |  |  |  |  |  |  | 
| 134 |  |  |  |  |  |  | sub get_url  { | 
| 135 |  |  |  |  |  |  | my ( $self, $url ) = @_; | 
| 136 |  |  |  |  |  |  |  | 
| 137 |  |  |  |  |  |  | # Create basetag | 
| 138 |  |  |  |  |  |  | $url        =~ m/(http.*\/)/i; | 
| 139 |  |  |  |  |  |  | my $basetag = "<\/base>"; | 
| 140 |  |  |  |  |  |  |  | 
| 141 |  |  |  |  |  |  | # Get the html | 
| 142 |  |  |  |  |  |  | my $ua      = LWP::UserAgent->new(); | 
| 143 |  |  |  |  |  |  | my $html    = $ua->get( $url ); | 
| 144 |  |  |  |  |  |  | my $content = $html->decoded_content; | 
| 145 |  |  |  |  |  |  | $content =~ s//$basetag/gi; | 
| 146 |  |  |  |  |  |  |  | 
| 147 |  |  |  |  |  |  | return $content; | 
| 148 |  |  |  |  |  |  | } | 
| 149 |  |  |  |  |  |  |  | 
| 150 |  |  |  |  |  |  | sub create_safe_set { | 
| 151 |  |  |  |  |  |  | my ( $self, $body ) = @_; | 
| 152 |  |  |  |  |  |  |  | 
| 153 |  |  |  |  |  |  | $body =~ m/(^(.|\n)*)/gi; | 
| 154 |  |  |  |  |  |  |  | 
| 155 |  |  |  |  |  |  | my $start      = length($`); | 
| 156 |  |  |  |  |  |  | my $stop       = length($&) + $start; | 
| 157 |  |  |  |  |  |  | my $danger_set = Set::Infinite->new($start, $stop); | 
| 158 |  |  |  |  |  |  |  | 
| 159 |  |  |  |  |  |  | # Manage |