File Coverage

blib/lib/VANAMBURG/SEMPROG/SimpleGraph.pm
Criterion Covered Total %
statement 4 6 66.6
branch n/a
condition n/a
subroutine 2 2 100.0
pod n/a
total 6 8 75.0


line stmt bran cond sub pod time code
1             package VANAMBURG::SEMPROG::SimpleGraph;
2              
3 10     10   418024 use vars qw($VERSION);
  10         28  
  10         618  
4             $VERSION = '0.010';
5              
6 10     10   14443 use Moose;
  0            
  0            
7             use Text::CSV_XS;
8             use Set::Scalar;
9             use List::MoreUtils qw(each_array);
10             use JSON;
11             use File::Slurp;
12              
13             use English;
14              
15             #
16             # Store triples in nested hashrefs with a Set::Scalar instance
17             # at the leaf nodes.
18             # Keep several hashes for accessing based on need
19             # in calls to 'triples' method. Three indexes are:
20             # 1) subject, then predicate then object set, or
21             # 2) predicate, object, then subject set,
22             # 3) object, then subject then predicate set.
23             #
24             # example:
25             #
26             # my $obj_set = $self->_spo()->{sub}->{pred};
27             #
28              
29             has '_spo' => ( isa => 'HashRef', is => 'rw', default => sub { {} } );
30             has '_pos' => ( isa => 'HashRef', is => 'rw', default => sub { {} } );
31             has '_osp' => ( isa => 'HashRef', is => 'rw', default => sub { {} } );
32              
33             sub add {
34             my ( $self, $sub, $pred, $obj ) = @_;
35              
36             $self->_addToIndex( $self->_spo(), $sub, $pred, $obj );
37             $self->_addToIndex( $self->_pos(), $pred, $obj, $sub );
38             $self->_addToIndex( $self->_osp(), $obj, $sub, $pred );
39             }
40              
41             sub _addToIndex {
42             my ( $self, $index, $a, $b, $c ) = @ARG;
43              
44             return if ( !defined($a) || !defined($b) || !defined($c) );
45              
46             if ( !defined( $index->{$a}->{$b} ) ) {
47             my $set = Set::Scalar->new();
48             $set->insert($c);
49             $index->{$a}->{$b} = $set;
50             }
51             else {
52             $index->{$a}->{$b}->insert($c);
53             }
54             }
55              
56             sub remove {
57             my ( $self, $sub, $pred, $obj ) = @ARG;
58              
59             my @tripls = $self->triples( $sub, $pred, $obj );
60             for my $t (@tripls) {
61             $self->_removeFromIndex( $self->_spo(), $t->[0], $t->[1], $t->[2] );
62             $self->_removeFromIndex( $self->_pos(), $t->[1], $t->[2], $t->[0] );
63             $self->_removeFromIndex( $self->_osp(), $t->[2], $t->[0], $t->[1] );
64             }
65             }
66              
67             sub _removeFromIndex {
68             my ( $self, $index, $a, $b, $c ) = @ARG;
69              
70             eval {
71             my $bs = $index->{$a};
72             my $cset = $bs->{$b};
73             $cset->delete($c);
74             delete $bs->{$b} if ( $cset->size == 0 );
75             delete $index->{$a} if ( keys(%$bs) == 0 );
76             };
77             if ($EVAL_ERROR) { print "ERROR: $EVAL_ERROR\n"; }
78             }
79              
80             sub triples {
81             my ( $self, $sub, $pred, $obj ) = @ARG;
82              
83             my @result;
84              
85             # check which terms are present in order to use the correct index:
86              
87             if ( defined($sub) ) {
88             if ( defined($pred) ) {
89              
90             # sub pred obj
91             if ( defined($obj) && defined( $self->_spo()->{$sub}->{$pred} ) ) {
92             push @result, [ $sub, $pred, $obj ]
93             if ( $self->_spo()->{$sub}->{$pred}->has($obj) );
94              
95             }
96             else {
97              
98             # sub pred undef
99             map { push @result, [ $sub, $pred, $_ ]; }
100             $self->_spo()->{$sub}->{$pred}->members()
101             if defined( $self->_spo()->{$sub}->{$pred} );
102             }
103             }
104             else {
105              
106             # sub undef obj
107             if ( defined($obj) && defined( $self->_osp()->{$obj}->{$sub} ) ) {
108             push @result, [ $sub, $obj, $_ ]
109             for $self->_osp()->{$obj}->{$sub}->members();
110             }
111             else {
112              
113             # sub undef undef
114             while ( my ( $retPred, $objSet ) =
115             each %{ $self->_spo()->{$sub} } )
116             {
117             push @result, [ $sub, $retPred, $_ ] for $objSet->members();
118             }
119             }
120             }
121             }
122             else {
123             if ( defined($pred) ) {
124              
125             # undef pred obj
126             if ( defined($obj) ) {
127              
128             map { push @result, [ $_, $pred, $obj ] }
129             $self->_pos()->{$pred}->{$obj}->members()
130             if ( defined( $self->_pos()->{$pred}->{$obj} ) );
131             }
132             else {
133              
134             # undef pred undef
135             while ( my ( $retObj, $subSet ) =
136             each %{ $self->_pos()->{$pred} } )
137             {
138             push @result, [ $_, $pred, $retObj ] for $subSet->members();
139             }
140             }
141             }
142             else {
143              
144             # undef undef obj
145             if ( defined($obj) ) {
146             while ( my ( $retSub, $predSet ) =
147             each %{ $self->_osp()->{$obj} } )
148             {
149             push @result, [ $retSub, $_, $obj ] for $predSet->members();
150             }
151             }
152             else {
153              
154             # undef undef undef
155             while ( my ( $retSub, $predHash ) = each %{ $self->_spo() } ) {
156             while ( my ( $retPred, $objSet ) = each %{$predHash} ) {
157             push @result, [ $retSub, $retPred, $_ ]
158             for $objSet->members();
159             }
160             }
161             }
162             }
163              
164             }
165              
166             return @result;
167             }
168              
169             sub value {
170             my ( $self, $sub, $pred, $obj ) = @ARG;
171              
172             for my $t ( $self->triples( $sub, $pred, $obj ) ) {
173             return $t->[0] if !defined($sub);
174             return $t->[1] if !defined($pred);
175             return $t->[2] if !defined($obj);
176             last;
177             }
178             }
179              
180             sub load {
181             my ( $self, $filename ) = @ARG;
182              
183             my $csv = Text::CSV_XS->new(
184             { allow_whitespace => 1, binary => 1, blank_is_undef => 1 } )
185             or die "Cannot use CSV: " . Text::CSV_XS->error_diag();
186              
187             open my $fh, "<:encoding(utf8)", $filename or die "$!";
188              
189             while ( my $row = $csv->getline($fh) ) {
190             $self->add( $row->[0], $row->[1], $row->[2] );
191             }
192              
193             close $fh or die "$!";
194             }
195              
196             sub load_json {
197             my ( $self, $filename ) = @ARG;
198              
199             my $text = read_file($filename) or die "Cannot read_file: $!";
200             my $data = from_json( $text, { utf8 => 1 } );
201              
202             for my $t ( @{ $data->{triples} } ) {
203             $self->add( $t->{s}, $t->{p}, $t->{o} );
204             }
205             }
206              
207             sub save {
208             my ( $self, $filename ) = @ARG;
209              
210             open my $fh, ">", $filename or die "Cannot open file for save: $!";
211              
212             my $csv =
213             Text::CSV_XS->new( { allow_whitespace => 1, blank_is_undef => 1 } )
214             or die "Cannot use CSV: " . Text::CSV_XS->error_diag();
215              
216             $csv->eol("\r\n");
217              
218             $csv->print( $fh, $_ )
219             or csv->error_diag()
220             for $self->triples( undef, undef, undef );
221              
222             close $fh or die "Cannot close file for save: $!";
223             }
224              
225             sub query {
226             my ( $self, $clauses ) = @ARG;
227              
228             my @bindings;
229              
230             my @trpl_inx = ( 0 .. 2 );
231              
232             for my $clause (@$clauses) {
233             my %bpos;
234             my @qparams;
235             my @rows;
236              
237             # Check each three indexes of clause to see if
238             # it is a binding variable (starts with '?').
239             # Generate a store for the binding variables,
240             # implimented as a hash keyed by binding variable name,
241             # and holding the triple index indicating if it
242             # represents a subject, predicate, or object.
243             #
244             # Also define parameters for subsequent call to
245             # 'triples'.
246              
247             my $each = each_array( @$clause, @trpl_inx );
248             while ( my ( $x, $pos ) = $each->() ) {
249             if ( $x =~ /^\?/ ) {
250             push @qparams, undef;
251             my $key = substr( $x, 1 );
252             $bpos{$key} = $pos;
253             }
254             else {
255             push @qparams, $x;
256             }
257             }
258              
259             @rows = $self->triples( $qparams[0], $qparams[1], $qparams[2] );
260             if ( !@bindings ) {
261             for my $row (@rows) {
262             my %binding;
263             while ( my ( $var, $pos ) = each %bpos ) {
264             $binding{$var} = $row->[$pos];
265             }
266              
267             push @bindings, \%binding;
268             }
269             }
270             else {
271             my @newb;
272             for my $binding (@bindings) {
273             for my $row (@rows) {
274             my $validmatch = 1;
275             my %tempbinding = %$binding;
276             while ( my ( $var, $pos ) = each %bpos ) {
277             if ( defined( $tempbinding{$var} ) ) {
278             if ( $tempbinding{$var} ne $row->[$pos] ) {
279             $validmatch = 0;
280             }
281             }
282             else {
283             $tempbinding{$var} = $row->[$pos];
284             }
285             }
286             if ($validmatch) {
287             push @newb, \%tempbinding;
288             }
289              
290             }
291             }
292             @bindings = @newb;
293             }
294             }
295             return @bindings;
296             }
297              
298             sub applyinference {
299             my ( $self, $rule ) = @ARG;
300              
301             my @bindings = $self->query( $rule->getqueries() );
302              
303             for my $binding (@bindings) {
304             for my $triple ( @{ $rule->maketriples($binding) } ) {
305             $self->add(@$triple);
306             }
307             }
308              
309             }
310              
311             1;
312              
313             __END__
314              
315              
316             =head1 SYNOPSIS
317              
318             A Perl interpretation of the SimpleGraph developed in Python by Toby Segaran in his book "Programming the Semantic Web", published by O'Reilly, 2009. CPAN modules are used in place of the Python standard library modules used by Mr. Segaran.
319              
320             my $graph = VANAMBURG::SEMPROG::SimpleGraph->new();
321              
322             $graph->load("data/place_triples.txt");
323              
324             $graph->add("Morgan Stanley", "headquarters", "New_York_New_York");
325              
326             my @sanfran_key = $graph->value(undef,'name','San Francisco');
327              
328             my @sanfran_triples = $graph->triples($sanfram_key, undef, undef);
329              
330             my @bindings = $g->query([
331             ['?company', 'headquarters', 'New_York_New_York'],
332             ['?company', 'industry', 'Investment Banking'],
333             ['?contrib', 'contributor', '?company'],
334             ['?contrib', 'recipient', 'Orrin Hatch'],
335             ['?contrib', 'amount', '?dollars'],
336             ]);
337              
338             for my $binding (@bindings){
339             printf "company=%s, contrib=%s, dollars=%s\n",
340             ($binding->{company},$binding->{contrib},$binding->{dollars});
341             }
342            
343              
344             $graph->applyinference( VANAMBURG::SEMPROG::GeocodeRule->new() );
345              
346              
347             =head1 SimpleGraph
348              
349            
350             This module and it's test suite is inspired by the simple triple store implimentation
351             developed in chapters 2 and 3 of "Programming the Semantic Web" by Toby Segaran,
352             Evans Colin, Taylor Jamie, 2009, O'Reilly. Mr. Segaran uses Python and
353             it's standard library to show the workins of a triple store. This module
354             and it's test make the same demonstration using Perl and CPAN modules, which
355             may be thought of as a Perl companion to the book for readers who are interested in Perl. Copies of Mr. Segaran's test data files are included in this distribution for your convenience.
356              
357             In addition to SimpleGraph, the triple store, the other exercises presented in chapters 2 and 3 are here interpreted as a set of perl test programs, using
358             Test::More and are found in the modules 't/' directory.
359            
360              
361             B<Triple Store Modules>
362              
363             lib/VANAMBURG/SEMPROG/SimpleGraph.pm
364            
365             lib/VANAMBURG/SEMPROG/CloseToRule.pm
366             lib/VANAMBURG/SEMPROG/GeocodeRule.pm
367             lib/VANAMBURG/SEMPROG/InferenceRule.pm
368             lib/VANAMBURG/SEMPROG/TouristyRule.pm
369             lib/VANAMBURG/SEMPROG/WestCoastRule.pm
370              
371             B<Module Usage Shown in Tests>
372              
373             t/semprog_ch02_03_places.t
374             t/semprog_ch02_04_celebs.t
375             t/semprog_ch02_05_business.t
376             t/semprog_ch02_moviegraph.t
377             t/semprog_ch03_01_queries.t
378             t/semprog_ch03_02_inference.t
379             t/semprog_ch03_03_chain_of_rules.t
380             t/semprog_ch03_04_shortest_path.t
381             t/semprog_ch03_05_join_graph.t
382             qt/semprog_ch03_chain_of_rules.t
383              
384              
385             Find out more about, or get the book at http://semprog.com, the Semantic Programming web site.
386              
387             =head1 INSTALLATION NOTES
388              
389             This module can be installed via cpan. This method resolves dependency
390             issues and is convenient. In brief, it looks something like this in a
391             terminal on linux:
392            
393             $sudo cpan
394             cpan>install VANAMBURG::SEMPROG::SimpleGraph
395             ...
396             cpan>quit
397             $
398              
399             All dependencies, as well as the modules are now installed. Leave out 'sudo' if using Strawberry perl on Windows.
400              
401             You can then download the source package and read and run the test programs.
402              
403             $tar xzvf VANAMBURG-SEMPROG-SimpleGraph-0.001.tar.gz
404             $cd VANAMBURG-SEMPROG-SimpleGraph-0.001/
405             $ perl Makefile.PL
406             ...
407             $make
408             ...
409              
410             Run 'dmake' instead of 'make' if using Strawberry Perl on Windows.
411              
412             To run all the test programs:
413            
414             $make test
415              
416             -- Note that some tests require internet access for geo code data.
417              
418             To run one test:
419              
420             $prove -Tvl lib - t/semprog_ch03_05_join_graph.t
421              
422              
423              
424              
425              
426             =head1 MooseX::Declare Experiment
427              
428             Version 0.007 was an experiment in using MooseX::Declare. The code remaind the same as version 0.006,
429             except that classes were defined by the 'class' keyword instead of 'package' and methods are
430             defined using 'method' keyword and well defined parameter lists in place of 'sub' and '@_'.
431             'class' and 'method' are supplied by MooseX::Declare.
432              
433             =head2 Types of Changes to Source Files
434              
435             The types of changes to the source looks like this.
436            
437             1) CLASS DECLARATIONS WERE CHANGED
438              
439             OLD PACKAGE STATEMENTS REMOVED:
440             <<package VANAMBURG::SEMPROG::SimpleGraph;
441             <<use Moose;
442              
443             REPLACED WITH MUCH CLEANER DECLARATIONS:
444             >>use MooseX::Declare;
445             >>class VANAMBURG::SEMPROG::SimpleGraph{
446              
447             2) METHOD DECLARATIONS WERE CHANGED
448              
449             OLD SUB AND @ARG REMOVED:
450             <<sub _addToIndex{
451             << my ($self, $index, $a, $b, $c) = @ARG;
452            
453             REPLACED WITH METHOD AND DEFINED OPTIONAL PARAMS:
454             >>method add($sub?, $pred?, $obj?){
455              
456             =head2 Performance Changes
457              
458             Version 0.007, using MooseX::Declare took ten times as long as using Moose alone.Subsequent to this test, version 0.008 was created by rolling back to the Version 0.006 sources.
459              
460             =head2 Devel::NYTProf For Version 0.006 (Moose only)
461              
462              
463             Performance Profile Index
464             For t/semprog_ch03_01_queries.t
465             Run on Sun Jan 10 01:14:07 2010
466             Reported on Sun Jan 10 01:16:44 2010
467              
468             Profile of t/semprog_ch03_01_queries.t for 64.0s, executing 10616075 statements and 3106325 subroutine calls in 119 source files and 194 string evals.
469             Top 15 Subroutines — ordered by exclusive time
470             Calls P F Exclusive
471             Time Inclusive
472             Time Subroutine
473             374201 2 2 6.10s 8.74s Set::Scalar::Base::::_insert_elements Set::Scalar::Base::_insert_elements
474             187100 1 1 3.90s 15.2s Set::Scalar::::_insert_hook Set::Scalar::_insert_hook
475             451618 2 2 3.18s 3.18s Set::Scalar::Base::::_invalidate_cached Set::Scalar::Base::_invalidate_cached
476             264518 4 3 2.61s 3.88s Set::Scalar::Base::::_make_elements Set::Scalar::Base::_make_elements
477             109683 3 1 2.45s 26.6s VANAMBURG::SEMPROG::SimpleGraph::::_addToIndexVANAMBURG::SEMPROG::SimpleGraph::_addToIndex
478             109683 2 1 1.86s 14.0s Set::Scalar::Real::::insert Set::Scalar::Real::insert
479             187100 2 2 1.85s 17.1s Set::Scalar::Base::::_insert Set::Scalar::Base::_insert
480             187101 2 2 1.72s 6.10s Set::Scalar::Virtual::::_extend Set::Scalar::Virtual::_extend
481             77417 1 1 1.63s 9.01s Set::Scalar::::_new_hook Set::Scalar::_new_hook
482             36561 1 1 1.39s 28.6s VANAMBURG::SEMPROG::SimpleGraph::::addVANAMBURG::SEMPROG::SimpleGraph::add
483             77417 1 1 1.36s 1.90s Set::Scalar::Real::::_delete Set::Scalar::Real::_delete
484             77417 1 1 1.32s 6.82s Set::Scalar::Real::::clear Set::Scalar::Real::clear
485             219366 1 1 1.26s 1.26s Set::Scalar::Base::::_strval Set::Scalar::Base::_strval
486             77417 1 1 1.11s 4.73s Set::Scalar::Real::::delete Set::Scalar::Real::delete
487             77419 3 2 1.11s 10.1s Set::Scalar::Base::::new Set::Scalar::Base::new
488              
489             =head2 Devel::NYTProf For Version 0.007 (MooseX::Declare)
490              
491             Performance Profile Index
492             For t/semprog_ch03_01_queries.t
493             Run on Sun Jan 10 01:28:09 2010
494             Reported on Sun Jan 10 01:38:25 2010
495              
496             Profile of t/semprog_ch03_01_queries.t for 489s, executing 74793743 statements and 24836936 subroutine calls in 371 source files and 407 string evals.
497             Top 15 Subroutines — ordered by exclusive time
498             Calls P F Exclusive
499             Time Inclusive
500             Time Subroutine
501             1426306 23 11 38.3s 250s MooseX::Types::TypeDecorator::::AUTOLOAD MooseX::Types::TypeDecorator::AUTOLOAD
502             438753 1 1 36.5s 186s MooseX::Types::Structured::::__ANON__[MooseX/Types/Structured.pm:745] MooseX::Types::Structured::__ANON__[MooseX/Types/Structured.pm:745]
503             2304288 3 1 26.0s 30.6s MooseX::Types::TypeDecorator::::__type_constraint MooseX::Types::TypeDecorator::__type_constraint
504             1280098 6 5 21.2s 163s Moose::Meta::TypeConstraint::::check Moose::Meta::TypeConstraint::check
505             548425 2 2 15.1s 18.8s Moose::Meta::TypeConstraint::::Defined Moose::Meta::TypeConstraint::Defined
506             146251 1 1 12.2s 16.8s MooseX::Method::Signatures::Meta::Method::::__ANON__[MooseX/Method/Signatures/Meta/Method.pm:430] MooseX::Method::Signatures::Meta::Method::__ANON__[MooseX/Method/Signatures/Meta/Method.pm:430]
507             585005 2 1 11.2s 303s MooseX::Meta::TypeConstraint::Structured::::__ANON__[MooseX/Meta/TypeConstraint/Structured.pm:115] MooseX::Meta::TypeConstraint::Structured::__ANON__[MooseX/Meta/TypeConstraint/Structured.pm:115]
508             1426306 1 2 9.06s 9.06s MooseX::Types::TypeDecorator::::CORE:match MooseX::Types::TypeDecorator::CORE:match (opcode)
509             438881 9 7 8.41s 17.2s MooseX::Types::TypeDecorator::::isa MooseX::Types::TypeDecorator::isa
510             3924871 56 34 7.80s 7.80s Scalar::Util::::blessed Scalar::Util::blessed (xsub)
511             1426685 7 6 7.22s 7.22s Moose::Meta::TypeConstraint::::_compiled_type_constraint Moose::Meta::TypeConstraint::_compiled_type_constraint
512             374201 2 2 6.31s 9.05s Set::Scalar::Base::::_insert_elements Set::Scalar::Base::_insert_elements
513             219366 4 2 5.35s 227s VANAMBURG::SEMPROG::SimpleGraph::::_addToIndex VANAMBURG::SEMPROG::SimpleGraph::_addToIndex
514             146251 1 1 4.87s 256s MooseX::Meta::TypeConstraint::ForceCoercion::::validateMooseX::Meta::TypeConstraint::ForceCoercion::validate
515             146251 1 1 4.87s 263s MooseX::Method::Signatures::Meta::Method::::validate MooseX::Method::Signatures::Meta::Method::validate
516              
517              
518             =head1 METHODS
519              
520             =head2 add
521              
522             Adds a triple to the graph.
523              
524             $g->add("San Francisco", "inside", "California");
525             $g->add("Ann Arbor", "inside", "Michigan");
526              
527             =head2 remove
528              
529             Remove a triple pattern from the graph.
530              
531             # remove all triples with predicate "inside"
532             $g->remove(undef, "inside", undef);
533              
534              
535             =head2 triples
536              
537             # retrieve all triples with predicate "inside"
538             my @triples = $g->triples(undef, "inside", undef);
539              
540             # @triples looks like this:
541             # (
542             # ["San Francisco", "inside", "California"],
543             # ["Ann Arbor", "inside", "Michigan"],
544             # )
545              
546             =head2 value
547              
548             Retrieve a single value from a triple.
549              
550             my $x = $g->value(undef, 'inside', 'Michigan');
551             # $x contains "Ann Arbor" given examples added.
552              
553              
554             =head2 query
555              
556             Returns array of hashrefs where keys are binding variables for triples.
557              
558             my @bindings = $g->query([
559             ['?company','headquarters','New_York_New_York'],
560             ['?company','industry','Investment Banking'],
561             ['?cont','contributor','?company'],
562             ['?cont', 'recipient', 'Orrin Hatch'],
563             ['?cont', 'amount', '?dollars'],
564             ]);
565              
566             =head2 applyinference
567              
568             Given an InferenceRule, generates additional triples in the triple store.
569              
570              
571             =head2 load
572            
573             Loads a csv file in utf8 encoding.
574              
575             $g->load("some/file.csv");
576              
577              
578             =head2 load_json
579              
580             Loads a json file into a graph. The json file should be formated as follows:
581              
582             {
583             "triples" : [
584             { "s": "your subject 1",
585             "p": "your predicate 1",
586             "o": "your object 1"
587             }, { "s": "your subject 2",
588             "p": "your predicate 2",
589             "o": "your object 2"
590             }
591             ]
592             }
593              
594              
595             =head2 save
596            
597             Saves a csv file in utf8 encoding.
598              
599             $g->load("some/file.csv");
600              
601             =head2 _addToIndex
602              
603             See source for details.
604              
605              
606             =head2 _removeFromIndex
607              
608             Removes a triple from an index and clears up empty indermediate structures.
609              
610              
611             =cut