File Coverage

blib/lib/WebFetch/Output/TWiki.pm
Criterion Covered Total %
statement 7 9 77.7
branch n/a
condition n/a
subroutine 3 3 100.0
pod n/a
total 10 12 83.3


line stmt bran cond sub pod time code
1             #
2             # WebFetch::Output::TWiki - save data into a TWiki web site
3             #
4             # Copyright (c) 2009 Ian Kluft. This program is free software; you can
5             # redistribute it and/or modify it under the terms of the GNU General Public
6             # License Version 3. See http://www.webfetch.org/GPLv3.txt
7              
8             package WebFetch::Output::TWiki;
9              
10 1     1   1713 use warnings;
  1         2  
  1         36  
11 1     1   6 use strict;
  1         2  
  1         29  
12 1     1   43 use WebFetch;
  0            
  0            
13             use base "WebFetch";
14             use DB_File;
15              
16             # define exceptions/errors
17             use Exception::Class (
18             "WebFetch::Output::TWiki::Exception::NoRoot" => {
19             isa => "WebFetch::Exception",
20             alias => "throw_twiki_no_root",
21             description => "WebFetch::Output::TWiki needs to be provided "
22             ."a twiki_root parameter",
23             },
24             "WebFetch::Output::TWiki::Exception::NotFound" => {
25             isa => "WebFetch::Exception",
26             alias => "throw_twiki_not_found",
27             description => "the directory in the twiki_root parameter "
28             ."doesn't exist or doesn't have a lib subdirectory",
29             },
30             "WebFetch::Output::TWiki::Exception::Require" => {
31             isa => "WebFetch::Exception",
32             alias => "throw_twiki_require",
33             description => "failed to import TWiki or TWiki::Func modules",
34             },
35             "WebFetch::Output::TWiki::Exception::NoConfig" => {
36             isa => "WebFetch::Exception",
37             alias => "throw_twiki_no_config",
38             description => "WebFetch::Output::TWiki needs to be provided "
39             ."a config_topic parameter",
40             },
41             "WebFetch::Output::TWiki::Exception::ConfigMissing" => {
42             isa => "WebFetch::Exception",
43             alias => "throw_twiki_config_missing",
44             description => "WebFetch::Output::TWiki is missing a required "
45             ."configuration parameter",
46             },
47             "WebFetch::Output::TWiki::Exception::Oops" => {
48             isa => "WebFetch::Exception",
49             alias => "throw_twiki_oops",
50             description => "WebFetch::Output::TWiki returned errors from "
51             ."saving one or more entries",
52             },
53             "WebFetch::Output::TWiki::Exception::FieldNotSpecified" => {
54             isa => "WebFetch::Exception",
55             alias => "throw_field_not_specified",
56             description => "a required field was not defined or found",
57             },
58             );
59              
60             =head1 NAME
61              
62             WebFetch::Output::TWiki - WebFetch output to TWiki web site
63              
64             =cut
65              
66             # globals/defaults
67             our @Options = ( "twiki_root=s", "config_topic=s", "config_key=s" );
68             our $Usage = "--twiki_root path-to-twiki --config_topic web.topic "
69             ."--config_key keyword";
70             our @default_field_names = ( qw( key web parent prefix template form
71             options ));
72              
73             # no user-servicable parts beyond this point
74              
75             # register capabilities with WebFetch
76             __PACKAGE__->module_register( "cmdline", "output:twiki" );
77              
78             =head1 SYNOPSIS
79              
80             This is an output module for WebFetch which places the data in pages
81             on a TWiki web site. Some of its configuration information is read from
82             a TWiki page. Calling or command-line parameters point to the TWiki page
83             which has the configuration and a search key to locate the correct line
84             in a table.
85              
86             From the command line...
87              
88             perl -w -I$libdir -MWebFetch::Input::Atom -MWebFetch::Output::TWiki -e "&fetch_main" -- --dir "/path/to/fetch/worskspace" --source "http://search.twitter.com/search.atom?q=%23twiki" --dest=twiki --twiki_root=/var/www/twiki --config_topic=Feeds.WebFetchConfig --config_key=twiki
89              
90             From Perl code...
91              
92             use WebFetch;
93              
94             my $obj = WebFetch->new(
95             "dir" => "/path/to/fetch/workspace",
96             "source" => "http://search.twitter.com/search.atom?q=%23twiki",
97             "source_format" => "atom",
98             "dest" => "twiki",
99             "dest_format" = "twiki",
100             "twiki_root" => "/var/www/twiki",
101             "config_topic" => "Feeds.WebFetchConfig",
102             "config_key" => "twiki",
103             );
104             $obj->do_actions; # process output
105             $obj->save; # save results
106              
107             =head1 configuration from TWiki topic
108              
109             The configuration information on feeds is kept in a TWiki page. You can
110             specify any page with a web and topic name, for example C<--config_topic=Feeds.WebFetchConfig> .
111              
112             The contents of that configuration page could look like this, though with
113             any feeds you want to configure. The "Key" field matches the --config_key
114             command-line parameter, and then brings in the rest of the configuration
115             info from that line. An example is shown below.
116              
117             =over
118             C<< ---+ !WebFetch Configuration >>
119              
120             C<< The following table is used by !WebFetch to configure news feeds >>
121              
122             C<< %STARTINCLUDE% >>
123             C<< | *Key* | *Web* | *Parent* | *Prefix* | *Template* | *Form* | *Options* | *Modul >>
124             e* | *Source* |
125             C<< | ikluft-twitter | Feeds | TwitterIkluftFeed | TwitterIkluft | AtomFeedTemplate | AtomFeedForm | separate_topics | Atom | http://twitter.com/statuses/user_timeline/37786023.rss | >>
126             C<< | twiki-twitter | Feeds | TwitterTwikiFeed | TwitterTwiki | AtomFeedTemplate | AtomFeedForm | separate_topics | Atom | http://search.twitter.com/search.atom?q=%23twiki | >>
127             C<< | cnn | Feeds | RssCnn | RssCnn | RssFeedTemplate | RssFeedForm | separate_topics | RSS | http://rss.cnn.com/rss/cnn_topstories.rss | >>
128             C<< %STOPINCLUDE% >>
129             =back
130              
131             The C<%STARTINCLUDE%> and C<%STOPINCLUDE%> are not required. However, if
132             present, they are used as boundaries for the inclusion like in a normal
133             INCLUDE operation on TWiki.
134              
135             =cut
136              
137             # read the TWiki configuation
138             sub get_twiki_config
139             {
140             my $self = shift;
141             WebFetch::debug "in get_twiki_config";
142              
143             # find the TWiki modules
144             if ( ! exists $self->{twiki_root}) {
145             throw_twiki_no_root( "TWiki root directory not defined" );
146             }
147             if (( ! -d $self->{twiki_root}) or ( ! -d $self->{twiki_root}."/lib" ))
148             {
149             throw_twiki_not_found( "can't find TWiki root or lib at "
150             .$self->{twiki_root});
151             }
152              
153             # load the TWiki modules
154             WebFetch::debug "loading TWiki modules";
155             push @INC, $self->{twiki_root}."/lib";
156             eval { require TWiki; require TWiki::Func; };
157             if ( $@ ) {
158             throw_twiki_require ( $@ );
159             }
160              
161             # initiate TWiki library, create session as user "WebFetch"
162             $self->{twiki_obj} = TWiki->new( "WebFetch" );
163              
164             # get the contents of the TWiki topic which contains our configuration
165             if ( !exists $self->{config_topic}) {
166             throw_twiki_no_config( "TWiki configuration page for WebFetch "
167             ."not defined" );
168             }
169             my ( $web, $topic ) = split /\./, $self->{config_topic};
170             WebFetch::debug "config_topic: ".$self->{config_topic}
171             ." -> $web, $topic";
172             if (( ! defined $web ) or ( ! defined $topic )) {
173             throw_twiki_no_config( "TWiki configuration page for WebFetch "
174             ."must be defined in the format web.topic" );
175             }
176              
177             # check if a config_key was specified before we read the configuration
178             if ( !exists $self->{config_key}) {
179             throw_twiki_no_config( "TWiki configuration key for WebFetch "
180             ."not defined" );
181             }
182              
183             # read the configuration info
184             my $config = TWiki::Func::readTopic( $web, $topic );
185              
186             # if STARTINCLUDE and STOPINCLUDE are present, use only what's between
187             if ( $config =~ /%STARTINCLUDE%\s*(.*)\s*%STOPINCLUDE%/s ) {
188             $config = $1;
189             }
190              
191             # parse the configuration
192             WebFetch::debug "parsing configuration";
193             my ( @fnames, $line );
194             $self->{twiki_config_all} = [];
195             $self->{twiki_keys} = {};
196             foreach $line ( split /\r*\n+/s, $config ) {
197             if ( $line =~ /^\|\s*(.*)\s*\|\s*$/ ) {
198             my @entries = split /\s*\|\s*/, $1;
199             WebFetch::debug "read entries: ".join( ', ', @entries );
200              
201             # first line contains field headings
202             if ( ! @fnames) {
203             # save table headings as field names
204             my $field;
205             foreach $field ( @entries ) {
206             my $tmp = lc($field);
207             $tmp =~ s/\W//g;
208             push @fnames, $tmp;
209             }
210             next;
211             }
212             WebFetch::debug "field names: ".join " ", @fnames;
213              
214             # save the entries
215             # it isn't a heading row if we got here
216             # transfer array @entries to named fields in %config
217             WebFetch::debug "data row: ".join " ", @entries;
218             my ( $i, $key, %config );
219             for ( $i=0; $i < scalar @fnames; $i++ ) {
220             $config{ $fnames[$i]} = $entries[$i];
221             if ( $fnames[$i] eq "key" ) {
222             $key = $entries[$i];
223             }
224             }
225              
226             # save the %config row in @{$self->{twiki_config_all}}
227             if (( defined $key )
228             and ( !exists $self->{twiki_keys}{$key}))
229             {
230             push @{$self->{twiki_config_all}}, \%config;
231             $self->{twiki_keys}{$key} = ( scalar
232             @{$self->{twiki_config_all}}) - 1;
233             }
234             }
235             }
236              
237             # select the line which is for this request
238             if ( ! exists $self->{twiki_keys}{$self->{config_key}}) {
239             throw_twiki_no_config "no configuration found for key "
240             .$self->{config_key};
241             }
242             $self->{twiki_config} = $self->{twiki_config_all}[$self->{twiki_keys}{$self->{config_key}}];
243             WebFetch::debug "twiki_config: ".join( " ", %{$self->{twiki_config}});
244             }
245              
246             # write to a TWiki page
247             sub write_to_twiki
248             {
249             my $self = shift;
250             my ( $config, $name );
251              
252             # get config variables
253             $config = $self->{twiki_config};
254              
255             # parse options
256             my ( $option );
257             $self->{twiki_options} = {};
258             foreach $option ( split /\s+/, $self->{twiki_config}{options}) {
259             if ( $option =~ /^([^=]+)=(.*)/ ) {
260             $self->{twiki_options}{$1} = $2;
261             } else {
262             $self->{twiki_options}{$option} = 1;
263             }
264             }
265              
266             # determine unique identifier field
267             my $id_field;
268             if ( exists $self->{twiki_options}{id_field}) {
269             $id_field = $self->{twiki_options}{id_field};
270             }
271             if ( ! defined $id_field ) {
272             $id_field = $self->wk2fname( "id" );
273             }
274             if ( ! defined $id_field ) {
275             $id_field = $self->wk2fname( "url" );
276             }
277             if ( ! defined $id_field ) {
278             $id_field = $self->wk2fname( "title" );
279             }
280             if ( ! defined $id_field ) {
281             throw_field_not_specified "identifier field not specified";
282             }
283             $self->{id_field} = $id_field;
284              
285             # determine from options whether each item is making metadata or topics
286             if ( exists $self->{twiki_options}{separate_topics}) {
287             $self->write_to_twiki_topics;
288             } else {
289             $self->write_to_twiki_metadata;
290             }
291             }
292              
293             # write to separate TWiki topics
294             sub write_to_twiki_topics
295             {
296             my $self = shift;
297              
298             # get config variables
299             my $config = $self->{twiki_config};
300             my $name;
301             foreach $name ( qw( key web parent prefix template form )) {
302             if ( !exists $self->{twiki_config}{$name}) {
303             throw_twiki_config_missing( "missing config parameter "
304             .$name );
305             }
306             }
307              
308             # get text of template topic
309             my ($meta, $template ) = TWiki::Func::readTopic( $config->{web},
310             $config->{template});
311              
312             # open DB file for tracking unique IDs of articles already processed
313             my %id_index;
314             tie %id_index, 'DB_File',
315             $self->{dir}."/".$config->{key}."_id_index.db",
316             &DB_File::O_CREAT|&DB_File::O_RDWR, 0640;
317              
318             # determine initial topic name
319             my ( %topics, @topics );
320             @topics = TWiki::Func::getTopicList( $config->{web});
321             foreach ( @topics ) {
322             $topics{$_} = 1;
323             }
324             my $tnum_counter = 0;
325             my $tnum_format = $config->{prefix}."-%07d";
326              
327             # create topics with metadata from each WebFetch data record
328             my $entry;
329             my @oopses;
330             my $id_field = $self->{id_field};
331             $self->data->reset_pos;
332             while ( $entry = $self->data->next_record ) {
333              
334             # check that this entry hasn't already been forwarded to TWiki
335             if ( exists $id_index{$entry->byname( $id_field )}) {
336             next;
337             }
338             $id_index{$entry->byname( $id_field )} = time;
339              
340             # select topic name
341             my $topicname = sprintf $tnum_format, $tnum_counter;
342             while ( exists $topics{$topicname}) {
343             $tnum_counter++;
344             $topicname = sprintf $tnum_format, $tnum_counter;
345             }
346             $tnum_counter++;
347             $topics{$topicname} = 1;
348             my $text = $template;
349             WebFetch::debug "write_to_twiki_topics: writing $topicname";
350              
351             # create topic metadata
352             #my $meta = TWiki::Meta->new ( $self->{twiki_obj}, $config->{web}, $topicname );
353             $meta->put( "TOPICPARENT",
354             { name => $config->{parent}});
355             $meta->put( "FORM", { name => $config->{form}});
356             my $fnum;
357             for ( $fnum = 0; $fnum <= $self->data->num_fields; $fnum++ ) {
358             WebFetch::debug "meta: "
359             .$self->data->field_bynum($fnum)
360             ." = ".$entry->bynum($fnum);
361             ( defined $self->data->field_bynum($fnum)) or next;
362             ( $self->data->field_bynum($fnum) eq "xml") and next;
363             ( defined $entry->bynum($fnum)) or next;
364             WebFetch::debug "meta: OK";
365             $meta->putKeyed( "FIELD", {
366             name => $self->data->field_bynum($fnum),
367             value => $entry->bynum($fnum)});
368             }
369              
370             # save a special title field for TWiki indexes
371             my $index_title = $entry->title;
372             $index_title =~ s/[\t\r\n\|]+/ /gs;
373             $index_title =~ s/^\s*//;
374             $index_title =~ s/\s*$//;
375             if ( length($index_title) > 60 ) {
376             substr( $index_title, 56 ) = "...";
377             }
378             WebFetch::debug "title: $index_title";
379             $meta->putKeyed( "FIELD", {
380             name => "IndexTitle",
381             title => "Indexing title",
382             value => $index_title });
383              
384             # save the topic
385             my $oopsurl = TWiki::Func::saveTopic( $config->{web},
386             $topicname, $meta, $text );
387             if ( $oopsurl ) {
388             WebFetch::debug "write_to_twiki_topics: "
389             ."$topicname - $oopsurl";
390             push @oopses, $entry->title." -> "
391             .$topicname." ".$oopsurl;
392             }
393             }
394              
395             # check for errors
396             if ( @oopses ) {
397             throw_twiki_oops( "TWiki saves failed:\n".join "\n", @oopses );
398             }
399             }
400              
401             # write to successive items of TWiki metadata
402             sub write_to_twiki_metadata
403             {
404             my $self = shift;
405              
406             # get config variables
407             my $config = $self->{twiki_config};
408             my $name;
409             foreach $name ( qw( key web parent )) {
410             if ( !exists $self->{twiki_config}{$name}) {
411             throw_twiki_config_missing( "missing config parameter "
412             .$name );
413             }
414             }
415              
416             # determine metadata title field
417             my $title_field;
418             if ( exists $self->{twiki_options}{title_field}) {
419             $title_field = $self->{twiki_options}{title_field};
420             }
421             if ( ! defined $title_field ) {
422             $title_field = $self->wk2fname( "title" );
423             }
424             if ( ! defined $title_field ) {
425             throw_field_not_specified "title field not specified";
426             }
427              
428             # determine metadata value field
429             my $value_field;
430             if ( exists $self->{twiki_options}{value_field}) {
431             $value_field = $self->{twiki_options}{value_field};
432             }
433             if ( ! defined $value_field ) {
434             $value_field = $self->wk2fname( "summary" );
435             }
436             if ( ! defined $value_field ) {
437             throw_field_not_specified "value field not specified";
438             }
439              
440             # open DB file for tracking unique IDs of articles already processed
441             my %id_index;
442             tie %id_index, 'DB_File',
443             $self->{dir}."/".$config->{key}."_id_index.db",
444             &DB_File::O_CREAT|&DB_File::O_RDWR, 0640;
445              
446             # get text of topic
447             my ($meta, $text) = TWiki::Func::readTopic( $config->{web},
448             $config->{parent});
449            
450             # start metadata line counter
451             my $mnum_counter = 0;
452             my $mnum_format = "line-%07d";
453              
454             # create metadata lines for each entry
455             my $entry;
456             my @oopses;
457             my $id_field = $self->{id_field};
458             $self->data->reset_pos;
459             while ( $entry = $self->data->next_record ) {
460             # check that this entry hasn't already been forwarded to TWiki
461             if ( exists $id_index{$entry->byname( $id_field )}) {
462             next;
463             }
464             $id_index{$entry->byname( $id_field )} = time;
465              
466             # select metadata field name
467             my ( $value, $metaname );
468             $value = $meta->get( "FIELD",
469             $metaname = sprintf( $mnum_format, $mnum_counter ));
470             while ( defined $value ) {
471             $value = $meta->get( "FIELD",
472             $metaname = sprintf( $mnum_format,
473             ++$mnum_counter ));
474             }
475              
476             # write the value
477             $meta->putKeyed( "FIELD", {
478             name => $metaname,
479             title => $entry->byname( $title_field ),
480             value => $entry->byname( $value_field ),
481             });
482             }
483              
484             # save the topic
485             my $oopsurl = TWiki::Func::saveTopic( $config->{web},
486             $config->{parent}, $meta, $text );
487             if ( $oopsurl ) {
488             throw_twiki_oops "TWiki saves failed: "
489             .$config->{parent}." ".$oopsurl;
490             }
491             }
492              
493             # TWiki format handler
494             sub fmt_handler_twiki
495             {
496             my $self = shift;
497             my $filename = shift;
498              
499             # get configuration from TWiki
500             $self->get_twiki_config;
501              
502             # write to TWiki topic
503             $self->write_to_twiki;
504              
505             # no savables - mark it OK so WebFetch::save won't call it an error
506             $self->no_savables_ok;
507             1;
508             }
509              
510             =head1 TWiki software
511              
512             TWiki is a wiki (user-editable web site) with features enabling
513             collaboration in an enterprise environment.
514             It implements the concept of a "structured wiki", allowing structure
515             and automation as needed and retaining the informality of a wiki.
516             Automated input/updates such as from WebFetch::Output::TWiki is one example.
517              
518             See http://twiki.org/ for the Open Source community-maintained software
519             or http://twiki.net/ for enterprise support.
520              
521             WebFetch::Output::TWiki was developed for TWiki Inc (formerly TWiki.Net).
522              
523             =head1 AUTHOR
524              
525             WebFetch was written by Ian Kluft
526             Send patches, bug reports, suggestions and questions to
527             C.
528              
529             =head1 BUGS
530              
531             Please report any bugs or feature requests to C, or through
532             the web interface at L. I will be notified, and then you'll
533             automatically be notified of progress on your bug as I make changes.
534              
535             =head1 SEE ALSO
536              
537             =for html
538             WebFetch
539              
540             =for text
541             WebFetch
542              
543             =for man
544             WebFetch
545              
546             =cut
547              
548             1; # End of WebFetch::Output::TWiki