blib/lib/Mail/Digest/Tools.pm | |||
---|---|---|---|
Criterion | Covered | Total | % |
statement | 572 | 854 | 66.9 |
branch | 166 | 422 | 39.3 |
condition | 9 | 68 | 13.2 |
subroutine | 30 | 34 | 88.2 |
pod | 7 | 7 | 100.0 |
total | 784 | 1385 | 56.6 |
line | stmt | bran | cond | sub | pod | time | code |
---|---|---|---|---|---|---|---|
1 | package Mail::Digest::Tools; | ||||||
2 | $VERSION = 2.12; # 05/14/2011 | ||||||
3 | 2 | 2 | 49382 | use strict; | |||
2 | 7 | ||||||
2 | 159 | ||||||
4 | 2 | 2 | 14 | use warnings; | |||
2 | 4 | ||||||
2 | 66 | ||||||
5 | 2 | 2 | 3589 | use Time::Local; | |||
2 | 5002 | ||||||
2 | 32009 | ||||||
6 | our @ISA = ("Exporter"); | ||||||
7 | our @EXPORT_OK = qw( | ||||||
8 | process_new_digests | ||||||
9 | reprocess_ALL_digests | ||||||
10 | reply_to_digest_message | ||||||
11 | repair_message_order | ||||||
12 | consolidate_threads_multiple | ||||||
13 | consolidate_threads_single | ||||||
14 | delete_deletables | ||||||
15 | ); | ||||||
16 | our %EXPORT_TAGS = ( | ||||||
17 | all => \@EXPORT_OK, | ||||||
18 | ); | ||||||
19 | |||||||
20 | ########################## Package Variables ################################### | ||||||
21 | |||||||
22 | our %month30 = map {$_, 1} (4,6,9,11); | ||||||
23 | our %month31 = map {$_, 1} (1,3,5,7,8,10,12); | ||||||
24 | our %unix = map {$_, 1} | ||||||
25 | qw| Unix linux darwin freebsd netbsd openbsd mirbsd cygwin solaris |; | ||||||
26 | |||||||
27 | ############################### Initializer ################################### | ||||||
28 | |||||||
29 | sub _config_check { | ||||||
30 | 6 | 6 | 24 | my ($config_in_ref, $config_out_ref) = @_; | |||
31 | 0 | 0 | die "Cannot find ${$config_out_ref}{'dir_digest'}: $!" | ||||
6 | 314 | ||||||
32 | 6 | 50 | 104 | unless (-d ${$config_out_ref}{'dir_digest'}); | |||
33 | 6 | 178 | die "Missing threads directory: $!" | ||||
34 | 6 | 50 | 16 | unless (-d ${$config_out_ref}{'dir_threads'}); | |||
35 | 6 | 87 | die "Except for '\n' newline, backslashes are not permitted\n in Thread Message Delimiter: $!" | ||||
36 | 6 | 50 | 24 | if (${$config_out_ref}{'thread_msg_delimiter'} =~ /\\[^n]|\\$/); | |||
37 | # to do: | ||||||
38 | # here do error checking on other digest.data info that is | ||||||
39 | # absolutely necessary for all conceivable uses of Mail::Digest::Tools | ||||||
40 | } | ||||||
41 | |||||||
42 | ############################ Public Methods #################################### | ||||||
43 | |||||||
44 | sub process_new_digests { | ||||||
45 | 2 | 2 | 1 | 86320 | my ($config_in_ref, $config_out_ref) = @_; | ||
46 | 2 | 27 | _config_check($config_in_ref, $config_out_ref); | ||||
47 | 2 | 8 | my $choice = _start_new_only(${$config_out_ref}{'title'}); | ||||
2 | 28 | ||||||
48 | 2 | 1157 | _main_processor($config_in_ref, $config_out_ref, $choice); | ||||
49 | } | ||||||
50 | |||||||
51 | sub reprocess_ALL_digests { | ||||||
52 | 0 | 0 | 1 | 0 | my ($config_in_ref, $config_out_ref) = @_; | ||
53 | 0 | 0 | _config_check($config_in_ref, $config_out_ref); | ||||
54 | 0 | 0 | my $choice = _start_ALL(${$config_out_ref}{'title'}); | ||||
0 | 0 | ||||||
55 | 0 | 0 | _main_processor($config_in_ref, $config_out_ref, $choice); | ||||
56 | } | ||||||
57 | |||||||
58 | sub reply_to_digest_message { | ||||||
59 | 2 | 2 | 1 | 65539 | my ($config_in_ref, $config_out_ref, | ||
60 | $dig_number, $dig_entry, $dir_for_reply) = @_; | ||||||
61 | 2 | 15 | _config_check($config_in_ref, $config_out_ref); | ||||
62 | 2 | 12 | my $digests_ref = _get_digest_list( | ||||
63 | $config_in_ref, | ||||||
64 | $config_out_ref, | ||||||
65 | ); | ||||||
66 | 2 | 11 | my $digest_verified = _identify_target_digest( | ||||
67 | $config_in_ref, | ||||||
68 | $config_out_ref, | ||||||
69 | $dig_number, | ||||||
70 | $dig_entry, | ||||||
71 | $digests_ref | ||||||
72 | ); | ||||||
73 | 2 | 90 | my $replyfile = _strip_down_for_reply( | ||||
74 | $config_in_ref, | ||||||
75 | $config_out_ref, | ||||||
76 | $digest_verified, | ||||||
77 | $dig_entry, | ||||||
78 | $dir_for_reply, | ||||||
79 | ); | ||||||
80 | 2 | 16 | return $replyfile; | ||||
81 | } | ||||||
82 | |||||||
83 | sub repair_message_order { | ||||||
84 | # But what about todays_topics.txt? It will be out of order as well. | ||||||
85 | 2 | 2 | 1 | 6339 | my ($config_in_ref, $config_out_ref, $error_date_ref) = @_; | ||
86 | 2 | 11 | _config_check($config_in_ref, $config_out_ref); | ||||
87 | 2 | 5 | local $_; | ||||
88 | 2 | 19 | my $date_threshold = _verify_date($error_date_ref); | ||||
89 | 2 | 313 | my $delimiter = ${$config_out_ref}{'thread_msg_delimiter'}; | ||||
2 | 9 | ||||||
90 | 2 | 10 | my $dir_threads = ${$config_out_ref}{'dir_threads'}; | ||||
2 | 6 | ||||||
91 | 2 | 4 | my (@threadfiles, @resorted_threadfiles); | ||||
92 | 2 | 50 | 49 | chdir $dir_threads or die "Unable to change to $dir_threads: $!"; | |||
93 | 2 | 50 | 87 | opendir DIR, $dir_threads or die "Unable to open $dir_threads: $!"; | |||
94 | 2 | 51 | @threadfiles = grep {! m/^\./ } readdir DIR; | ||||
8 | 42 | ||||||
95 | 2 | 50 | 27 | closedir DIR or die "Unable to close $dir_threads: $!"; | |||
96 | 2 | 15 | foreach my $in (@threadfiles) { | ||||
97 | 4 | 6 | my (@msgids); | ||||
98 | my (%messages); | ||||||
99 | 4 | 53 | my $mtime = (stat($in))[9]; | ||||
100 | 4 | 50 | 15 | if ($date_threshold < $mtime) { | |||
101 | 4 | 13 | my $msgs_ref = _get_array_of_messages($in, $delimiter); | ||||
102 | 4 | 7 | foreach my $msg (@{$msgs_ref}) { | ||||
4 | 11 | ||||||
103 | 16 | 179 | my @lines = split(/\n/, $msg); | ||||
104 | 16 | 33 | my ($ln); | ||||
105 | 16 | 38 | while (defined($ln = shift(@lines))) { | ||||
106 | 32 | 100 | 142 | if ($ln =~ /^Message: ([\d_]+)$/) { | |||
107 | 16 | 39 | push(@msgids, $1); | ||||
108 | 16 | 45 | $messages{$1} = $msg; | ||||
109 | 16 | 65 | last; | ||||
110 | } | ||||||
111 | } | ||||||
112 | } | ||||||
113 | 4 | 8 | my ($need_resort_flag); | ||||
114 | 4 | 15 | for (my $el = 1; $el <= $#msgids; $el++) { | ||||
115 | 6 | 100 | 25 | if ($msgids[$el] lt $msgids[$el-1]) { | |||
116 | 4 | 6 | $need_resort_flag++; | ||||
117 | 4 | 6 | last; | ||||
118 | } | ||||||
119 | } | ||||||
120 | 4 | 50 | 11 | if ($need_resort_flag) { | |||
121 | 4 | 10 | my $out = "$in.bak"; | ||||
122 | 4 | 50 | 361 | open OUT, ">$out" or die "Couldn't open $out for writing: $!"; | |||
123 | 4 | 29 | foreach my $msg (sort keys %messages) { | ||||
124 | 16 | 62 | print OUT $messages{$msg}, $delimiter; | ||||
125 | } | ||||||
126 | 4 | 50 | 170 | close OUT or die "Couldn't close $out after writing: $!"; | |||
127 | 4 | 50 | 369 | rename($out, $in) or die "Couldn't rename $out to $in: $!"; | |||
128 | 4 | 32 | push(@resorted_threadfiles, $in); | ||||
129 | } | ||||||
130 | } | ||||||
131 | } | ||||||
132 | 2 | 50 | 12 | if (@resorted_threadfiles) { | |||
133 | 2 | 254 | print "Message order has been re-sorted in\n"; | ||||
134 | 2 | 285 | print " $_\n" foreach @resorted_threadfiles; | ||||
135 | } | ||||||
136 | } | ||||||
137 | |||||||
138 | sub consolidate_threads_multiple { | ||||||
139 | 0 | 0 | 1 | 0 | my ($config_in_ref, $config_out_ref); | ||
140 | 0 | 0 | $config_in_ref = shift; | ||||
141 | 0 | 0 | $config_out_ref = shift; | ||||
142 | 0 | 0 | 0 | my $first_common_letters = defined $_[0] ? $_[0] : 20; | |||
143 | 0 | 0 | my $delimiter = ${$config_out_ref}{'thread_msg_delimiter'}; | ||||
0 | 0 | ||||||
144 | 0 | 0 | my $dir_threads = ${$config_out_ref}{'dir_threads'}; | ||||
0 | 0 | ||||||
145 | 0 | 0 | local $_; | ||||
146 | 0 | 0 | my (@threadfiles, %threadstubs, %stubs_for_consol); | ||||
147 | 0 | 0 | 0 | chdir $dir_threads or die "Unable to change to $dir_threads: $!"; | |||
148 | 0 | 0 | 0 | opendir DIR, $dir_threads or die "Unable to open $dir_threads: $!"; | |||
149 | 0 | 0 | @threadfiles = map {/(.*)\.thr\.txt$/} readdir DIR; | ||||
0 | 0 | ||||||
150 | 0 | 0 | 0 | closedir DIR or die "Unable to close $dir_threads: $!"; | |||
151 | 0 | 0 | foreach (@threadfiles) { | ||||
152 | 0 | 0 | my $stub = substr($_, 0, $first_common_letters); | ||||
153 | 0 | 0 | push @{$threadstubs{$stub}}, "$_.thr.txt"; | ||||
0 | 0 | ||||||
154 | } | ||||||
155 | 0 | 0 | my ($k,$v, $consolcount); | ||||
156 | 0 | 0 | CONSOL: while ( ($k,$v) = each(%threadstubs)) { | ||||
157 | 0 | 0 | 0 | if (@{$v} > 1) { | |||
0 | 0 | ||||||
158 | 0 | 0 | $consolcount++; | ||||
159 | 0 | 0 | print "Candidates for consolidation:\n"; | ||||
160 | 0 | 0 | foreach my $thrfile (@{$v}) { | ||||
0 | 0 | ||||||
161 | 0 | 0 | print " $thrfile\n"; | ||||
162 | } | ||||||
163 | 0 | 0 | while () { | ||||
164 | 0 | 0 | my ($selection); | ||||
165 | 0 | 0 | print "\nTo consolidate, type YES: "; | ||||
166 | 0 | 0 | chomp ($selection = <>); | ||||
167 | 0 | 0 | 0 | if ($selection eq 'YES') { | |||
168 | 0 | 0 | print "\n Files will be consolidated\n\n"; | ||||
169 | 0 | 0 | $stubs_for_consol{$k} = $v; | ||||
170 | } else { | ||||||
171 | 0 | 0 | print "\n Files will not be consolidated\n\n"; | ||||
172 | } | ||||||
173 | 0 | 0 | next CONSOL; | ||||
174 | } | ||||||
175 | } | ||||||
176 | } | ||||||
177 | 0 | 0 | 0 | unless ($consolcount) { | |||
178 | 0 | 0 | warn "\nAnalysis of the first $first_common_letters letters of each file in\n $dir_threads\n shows no candidates for consolidation. Please hard-code\n names of files you wish to consolidate as arguments to\n \&consolidate_threads_single:\n $!"; | ||||
179 | } | ||||||
180 | 0 | 0 | foreach my $k (keys %stubs_for_consol) { | ||||
181 | 0 | 0 | consolidate_threads_single( | ||||
182 | $config_in_ref, | ||||||
183 | $config_out_ref, | ||||||
184 | 0 | 0 | \@{$stubs_for_consol{$k}} | ||||
185 | ); | ||||||
186 | } | ||||||
187 | } | ||||||
188 | |||||||
189 | sub consolidate_threads_single { | ||||||
190 | 2 | 2 | 1 | 25666 | my ($config_in_ref, $config_out_ref, $filesref) = @_; | ||
191 | 2 | 5 | my $delimiter = ${$config_out_ref}{'thread_msg_delimiter'}; | ||||
2 | 10 | ||||||
192 | 2 | 4 | my $dir_threads = ${$config_out_ref}{'dir_threads'}; | ||||
2 | 7 | ||||||
193 | 2 | 4 | local $_; | ||||
194 | 2 | 5 | my (%messages, @superseded); | ||||
195 | 2 | 4 | foreach my $in (@{$filesref}) { | ||||
2 | 10 | ||||||
196 | 4 | 50 | 107 | unless ($in =~ /^$dir_threads/) { | |||
197 | 4 | 15 | $in = "$dir_threads/$in"; | ||||
198 | } | ||||||
199 | 4 | 18 | my $msgs_ref = _get_array_of_messages($in, $delimiter); | ||||
200 | 4 | 9 | foreach my $msg (@{$msgs_ref}) { | ||||
4 | 12 | ||||||
201 | 9 | 1303 | my @lines = split(/\n/, $msg); | ||||
202 | 9 | 26 | my ($ln); | ||||
203 | 9 | 29 | while (defined($ln = shift(@lines))) { | ||||
204 | 18 | 100 | 84 | if ($ln =~ /^Message: ([\d_]+)$/) { | |||
205 | 9 | 50 | 39 | die "Message $1 already exists: $!" | |||
206 | if (exists $messages{$1}); | ||||||
207 | 9 | 45 | $messages{$1} = [ $msg, $in ]; | ||||
208 | 9 | 51 | last; | ||||
209 | } | ||||||
210 | } | ||||||
211 | } | ||||||
212 | 4 | 16 | push(@superseded, $in); | ||||
213 | } | ||||||
214 | 2 | 21 | my @msgids = sort keys %messages; | ||||
215 | 2 | 11 | my $first_in_thread = "$messages{$msgids[0]}[1]"; | ||||
216 | 2 | 7 | my $out = $first_in_thread . '.bak'; | ||||
217 | 2 | 50 | 244 | open OUT, ">$out" or die "Couldn't open $out for writing: $!"; | |||
218 | 2 | 16 | foreach (sort keys %messages) { | ||||
219 | 9 | 38 | print OUT $messages{$_}[0], $delimiter; | ||||
220 | } | ||||||
221 | 2 | 50 | 116 | close OUT or die "Couldn't close $out after writing: $!"; | |||
222 | 2 | 8 | foreach (@superseded) { | ||||
223 | 4 | 50 | 352 | rename($_, $_ . '.DELETABLE') or die "Couldn't rename $_: $!"; | |||
224 | } | ||||||
225 | 2 | 50 | 158 | rename($out, $first_in_thread) | |||
226 | || die "Couldn't rename $out to $first_in_thread: $!"; | ||||||
227 | } | ||||||
228 | |||||||
229 | sub delete_deletables { | ||||||
230 | 2 | 2 | 1 | 8262 | my $config_out_ref = shift; | ||
231 | 2 | 6 | my $dir_threads = ${$config_out_ref}{'dir_threads'}; | ||||
2 | 10 | ||||||
232 | 2 | 4 | local $_; | ||||
233 | 2 | 4 | my (@deletables); | ||||
234 | 2 | 50 | 906 | chdir $dir_threads or die "Unable to change to $dir_threads: $!"; | |||
235 | 2 | 50 | 5286 | opendir DIR, $dir_threads or die "Unable to open $dir_threads: $!"; | |||
236 | 2 | 41 | @deletables = grep { /\.DELETABLE$/ } readdir DIR; | ||||
10 | 53 | ||||||
237 | 2 | 50 | 33 | closedir DIR or die "Unable to close $dir_threads: $!"; | |||
238 | 2 | 26 | foreach (@deletables) { | ||||
239 | 4 | 251 | print "Deleting $_\n"; | ||||
240 | 4 | 50 | 539 | unlink $_ or die "Couldn't unlink $_: $!"; | |||
241 | } | ||||||
242 | } | ||||||
243 | |||||||
244 | ############################ Private Methods ################################### | ||||||
245 | |||||||
246 | sub _start_new_only { | ||||||
247 | 2 | 2 | 15 | my $full_title = shift; | |||
248 | 2 | 309 | print "\nProcessing new $full_title digest files only!\n\n"; | ||||
249 | 2 | 19 | return ''; | ||||
250 | } | ||||||
251 | |||||||
252 | sub _start_ALL { | ||||||
253 | # prints screen prompts which ask user to choose between | ||||||
254 | # default version (process newly arrived digests only) and | ||||||
255 | # full version (process or re-process all digests) | ||||||
256 | 0 | 0 | 0 | my $full_title = shift @_; | |||
257 | 0 | 0 | my ($choice); | ||||
258 | 0 | 0 | print "\n " . uc($full_title) . "\n"; | ||||
259 | 0 | 0 | print < | ||||
260 | |||||||
261 | By default, this program processes only NEWLY ARRIVED | ||||||
262 | $full_title files found in this directory. Messages in | ||||||
263 | these new digests are sorted and appended to the appropriate | ||||||
264 | ".thr.txt" files in the "Threads" subdirectory. | ||||||
265 | |||||||
266 | However, by choosing method 'reprocess_ALL_digests()' you have | ||||||
267 | indicated that you wish to process ALL digest files found in this | ||||||
268 | directory -- regardless of whether or not they have previously been | ||||||
269 | processed. This is recommended ONLY for initialization and testing | ||||||
270 | of this program. | ||||||
271 | |||||||
272 | Since this will wipe out all threads files ('.thr.txt') as well -- | ||||||
273 | including threads files for which you no longer have their source | ||||||
274 | digest files -- please confirm that this is your intent by typing | ||||||
275 | ALL at the prompt. | ||||||
276 | |||||||
277 | |||||||
278 | GOT IT? | ||||||
279 | |||||||
280 | XQ18 | ||||||
281 | |||||||
282 | 0 | 0 | print qq{Hit 'Enter' -- or, to process ALL digests in this directory, | ||||
283 | type 'ALL' and hit 'Enter': }; | ||||||
284 | 0 | 0 | chomp ($choice = |
||||
285 | 0 | 0 | 0 | if ($choice eq 'ALL') { | |||
286 | 0 | 0 | print qq{ | ||||
287 | You have chosen to WIPE OUT all '.thr.txt' files currently | ||||||
288 | existing in the 'Threads' subdirectory and reprocess all | ||||||
289 | $full_title digest files from scratch. | ||||||
290 | |||||||
291 | Please re-confirm your choice by once again typing 'ALL' | ||||||
292 | and hitting 'Enter': }; | ||||||
293 | |||||||
294 | 0 | 0 | chomp (my $confirm = |
||||
295 | 0 | 0 | 0 | if ($choice eq $confirm) { | |||
296 | 0 | 0 | print "\n Processing ALL digests in this directory!\n"; | ||||
297 | } else { | ||||||
298 | 0 | 0 | die "\n Choice not confirmed; exiting program. $!\n"; | ||||
299 | } | ||||||
300 | } else { | ||||||
301 | 0 | 0 | print "\n Processing new digest files only!\n"; | ||||
302 | 0 | 0 | $choice = ''; | ||||
303 | } | ||||||
304 | 0 | 0 | print "\n"; | ||||
305 | 0 | 0 | return $choice; | ||||
306 | } | ||||||
307 | |||||||
308 | sub _main_processor { | ||||||
309 | 2 | 2 | 8 | my ($config_in_ref, $config_out_ref, $choice) = @_; | |||
310 | |||||||
311 | 2 | 25 | my $recentref = _archive_or_kill($config_out_ref); | ||||
312 | |||||||
313 | 2 | 77 | my $digests_ref = _get_digest_list($config_in_ref, $config_out_ref); | ||||
314 | |||||||
315 | 2 | 12 | my $in_out_ref = _prep_source_file( | ||||
316 | $config_in_ref, $config_out_ref, $digests_ref); #v1.94 | ||||||
317 | |||||||
318 | 2 | 24 | $in_out_ref = _get_log_data($config_out_ref, $choice, $in_out_ref); | ||||
319 | |||||||
320 | 2 | 4 | my ($message_count, $thread_count); | ||||
321 | 2 | 20 | ($in_out_ref, $message_count, $thread_count) = _strip_down( | ||||
322 | $in_out_ref, | ||||||
323 | $config_in_ref, | ||||||
324 | $config_out_ref, | ||||||
325 | $recentref, | ||||||
326 | ); | ||||||
327 | |||||||
328 | 2 | 22 | _update_all_topics($choice, $config_out_ref, $in_out_ref); | ||||
329 | |||||||
330 | 2 | 11 | _print_results( | ||||
331 | scalar(keys %$in_out_ref), | ||||||
332 | $message_count, | ||||||
333 | $config_out_ref, | ||||||
334 | $thread_count, | ||||||
335 | ); | ||||||
336 | } | ||||||
337 | |||||||
338 | sub _archive_or_kill { | ||||||
339 | 2 | 2 | 9 | my $config_out_ref = shift; | |||
340 | 2 | 10 | my $dir_threads = ${$config_out_ref}{'dir_threads'}; | ||||
2 | 8 | ||||||
341 | 2 | 8 | my $trigger = ${$config_out_ref}{'archive_kill_trigger'}; | ||||
2 | 153 | ||||||
342 | 2 | 19 | my $threshold = defined ${$config_out_ref}{'archive_kill_days'} | ||||
2 | 8 | ||||||
343 | 2 | 50 | 4 | ? ${$config_out_ref}{'archive_kill_days'} | |||
344 | : 14; # v1.95 | ||||||
345 | 2 | 10 | my ($thr, %recent, %nonrecent, $recentref); | ||||
346 | 2 | 50 | 87 | chdir($dir_threads) || die "cannot chdir to $dir_threads $!"; | |||
347 | 2 | 50 | 116 | opendir THR, $dir_threads or die "cannot open $dir_threads: $!"; | |||
348 | 2 | 102 | while ($thr = readdir THR) { | ||||
349 | 6 | 50 | 33 | 46 | next unless ( ($thr =~ /\.thr\.txt$/) and (-f $thr) ); | ||
350 | 0 | 0 | 0 | if ($trigger == 0) { | |||
351 | 0 | 0 | $recent{$thr}++; | ||||
352 | } else { | ||||||
353 | 0 | 0 | 0 | -M $thr <= $threshold # v1.95 | |||
354 | ? $recent{$thr}++ | ||||||
355 | : $nonrecent{$thr}++; | ||||||
356 | } | ||||||
357 | } | ||||||
358 | 2 | 50 | 36 | closedir THR or die "Cannot close $dir_threads: $!"; | |||
359 | 2 | 50 | 10 | return \%recent if ($trigger == 0); | |||
360 | 2 | 50 | 8 | if ($trigger == 1) { | |||
0 | |||||||
361 | 2 | 17 | _archive_old_files($config_out_ref, \%nonrecent); | ||||
362 | } elsif ($trigger == -1) { | ||||||
363 | 0 | 0 | _kill_old_files($config_out_ref, \%nonrecent); | ||||
364 | } else { | ||||||
365 | 0 | 0 | die "$trigger is invalid value for archive_kill_trigger: $!"; | ||||
366 | } | ||||||
367 | 2 | 19 | return \%recent; | ||||
368 | } | ||||||
369 | |||||||
370 | sub _archive_old_files { | ||||||
371 | 2 | 2 | 8 | my ($config_out_ref, $nonrecentref) = @_; | |||
372 | 2 | 3 | my $dir_threads = ${$config_out_ref}{'dir_threads'}; | ||||
2 | 8 | ||||||
373 | 2 | 82 | my $archfile = defined ${$config_out_ref}{'archived_today'} | ||||
2 | 9 | ||||||
374 | 2 | 50 | 12 | ? ${$config_out_ref}{'archived_today'} | |||
375 | 0 | 0 | : "${$config_out_ref}{'dir_digest'}/archived_today.txt"; | ||||
376 | 2 | 9 | my $dir_archive_top = ${$config_out_ref}{'dir_archive_top'}; | ||||
2 | 7 | ||||||
377 | 2 | 50 | 67 | die "Missing top archive directory: $!" unless (-d $dir_archive_top); | |||
378 | 2 | 17 | foreach ('a'..'z') { | ||||
379 | 52 | 50 | 3285 | die "Missing archive subdirectory $_: $!" unless (-d "$dir_archive_top/$_"); | |||
380 | } | ||||||
381 | 2 | 50 | 67 | die "Missing archive subdirectory 'other': $!" unless (-d "$dir_archive_top/other"); | |||
382 | |||||||
383 | 2 | 50 | 237 | open ARCH, ">$archfile" or die "Couldn't open $archfile for writing: $!"; | |||
384 | 2 | 144 | print ARCH 'Archived today (', scalar(localtime), "):\n"; | ||||
385 | 2 | 225 | print ARCH '-' x 41, "\n"; | ||||
386 | |||||||
387 | 2 | 6 | my ($thr, $archstr); | ||||
388 | 2 | 6 | my $toarchive = 0; | ||||
389 | 2 | 5 | foreach $thr (sort keys %{$nonrecentref}) { | ||||
2 | 19 | ||||||
390 | 0 | 0 | my $initial = lc(substr $thr, 0, 1); | ||||
391 | 0 | 0 | print "Archiving: $thr\n"; | ||||
392 | 0 | 0 | $archstr .= $thr . "\n"; | ||||
393 | 0 | 0 | 0 | if ($initial =~ /[a-zA-Z]/) { | |||
394 | 0 | 0 | 0 | rename($thr, "$dir_archive_top/$initial/$thr") or die "Couldn't move $thr: $!"; | |||
395 | } else { | ||||||
396 | 0 | 0 | 0 | rename($thr, "$dir_archive_top/other/$thr") or die "Couldn't move $thr: $!"; | |||
397 | } | ||||||
398 | 0 | 0 | $toarchive++; | ||||
399 | 0 | 0 | 0 | print "$toarchive files archived\n\n" if ($toarchive % 100 == 0); | |||
400 | } | ||||||
401 | 2 | 2244 | print "$toarchive files archived\n\n"; | ||||
402 | 2 | 50 | 18 | $toarchive ? print ARCH $archstr : print ARCH "[None.]\n"; | |||
403 | 2 | 50 | 303 | close ARCH or die "Couldn't close $archfile after writing: $!"; | |||
404 | } | ||||||
405 | |||||||
406 | sub _kill_old_files { | ||||||
407 | 0 | 0 | 0 | my ($config_out_ref, $nonrecentref) = @_; | |||
408 | 0 | 0 | my $dir_threads = ${$config_out_ref}{'dir_threads'}; | ||||
0 | 0 | ||||||
409 | 0 | 0 | my $killfile = defined ${$config_out_ref}{'deleted_today'} | ||||
0 | 0 | ||||||
410 | 0 | 0 | 0 | ? ${$config_out_ref}{'deleted_today'} | |||
411 | 0 | 0 | : "${$config_out_ref}{'dir_digest'}/deleted_today.txt"; # v1.95 | ||||
412 | 0 | 0 | 0 | open KILL, ">$killfile" or die "Couldn't open $killfile for writing: $!"; | |||
413 | 0 | 0 | print KILL 'Deleted today (', scalar(localtime), "):\n"; | ||||
414 | 0 | 0 | print KILL '-' x 40, "\n"; | ||||
415 | |||||||
416 | 0 | 0 | my ($thr, $killstr); | ||||
417 | 0 | 0 | my $tokill = 0; | ||||
418 | 0 | 0 | foreach $thr (sort keys %{$nonrecentref}) { | ||||
0 | 0 | ||||||
419 | 0 | 0 | print "Unlinking: $thr\n"; | ||||
420 | 0 | 0 | $killstr .= $thr . "\n"; | ||||
421 | 0 | 0 | 0 | unlink $thr or die "Couldn't unlink $thr: $!"; | |||
422 | 0 | 0 | $tokill++; | ||||
423 | 0 | 0 | 0 | print "$tokill files deleted\n" if ($tokill % 100 == 0); | |||
424 | } | ||||||
425 | 0 | 0 | print "$tokill files deleted\n"; | ||||
426 | 0 | 0 | 0 | $tokill ? print KILL $killstr : print KILL "[None.]\n"; | |||
427 | 0 | 0 | 0 | close KILL or die "Couldn't close $killfile after writing: $!"; | |||
428 | } | ||||||
429 | |||||||
430 | sub _get_digest_list { | ||||||
431 | 4 | 4 | 11 | my ($config_in_ref, $config_out_ref) = @_; | |||
432 | 4 | 50 | 8 | opendir(DIR, ${$config_out_ref}{'dir_digest'}) || die "no ${$config_out_ref}{'dir_digest'}?: $!"; | |||
0 | 0 | ||||||
4 | 218 | ||||||
433 | 8 | 42 | my @digests = | ||||
434 | 40 | 45 | sort { lc($a) cmp lc($b) } | ||||
435 | 4 | 81 | grep { /${$config_in_ref}{'grep_formula'}/ } | ||||
40 | 486 | ||||||
436 | readdir(DIR); | ||||||
437 | 4 | 50 | 82 | closedir(DIR) || die "Could not close ${$config_out_ref}{'dir_digest'}: $!"; | |||
0 | 0 | ||||||
438 | 4 | 17 | return \@digests; | ||||
439 | } | ||||||
440 | |||||||
441 | sub _prep_source_file { | ||||||
442 | 2 | 2 | 6 | my ($config_in_ref, $config_out_ref, $digests_ref) = @_; # v1.94 | |||
443 | # %in_out: hash of all instances in directory of a given digest, | ||||||
444 | # value refers to digest's title and its message topics | ||||||
445 | 2 | 5 | my (%in_out, $id); | ||||
446 | 2 | 6 | foreach (@{$digests_ref}) { | ||||
2 | 9 | ||||||
447 | 5 | 9 | $_ =~ m/${$config_in_ref}{'pattern_target'}/; | ||||
5 | 83 | ||||||
448 | 5 | 7 | $id = eval(${$config_out_ref}{'id_format'}); # v1.94 | ||||
5 | 1016 | ||||||
449 | 5 | 36 | $in_out{$id} = [ $_ ]; | ||||
450 | } | ||||||
451 | 2 | 7 | return \%in_out; | ||||
452 | } | ||||||
453 | |||||||
454 | sub _identify_target_digest { | ||||||
455 | 2 | 2 | 7 | my ($config_in_ref, $config_out_ref, | |||
456 | $dig_number, $dig_entry, $digests_ref) = @_; | ||||||
457 | 2 | 3 | my ($hit); | ||||
458 | 2 | 6 | foreach my $digfile (@{$digests_ref}) { | ||||
2 | 18 | ||||||
459 | 5 | 10 | $digfile =~ m/${$config_in_ref}{'pattern_target'}/; | ||||
5 | 74 | ||||||
460 | 5 | 100 | 33 | 39 | if (defined $2) { | ||
50 | |||||||
461 | 2 | 100 | 12 | next unless ($2 == $dig_number); | |||
462 | 1 | 3 | $hit = $digfile; | ||||
463 | 1 | 4 | last; | ||||
464 | } elsif ((defined $1) and (! defined $2)) { | ||||||
465 | 3 | 100 | 11 | next unless ($1 == $dig_number); | |||
466 | 1 | 3 | $hit = $digfile; | ||||
467 | 1 | 4 | last; | ||||
468 | } else { | ||||||
469 | 0 | 0 | die "Could'nt process digest filename to identify target digest: $!"; | ||||
470 | } | ||||||
471 | } | ||||||
472 | 2 | 50 | 12 | if (defined $hit) { | |||
473 | 2 | 2855 | return $hit; | ||||
474 | } else { | ||||||
475 | 0 | 0 | print STDERR "No ${$config_out_ref}{'title'} digest numbered $dig_number could be found in directory\n"; | ||||
0 | 0 | ||||||
476 | 0 | 0 | print STDERR " ${$config_out_ref}{'dir_digest'}\n"; | ||||
0 | 0 | ||||||
477 | 0 | 0 | exit 0; | ||||
478 | } | ||||||
479 | } | ||||||
480 | |||||||
481 | sub _get_log_data { | ||||||
482 | 2 | 2 | 6 | my ($config_out_ref, $choice, $in_out_ref) = @_; | |||
483 | 2 | 6 | my $dir_digest = ${$config_out_ref}{'dir_digest'}; | ||||
2 | 6 | ||||||
484 | 2 | 4 | my $dir_threads = ${$config_out_ref}{'dir_threads'}; | ||||
2 | 9 | ||||||
485 | 2 | 4 | my $logfile = ${$config_out_ref}{'digests_log'}; | ||||
2 | 7 | ||||||
486 | 2 | 9 | my $readfile = defined ${$config_out_ref}{'digests_read'} # new in 1.95 | ||||
2 | 6 | ||||||
487 | 2 | 50 | 4 | ? ${$config_out_ref}{'digests_read'} | |||
488 | : "$dir_digest/digests_read.txt"; | ||||||
489 | |||||||
490 | # hash which pulls in data from an external log file that | ||||||
491 | # records which digests have been previously processed | ||||||
492 | 2 | 6 | my (%hashlog); | ||||
493 | 2 | 50 | 205 | open(LOG, $logfile) || die "cannot open $logfile for reading: $!"; | |||
494 | 2 | 66 | while ( |
||||
495 | 0 | 0 | chomp; | ||||
496 | 0 | 0 | my @entrydata = split(/;/); | ||||
497 | 0 | 0 | $hashlog{$entrydata[0]} = [ @entrydata[1..$#entrydata] ]; | ||||
498 | } | ||||||
499 | 2 | 50 | 30 | close(LOG) || die "cannot close $logfile: $!"; | |||
500 | |||||||
501 | 2 | 13 | foreach ( sort keys %$in_out_ref ) { | ||||
502 | # if this is 1st time this digest has been seen for processing ... | ||||||
503 | 5 | 50 | 15 | if (! exists $hashlog{$_}) { | |||
504 | 5 | 131 | $hashlog{$_}[1] = $hashlog{$_}[0] = scalar localtime; | ||||
505 | |||||||
506 | # if this digest has been seen for processing already ... | ||||||
507 | } else { | ||||||
508 | |||||||
509 | # either we're going to re-process every digest ... | ||||||
510 | 0 | 0 | 0 | if ($choice eq 'ALL') { | |||
511 | 0 | 0 | 0 | chdir($dir_threads) || die "cannot chdir to $dir_threads $!"; | |||
512 | 0 | 0 | my ($thrfile); | ||||
513 | 0 | 0 | 0 | opendir(THREADS, $dir_threads) || die "no $dir_threads?: $!"; | |||
514 | 0 | 0 | while ($thrfile = readdir(THREADS) ) { | ||||
515 | 0 | 0 | 0 | next unless $thrfile =~ /\.thr\.txt$/; | |||
516 | 0 | 0 | 0 | unlink $thrfile || warn "having trouble deleting $thrfile: $!"; | |||
517 | } | ||||||
518 | 0 | 0 | 0 | closedir(THREADS) or die "Couldn't close $dir_threads: $!"; | |||
519 | 0 | 0 | 0 | chdir($dir_digest) || die "cannot chdir to $dir_digest $!"; | |||
520 | 0 | 0 | $hashlog{$_}[1] = scalar localtime; | ||||
521 | |||||||
522 | # or we're only going to process new digest files | ||||||
523 | } else { | ||||||
524 | 0 | 0 | delete ${$in_out_ref}{$_}; | ||||
0 | 0 | ||||||
525 | } | ||||||
526 | } | ||||||
527 | } | ||||||
528 | 2 | 11 | _update_digests_log(\%hashlog, $logfile); | ||||
529 | 2 | 19 | _update_digests_read( | ||||
530 | 2 | 11 | ${$config_out_ref}{'title'}, | ||||
531 | \%hashlog, | ||||||
532 | $readfile, # new in v1.95 | ||||||
533 | 2 | 50 | 3 | ) if ${$config_out_ref}{'digests_read_flag'}; | |||
534 | 2 | 10 | return ($in_out_ref); | ||||
535 | } | ||||||
536 | |||||||
537 | sub _update_digests_log { # must be supplied with ref to %hashlog | ||||||
538 | 2 | 2 | 5 | my ($hashlog_ref, $logfile) = @_; | |||
539 | 2 | 5 | my ($logstring); | ||||
540 | 2 | 11 | foreach ( sort keys %$hashlog_ref ) { | ||||
541 | # $logstring .= $_ . ';' . ${%$hashlog_ref}{$_}[0] . ';' . | ||||||
542 | # ${%$hashlog_ref}{$_}[1]. "\n"; | ||||||
543 | 5 | 299 | $logstring .= $_ . ';' . ${$hashlog_ref}{$_}[0] . ';' . | ||||
5 | 19 | ||||||
544 | 5 | 9 | ${$hashlog_ref}{$_}[1]. "\n"; | ||||
545 | } | ||||||
546 | 2 | 50 | 156 | open(LOG, ">$logfile") || die "cannot open $logfile for writing: $!"; | |||
547 | 2 | 25 | print LOG $logstring; | ||||
548 | 2 | 50 | 81 | close(LOG) || die "cannot close $logfile: $!"; | |||
549 | } | ||||||
550 | |||||||
551 | sub _update_digests_read { # must be supplied with $title and ref to %hashlog | ||||||
552 | 2 | 2 | 7 | my ($title, $hashlog_ref, $readfile) = @_; | |||
553 | 2 | 10 | my $readstring = ''; | ||||
554 | 2 | 9 | $readstring .= "$title Digest\n"; | ||||
555 | 2 | 12 | foreach ( sort keys %$hashlog_ref ) { | ||||
556 | 5 | 11 | $readstring .= "\n$_:\n"; | ||||
557 | # $readstring .= " first processed at ${%$hashlog_ref}{$_}[0]\n"; | ||||||
558 | # $readstring .= " most recently processed at ${%$hashlog_ref}{$_}[1]\n"; | ||||||
559 | 5 | 8 | $readstring .= " first processed at ${$hashlog_ref}{$_}[0]\n"; | ||||
5 | 17 | ||||||
560 | 5 | 7 | $readstring .= " most recently processed at ${$hashlog_ref}{$_}[1]\n"; | ||||
5 | 17 | ||||||
561 | } | ||||||
562 | 2 | 50 | 220 | open(READ, ">$readfile") || die "cannot open $readfile for writing: $!"; | |||
563 | 2 | 9 | print READ $readstring; | ||||
564 | 2 | 50 | 66 | close(READ) || die "can't close $readfile:$!"; | |||
565 | } | ||||||
566 | |||||||
567 | sub _strip_down { | ||||||
568 | 2 | 2 | 6 | my ($in_out_ref, $config_in_ref, $config_out_ref, $recentref) = @_; | |||
569 | 2 | 5 | my $MIME_cleanup_flag = ${$config_in_ref}{'MIME_cleanup_flag'}; | ||||
2 | 5 | ||||||
570 | 2 | 4 | my $topics_intro = ${$config_in_ref}{'topics_intro'}; | ||||
2 | 7 | ||||||
571 | 2 | 5 | my $post_topics_delimiter = ${$config_in_ref}{'post_topics_delimiter'}; | ||||
2 | 6 | ||||||
572 | 2 | 4 | my $source_msg_delimiter = ${$config_in_ref}{'source_msg_delimiter'}; | ||||
2 | 7 | ||||||
573 | 0 | 0 | my $subject_constant = ${$config_in_ref}{'subject_constant'} | ||||
2 | 11 | ||||||
574 | 2 | 50 | 51 | if (defined ${$config_in_ref}{'subject_constant'}); | |||
575 | 2 | 3 | my $archive_kill_trigger = ${$config_out_ref}{'archive_kill_trigger'}; | ||||
2 | 5 | ||||||
576 | 2 | 5 | my $dir_digest = ${$config_out_ref}{'dir_digest'}; | ||||
2 | 4 | ||||||
577 | 2 | 5 | my $dir_threads = ${$config_out_ref}{'dir_threads'}; | ||||
2 | 6 | ||||||
578 | 2 | 3 | my $thread_msg_delimiter = ${$config_out_ref}{'thread_msg_delimiter'}; | ||||
2 | 5 | ||||||
579 | 0 | 0 | my $optional_fields_ref = ${$config_out_ref}{'optional_fields'} | ||||
2 | 9 | ||||||
580 | 2 | 50 | 4 | if (defined ${$config_out_ref}{'optional_fields'}); | |||
581 | 2 | 5 | my $MIME_cleanup_log_flag = ${$config_out_ref}{'MIME_cleanup_log_flag'} | ||||
2 | 9 | ||||||
582 | 2 | 50 | 5 | if (defined ${$config_out_ref}{'MIME_cleanup_log_flag'}); | |||
583 | |||||||
584 | 2 | 4 | my (%recent, $mimelog, %optional_fields); | ||||
585 | 2 | 50 | 15 | %recent = defined $recentref ? %$recentref : (); | |||
586 | 2 | 50 | 9 | if (defined $optional_fields_ref) { | |||
587 | 0 | 0 | my $i = 0; | ||||
588 | 0 | 0 | foreach my $opt (@{$optional_fields_ref}) { | ||||
0 | 0 | ||||||
589 | 0 | 0 | my $longkey = $opt . '_style_flag'; | ||||
590 | 0 | 0 | 0 | if (defined ${$config_in_ref}{$longkey}) { | |||
0 | 0 | ||||||
591 | 0 | 0 | 0 | next unless (${$config_in_ref}{$longkey} =~ /\^(.*?):/); | |||
0 | 0 | ||||||
592 | 0 | 0 | $optional_fields{$i} = [ $opt, $1 ]; | ||||
593 | 0 | 0 | $i++; | ||||
594 | } else { | ||||||
595 | 0 | 0 | warn "WARNING:\n '$opt' is not available as a header field for digest ${$config_out_ref}{'title'}\n"; | ||||
0 | 0 | ||||||
596 | } | ||||||
597 | } | ||||||
598 | } | ||||||
599 | |||||||
600 | # Analysis of source message delimiter: | ||||||
601 | 2 | 12 | my $delimiter_core = | ||||
602 | substr( $source_msg_delimiter, 0, index($source_msg_delimiter, "\n") ); | ||||||
603 | |||||||
604 | 2 | 6 | my $message_count = 0; | ||||
605 | 2 | 5 | my %seen = (); | ||||
606 | 2 | 4 | my $seen_ref = \%seen; | ||||
607 | 2 | 3 | my ($output_ref); | ||||
608 | 2 | 100 | 8 | if ($MIME_cleanup_flag) { | |||
609 | 1 | 4 | $mimelog = defined ${$config_out_ref}{'mimelog'} # v1.96 | ||||
1 | 2 | ||||||
610 | 1 | 50 | 1 | ? ${$config_out_ref}{'mimelog'} | |||
611 | 0 | 0 | : "${$config_out_ref}{'dir_digest'}/mimelog.txt"; | ||||
612 | 1 | 50 | 4 | if ($MIME_cleanup_log_flag) { | |||
613 | 1 | 50 | 72 | open MIME, ">$mimelog" or die "Couldn't open $mimelog for writing: $!"; | |||
614 | 1 | 25 | print MIME < | ||||
615 | Processed Problem | ||||||
616 | |||||||
617 | MIMELOG | ||||||
618 | } | ||||||
619 | } | ||||||
620 | 2 | 50 | 94 | chdir($dir_digest) || die "cannot chdir to $dir_digest $!"; | |||
621 | 2 | 58 | foreach my $digest_no ( sort keys %$in_out_ref ) { | ||||
622 | 5 | 11 | my (@newfile, %messages_sorted_by_thread); | ||||
623 | 5 | 8 | my $file = ${$in_out_ref}{$digest_no}[0]; | ||||
5 | 18 | ||||||
624 | 5 | 13 | my ($bigstr, $digest_head, $digest_bal, @digest_header, @digest_balance); | ||||
625 | 5 | 50 | 413 | open(IN, $file) || die "cannot open $file for reading: $!"; | |||
626 | { | ||||||
627 | 5 | 18 | local $/ = undef; | ||||
5 | 31 | ||||||
628 | 5 | 285 | $bigstr = |
||||
629 | } | ||||||
630 | 5 | 50 | 75 | close (IN) || die "can't close $file:$!"; | |||
631 | |||||||
632 | 5 | 50 | 366 | if ($bigstr =~ /(.*?)$post_topics_delimiter(.*)/s) { | |||
633 | 5 | 24 | $digest_head = $1; | ||||
634 | 5 | 71 | $digest_bal = $2; | ||||
635 | } else { | ||||||
636 | 0 | 0 | die "Couldn't extract: $!"; | ||||
637 | } | ||||||
638 | |||||||
639 | 5 | 91 | @digest_header = split(/\n/, $digest_head); | ||||
640 | |||||||
641 | 5 | 309 | @digest_balance = split(/$source_msg_delimiter/, $digest_bal); | ||||
642 | 5 | 15 | pop @digest_balance; | ||||
643 | 5 | 14 | $message_count += scalar(@digest_balance); | ||||
644 | |||||||
645 | # extract topics listing | ||||||
646 | 5 | 34 | $in_out_ref = _prepare_todays_topics( | ||||
647 | \@digest_header, | ||||||
648 | $topics_intro, | ||||||
649 | $delimiter_core, | ||||||
650 | $in_out_ref, | ||||||
651 | $digest_no, | ||||||
652 | ); | ||||||
653 | |||||||
654 | # process each message in a digest file | ||||||
655 | 5 | 20 | foreach my $el (@digest_balance) { | ||||
656 | # analyze message's header | ||||||
657 | 43 | 97 | my $header_ref = _analyze_message_header( | ||||
658 | $el, $config_in_ref, $config_out_ref | ||||||
659 | ); | ||||||
660 | # clean up message's title to eliminate characters | ||||||
661 | # forbidden as filenames on this system | ||||||
662 | 43 | 823 | my $thread = _clean_up_thread_title( | ||||
663 | 43 | 224 | ${$header_ref}{'subject'}, $subject_constant); | ||||
664 | 43 | 90 | my $full_id = $digest_no . '_' . ${$header_ref}{'message_no'}; | ||||
43 | 129 | ||||||
665 | 43 | 130 | my $thread_full_id = lc($thread . $full_id); | ||||
666 | |||||||
667 | # clean up message's text to eliminate MIME multiparts | ||||||
668 | 43 | 171 | my $text = _analyze_message_body( | ||||
669 | $el, $MIME_cleanup_flag, $full_id, $MIME_cleanup_log_flag); | ||||||
670 | |||||||
671 | |||||||
672 | # add info to hash from which output will be generated | ||||||
673 | 43 | 691 | $messages_sorted_by_thread{$thread_full_id} = [ | ||||
674 | $thread, | ||||||
675 | $full_id, | ||||||
676 | $header_ref, | ||||||
677 | $text, | ||||||
678 | ]; | ||||||
679 | } | ||||||
680 | # prepare output for this digest file | ||||||
681 | 5 | 63 | foreach ( sort keys %messages_sorted_by_thread ) { | ||||
682 | 43 | 150 | ($seen_ref, $output_ref) = _prepare_output_string( | ||||
683 | \%messages_sorted_by_thread, | ||||||
684 | $seen_ref, | ||||||
685 | $dir_threads, | ||||||
686 | $thread_msg_delimiter, | ||||||
687 | $output_ref, | ||||||
688 | \%optional_fields, # new in v1.67 | ||||||
689 | ); | ||||||
690 | } | ||||||
691 | } | ||||||
692 | 2 | 100 | 13 | if ($MIME_cleanup_log_flag) { | |||
693 | 1 | 50 | 106 | close MIME, ">$mimelog" or die "Couldn't close $mimelog after writing: $!"; | |||
694 | } | ||||||
695 | |||||||
696 | # If I am not archiving a particular digest, then I would never be calling a | ||||||
697 | # thread file for that digest back from the archive. | ||||||
698 | # Hence, I can simply append. | ||||||
699 | 2 | 50 | 50 | 39 | if ($archive_kill_trigger == 0 or $archive_kill_trigger = -1) { | ||
0 | |||||||
700 | 2 | 6 | foreach (keys %{$output_ref}) { | ||||
2 | 11 | ||||||
701 | 11 | 50 | 2364 | open(NOARCH, ">>$_") || die "cannot open $_ for appending: $!"; | |||
702 | 11 | 29 | print NOARCH ${$output_ref}{$_}; | ||||
11 | 82 | ||||||
703 | 11 | 50 | 748 | close(NOARCH) || die "can't close $_: $!"; | |||
704 | } | ||||||
705 | } elsif ($archive_kill_trigger == 1) { | ||||||
706 | 0 | 0 | my $fromarchive = 0; | ||||
707 | 0 | 0 | my $dearchfile = defined ${$config_out_ref}{'de_archived_today'} | ||||
0 | 0 | ||||||
708 | 0 | 0 | 0 | ? ${$config_out_ref}{'de_archived_today'} | |||
709 | 0 | 0 | : "${$config_out_ref}{'dir_digest'}/de_archived_today.txt"; | ||||
710 | 0 | 0 | my $dir_archive_top = ${$config_out_ref}{'dir_archive_top'}; | ||||
0 | 0 | ||||||
711 | 0 | 0 | my ($dearchstr); | ||||
712 | 0 | 0 | 0 | open DEARCH, ">$dearchfile" | |||
713 | or die "Couldn't open $dearchfile for writing: $!"; | ||||||
714 | 0 | 0 | print DEARCH 'De-archived today (', scalar(localtime), "):\n"; | ||||
715 | 0 | 0 | print DEARCH '-' x 44, "\n"; | ||||
716 | |||||||
717 | # 1st: See if recent thread exists; if so, open for appending | ||||||
718 | # 2nd: See if archive thread exists; | ||||||
719 | # if so, move from archive to current and open for appending | ||||||
720 | # [of course, if a thread has not been active for 14 days, | ||||||
721 | # we may wish to treat a message | ||||||
722 | # with the same name as a temporarily new thread and only append it | ||||||
723 | # when archiving once it's stale ] | ||||||
724 | # 3rd: If no recent/archive thread can be found, open new file for writing | ||||||
725 | |||||||
726 | 0 | 0 | foreach (keys %{$output_ref}) { | ||||
0 | 0 | ||||||
727 | 0 | 0 | my ($stub); | ||||
728 | 0 | 0 | 0 | if ($_ =~ m|[/\\]([^/\\]*)$|) { | |||
729 | 0 | 0 | $stub = $1; | ||||
730 | } else { | ||||||
731 | 0 | 0 | die "Couldn't extract stub from $_: $!"; | ||||
732 | } | ||||||
733 | 0 | 0 | 0 | if ($recent{$stub}) { | |||
734 | 0 | 0 | 0 | open(OUT2, ">>$_") || die "cannot open $_ for appending: $!"; | |||
735 | } else { | ||||||
736 | 0 | 0 | my ($initial, $newstub); | ||||
737 | 0 | 0 | $initial = lc(substr $stub, 0, 1); | ||||
738 | 0 | 0 | $newstub = "$dir_threads/$stub"; | ||||
739 | 0 | 0 | 0 | 0 | if ( ($initial =~ /[a-zA-Z]/) and | ||
0 | |||||||
740 | (-f "$dir_archive_top/$initial/$stub") ) { | ||||||
741 | 0 | 0 | 0 | rename("$dir_archive_top/$initial/$stub", $newstub ) or | |||
742 | die "Couldn't de-archive $stub: $!"; | ||||||
743 | 0 | 0 | print "De-archiving: $stub\n"; | ||||
744 | 0 | 0 | $dearchstr .= $stub . "\n"; | ||||
745 | 0 | 0 | $fromarchive++; | ||||
746 | 0 | 0 | 0 | open(OUT2, ">>$newstub") || | |||
747 | die "cannot open $newstub for appending: $!"; | ||||||
748 | } elsif (-f "$dir_archive_top/other/$stub") { | ||||||
749 | 0 | 0 | 0 | rename("$dir_archive_top/other/$stub", $newstub ) or | |||
750 | die "Couldn't de-archive $stub: $!"; | ||||||
751 | 0 | 0 | print "De-archiving: $stub\n"; | ||||
752 | 0 | 0 | $dearchstr .= $stub . "\n"; | ||||
753 | 0 | 0 | $fromarchive++; | ||||
754 | 0 | 0 | 0 | open(OUT2, ">>$newstub") || | |||
755 | die "cannot open $newstub for appending: $!"; | ||||||
756 | } else { | ||||||
757 | 0 | 0 | 0 | open(OUT2, ">$_") || die "cannot open $_ for writing: $!"; | |||
758 | } | ||||||
759 | } | ||||||
760 | 0 | 0 | print OUT2 ${$output_ref}{$_}; | ||||
0 | 0 | ||||||
761 | 0 | 0 | 0 | close(OUT2) || die "can't close $_: $!"; | |||
762 | } | ||||||
763 | 0 | 0 | print "$fromarchive files de-archived\n"; | ||||
764 | 0 | 0 | 0 | $fromarchive ? print DEARCH $dearchstr : print DEARCH "[None.]\n"; | |||
765 | 0 | 0 | 0 | close DEARCH or die "Couldn't close $dearchfile after writing: $!"; | |||
766 | } else { | ||||||
767 | 0 | 0 | die "Bad value for archive/kill trigger: $!"; | ||||
768 | } | ||||||
769 | 2 | 7 | return ($in_out_ref, $message_count, scalar(keys %{$seen_ref})); | ||||
2 | 91 | ||||||
770 | } | ||||||
771 | |||||||
772 | sub _strip_down_for_reply { | ||||||
773 | 2 | 2 | 20 | my ($config_in_ref, $config_out_ref, | |||
774 | $digest_verified, $dig_entry, $dir_for_reply) = @_; | ||||||
775 | 2 | 5 | my $MIME_cleanup_flag = ${$config_in_ref}{'MIME_cleanup_flag'}; | ||||
2 | 12 | ||||||
776 | 2 | 3 | my $post_topics_delimiter = ${$config_in_ref}{'post_topics_delimiter'}; | ||||
2 | 7 | ||||||
777 | 2 | 4 | my $source_msg_delimiter = ${$config_in_ref}{'source_msg_delimiter'}; | ||||
2 | 7 | ||||||
778 | 0 | 0 | my $subject_constant = ${$config_in_ref}{'subject_constant'} | ||||
2 | 20 | ||||||
779 | 2 | 50 | 12 | if (defined ${$config_in_ref}{'subject_constant'}); | |||
780 | 2 | 5 | my $dir_digest = ${$config_out_ref}{'dir_digest'}; | ||||
2 | 7 | ||||||
781 | |||||||
782 | 2 | 50 | 74 | chdir($dir_digest) || die "cannot chdir to $dir_digest $!"; | |||
783 | |||||||
784 | # slurp the digest file in, splitting on message delimiters | ||||||
785 | # so that each message is an array element | ||||||
786 | 2 | 5 | my ($bigstr, $digest_head, $digest_bal, @digest_header, @digest_balance); | ||||
787 | 2 | 50 | 393 | open(IN, $digest_verified) || | |||
788 | die "cannot open $digest_verified for reading: $!"; | ||||||
789 | { | ||||||
790 | 2 | 6 | local $/ = undef; | ||||
2 | 16 | ||||||
791 | 2 | 350 | $bigstr = |
||||
792 | } | ||||||
793 | 2 | 50 | 46 | close (IN) || die "can't close $digest_verified:$!"; | |||
794 | |||||||
795 | 2 | 50 | 110 | if ($bigstr =~ /(.*?)$post_topics_delimiter(.*)/s) { | |||
796 | 2 | 12 | $digest_head = $1; | ||||
797 | 2 | 48 | $digest_bal = $2; | ||||
798 | } else { | ||||||
799 | 0 | 0 | die "Couldn't extract: $!"; | ||||
800 | } | ||||||
801 | |||||||
802 | 2 | 167 | @digest_balance = split(/$source_msg_delimiter/, $digest_bal); | ||||
803 | 2 | 8 | pop @digest_balance; | ||||
804 | |||||||
805 | 2 | 10 | my ($el, $replyfile); | ||||
806 | 2 | 12 | while (defined ($el = shift @digest_balance)) { | ||||
807 | # analyze message's header | ||||||
808 | 17 | 44 | my $header_ref = | ||||
809 | _analyze_message_header($el, $config_in_ref, $config_out_ref); # v1.94 | ||||||
810 | 17 | 100 | 3166 | next unless (${$header_ref}{'message_no'} == $dig_entry); | |||
17 | 128 | ||||||
811 | |||||||
812 | # clean up message's title to eliminate characters | ||||||
813 | # forbidden as filenames on this system | ||||||
814 | 2 | 12 | my $thread = _clean_up_thread_title( | ||||
815 | 2 | 51 | ${$header_ref}{'subject'}, $subject_constant); | ||||
816 | 2 | 66 | $replyfile = "$dir_for_reply/${thread}.reply.txt"; | ||||
817 | |||||||
818 | # clean up message's text to eliminate MIME multiparts | ||||||
819 | 2 | 11 | my $text = _analyze_message_body($el, $MIME_cleanup_flag, undef, 0); | ||||
820 | 2 | 22 | my @lines = split(/\n/, $text); | ||||
821 | 2 | 4 | my ($replytext); | ||||
822 | 2 | 21 | foreach my $l (@lines) { | ||||
823 | 29 | 30 | chomp($l); | ||||
824 | 29 | 51 | $replytext .= '> ' . $l . "\n"; | ||||
825 | } | ||||||
826 | |||||||
827 | # print reply | ||||||
828 | 2 | 48 | my $old_fh = select(REPLY); | ||||
829 | 2 | 50 | 297 | open REPLY, ">$replyfile" or die "Couldn't open $replyfile: $!"; | |||
830 | 2 | 50 | 4 | if (defined ${$header_ref}{'reply_to'}) { | |||
2 | 100 | 11 | |||||
2 | 11 | ||||||
831 | 0 | 0 | print "Reply-To:\n"; | ||||
832 | 0 | 0 | print "${$header_ref}{'reply_to'}\n\n"; | ||||
0 | 0 | ||||||
833 | } elsif (defined ${$header_ref}{'to'}) { | ||||||
834 | 1 | 9 | print "To:\n"; | ||||
835 | 1 | 2 | print "${$header_ref}{'to'}\n\n"; | ||||
1 | 4 | ||||||
836 | } | ||||||
837 | 2 | 50 | 3 | if (defined ${$header_ref}{'subject'}) { | |||
2 | 10 | ||||||
838 | 2 | 5 | my ($subject_clean); | ||||
839 | 2 | 50 | 3 | if (${$header_ref}{'subject'} =~ | |||
2 | 18 | ||||||
840 | /^(?:(Re2?|RE2?|re2?|FWD?|Fwd?|AW):?\s+)*(.*)$/) { | ||||||
841 | 2 | 128 | $subject_clean = $2; | ||||
842 | } else { | ||||||
843 | 0 | 0 | $subject_clean = ${$header_ref}{'subject'}; | ||||
0 | 0 | ||||||
844 | } | ||||||
845 | 2 | 8 | print "Subject:\n"; | ||||
846 | 2 | 6 | print "$subject_clean\n\n"; | ||||
847 | } | ||||||
848 | 2 | 4 | print "On ${$header_ref}{'date'}, ${$header_ref}{'from'} wrote:\n\n"; | ||||
2 | 7 | ||||||
2 | 6 | ||||||
849 | 2 | 5 | print $replytext; | ||||
850 | 2 | 3 | print "\n"; | ||||
851 | 2 | 50 | 106622 | close REPLY or die "Couldn't close $replyfile: $!"; | |||
852 | 2 | 23 | select $old_fh; | ||||
853 | 2 | 22 | last; | ||||
854 | } | ||||||
855 | 2 | 18 | return $replyfile; | ||||
856 | } | ||||||
857 | |||||||
858 | sub _prepare_todays_topics { | ||||||
859 | 5 | 5 | 15 | my ($digest_header_ref, $topics_intro, | |||
860 | $delimiter_core, $in_out_ref, $digest_no) = @_; | ||||||
861 | 5 | 10 | my $counter = 0; | ||||
862 | 5 | 17 | my @todays_topics = (); # empty out @todays_topics | ||||
863 | 5 | 9 | foreach ( @{$digest_header_ref} ) { | ||||
5 | 19 | ||||||
864 | 138 | 100 | 1257 | if (m/^$topics_intro/) { # digest-specific | |||
865 | 5 | 7 | $counter = 1; | ||||
866 | } | ||||||
867 | 138 | 100 | 261 | if ($counter == 1) { | |||
868 | 79 | 100 | 1058 | if (m/^$topics_intro|^$/) { next; } # digest-specific | |||
10 | 50 | 27 | |||||
869 | elsif ($_ !~ m/$delimiter_core/) | ||||||
870 | 69 | 283 | { push (@todays_topics, $_); } | ||||
871 | 0 | 0 | else { last; } | ||||
872 | } | ||||||
873 | # ${%$in_out_ref}{$digest_no}[1] = [ @todays_topics ]; | ||||||
874 | # # Note: this is 1st point at which ${%$in_out_ref}{$digest_no}[1] | ||||||
875 | 128 | 410 | ${$in_out_ref}{$digest_no}[1] = [ @todays_topics ]; | ||||
128 | 767 | ||||||
876 | # Note: this is 1st point at which ${$in_out_ref}{$digest_no}[1] | ||||||
877 | # gets meaningful content | ||||||
878 | } | ||||||
879 | 5 | 124 | return $in_out_ref; | ||||
880 | } | ||||||
881 | |||||||
882 | sub _analyze_message_header { | ||||||
883 | 60 | 60 | 106 | my ($el, $config_in_ref, $config_out_ref) = @_; # v1.94 | |||
884 | 60 | 848 | my @all = split(/\n/, $el); | ||||
885 | 60 | 227 | my ($hl, @lines); | ||||
886 | 60 | 160 | while (defined ($hl = shift(@all)) ) { | ||||
887 | 371 | 100 | 5197 | last if $hl =~ /^\s*$/; | |||
888 | 311 | 1359 | push(@lines, $hl); | ||||
889 | } | ||||||
890 | 60 | 69 | my (%header, %init, $last_analyzed); | ||||
891 | 60 | 66 | foreach my $key (keys %{$config_in_ref}) { | ||||
60 | 276 | ||||||
892 | 840 | 100 | 4751 | next unless ($key =~ /_style_flag$/); | |||
893 | 480 | 2515 | my ($shortkey); | ||||
894 | 480 | 50 | 1905 | if ($key =~ /(.*)_style_flag$/) { | |||
895 | 480 | 854 | $shortkey = $1; | ||||
896 | } else { | ||||||
897 | 0 | 0 | warn "Problem in analyzing message header: $!"; | ||||
898 | } | ||||||
899 | 480 | 100 | 537 | $init{$shortkey}++ unless defined ${$config_in_ref}{$key}; | |||
480 | 1915 | ||||||
900 | } | ||||||
901 | 60 | 697 | foreach (@lines) { | ||||
902 | 311 | 575 | chomp; | ||||
903 | 311 | 624 | my ($matched); | ||||
904 | 311 | 100 | 1224 | unless ($init{'message'}) { | |||
905 | 60 | 50 | 64 | if (/${$config_in_ref}{'message_style_flag'}/) { | |||
60 | 566 | ||||||
906 | 60 | 6434 | $header{'message_no'} = | ||||
907 | 60 | 66 | eval(${$config_out_ref}{'output_id_format'}); | ||||
908 | 60 | 267 | $init{'message'}++; | ||||
909 | 60 | 82 | $last_analyzed = 'message'; | ||||
910 | 60 | 88 | $matched++; | ||||
911 | } | ||||||
912 | } | ||||||
913 | 311 | 100 | 724 | unless ($init{'from'}) { | |||
914 | 173 | 100 | 268 | if (/${$config_in_ref}{'from_style_flag'}/) { | |||
173 | 1084 | ||||||
915 | 60 | 231 | $header{'from'} = $1; | ||||
916 | 60 | 94 | $init{'from'}++; | ||||
917 | 60 | 71 | $last_analyzed = 'from'; | ||||
918 | 60 | 223 | $matched++; | ||||
919 | } | ||||||
920 | } | ||||||
921 | 311 | 100 | 789 | unless ($init{'subject'}) { | |||
922 | 260 | 100 | 323 | if (/${$config_in_ref}{'subject_style_flag'}/) { | |||
260 | 1190 | ||||||
923 | 60 | 234 | $header{'subject'} = $1; | ||||
924 | 60 | 246 | $init{'subject'}++; | ||||
925 | 60 | 71 | $last_analyzed = 'subject'; | ||||
926 | 60 | 77 | $matched++; | ||||
927 | } | ||||||
928 | } | ||||||
929 | 311 | 100 | 18910 | unless ($init{'to'}) { | |||
930 | 125 | 100 | 2369 | if (/${$config_in_ref}{'to_style_flag'}/) { | |||
125 | 425 | ||||||
931 | 31 | 86 | $header{'to'} = $1; | ||||
932 | 31 | 43 | $init{'to'}++; | ||||
933 | 31 | 47 | $last_analyzed = 'to'; | ||||
934 | 31 | 34 | $matched++; | ||||
935 | } | ||||||
936 | } | ||||||
937 | 311 | 100 | 719 | unless ($init{'reply_to'}) { | |||
938 | 163 | 100 | 155 | if (/${$config_in_ref}{'reply_to_style_flag'}/) { | |||
163 | 491 | ||||||
939 | 8 | 23 | $header{'reply_to'} = $1; | ||||
940 | 8 | 14 | $init{'reply_to'}++; | ||||
941 | 8 | 10 | $last_analyzed = 'reply_to'; | ||||
942 | 8 | 10 | $matched++; | ||||
943 | } | ||||||
944 | } | ||||||
945 | 311 | 100 | 711 | unless ($init{'cc'}) { | |||
946 | 169 | 100 | 154 | if (/${$config_in_ref}{'cc_style_flag'}/i) { | |||
169 | 573 | ||||||
947 | 17 | 58 | $header{'cc'} = $1; | ||||
948 | 17 | 24 | $init{'cc'}++; | ||||
949 | 17 | 26 | $last_analyzed = 'cc'; | ||||
950 | 17 | 25 | $matched++; | ||||
951 | } | ||||||
952 | } | ||||||
953 | 311 | 100 | 2106 | unless ($init{'date'}) { | |||
954 | 195 | 100 | 577 | if (/${$config_in_ref}{'date_style_flag'}/) { | |||
195 | 905 | ||||||
955 | 60 | 204 | $header{'date'} = $1; | ||||
956 | 60 | 99 | $init{'date'}++; | ||||
957 | 60 | 73 | $last_analyzed = 'date'; | ||||
958 | 60 | 143 | $matched++; | ||||
959 | } | ||||||
960 | } | ||||||
961 | 311 | 100 | 641 | unless ($init{'org'}) { | |||
962 | 195 | 50 | 175 | if (/${$config_in_ref}{'org_style_flag'}/) { | |||
195 | 543 | ||||||
963 | 0 | 0 | $header{'org'} = $1; | ||||
964 | 0 | 0 | $init{'org'}++; | ||||
965 | 0 | 0 | $last_analyzed = 'org'; | ||||
966 | 0 | 0 | $matched++; | ||||
967 | } | ||||||
968 | } | ||||||
969 | 311 | 100 | 773 | unless ($matched) { | |||
970 | 15 | 50 | 41 | if ($last_analyzed ne 'subject') { | |||
971 | 15 | 58 | $_ =~ s/^\s+//; | ||||
972 | 15 | 51 | $header{$last_analyzed} .= "\n" . ' ' x 14 . $_; | ||||
973 | } | ||||||
974 | } | ||||||
975 | } | ||||||
976 | 60 | 670 | return \%header; | ||||
977 | } | ||||||
978 | |||||||
979 | sub _clean_up_thread_title { | ||||||
980 | 45 | 45 | 72 | my $subj = shift; | |||
981 | 45 | 50 | 101 | my $subject_constant = shift if defined $_[0]; | |||
982 | 45 | 87 | my ($thread, @thread); | ||||
983 | 45 | 50 | 160 | $subj = "No subject" unless $subj; #messages on some lists can be subject-less | |||
984 | |||||||
985 | 45 | 372 | $subj =~ | ||||
986 | /^(?:(Re\d?|RE\d?|re\d?|Re\[\d?\]|RE\[\d?\]|re\[\d?\]|FWD?|Fwd?|AW):?\s+)*(.*)$/; | ||||||
987 | 45 | 128 | $thread = $2; | ||||
988 | 45 | 50 | 33 | 159 | if (defined $subject_constant and $thread =~ /^$subject_constant\s+(.*)/) { | ||
989 | 0 | 0 | $thread = $1; | ||||
990 | } | ||||||
991 | 45 | 445 | @thread = split(//, $thread); | ||||
992 | 45 | 50 | 568 | if ($^O eq 'MSWin32') { | |||
993 | 0 | 0 | $thread = join("", (grep m/[^*|\\:"<>?\/]/, @thread) ); #" | ||||
994 | } | ||||||
995 | 45 | 50 | 224 | if ($unix{$^O}) { # v2.08 | |||
996 | 45 | 770 | $thread = join("", (grep m/[^\/]/, @thread) ); | ||||
997 | } | ||||||
998 | # squish repeated periods anywhere in file name | ||||||
999 | 45 | 80 | $thread =~ tr/././s; | ||||
1000 | # Win32 allows periods in file names, | ||||||
1001 | # but I don't want any periods or spaces immediately before '.thr.txt' | ||||||
1002 | # or at the beginning of the file name | ||||||
1003 | 45 | 206 | $thread =~ s/[.\s]+$//; | ||||
1004 | 45 | 95 | $thread =~ s/^[.\s]+//; | ||||
1005 | # squish repeated whitespace anywhere in file name (EPP, Item 20, p. 76) | ||||||
1006 | 45 | 65 | $thread =~ tr/ \n\r\t\f/ /s; | ||||
1007 | 45 | 50 | 119 | $thread = '[Illegal subject]' unless $thread; | |||
1008 | 45 | 437 | return $thread; | ||||
1009 | } | ||||||
1010 | |||||||
1011 | sub _analyze_message_body { | ||||||
1012 | 45 | 45 | 89 | my ($el, $MIME_cleanup_flag, $postid, $MIME_cleanup_log_flag) = @_; | |||
1013 | 45 | 499 | my @chunks = split(/\n{2,}/, $el); | ||||
1014 | 45 | 100 | 256 | return join("\n\n", @chunks[1 .. ($#chunks)] ) | |||
1015 | unless $MIME_cleanup_flag; | ||||||
1016 | 18 | 23 | my (@nextparts); | ||||
1017 | 18 | 50 | 104 | if ($chunks[1] =~ /Content-Type:\smultipart\/alternative/o) { | |||
50 | |||||||
50 | |||||||
1018 | # New in v1.84 1/23/04 | ||||||
1019 | 0 | 0 | for (my $i=1; $i<=$#chunks; $i++) { | ||||
1020 | 0 | 0 | 0 | push(@nextparts, $i) if ($chunks[$i] =~ /Content-Type:/); | |||
1021 | } | ||||||
1022 | 0 | 0 | 0 | if (@nextparts == 4) { | |||
1023 | 0 | 0 | 0 | print MIME "$postid CASE I\n" if $MIME_cleanup_log_flag; | |||
1024 | 0 | 0 | splice @chunks, $nextparts[2], $nextparts[3] - $nextparts[2] + 1; | ||||
1025 | 0 | 0 | splice @chunks, 1, 2; | ||||
1026 | 0 | 0 | return join("\n\n", @chunks[1 .. ($#chunks-1)] ); | ||||
1027 | } else { | ||||||
1028 | 0 | 0 | 0 | print MIME ' ' x 30, | |||
1029 | "$postid; count: ", sprintf("%3d", scalar(@nextparts)), " CASE I\n" | ||||||
1030 | if $MIME_cleanup_log_flag; | ||||||
1031 | 0 | 0 | return join("\n\n", @chunks ); | ||||
1032 | } | ||||||
1033 | } elsif ($chunks[1] =~ /--Apple-Mail-/o) { # New in v1.85 1/23/04 | ||||||
1034 | 0 | 0 | for (my $i=1; $i<=$#chunks; $i++) { | ||||
1035 | 0 | 0 | 0 | push(@nextparts, $i) if ($chunks[$i] =~ /--Apple-Mail-/o); | |||
1036 | } | ||||||
1037 | 0 | 0 | 0 | 0 | if (@nextparts == 3 or @nextparts == 4) { | ||
1038 | 0 | 0 | 0 | print MIME "$postid CASE J\n" if $MIME_cleanup_log_flag; | |||
1039 | 0 | 0 | my ($fragment); | ||||
1040 | 0 | 0 | 0 | if (@nextparts == 4) { | |||
1041 | 0 | 0 | splice @chunks, $nextparts[-1], 1; | ||||
1042 | } | ||||||
1043 | 0 | 0 | 0 | if ($chunks[$nextparts[1]] =~ /(.*?)--Apple-Mail-/os) { | |||
1044 | 0 | 0 | $fragment = $1; | ||||
1045 | } | ||||||
1046 | 0 | 0 | splice @chunks, $nextparts[1]; | ||||
1047 | 0 | 0 | 0 | push @chunks, $fragment if ($fragment); | |||
1048 | 0 | 0 | splice @chunks, $nextparts[0], 1; | ||||
1049 | 0 | 0 | return join("\n\n", @chunks[1 .. $#chunks] ); | ||||
1050 | } else { | ||||||
1051 | 0 | 0 | 0 | print MIME ' ' x 30, "$postid; count: ", | |||
1052 | sprintf("%3d", scalar(@nextparts)), " CASE J\n" | ||||||
1053 | if $MIME_cleanup_log_flag; | ||||||
1054 | 0 | 0 | return join("\n\n", @chunks ); | ||||
1055 | } | ||||||
1056 | } elsif ($chunks[1] !~ /^This.+?message.+?MIME format/o) { | ||||||
1057 | 18 | 153 | return join("\n\n", @chunks[1 .. ($#chunks)] ); | ||||
1058 | } else { | ||||||
1059 | 0 | 0 | 0 | if ($chunks[1] =~ /--=_alternative/) { | |||
0 | |||||||
1060 | 0 | 0 | for (my $i=1; $i<=$#chunks; $i++) { | ||||
1061 | 0 | 0 | 0 | push(@nextparts, $i) if ($chunks[$i] =~ /--=_alternative/); | |||
1062 | } | ||||||
1063 | 0 | 0 | 0 | if (@nextparts == 3) { | |||
1064 | 0 | 0 | 0 | print MIME "$postid CASE A\n" if $MIME_cleanup_log_flag; | |||
1065 | 0 | 0 | splice @chunks, | ||||
1066 | $nextparts[1] + 1, $nextparts[2] - $nextparts[1] + 1; | ||||||
1067 | 0 | 0 | $nextparts[1] =~ /^(.*\n)--=_alternative/s; | ||||
1068 | 0 | 0 | my $fragment = $1; | ||||
1069 | 0 | 0 | $chunks[$nextparts[1]] = $fragment; | ||||
1070 | 0 | 0 | splice @chunks, 1, 2; | ||||
1071 | 0 | 0 | return join("\n\n", @chunks[1 .. ($#chunks-1)] ); | ||||
1072 | } else { | ||||||
1073 | 0 | 0 | 0 | print MIME ' ' x 30, "$postid; count: ", | |||
1074 | sprintf("%3d", scalar(@nextparts)), " CASE B\n" | ||||||
1075 | if $MIME_cleanup_log_flag; | ||||||
1076 | 0 | 0 | return join("\n\n", @chunks ); | ||||
1077 | } | ||||||
1078 | } elsif ($chunks[1] =~ /cryptographically\ssigned/) { | ||||||
1079 | 0 | 0 | 0 | print MIME "$postid CASE H\n" if $MIME_cleanup_log_flag; | |||
1080 | 0 | 0 | splice @chunks, -3, 2; | ||||
1081 | 0 | 0 | splice @chunks, 1, 2; | ||||
1082 | 0 | 0 | return join("\n\n", @chunks[1 .. ($#chunks-1)] ); | ||||
1083 | } else { | ||||||
1084 | 0 | 0 | for (my $i=2; $i<=$#chunks; $i++) { | ||||
1085 | 0 | 0 | 0 | push(@nextparts, $i) if ( | |||
1086 | $chunks[$i] =~ /-{4,6}[_\s]?=_NextPart| | ||||||
1087 | --Boundary_| | ||||||
1088 | --------------InterScan_NT_MIME_Boundary/x | ||||||
1089 | ); | ||||||
1090 | } | ||||||
1091 | 0 | 0 | 0 | 0 | if (@nextparts == 3) { | ||
0 | 0 | ||||||
0 | |||||||
0 | |||||||
1092 | 0 | 0 | 0 | print MIME "$postid CASE C\n" if $MIME_cleanup_log_flag; | |||
1093 | 0 | 0 | splice @chunks, $nextparts[1], $nextparts[2] - $nextparts[1] + 1; | ||||
1094 | 0 | 0 | splice @chunks, 1, 2; | ||||
1095 | 0 | 0 | return join("\n\n", @chunks[1 .. ($#chunks-1)] ); | ||||
1096 | } elsif (@nextparts == 1) { | ||||||
1097 | 0 | 0 | 0 | print MIME "$postid CASE D\n" if $MIME_cleanup_log_flag; | |||
1098 | 0 | 0 | splice @chunks, 1, 1; | ||||
1099 | 0 | 0 | return join("\n\n", @chunks[1 .. ($#chunks-1)] ); | ||||
1100 | } elsif (@nextparts == 5 or @nextparts == 6) { | ||||||
1101 | 0 | 0 | 0 | print MIME "$postid CASE E\n" if $MIME_cleanup_log_flag; | |||
1102 | 0 | 0 | splice @chunks, $nextparts[2], $nextparts[-1] - $nextparts[2] + 1; | ||||
1103 | 0 | 0 | splice @chunks, 1, 3; | ||||
1104 | 0 | 0 | return join("\n\n", @chunks[1 .. ($#chunks-1)] ); | ||||
1105 | } elsif (@nextparts == 7 or @nextparts == 8) { | ||||||
1106 | 0 | 0 | 0 | print MIME "$postid CASE F\n" if $MIME_cleanup_log_flag; | |||
1107 | 0 | 0 | splice @chunks, $nextparts[3], $nextparts[-1] - $nextparts[3] + 1; | ||||
1108 | 0 | 0 | splice @chunks, 1, 3; | ||||
1109 | 0 | 0 | return join("\n\n", @chunks[1 .. ($#chunks-1)] ); | ||||
1110 | } else { | ||||||
1111 | 0 | 0 | 0 | print MIME ' ' x 30, "$postid; count: ", | |||
1112 | sprintf("%3d", scalar(@nextparts)), " CASE G\n" | ||||||
1113 | if $MIME_cleanup_log_flag; | ||||||
1114 | 0 | 0 | return join("\n\n", @chunks[1 .. ($#chunks-1)] ); | ||||
1115 | } | ||||||
1116 | } | ||||||
1117 | } | ||||||
1118 | } | ||||||
1119 | |||||||
1120 | sub _prepare_output_string { | ||||||
1121 | 43 | 43 | 83 | my ($threads_hash_ref, $seen_ref, $dir_threads, $thread_msg_delimiter, | |||
1122 | $output_ref, $optional_fields_ref) = @_; | ||||||
1123 | 43 | 56 | my %messages = %{$threads_hash_ref}; | ||||
43 | 3101 | ||||||
1124 | 43 | 91 | my %seen = %{$seen_ref}; | ||||
43 | 247 | ||||||
1125 | 43 | 71 | my (%output, %opt_fields); | ||||
1126 | 43 | 100 | 102 | %output = %{$output_ref} if defined $output_ref; | |||
41 | 177 | ||||||
1127 | 43 | 62 | %opt_fields = %{$optional_fields_ref}; | ||||
43 | 86 | ||||||
1128 | 43 | 52 | my ($pathsep, $out, $lc_out, $outstr); | ||||
1129 | 43 | 50 | 428 | $pathsep = ($^O eq 'MSWin32') ? "\\" : '/'; | |||
1130 | 43 | 130 | $out = $dir_threads . $pathsep . $messages{$_}[0] . '.thr.txt'; | ||||
1131 | 43 | 117 | $lc_out = lc($out); | ||||
1132 | 43 | 94 | $seen{$lc_out}++; | ||||
1133 | 43 | 219 | $outstr = "Thread: $messages{$_}[0]\n"; | ||||
1134 | 43 | 84 | $outstr .= "Message: $messages{$_}[1]\n"; | ||||
1135 | 43 | 110 | $outstr .= "From: $messages{$_}[2]{'from'}\n"; | ||||
1136 | 43 | 142 | foreach my $i (sort keys %opt_fields) { | ||||
1137 | 0 | 0 | 0 | next unless (defined $messages{$_}[2]{$opt_fields{$i}[0]}); | |||
1138 | 0 | 0 | my $space = 13 - length($opt_fields{$i}[1]); | ||||
1139 | 0 | 0 | $outstr .= $opt_fields{$i}[1] . ':' . ' ' x $space . | ||||
1140 | "$messages{$_}[2]{$opt_fields{$i}[0]}\n"; | ||||||
1141 | } | ||||||
1142 | 43 | 195 | $outstr .= 'Text:' . "\n\n" . $messages{$_}[3] . "\n"; | ||||
1143 | 43 | 85 | $outstr .= "\n"; | ||||
1144 | 43 | 50 | 439 | $outstr .= "$thread_msg_delimiter" | |||
1145 | unless (! defined $thread_msg_delimiter); | ||||||
1146 | 43 | 244 | $output{$out} .= $outstr; | ||||
1147 | 43 | 683 | return \%seen, \%output; | ||||
1148 | } | ||||||
1149 | |||||||
1150 | sub _update_all_topics { | ||||||
1151 | 2 | 2 | 7 | my ($choice, $config_out_ref, $in_out_ref) = @_; | |||
1152 | 2 | 4 | my $title = ${$config_out_ref}{'title'}; | ||||
2 | 9 | ||||||
1153 | 2 | 12 | my $topicsfile = defined ${$config_out_ref}{'todays_topics'} # v1.96 | ||||
2 | 6 | ||||||
1154 | 2 | 50 | 5 | ? ${$config_out_ref}{'todays_topics'} | |||
1155 | 0 | 0 | : "${$config_out_ref}{'dir_digest'}/todays_topics.txt"; | ||||
1156 | 2 | 5 | my ($topic, $topicstring); | ||||
1157 | 2 | 50 | 12 | if ($choice eq 'ALL') { | |||
1158 | 0 | 0 | $topicstring = "$title Digest: Today's Topics\n"; | ||||
1159 | 0 | 0 | foreach ( sort keys %$in_out_ref ) { | ||||
1160 | 0 | 0 | $topicstring .= "\n${$in_out_ref}{$_}[0]\n"; | ||||
0 | 0 | ||||||
1161 | 0 | 0 | foreach $topic ( @{${$in_out_ref}{$_}[1]} ) { | ||||
0 | 0 | ||||||
0 | 0 | ||||||
1162 | 0 | 0 | $topicstring .= "$topic\n"; | ||||
1163 | } | ||||||
1164 | } | ||||||
1165 | 0 | 0 | 0 | open(TOPICS, ">$topicsfile") | |||
1166 | || die "cannot open $topicsfile for writing: $!"; | ||||||
1167 | 0 | 0 | print TOPICS $topicstring; | ||||
1168 | 0 | 0 | 0 | close(TOPICS) || die "can't close $topicsfile:$!"; | |||
1169 | } else { | ||||||
1170 | 2 | 14 | $topicstring = ''; | ||||
1171 | 2 | 17 | foreach ( sort keys %$in_out_ref ) { | ||||
1172 | 5 | 9 | $topicstring .= "\n${$in_out_ref}{$_}[0]\n"; | ||||
5 | 25 | ||||||
1173 | 5 | 8 | foreach $topic ( @{${$in_out_ref}{$_}[1]} ) { | ||||
5 | 6 | ||||||
5 | 19 | ||||||
1174 | 69 | 121 | $topicstring .= "$topic\n"; | ||||
1175 | } | ||||||
1176 | } | ||||||
1177 | 2 | 50 | 115 | open(TOPICS, ">>$topicsfile") | |||
1178 | || die "cannot open $topicsfile for appending: $!"; | ||||||
1179 | 2 | 13 | print TOPICS $topicstring; | ||||
1180 | 2 | 50 | 87 | close(TOPICS) || die "can't close $topicsfile:$!"; | |||
1181 | } | ||||||
1182 | } | ||||||
1183 | |||||||
1184 | sub _print_results { | ||||||
1185 | 2 | 2 | 6 | my ($total_digests_processed, $message_count, | |||
1186 | $config_out_ref, $thread_count) = @_; | ||||||
1187 | 2 | 12 | print < | ||||
1188 | |||||||
1189 | |||||||
1190 | RESULTS | ||||||
1191 | |||||||
1192 | 2 | 3097 | Digests processed:\t\t$total_digests_processed | ||||
1193 | Messages processed:\t\t$message_count | ||||||
1194 | Threads directory:\t\t${$config_out_ref}{'dir_threads'} | ||||||
1195 | Threads created/modified:\t$thread_count | ||||||
1196 | XQ19 | ||||||
1197 | } | ||||||
1198 | |||||||
1199 | sub _verify_date { | ||||||
1200 | 2 | 2 | 6 | my $dateref = shift; | |||
1201 | 2 | 21 | die "Incorrect date specification: $!" | ||||
1202 | unless ( | ||||||
1203 | 2 | 20 | (exists ${$dateref}{'year'}) && | ||||
1204 | 2 | 12 | (exists ${$dateref}{'month'}) && | ||||
1205 | 2 | 50 | 33 | 4 | (exists ${$dateref}{'day'}) | ||
33 | |||||||
1206 | ); | ||||||
1207 | 0 | 0 | die "${$dateref}{'year'} is incorrect year specification: $!" | ||||
2 | 13 | ||||||
1208 | 2 | 50 | 4 | unless (1900 <= ${$dateref}{'year'}); | |||
1209 | 0 | 0 | die "${$dateref}{'month'} is incorrect month specification: $!" | ||||
2 | 17 | ||||||
1210 | unless ( | ||||||
1211 | 2 | 21 | (1 <= ${$dateref}{'month'}) && | ||||
1212 | 2 | 50 | 33 | 4 | (${$dateref}{'month'} <= 12) | ||
1213 | ); | ||||||
1214 | 0 | 0 | die "${$dateref}{'day'} is incorrect day of month specification: $!" | ||||
2 | 20 | ||||||
1215 | unless ( | ||||||
1216 | ( | ||||||
1217 | 2 | 12 | ${$dateref}{'day'} >= 1 and | ||||
1218 | 0 | 0 | ${$dateref}{'day'} <= 28 | ||||
1219 | ) | ||||||
1220 | || | ||||||
1221 | ( | ||||||
1222 | 0 | 0 | $month31{${$dateref}{'month'}} and | ||||
1223 | 0 | 0 | ${$dateref}{'day'} >= 29 and | ||||
1224 | 0 | 0 | ${$dateref}{'day'} <= 31 | ||||
1225 | ) | ||||||
1226 | || | ||||||
1227 | ( | ||||||
1228 | 0 | 0 | $month30{${$dateref}{'month'}} and | ||||
1229 | 0 | 0 | ${$dateref}{'day'} >= 29 and | ||||
1230 | 0 | 0 | ${$dateref}{'day'} <= 30 | ||||
1231 | ) | ||||||
1232 | || | ||||||
1233 | ( | ||||||
1234 | 0 | 0 | ${$dateref}{'month'} == 2 and | ||||
1235 | ${$dateref}{'day'} == 29 and | ||||||
1236 | ( | ||||||
1237 | ${$dateref}{'year'} % 400 == 0 or | ||||||
1238 | ( | ||||||
1239 | ${$dateref}{'year'} % 100 != 0 and | ||||||
1240 | 2 | 0 | 33 | 4 | ${$dateref}{'year'} % 4 == 0 | ||
0 | |||||||
0 | |||||||
33 | |||||||
0 | |||||||
0 | |||||||
0 | |||||||
0 | |||||||
0 | |||||||
0 | |||||||
0 | |||||||
1241 | ) | ||||||
1242 | ) | ||||||
1243 | ) | ||||||
1244 | ); | ||||||
1245 | 2 | 5 | return timelocal( | ||||
1246 | 0, 0, 0, | ||||||
1247 | 2 | 6 | ${$dateref}{'day'}, | ||||
1248 | 2 | 14 | ${$dateref}{'month'} - 1, | ||||
1249 | 2 | 5 | ${$dateref}{'year'} | ||||
1250 | ); | ||||||
1251 | } | ||||||
1252 | |||||||
1253 | sub _get_array_of_messages { | ||||||
1254 | 8 | 8 | 23 | my ($in, $delimiter) = @_; | |||
1255 | 8 | 11 | my ($fh, $bigstr); | ||||
1256 | 8 | 50 | 486 | open $fh, $in or die "Couldn't open $in for reading: $!"; | |||
1257 | { | ||||||
1258 | 8 | 16 | local $/ = undef; | ||||
8 | 31 | ||||||
1259 | 8 | 215 | $bigstr = <$fh>; | ||||
1260 | } | ||||||
1261 | 8 | 50 | 232 | close $fh or die "Couldn't close $in after reading: $!"; | |||
1262 | 8 | 135 | my @messages = split(/$delimiter/, $bigstr); | ||||
1263 | 8 | 52 | return \@messages; | ||||
1264 | } | ||||||
1265 | |||||||
1266 | 1; | ||||||
1267 | |||||||
1268 | ############################ DOCUMENTATION ##################################### | ||||||
1269 | |||||||
1270 | =head1 NAME | ||||||
1271 | |||||||
1272 | Mail::Digest::Tools - Tools for digest versions of mailing lists | ||||||
1273 | |||||||
1274 | =head1 VERSION | ||||||
1275 | |||||||
1276 | This document refers to version 2.12 of digest.pl, released May 14, 2011. | ||||||
1277 | |||||||
1278 | =head1 SYNOPSIS | ||||||
1279 | |||||||
1280 | use Mail::Digest::Tools qw( | ||||||
1281 | process_new_digests | ||||||
1282 | reprocess_ALL_digests | ||||||
1283 | reply_to_digest_message | ||||||
1284 | repair_message_order | ||||||
1285 | consolidate_threads_multiple | ||||||
1286 | consolidate_threads_single | ||||||
1287 | delete_deletables | ||||||
1288 | ); | ||||||
1289 | |||||||
1290 | C<%config_in> and C<%config_out> are two configuration hashes whose setup | ||||||
1291 | is discussed in detail below. | ||||||
1292 | |||||||
1293 | process_new_digests(\%config_in, \%config_out); | ||||||
1294 | |||||||
1295 | reprocess_ALL_digests(\%config_in, \%config_out); | ||||||
1296 | |||||||
1297 | $full_reply_file = reply_to_digest_message( | ||||||
1298 | \%config_in, | ||||||
1299 | \%config_out, | ||||||
1300 | $digest_number, | ||||||
1301 | $digest_entry, | ||||||
1302 | $directory_for_reply, | ||||||
1303 | ); | ||||||
1304 | |||||||
1305 | repair_message_order( | ||||||
1306 | \%config_in, | ||||||
1307 | \%config_out, | ||||||
1308 | { | ||||||
1309 | year => 2004, | ||||||
1310 | month => 01, | ||||||
1311 | day => 27, | ||||||
1312 | } | ||||||
1313 | ); | ||||||
1314 | |||||||
1315 | consolidate_threads_multiple( | ||||||
1316 | \%config_in, | ||||||
1317 | \%config_out, | ||||||
1318 | $first_common_letters, # optional integer argument; defaults to 20 | ||||||
1319 | ); | ||||||
1320 | |||||||
1321 | consolidate_threads_single( | ||||||
1322 | \%config_in, | ||||||
1323 | \%config_out, | ||||||
1324 | [ | ||||||
1325 | 'first_dummy_file_for_consolidation.thr.txt', | ||||||
1326 | 'second_dummy_file_for_consolidation.thr.txt', | ||||||
1327 | ], | ||||||
1328 | ); | ||||||
1329 | |||||||
1330 | delete_deletables(\%config_out); | ||||||
1331 | |||||||
1332 | =head1 DESCRIPTION | ||||||
1333 | |||||||
1334 | Mail::Digest::Tools provides useful tools for processing mail which an | ||||||
1335 | individual receives in a 'daily digest' version from a mailing list. | ||||||
1336 | Digest versions of mailing lists are provided by a variety of mail processing | ||||||
1337 | programs and by a variety of list hosts. Within the Perl community, digest | ||||||
1338 | versions of mailing lists are offered by such sponsors as Active State, | ||||||
1339 | Sourceforge, Yahoo! Groups and London.pm. However, you do not have to be | ||||||
1340 | interested in Perl to make use of Mail::Digest::Tools. Mail from I |
||||||
1341 | the thousands of Yahoo! Groups, for example, may be processed with this module. | ||||||
1342 | |||||||
1343 | If, when you receive e-mail from the digest version of a mailing list, you | ||||||
1344 | simply read the digest in an e-mail client and then discard it, you may stop | ||||||
1345 | reading here. If, however, you wish to read or store such mail by subject, | ||||||
1346 | read on. As printed in a normal web browser, this document contains 40 | ||||||
1347 | pages of documentation. You are urged to print this documentation out and | ||||||
1348 | study it before using this module. | ||||||
1349 | |||||||
1350 | To understand how to use Mail::Digest::Tools, we will first take a look at a | ||||||
1351 | typical mailing list digest. We will then sketch how that digest looks once | ||||||
1352 | processed by Mail::Digest::Tool. We will then discuss Mail::Digest::Tool's | ||||||
1353 | exportable functions. Next, we will study how to prepare the two configuration | ||||||
1354 | hashes which hold the configuration data. Finally, we will provide some tips | ||||||
1355 | for everyday use of Mail::Digest::Tools. | ||||||
1356 | |||||||
1357 | =head1 A TYPICAL MAILING LIST DIGEST | ||||||
1358 | |||||||
1359 | Here is a dummied-up version of a typical mailing list digest as it appears | ||||||
1360 | once saved to a plain-text file. For illustrative purposes, let us suppose | ||||||
1361 | that the file is named: 'Perl-Win32-Users Digest, Vol 1 Issue 9999.txt' | ||||||
1362 | |||||||
1363 | Send Perl-Win32-Users mailing list submissions to | ||||||
1364 | perl-win32-users@listserv.ActiveState.com | ||||||
1365 | |||||||
1366 | When replying, please edit your Subject line so it is more specific | ||||||
1367 | than "Re: Contents of Perl-Win32-Users digest..." | ||||||
1368 | |||||||
1369 | Today's Topics: | ||||||
1370 | |||||||
1371 | 1. Introducing Mail::Digest::Tools (James E Keenan) | ||||||
1372 | 2. A Different Discussion (steve) | ||||||
1373 | 3. Re: Introducing Mail::Digest::Tools (David H Adler) | ||||||
1374 | |||||||
1375 | ---------------------------------------------------------------------- | ||||||
1376 | |||||||
1377 | Message: 1 | ||||||
1378 | From: "James E Keenan" |
||||||
1379 | To: |
||||||
1380 | Subject: Introducing Mail::Digest::Tools | ||||||
1381 | Date: Sat, 31 Jan 2004 14:10:20 -0600 | ||||||
1382 | |||||||
1383 | Mail::Digest::Tools is the greatest thing since sliced bread. | ||||||
1384 | Go download it now! | ||||||
1385 | |||||||
1386 | ------------------------------ | ||||||
1387 | |||||||
1388 | Message: 2 | ||||||
1389 | From: "steve" |
||||||
1390 | To: |
||||||
1391 | Subject: A Different Discussion | ||||||
1392 | Date: Sat, 31 Jan 2004 14:40:20 -0600 | ||||||
1393 | |||||||
1394 | This is a new topic. I am not discussing Mail::Digest::Tools in this | ||||||
1395 | submission. | ||||||
1396 | |||||||
1397 | ------------------------------ | ||||||
1398 | |||||||
1399 | Message: 3 | ||||||
1400 | From: "David H Adler" |
||||||
1401 | To: |
||||||
1402 | Subject: Re: Introducing Mail::Digest::Tools | ||||||
1403 | Date: Sat, 31 Jan 2004 14:50:20 -0600 | ||||||
1404 | |||||||
1405 | Jim, what's this nonsense about sliced bread. Weren't you on the Atkins | ||||||
1406 | diet? Unlike beer, sliced bread is Off Topic. | ||||||
1407 | |||||||
1408 | ------------------------------ | ||||||
1409 | |||||||
1410 | _______________________________________________ | ||||||
1411 | Perl-Win32-Users mailing list | ||||||
1412 | Perl-Win32-Users@listserv.ActiveState.com | ||||||
1413 | To unsubscribe: http://listserv.ActiveState.com/mailman/mysubs | ||||||
1414 | |||||||
1415 | End of Perl-Win32-Users Digest | ||||||
1416 | |||||||
1417 | Note that the digest has an I |
||||||
1418 | the digest has its own structure. | ||||||
1419 | |||||||
1420 | The digest's overall structure consists of: | ||||||
1421 | |||||||
1422 | =over 4 | ||||||
1423 | |||||||
1424 | =item * | ||||||
1425 | |||||||
1426 | I |
||||||
1427 | |||||||
1428 | The digest header consists of one or more paragraphs providing instructions | ||||||
1429 | on how to subscribe, post messages, unsubscribe and contact the list | ||||||
1430 | administrator. | ||||||
1431 | |||||||
1432 | In processing a digest, Mail::Digest::Tools generally discards the digest | ||||||
1433 | header. | ||||||
1434 | |||||||
1435 | =item * | ||||||
1436 | |||||||
1437 | I |
||||||
1438 | |||||||
1439 | Next, each daily digest contains a list of the subjects of the messages found | ||||||
1440 | in that particular digest. This list is introduced by a paragraph such as: | ||||||
1441 | |||||||
1442 | Today's Topics | ||||||
1443 | |||||||
1444 | and is followed by a numbered list of the message subjects and authors. Some | ||||||
1445 | digests break the authors into two lines for names and e-mail addresses. | ||||||
1446 | Others, such as the example above, list only names. | ||||||
1447 | |||||||
1448 | When Mail::Digest::Tools process a digest, it extracts the list of topics as a | ||||||
1449 | single chunk and appends it to a file containing the topics from all previous | ||||||
1450 | digests which the user has similarly processed. | ||||||
1451 | |||||||
1452 | =item * | ||||||
1453 | |||||||
1454 | I |
||||||
1455 | |||||||
1456 | The list of topics is separated from the first message by a string of | ||||||
1457 | characters which the list sponsor has, we hope, determined is not likely to | ||||||
1458 | occur in the text of any message posted to that list. In the example above, | ||||||
1459 | the source message delimiter is the string: | ||||||
1460 | |||||||
1461 | ---------------------------------------------------------------------- | ||||||
1462 | |||||||
1463 | followed by two C<\n> newlines (so that the delimiter is a paragraph unto | ||||||
1464 | itself). Other digests may use a two-line delimiter such as: | ||||||
1465 | |||||||
1466 | _______________________________________________________ | ||||||
1467 | _______________________________________________________ | ||||||
1468 | |||||||
1469 | or | ||||||
1470 | |||||||
1471 | --__--__-- | ||||||
1472 | |||||||
1473 | =item * | ||||||
1474 | |||||||
1475 | I | ||||||
1476 | |||||||
1477 | Most mailing list digests use the same string to delimit individual messages | ||||||
1478 | within the digest that they use to delimit the list of today's topics from the | ||||||
1479 | very first message in the digest. (The author tracked one digest for more | ||||||
1480 | than three-and-a-half years that used the same string for both functions -- | ||||||
1481 | only to see that digest's provider change its format while this module was | ||||||
1482 | being prepared for CPAN!) But the digest may use a different string to | ||||||
1483 | separate individual messages from each other. In the sample digest above, | ||||||
1484 | the source message delimiter is the string: | ||||||
1485 | |||||||
1486 | ------------------------------ | ||||||
1487 | |||||||
1488 | followed by two C<\n> newlines (so that the delimiter is a paragraph unto | ||||||
1489 | itself). | ||||||
1490 | |||||||
1491 | As we shall see below, correctly identifying the post-topics delimiter and | ||||||
1492 | source message delimiter used in a particular digest is essential to correct | ||||||
1493 | configuration of Mail::Digest::Tools, as the module will repeatedly C |
||||||
1494 | digests on this delimiter. | ||||||
1495 | |||||||
1496 | =item * | ||||||
1497 | |||||||
1498 | I |
||||||
1499 | |||||||
1500 | Individual messages have their own structure. | ||||||
1501 | |||||||
1502 | =over 4 | ||||||
1503 | |||||||
1504 | =item * | ||||||
1505 | |||||||
1506 | I |
||||||
1507 | |||||||
1508 | In addition to normal mail headers, a message in a digest must have a | ||||||
1509 | message number representing its position within that day's digest. So a | ||||||
1510 | message in a digest will typically have some or all of the following headers: | ||||||
1511 | |||||||
1512 | Message: | ||||||
1513 | From: | ||||||
1514 | Organization: | ||||||
1515 | Reply-To: | ||||||
1516 | To: | ||||||
1517 | CC: | ||||||
1518 | Date: | ||||||
1519 | Subject: | ||||||
1520 | |||||||
1521 | =item * | ||||||
1522 | |||||||
1523 | I |
||||||
1524 | |||||||
1525 | One of more paragraphs of text, frequently including citations from earlier | ||||||
1526 | postings to the mailing list. | ||||||
1527 | |||||||
1528 | The main objective of Mail::Digest::Tools is to extract headers and bodies | ||||||
1529 | from particular digest entries and to append them to plain-text files which | ||||||
1530 | hold all postings on a particular subject. See discussion of | ||||||
1531 | C |
||||||
1532 | |||||||
1533 | Many mailing lists allow subscribers to post in either plain-text or HTML. | ||||||
1534 | Some allow users to post attachments; others do not. Others still | ||||||
1535 | incorporate the attachments into the message body, often using 'multipart | ||||||
1536 | MIME' format. Regrettably, certain mailing list digest programs fail to | ||||||
1537 | eliminate redundant MIME parts before posting a message to a digest. This | ||||||
1538 | leads to severe bloat once Mail::Digest::Tools extracts a message's content | ||||||
1539 | and posts it to a thread file. Mail::Digest::Tools, however, provides its | ||||||
1540 | users with the option of stripping redundant MIME parts from a message | ||||||
1541 | before posting. | ||||||
1542 | |||||||
1543 | =item * | ||||||
1544 | |||||||
1545 | I | ||||||
1546 | |||||||
1547 | As discussed above, each message within a digest is delimited by a string | ||||||
1548 | which may or may not be the same string which separates the list of Today's | ||||||
1549 | Topics from the first message in the digest. | ||||||
1550 | |||||||
1551 | =back | ||||||
1552 | |||||||
1553 | =item * | ||||||
1554 | |||||||
1555 | I |
||||||
1556 | |||||||
1557 | The digest footer consists of one or more paragraphs containing | ||||||
1558 | additional information on the digest and signaling the end of the digest. It | ||||||
1559 | follows the source message delimiter corresponding to the last message in a | ||||||
1560 | particular digest. | ||||||
1561 | |||||||
1562 | In processing a given digest, Mail::Digest::Tools generally discards the | ||||||
1563 | digest footer. | ||||||
1564 | |||||||
1565 | =back | ||||||
1566 | |||||||
1567 | =head2 The Typical Digest After Processing with Mail::Digest::Tools | ||||||
1568 | |||||||
1569 | Using the dummy messages provided above, typical use of Mail::Digest::Tools | ||||||
1570 | would produce (in a bare-bones configuration) the following results: | ||||||
1571 | |||||||
1572 | =over 4 | ||||||
1573 | |||||||
1574 | =item * | ||||||
1575 | |||||||
1576 | Two plain-text 'thread' files holding the ongoing discussion of each topic: | ||||||
1577 | |||||||
1578 | =over 4 | ||||||
1579 | |||||||
1580 | =item * | ||||||
1581 | |||||||
1582 | F |
||||||
1583 | |||||||
1584 | Thread: Introducing Mail::Digest::Tools | ||||||
1585 | Message: 001_9999_001 | ||||||
1586 | From: "James E Keenan" |
||||||
1587 | Text: | ||||||
1588 | |||||||
1589 | Mail::Digest::Tools is the greatest thing since sliced bread. | ||||||
1590 | Go download it now! | ||||||
1591 | |||||||
1592 | --__--__-- | ||||||
1593 | |||||||
1594 | Thread: Introducing Mail::Digest::Tools | ||||||
1595 | Message: 001_9999_003 | ||||||
1596 | From: "David H Adler" |
||||||
1597 | Text: | ||||||
1598 | |||||||
1599 | Jim, what's this nonsense about sliced bread. Weren't you on the Atkins | ||||||
1600 | diet? Unlike beer, sliced bread is Off Topic. | ||||||
1601 | |||||||
1602 | --__--__-- | ||||||
1603 | |||||||
1604 | =item * | ||||||
1605 | |||||||
1606 | F | ||||||
1607 | |||||||
1608 | Thread: A Different Discussion | ||||||
1609 | Message: 001_9999_002 | ||||||
1610 | From: "steve" |
||||||
1611 | Text: | ||||||
1612 | |||||||
1613 | This is a new topic. I am not discussing Mail::Digest::Tools in this | ||||||
1614 | submission. | ||||||
1615 | |||||||
1616 | --__--__-- | ||||||
1617 | |||||||
1618 | =back | ||||||
1619 | |||||||
1620 | =item * | ||||||
1621 | |||||||
1622 | A new entry at the end of file F |
||||||
1623 | |||||||
1624 | Today's Topics | ||||||
1625 | |||||||
1626 | ... | ||||||
1627 | |||||||
1628 | Perl-Win32-Users digest, Vol 1 #9999 - 3 msgs.txt | ||||||
1629 | 1. Introducing Mail::Digest::Tools (James E Keenan) | ||||||
1630 | 2. A Different Discussion (steve) | ||||||
1631 | 3. Re: Introducing Mail::Digest::Tools (David H Adler) | ||||||
1632 | |||||||
1633 | =item * | ||||||
1634 | |||||||
1635 | A new entry at the end of file F |
||||||
1636 | |||||||
1637 | 001_9999;Fri Feb 6 18:57:41 2004;Fri Feb 6 18:57:41 2004 | ||||||
1638 | |||||||
1639 | =back | ||||||
1640 | |||||||
1641 | =head1 FUNCTIONS | ||||||
1642 | |||||||
1643 | Mail::Digest::Tools exports no functions by default. Each of its current | ||||||
1644 | seven functions is imported only on request by your script. | ||||||
1645 | |||||||
1646 | In everyday use, you will probably call just I |
||||||
1647 | exportable functions in a particular Perl script. Typically, you will import | ||||||
1648 | the function as described in the SYNOPSIS above, populate two configuration | ||||||
1649 | hashes, and finally call the one function you have imported. | ||||||
1650 | |||||||
1651 | As will become evident, the most challenging part of using Mail::Digest::Tools | ||||||
1652 | is I |
||||||
1653 | of configuration files from which the two configuration hashes passed as | ||||||
1654 | arguments to the various Mail::Digest::Tools functions are drawn. | ||||||
1655 | |||||||
1656 | More on those configuration hashes later. For now, let's look at the | ||||||
1657 | exportable functions. | ||||||
1658 | |||||||
1659 | =head2 C |
||||||
1660 | |||||||
1661 | process_new_digests(\%config_in, \%config_out); | ||||||
1662 | |||||||
1663 | C |
||||||
1664 | use most frequently on a daily basis. Based on information supplied in the | ||||||
1665 | two configuration hashes passed to it as arguments, C |
||||||
1666 | does the following: | ||||||
1667 | |||||||
1668 | =over 4 | ||||||
1669 | |||||||
1670 | =item * | ||||||
1671 | |||||||
1672 | Validates the configuration data. | ||||||
1673 | |||||||
1674 | =item * | ||||||
1675 | |||||||
1676 | Conducts an analysis of the directory in which thread files for a given | ||||||
1677 | digest are stored to determine are old enough: | ||||||
1678 | |||||||
1679 | =over 4 | ||||||
1680 | |||||||
1681 | =item * | ||||||
1682 | |||||||
1683 | I |
||||||
1684 | configuration file that you wish to archive older threads in a subdirectory | ||||||
1685 | |||||||
1686 | =item * | ||||||
1687 | |||||||
1688 | I |
||||||
1689 | I |
||||||
1690 | |||||||
1691 | =back | ||||||
1692 | |||||||
1693 | =item * | ||||||
1694 | |||||||
1695 | Conducts an analysis of the directory in which digest files (I |
||||||
1696 | plain-text versions of mailing list digests you have received) are stored to | ||||||
1697 | determine which digest files are new and need processing and which have | ||||||
1698 | previously been processed. | ||||||
1699 | |||||||
1700 | =item * | ||||||
1701 | |||||||
1702 | Updates a log file to put a timestamp on the processing of the new digest | ||||||
1703 | file or files. Based on options set in the configuration file, this function | ||||||
1704 | may also update a more human-readable version of this log file. | ||||||
1705 | |||||||
1706 | =item * | ||||||
1707 | |||||||
1708 | Opens each of the digest files identified as needing processing and proceeds | ||||||
1709 | to 'strip down' those files. This 'stripping down' includes the following: | ||||||
1710 | |||||||
1711 | =over 4 | ||||||
1712 | |||||||
1713 | =item * | ||||||
1714 | |||||||
1715 | The digest file's name is analyzed to extract the digest's number as issued by | ||||||
1716 | the provider's mailing list program. This number is used to form part of the | ||||||
1717 | unique identifier which Mail::Digest::Tools assigns to each message within | ||||||
1718 | each digest. | ||||||
1719 | |||||||
1720 | =item * | ||||||
1721 | |||||||
1722 | The list of today's topics in the digest is extracted and appended to a | ||||||
1723 | permanent log file of such topics. | ||||||
1724 | |||||||
1725 | =item * | ||||||
1726 | |||||||
1727 | The digest's contents are split into individual messages. Each message, in | ||||||
1728 | turn, is split into headers and body. | ||||||
1729 | |||||||
1730 | =item * | ||||||
1731 | |||||||
1732 | If you have requested in the configuration file that superfluous multipart | ||||||
1733 | MIME content be purged from messages before posting to thread files, this | ||||||
1734 | purging is now conducted. | ||||||
1735 | |||||||
1736 | =item * | ||||||
1737 | |||||||
1738 | Each message is appended to an appropriate, plain-text thread file which | ||||||
1739 | holds the ongoing discussion of that topic. The following factors are taken | ||||||
1740 | into consideration: | ||||||
1741 | |||||||
1742 | =over 4 | ||||||
1743 | |||||||
1744 | =item * | ||||||
1745 | |||||||
1746 | The name of the thread file is derived from the message's subject, though | ||||||
1747 | characters in the message's subject which would not be valid in file names | ||||||
1748 | on your operating system are skipped over. | ||||||
1749 | |||||||
1750 | =item * | ||||||
1751 | |||||||
1752 | To the greatest extent possible, extraneous words in a message's subject | ||||||
1753 | such as 'Re:' or 'Fwd:' are deleted so that all relevant postings on a given | ||||||
1754 | subject can be included in a single thread file. (Should this not succeed | ||||||
1755 | and a new thread file beginning with 'Re:' or some similar term be created, | ||||||
1756 | you can fix this later by using Mail::Digest::Tool's | ||||||
1757 | C |
||||||
1758 | |||||||
1759 | =back | ||||||
1760 | |||||||
1761 | =item * | ||||||
1762 | |||||||
1763 | A brief summation of results is printed to standard output. | ||||||
1764 | |||||||
1765 | =back | ||||||
1766 | |||||||
1767 | =back | ||||||
1768 | |||||||
1769 | =head2 C |
||||||
1770 | |||||||
1771 | reprocess_ALL_digests(\%config_in, \%config_out); | ||||||
1772 | |||||||
1773 | C |
||||||
1774 | should use ONLY when you are setting up and fine-tuning Mail::Digest::Tools | ||||||
1775 | to process a given digest -- and you should NEVER use it thereafter! | ||||||
1776 | |||||||
1777 | Why? Read on! | ||||||
1778 | |||||||
1779 | C |
||||||
1780 | C |
||||||
1781 | directory in which you store such digests -- not just on those previously | ||||||
1782 | processed. But in the process it does not merely append new messages to | ||||||
1783 | already existing thread files, leaving older thread files untouched. Instead, | ||||||
1784 | C |
||||||
1785 | rebuilds it from scratch. | ||||||
1786 | |||||||
1787 | That's cool if you have retained all instances of a given digest which you | ||||||
1788 | wish to process into thread files. But if you've thrown out older instances | ||||||
1789 | of a given digest and call C |
||||||
1790 | to process the messages contained in those discarded digests. The message | ||||||
1791 | sources are gone. That's cool once you're certain that you've got a given | ||||||
1792 | digest configured just the way you want it -- but not until that moment. | ||||||
1793 | |||||||
1794 | =over 4 | ||||||
1795 | |||||||
1796 | =item * Example | ||||||
1797 | |||||||
1798 | Let's make this more concrete. Suppose that you have begun to subscribe to | ||||||
1799 | the digest version of the London Perlmongers mailing list. When you receive | ||||||
1800 | e-mails from this provider, you store them in a directory whose contents look | ||||||
1801 | like this: | ||||||
1802 | |||||||
1803 | london.pm digest, Vol 1 #1856 - 7 msgs.txt | ||||||
1804 | london.pm digest, Vol 1 #1857 - 18 msgs.txt | ||||||
1805 | london.pm digest, Vol 1 #1858 - 15 msgs.txt | ||||||
1806 | london.pm digest, Vol 1 #1859 - 17 msgs.txt | ||||||
1807 | london.pm digest, Vol 1 #1860 - 11 msgs.txt | ||||||
1808 | |||||||
1809 | Initially, you decide that you want to post the messages in these digests | ||||||
1810 | to thread files that are discarded after three days. You set up your | ||||||
1811 | configuration files to do precisely this. (See below for how this is done.) | ||||||
1812 | You then write a script which calls | ||||||
1813 | |||||||
1814 | reprocess_ALL_digests(\%config_in, \%config_out); | ||||||
1815 | |||||||
1816 | Three days go by. One or two new london.pm digests arrive each day. You | ||||||
1817 | want to process only the newly arrived files, so each day you simply call: | ||||||
1818 | |||||||
1819 | process_new_digests(\%config_in, \%config_out); | ||||||
1820 | |||||||
1821 | and on Day 4 Mail::Digest::Tools starts to notify you on standard output | ||||||
1822 | that it is discarding thread files which have not been changed (I |
||||||
1823 | received new postings) in three days. | ||||||
1824 | |||||||
1825 | But then you decide that London.pm's contributors are the most witty and | ||||||
1826 | erudite Perlmongers anywhere and you wish to archive their contributions | ||||||
1827 | until the end of time (or until the first production release of | ||||||
1828 | Perl 6, whichever comes first). Fortunately, you've still got all your | ||||||
1829 | London.pm digest files going back to the beginning of your subscription. | ||||||
1830 | You make appropriate changes to your configuration setup to say, ''Instead | ||||||
1831 | of killing these thread files after 3 days of inactivity, archive them after | ||||||
1832 | 3 days instead.'' (Again, we'll see how to do this below.) You then call: | ||||||
1833 | |||||||
1834 | reprocess_ALL_digests(\%config_in, \%config_out); | ||||||
1835 | |||||||
1836 | one last time. All your previously existing thread files are wiped out, and | ||||||
1837 | all your London.pm digests are reprocessed from scratch. But that's okay, | ||||||
1838 | because you've decided to live with your configuration decisions. So you | ||||||
1839 | can now begin to discard older digest files and process newly arrived files | ||||||
1840 | only with | ||||||
1841 | |||||||
1842 | process_new_digests(\%config_in, \%config_out); | ||||||
1843 | |||||||
1844 | Your London.pm thread archive grows exponentially, and you live happily ever | ||||||
1845 | after. | ||||||
1846 | |||||||
1847 | =back | ||||||
1848 | |||||||
1849 | The ALL CAPS in C |
||||||
1850 | Mail::Digest::Tools function is very powerful, but potentially very dangerous. | ||||||
1851 | You are also alerted to this danger by this screen prompt which appears when | ||||||
1852 | you call this function: | ||||||
1853 | |||||||
1854 | By default, this program processes only NEWLY ARRIVED | ||||||
1855 | [London.pm/other digest] files found in this directory. Messages in | ||||||
1856 | these new digests are sorted and appended to the appropriate | ||||||
1857 | '.thr.txt' files in the 'Threads' subdirectory. | ||||||
1858 | |||||||
1859 | However, by choosing method 'reprocess_ALL_digests()' you have | ||||||
1860 | indicated that you wish to process ALL digest files found in this | ||||||
1861 | directory -- regardless of whether or not they have previously been | ||||||
1862 | processed. This is recommended ONLY for initialization and testing | ||||||
1863 | of this program. | ||||||
1864 | |||||||
1865 | Since this will wipe out all threads files ('.thr.txt') as well -- | ||||||
1866 | including threads files for which you no longer have their source | ||||||
1867 | digest files -- please confirm that this is your intent by typing | ||||||
1868 | ALL at the prompt. | ||||||
1869 | |||||||
1870 | |||||||
1871 | GOT IT? | ||||||
1872 | |||||||
1873 | To proceed, you must type C |
||||||
1874 | yet another prompt: | ||||||
1875 | |||||||
1876 | You have chosen to WIPE OUT all '.thr.txt' files currently | ||||||
1877 | existing in the 'Threads' subdirectory and reprocess all | ||||||
1878 | [London.pm/other digest] digest files from scratch. | ||||||
1879 | |||||||
1880 | Please re-confirm your choice by once again typing 'ALL' | ||||||
1881 | and hitting [Enter]: | ||||||
1882 | |||||||
1883 | You must again type C |
||||||
1884 | digests. Should you fail to type C |
||||||
1885 | script will default to C |
||||||
1886 | arrived digest files. | ||||||
1887 | |||||||
1888 | =head2 C |
||||||
1889 | |||||||
1890 | $full_reply_file = reply_to_digest_message( | ||||||
1891 | \%config_in, | ||||||
1892 | \%config_out, | ||||||
1893 | $digest_number, | ||||||
1894 | $digest_entry, | ||||||
1895 | $directory_for_reply, | ||||||
1896 | ); | ||||||
1897 | |||||||
1898 | Once you have begun to follow discussion threads on a mailing list with the | ||||||
1899 | aid of Mail::Digest::Tools, you may wish to join the discussion and reply to | ||||||
1900 | a message. | ||||||
1901 | |||||||
1902 | If you tried to do this by hitting the 'Reply' button in your e-mail client, | ||||||
1903 | you would probably end up with a 'Subject' line in your e-mail that looked | ||||||
1904 | this: | ||||||
1905 | |||||||
1906 | Re: london.pm digest, Vol 1 #1814 - 2 msgs | ||||||
1907 | |||||||
1908 | Needless to say, this is tacky. So tacky that many mailing list digest | ||||||
1909 | programs insert this message into each digest's headers: | ||||||
1910 | |||||||
1911 | When replying, please edit your Subject line so it is more specific | ||||||
1912 | than "Re: Contents of london.pm digest, Vol 1, #xxxx..." | ||||||
1913 | |||||||
1914 | You don't want to be tacky; you want to be lazy. You want Perl to do the | ||||||
1915 | work of initiating an e-mail with a meaningful subject header for you. | ||||||
1916 | Mail::Digest::Tool's C |
||||||
1917 | a plain-text file for you that has a meaningful subject line and prepends | ||||||
1918 | each line of the body of the message with C<\> >. You then open this | ||||||
1919 | plain-text file, edit it to reply to its contents, copy-and-paste it into | ||||||
1920 | your e-mail client, and send it. | ||||||
1921 | |||||||
1922 | The arguments passed to C |
||||||
1923 | |||||||
1924 | =over 4 | ||||||
1925 | |||||||
1926 | =item * | ||||||
1927 | |||||||
1928 | a reference to the 'in' configuration hash | ||||||
1929 | |||||||
1930 | =item * | ||||||
1931 | |||||||
1932 | a reference to the 'out' configuration hash | ||||||
1933 | |||||||
1934 | =item * | ||||||
1935 | |||||||
1936 | the number of the digest containing the message to which you are replying | ||||||
1937 | |||||||
1938 | =item * | ||||||
1939 | |||||||
1940 | the number of the message to which you are replying within that digest | ||||||
1941 | |||||||
1942 | =item * | ||||||
1943 | |||||||
1944 | a path to the directory in which you want the plain-text reply file to be | ||||||
1945 | created | ||||||
1946 | |||||||
1947 | =back | ||||||
1948 | |||||||
1949 | =over 4 | ||||||
1950 | |||||||
1951 | =item * Example | ||||||
1952 | |||||||
1953 | Suppose that you wished to reply to message #2 in London.pm digest #1814: | ||||||
1954 | |||||||
1955 | Message: 2 | ||||||
1956 | From: James E Keenan |
||||||
1957 | To: London Perlmongers |
||||||
1958 | Date: Fri, 2 Jan 2004 23:41:01 -0500 | ||||||
1959 | Subject: re: language courses | ||||||
1960 | Reply-To: london.pm@london.pm.org | ||||||
1961 | |||||||
1962 | On Fri, 2 Jan 2004 22:38:40 +0000 (GMT), Ali Young wrote concerning: | ||||||
1963 | language courses | ||||||
1964 | |||||||
1965 | > Depends what you count as useful. Learning Esperanto means that you | ||||||
1966 | > can read the current London.pm website. | ||||||
1967 | |||||||
1968 | BTW, wasn't the Esperanto on the website supposed to expire on 31 Dec? | ||||||
1969 | |||||||
1970 | Jim Keenan | ||||||
1971 | Brooklyn, NY | ||||||
1972 | |||||||
1973 | You would call the function as follows: | ||||||
1974 | |||||||
1975 | $full_reply_file = reply_to_digest_message( | ||||||
1976 | \%config_in, | ||||||
1977 | \%config_out, | ||||||
1978 | 1814, | ||||||
1979 | 2, | ||||||
1980 | '/home/jimk/mail/digest/london', | ||||||
1981 | ); | ||||||
1982 | |||||||
1983 | Mail::Digest::Tools will then create a plain-text file which you can use as | ||||||
1984 | the first draft of your reply. It will print this screen prompt: | ||||||
1985 | |||||||
1986 | To complete reply, edit text in: | ||||||
1987 | /home/jimk/mail/digest/london/language_courses.reply.txt | ||||||
1988 | |||||||
1989 | When you open F |
||||||
1990 | like this: | ||||||
1991 | |||||||
1992 | Reply-To: | ||||||
1993 | london.pm@london.pm.org | ||||||
1994 | |||||||
1995 | Subject: | ||||||
1996 | language courses | ||||||
1997 | |||||||
1998 | On Fri, 2 Jan 2004 23:41:01 -0500, James E Keenan | ||||||
1999 | |
||||||
2000 | |||||||
2001 | > On Fri, 2 Jan 2004 22:38:40 +0000 (GMT), Ali Young wrote concerning: | ||||||
2002 | > language courses | ||||||
2003 | > | ||||||
2004 | > > Depends what you count as useful. Learning Esperanto means that you | ||||||
2005 | > can | ||||||
2006 | > > read the current London.pm website. | ||||||
2007 | > | ||||||
2008 | > BTW, wasn't the Esperanto on the website supposed to expire on 31 Dec? | ||||||
2009 | > | ||||||
2010 | > Jim Keenan | ||||||
2011 | > Brooklyn, NY | ||||||
2012 | > | ||||||
2013 | |||||||
2014 | The 'Reply-To' and 'Subject' paragraphs are provided simply to give you | ||||||
2015 | something to cut-and-paste into a GUI e-mail client. The 'Reply-To' | ||||||
2016 | paragraph will only appear if in C<%config_in> the key | ||||||
2017 | C |
||||||
2018 | |||||||
2019 | You edit this plain-text file, pop it into the body of your e-mail | ||||||
2020 | window and send it. Not elegant, but it at least gives you a first draft. | ||||||
2021 | |||||||
2022 | =back | ||||||
2023 | |||||||
2024 | =head2 C |
||||||
2025 | |||||||
2026 | repair_message_order( | ||||||
2027 | \%config_in, | ||||||
2028 | \%config_out, | ||||||
2029 | { | ||||||
2030 | year => 2004, | ||||||
2031 | month => 01, | ||||||
2032 | day => 27, | ||||||
2033 | } | ||||||
2034 | ); | ||||||
2035 | |||||||
2036 | From time to time you may receive digest versions of mailing lists out of | ||||||
2037 | chronological/numerical sequence. This is especially true when e-mail | ||||||
2038 | traffic is being disrupted by worms or viruses. You may discover that you | ||||||
2039 | have received and processed | ||||||
2040 | |||||||
2041 | london.pm digest, Vol 1 #1856 - 7 msgs | ||||||
2042 | london.pm digest, Vol 1 #1858 - 15 msgs | ||||||
2043 | |||||||
2044 | before realizing that you were missing | ||||||
2045 | |||||||
2046 | london.pm digest, Vol 1 #1857 - 18 msgs | ||||||
2047 | |||||||
2048 | If you were to now process digest 1857 with C |
||||||
2049 | from that digest would be appended to their respective thread files I |
||||||
2050 | messages from digest 1858. Since the whole point of Mail::Digest::Tools is to | ||||||
2051 | be able to read a discussion thread in chronological order, this would not be | ||||||
2052 | desirable. | ||||||
2053 | |||||||
2054 | Fortunately, you can fix this problem as follows: | ||||||
2055 | |||||||
2056 | =over 4 | ||||||
2057 | |||||||
2058 | =item * Apply C |
||||||
2059 | |||||||
2060 | Call C |
||||||
2061 | go ahead and call it on digest 1857 even though it creates thread files with | ||||||
2062 | messages out of chronological order. | ||||||
2063 | |||||||
2064 | =item * Determine date where need for repair begins | ||||||
2065 | |||||||
2066 | Examine the timestamps on your digest files for the date of the first digest | ||||||
2067 | you received out of sequence. In the above example, that would be the date | ||||||
2068 | of digest 1858. Since digest files were received out of proper sequence on or | ||||||
2069 | after that date, all thread files generated after that date may have | ||||||
2070 | out-of-sequence messages and need re-ordering. | ||||||
2071 | |||||||
2072 | =item * Apply C |
||||||
2073 | |||||||
2074 | Call C |
||||||
2075 | |||||||
2076 | =over 4 | ||||||
2077 | |||||||
2078 | =item * | ||||||
2079 | |||||||
2080 | a reference to the 'in' configuration hash | ||||||
2081 | |||||||
2082 | =item * | ||||||
2083 | |||||||
2084 | a reference to the 'out' configuration hash | ||||||
2085 | |||||||
2086 | =item * | ||||||
2087 | |||||||
2088 | a reference to an anonymous hash whose keys are C |
||||||
2089 | the values for which keys are the elements of the repair date. | ||||||
2090 | |||||||
2091 | =back | ||||||
2092 | |||||||
2093 | Mail::Digest::Tools will examine all thread files from midnight local time on | ||||||
2094 | that date. Where messages have been posted to the thread files out of proper | ||||||
2095 | sequence, they will be reposted in the correct order. The thread file with | ||||||
2096 | the correct sequence will overwrite the file with the incorrect sequence. | ||||||
2097 | |||||||
2098 | =back | ||||||
2099 | |||||||
2100 | =head2 C |
||||||
2101 | |||||||
2102 | consolidate_threads_multiple( | ||||||
2103 | \%config_in, | ||||||
2104 | \%config_out, | ||||||
2105 | ); | ||||||
2106 | |||||||
2107 | or | ||||||
2108 | |||||||
2109 | consolidate_threads_multiple( | ||||||
2110 | \%config_in, | ||||||
2111 | \%config_out, | ||||||
2112 | $first_common_letters, # optional integer argument | ||||||
2113 | ); | ||||||
2114 | |||||||
2115 | As described above, Mail::Digest::Tool's C |
||||||
2116 | will, to the greatest extent possible, delete extraneous words such as 'Re:' | ||||||
2117 | or 'Fwd:' from a message's subject so that all relevant postings on a given | ||||||
2118 | subject can be included in a single thread file. What happens when this is | ||||||
2119 | not sufficient? For example, suppose someone posts a message to a list with a | ||||||
2120 | slightly misspelled or altered subject line: | ||||||
2121 | |||||||
2122 | =over 4 | ||||||
2123 | |||||||
2124 | =item * Original thread file: | ||||||
2125 | |||||||
2126 | Help telnetting to remote host through CGI.thr.txt | ||||||
2127 | |||||||
2128 | =item * Thread file created due to altered subject line: | ||||||
2129 | |||||||
2130 | Help telnetting to remote host thru CGI.thr.txt | ||||||
2131 | |||||||
2132 | =back | ||||||
2133 | |||||||
2134 | Mail::Digest::Tools offers two functions to address this problem. | ||||||
2135 | C |
||||||
2136 | first. This function presumes that people who re-type e-mail subject lines | ||||||
2137 | when replying tend to type the first several words correctly, then make errors | ||||||
2138 | or alterations toward the end of the subject line. If the first I |
||||||
2139 | of the subject line of two or more messages are identical, there is a strong | ||||||
2140 | chance that the messages are discussing the same topic and should be posted to | ||||||
2141 | the same discussion thread. Mail::Digest::Tool's default value for I |
||||||
2142 | 20, but you can set a different value for a particular digest by passing an | ||||||
2143 | optional third argument as shown above. C |
||||||
2144 | accordingly: | ||||||
2145 | |||||||
2146 | =over 4 | ||||||
2147 | |||||||
2148 | =item * | ||||||
2149 | |||||||
2150 | Makes a list of all thread files for a particular digest. | ||||||
2151 | |||||||
2152 | =item * | ||||||
2153 | |||||||
2154 | Identifies groups of thread files whose names share the first 20 letters. | ||||||
2155 | |||||||
2156 | =item * | ||||||
2157 | |||||||
2158 | Displays a prompt on standard output asking you whether you wish to | ||||||
2159 | consolidate the files in each such group: | ||||||
2160 | |||||||
2161 | Candidates for consolidation: | ||||||
2162 | Help telnetting to remote host through CGI.thr.txt | ||||||
2163 | Help telnetting to remote host thru CGI.thr.txt | ||||||
2164 | |||||||
2165 | To consolidate, type YES: | ||||||
2166 | |||||||
2167 | =over 4 | ||||||
2168 | |||||||
2169 | =item * | ||||||
2170 | |||||||
2171 | If you type C |
||||||
2172 | thread file whose name will be derived from the Subject line of the very first | ||||||
2173 | posting to the discussion thread. Standard output will display: | ||||||
2174 | |||||||
2175 | Files will be consolidated | ||||||
2176 | |||||||
2177 | =item * | ||||||
2178 | |||||||
2179 | If you type anything other than C |
||||||
2180 | then the files will not be consolidated and standard output will display: | ||||||
2181 | |||||||
2182 | Files will not be consolidated | ||||||
2183 | |||||||
2184 | =item * | ||||||
2185 | |||||||
2186 | If the files are consolidated, the original thread files will not automatically | ||||||
2187 | be deleted. Rather, they are renamed with the extension C<.DELETABLE>. | ||||||
2188 | |||||||
2189 | Help telnetting to remote host through CGI.thr.txt.DELETABLE | ||||||
2190 | Help telnetting to remote host thru CGI.thr.txt.DELETABLE | ||||||
2191 | |||||||
2192 | This is a safety precaution. The user can then delete the deletable files | ||||||
2193 | by calling the C |
||||||
2194 | |||||||
2195 | =back | ||||||
2196 | |||||||
2197 | =item * | ||||||
2198 | |||||||
2199 | If there are no files in the threads directory which share the first 20 letters | ||||||
2200 | in common (or the first I |
||||||
2201 | argument), then you are warned at standard output: | ||||||
2202 | |||||||
2203 | Analysis of the first 20 letters of each file in | ||||||
2204 | [threads directory] | ||||||
2205 | shows no candidates for consolidation. Please hard-code | ||||||
2206 | names of files you wish to consolidate as arguments to | ||||||
2207 | &consolidate_threads_single | ||||||
2208 | |||||||
2209 | =back | ||||||
2210 | |||||||
2211 | =head2 C |
||||||
2212 | |||||||
2213 | consolidate_threads_single( | ||||||
2214 | \%config_in, | ||||||
2215 | \%config_out, | ||||||
2216 | [ | ||||||
2217 | 'first_dummy_file_for_consolidation.thr.txt', | ||||||
2218 | 'second_dummy_file_for_consolidation.thr.txt', | ||||||
2219 | ], | ||||||
2220 | ); | ||||||
2221 | |||||||
2222 | Suppose that the thread files which you wish to consolidate have names whose | ||||||
2223 | spelling diverges before the 21st letter. The algorithm which | ||||||
2224 | C |
||||||
2225 | rationale for consolidation. This could happen when someone tries to change | ||||||
2226 | the subject of discussion from: | ||||||
2227 | |||||||
2228 | Best book for extreme Newbie to programming | ||||||
2229 | |||||||
2230 | to: | ||||||
2231 | |||||||
2232 | De incunabula nostra (Was Best book for extreme Newbie to programming) | ||||||
2233 | |||||||
2234 | I |
||||||
2235 | anonymous array. Pass a reference to that anonymous array as the third | ||||||
2236 | argument to C |
||||||
2237 | |||||||
2238 | As with C |
||||||
2239 | will bear the name of the source file containing the very first posting to | ||||||
2240 | the discussion thread. The files so consolidated will not automatically be | ||||||
2241 | deleted. Rather, they will be renamed with the extension C<.DELETABLE> as a | ||||||
2242 | safety precaution and left for you to delete with C |
||||||
2243 | |||||||
2244 | =head2 C |
||||||
2245 | |||||||
2246 | delete_deletables(\%config_out); | ||||||
2247 | |||||||
2248 | Mail::Digest::Tools function C |
||||||
2249 | either C |
||||||
2250 | Unlike all other public functions provided by Mail::Digest::Tools, | ||||||
2251 | C |
||||||
2252 | two configuration hashes, I |
||||||
2253 | function simply changes to the directory where thread files for a given | ||||||
2254 | digest are stored and deletes all files with the extension C<.DELETABLE>. | ||||||
2255 | |||||||
2256 | =head1 CONFIGURATION SETUP OVERVIEW | ||||||
2257 | |||||||
2258 | To use a Mail::Digest::Tool function, you need to answer two fundamental | ||||||
2259 | questions: | ||||||
2260 | |||||||
2261 | =over 4 | ||||||
2262 | |||||||
2263 | =item 1 | ||||||
2264 | |||||||
2265 | What internal structure has the mailing list sponsor provided for a given | ||||||
2266 | digest? | ||||||
2267 | |||||||
2268 | =item 2 | ||||||
2269 | |||||||
2270 | How do I want to structure the results of applying Mail::Digest::Tools to a | ||||||
2271 | particular digest on my system? | ||||||
2272 | |||||||
2273 | =back | ||||||
2274 | |||||||
2275 | Each of these two questions breaks down into sub-parts. Their answers | ||||||
2276 | supply you with the information with which you will construct the two | ||||||
2277 | configuration hashes passed to most Mail::Digest::Tools functions. | ||||||
2278 | Let us take each in turn. | ||||||
2279 | |||||||
2280 | =head1 C<%config_in>: THE INTERNAL STRUCTURE OF A DIGEST | ||||||
2281 | |||||||
2282 | The best way to learn about the internal structure of a mailing list digest | ||||||
2283 | (other than to study the application which created the digest in the first | ||||||
2284 | place) is to accumulate several instances of the digest on your system in a | ||||||
2285 | directory devoted to that purpose. Examine the way the digest's filename is | ||||||
2286 | formed. Then examine the digest file itself. You will soon pick up a feel | ||||||
2287 | for the structure of the digest, which will guide you in configuring | ||||||
2288 | Mail::Digest::Tools for your system. That configuration will take the form | ||||||
2289 | of a Perl hash which, for illustrative purposes, we shall here call | ||||||
2290 | C<%xxx_config_in> where C |
||||||
2291 | |||||||
2292 | For heuristic purposes we will examine the characteristics of two mailing | ||||||
2293 | list digests which the author has been following and archiving for several | ||||||
2294 | years: ActiveState's 'Perl-Win32-Users' digest and Yahoo! Groups' Perl | ||||||
2295 | Beginners group digest. | ||||||
2296 | |||||||
2297 | =head2 Analysis of Digest's File Name | ||||||
2298 | |||||||
2299 | We must study a digest's file name in order to be able to write a pattern | ||||||
2300 | with which we will be able to distinguish a digest file from any non-digest | ||||||
2301 | file sitting in the same directory, as well as to be able to extract the | ||||||
2302 | digest number from that file name. | ||||||
2303 | |||||||
2304 | Once saved as plain-text files, Perl-Win32-Users digest files typically look | ||||||
2305 | like this in a directory: | ||||||
2306 | |||||||
2307 | Perl-Win32-Users Digest, Vol 1 Issue 1771.txt | ||||||
2308 | Perl-Win32-Users Digest, Vol 1 Issue 1772.txt | ||||||
2309 | |||||||
2310 | Similarly, the Perl Beginner digest files look like this: | ||||||
2311 | |||||||
2312 | [PBML] Digest Number 1491.txt | ||||||
2313 | [PBML] Digest Number 1492.txt | ||||||
2314 | |||||||
2315 | To correctly identify Perl-Win32-Users digest files from any other files in | ||||||
2316 | the same directory, we compose a string which would form the core of a Perl | ||||||
2317 | regular expression, I |
||||||
2318 | delimiters. Internally, Mail::Digest::Tools passes the file name through a | ||||||
2319 | C |
||||||
2320 | |||||||
2321 | %pw32u_config_in = ( | ||||||
2322 | grep_formula => 'Perl-Win32-Users Digest', | ||||||
2323 | ... | ||||||
2324 | ); | ||||||
2325 | |||||||
2326 | The equivalent pattern for the Perl Beginners digest would be: | ||||||
2327 | |||||||
2328 | %pbml_config_in = ( | ||||||
2329 | grep_formula => '\[PBML\]', | ||||||
2330 | ... | ||||||
2331 | ); | ||||||
2332 | |||||||
2333 | Note that the C<[> and C<]> characters have to be escaped with a C<\> | ||||||
2334 | backslash because they are normally metacharacters inside Perl regular | ||||||
2335 | expressions. | ||||||
2336 | |||||||
2337 | We next have to extract the digest number from the digest's file name. | ||||||
2338 | Certain mailing list programs give individual digests both a 'Volume' number | ||||||
2339 | as well as an individual digest number. Perl-Win32-Users typifies this. In | ||||||
2340 | the example above we need to capture both the C<1> as volume number and C<1771> | ||||||
2341 | as digest number. The next key in our configuration hash is called | ||||||
2342 | C |
||||||
2343 | |||||||
2344 | %pw32u_config_in = ( | ||||||
2345 | grep_formula => 'Perl-Win32-Users Digest', | ||||||
2346 | pattern_target => '.*Vol\s(\d+),\sIssue\s(\d+)\.txt', | ||||||
2347 | ... | ||||||
2348 | ); | ||||||
2349 | |||||||
2350 | Note the two sets of capturing parentheses. | ||||||
2351 | |||||||
2352 | Other digests, such as those at Yahoo! Groups, dispense with a volume number | ||||||
2353 | and simply increment each digest number: | ||||||
2354 | |||||||
2355 | %pbml_config_in = ( | ||||||
2356 | grep_formula => '\[PBML\]', | ||||||
2357 | pattern_target => '.*\s(\d+)\.txt$', | ||||||
2358 | ... | ||||||
2359 | ); | ||||||
2360 | |||||||
2361 | Note that this C |
||||||
2362 | parentheses. | ||||||
2363 | |||||||
2364 | =head2 Analysis of Digest's Internal Structure | ||||||
2365 | |||||||
2366 | A digest's internal structure is discussed in detail above (see | ||||||
2367 | 'A TYPICAL MAILING LIST DIGEST'). Here we need to identify two | ||||||
2368 | characteristics: the way the digest introduces its list of today's topics | ||||||
2369 | and the string it uses to delimit the list of today's topics from the first | ||||||
2370 | individual message in the digest and all subsequent messages from one another. | ||||||
2371 | Continuing with our two examples from above, we provide values for keys | ||||||
2372 | C |
||||||
2373 | |||||||
2374 | %pw32u_config_in = ( | ||||||
2375 | grep_formula => 'Perl-Win32-Users digest', | ||||||
2376 | pattern_target => '.*Vol\s(\d+),\sIssue\s(\d+)\.txt', | ||||||
2377 | topics_intro => 'Today\'s Topics:', | ||||||
2378 | source_msg_delimiter => "--__--__--\n\n", | ||||||
2379 | ... | ||||||
2380 | ); | ||||||
2381 | |||||||
2382 | Note the escaped C<'> apostrophe character in the value for key | ||||||
2383 | C |
||||||
2384 | |||||||
2385 | %pbml_config_in = ( | ||||||
2386 | grep_formula => '\[PBML\]', | ||||||
2387 | pattern_target => '.*\s(\d+)\.txt$', | ||||||
2388 | topics_intro => 'Topics in this digest:', | ||||||
2389 | source_msg_delimiter => "________________________________________________________________________\n________________________________________________________________________\n\n", | ||||||
2390 | ... | ||||||
2391 | ); | ||||||
2392 | |||||||
2393 | Note that the values provided for the respective C |
||||||
2394 | had to be double-quoted strings. That's because all such delimiters include | ||||||
2395 | two or more C<\n> newline characters so that they form paragraphs unto | ||||||
2396 | themselves. Unless indicated otherwise, the values for all other values in | ||||||
2397 | the configuration hash are single-quoted strings. | ||||||
2398 | |||||||
2399 | Note: In early 2004, while Mail::Digest::Tools was being prepared for its | ||||||
2400 | initial distribution on CPAN, ActiveState changed certain features in the | ||||||
2401 | daily digest versions of its mailing lists. Hence, the code example presented | ||||||
2402 | above should not be 'copied-and-pasted' into a configuration hash with which | ||||||
2403 | you, the user, might follow the current Perl-Win32-Users digest. In | ||||||
2404 | particular, the source message delimiter was changed to a string of 30 | ||||||
2405 | hyphens followed by 2 C<\n> newline characters: | ||||||
2406 | |||||||
2407 | "------------------------------\n\n" | ||||||
2408 | |||||||
2409 | However, since it is not unheard of for contributors to a mailing list to use | ||||||
2410 | such a string of hyphens within their postings or signatures, using a string | ||||||
2411 | of hyphens is not a particularly apt choice for a source message delimiter. | ||||||
2412 | In this particular case, the author is getting better (but not fully tested) | ||||||
2413 | results by including an additional newline I |
||||||
2414 | order to more uniquely identify the source message delimiter: | ||||||
2415 | |||||||
2416 | "\n------------------------------\n\n" | ||||||
2417 | |||||||
2418 | =head2 Analysis of Individual Messages | ||||||
2419 | |||||||
2420 | The internal structure of an individual message within a digest is also | ||||||
2421 | discussed in detail above. Here we need to identify patterns with which we | ||||||
2422 | can extract the content of the message's headers. | ||||||
2423 | |||||||
2424 | Certain mailing list digest programs allow a wide variety of headers to appear | ||||||
2425 | in digested messages. The Perl-Win32-Users digest typifies this. Each | ||||||
2426 | message in a Perl-Win32_Users digest I |
||||||
2427 | for the message's author, recipients, subject and date. | ||||||
2428 | |||||||
2429 | Message: 1 | ||||||
2430 | From: Chris Smithson |
||||||
2431 | To: "'Carter Kraus'" |
||||||
2432 | "Perl-Win32-Users (E-mail)" |
||||||
2433 | Subject: RE: OO Perl Issue. | ||||||
2434 | Date: Wed, 4 Feb 2004 14:17:24 -0600 | ||||||
2435 | |||||||
2436 | But a message in this digest may have additional headers for the author's | ||||||
2437 | organization, reply address and/or carbon-copy recipients. | ||||||
2438 | |||||||
2439 | Message: 5 | ||||||
2440 | Date: Wed, 4 Feb 2004 15:15:44 -0800 | ||||||
2441 | From: Sam Spade |
||||||
2442 | Organization: Some Web Address | ||||||
2443 | Reply-To: Sam Spade |
||||||
2444 | To: "Time" |
||||||
2445 | CC: "Perl List" |
||||||
2446 | Subject: Re: New IE Update causes script problems | ||||||
2447 | |||||||
2448 | Patterns are easily developed to capture this information and store it in the | ||||||
2449 | configuration hash: | ||||||
2450 | |||||||
2451 | %pw32u_config_in = ( | ||||||
2452 | grep_formula => 'Perl-Win32-Users digest', | ||||||
2453 | pattern_target => '.*Vol\s(\d+),\sIssue\s(\d+)\.txt', | ||||||
2454 | topics_intro => 'Today\'s Topics:', | ||||||
2455 | source_msg_delimiter => "--__--__--\n\n", | ||||||
2456 | message_style_flag => '^Message:\s+(\d+)$', | ||||||
2457 | from_style_flag => '^From:\s+(.+)$', | ||||||
2458 | org_style_flag => '^Organization:\s+(.+)$', | ||||||
2459 | to_style_flag => '^To:\s+(.+)$', | ||||||
2460 | cc_style_flag => '^CC:\s+(.+)$', | ||||||
2461 | subject_style_flag => '^Subject:\s+(.+)$', | ||||||
2462 | date_style_flag => '^Date:\s+(.+)$', | ||||||
2463 | reply_to_style_flag => '^Reply-To:\s+(.+)$', | ||||||
2464 | ... | ||||||
2465 | ); | ||||||
2466 | |||||||
2467 | Other mailing list digest programs allow much fewer headers in digested | ||||||
2468 | messages. The Yahoo! Groups digests such as Perl Beginner typify this. | ||||||
2469 | |||||||
2470 | Message: 4 | ||||||
2471 | Date: Sun, 7 Dec 2003 19:24:03 +1100 | ||||||
2472 | From: Philip Streets |
||||||
2473 | Subject: RH9.0, perl 5.8.2 and qmail-localfilter question | ||||||
2474 | |||||||
2475 | The patterns developed to capture this information and store it in the | ||||||
2476 | configuration hash would be as follows: | ||||||
2477 | |||||||
2478 | %pbml_config_in = ( | ||||||
2479 | grep_formula => '\[PBML\]', | ||||||
2480 | pattern_target => '.*\s(\d+)\.txt$', | ||||||
2481 | topics_intro => 'Topics in this digest:', | ||||||
2482 | source_msg_delimiter => "________________________________________________________________________\n________________________________________________________________________\n\n", | ||||||
2483 | message_style_flag => '^Message:\s+(\d+)$', | ||||||
2484 | from_style_flag => '^\s+From:\s+(.+)$', | ||||||
2485 | subject_style_flag => '^Subject:\s+(.+)$', | ||||||
2486 | date_style_flag => '^\s+Date:\s+(.+)$', | ||||||
2487 | ... | ||||||
2488 | ); | ||||||
2489 | |||||||
2490 | Note that this pattern is written to expect 1 or more whitespaces at the | ||||||
2491 | beginning of the C |
||||||
2492 | |||||||
2493 | We could -- but do not need to -- add the following key-value pairs to the | ||||||
2494 | C<%pbml_config_in> hash. | ||||||
2495 | |||||||
2496 | org_style_flag => undef, | ||||||
2497 | to_style_flag => undef, | ||||||
2498 | cc_style_flag => undef, | ||||||
2499 | reply_to_style_flag => undef, | ||||||
2500 | |||||||
2501 | =head2 Inspection of Messages for Multipart MIME Content | ||||||
2502 | |||||||
2503 | Certain mailing lists allow subscribers to post messages in either plain-text | ||||||
2504 | or HTML. Certain lists allow subscribers to post attachments; others do not. | ||||||
2505 | When it comes to preparing digests of these messages, the programs which | ||||||
2506 | different lists take lead to different results. The most annoying situation | ||||||
2507 | occurs when a list allows a subscriber to post in 'multipart MIME format' and | ||||||
2508 | then fails to strip out the redundant HTML part after printing the needed | ||||||
2509 | plain-text part. | ||||||
2510 | |||||||
2511 | I |
||||||
2512 | list digest. (ActiveState changed the format of its digests in early 2004 to | ||||||
2513 | strip out HTML attachments. Hence, the following code no longer accurately | ||||||
2514 | represents what a subscriber to an ActiveState digest will see. Other mailing | ||||||
2515 | lists still suffer from MIME bloat, however, so treat the following code as | ||||||
2516 | illustrative.) The message begins: | ||||||
2517 | |||||||
2518 | Message: 1 | ||||||
2519 | To: Perl-Win32-Users@activestate.com | ||||||
2520 | Subject: Can not tie STDOUT to scolled Tk widget | ||||||
2521 | From: John_Wonderman@some.web.address.ca | ||||||
2522 | Date: Thu, 15 Jan 2004 16:25:17 -0500 | ||||||
2523 | This is a multipart message in MIME format. | ||||||
2524 | --=_alternative 00750F0485256E1C_= | ||||||
2525 | Content-Type: text/plain; charset="US-ASCII" | ||||||
2526 | Hi; | ||||||
2527 | I am trying to implement a scrolling text widget to capture output for for | ||||||
2528 | at tk app. Without scrolling: | ||||||
2529 | my $text = $mw->Text(-width => 78, | ||||||
2530 | -height => 32, | ||||||
2531 | -wrap => 'word', | ||||||
2532 | -font => ['Courier New','11'] | ||||||
2533 | )->pack(-side => 'bottom', | ||||||
2534 | -expand => 1, | ||||||
2535 | -fill => 'both', | ||||||
2536 | ); | ||||||
2537 | ... | ||||||
2538 | |||||||
2539 | When the plain-text part of the message is finished, it is then repeated in | ||||||
2540 | HTML: | ||||||
2541 | |||||||
2542 | --=_alternative 00750F0485256E1C_= | ||||||
2543 | Content-Type: text/html; charset="US-ASCII" | ||||||
2544 | Hi; |
||||||
2545 | I am trying to implement a scrolling text |
||||||
2546 | widget to capture output for for at tk app. Without scrolling: | ||||||
2547 | my $text = $mw->Text(-width |
||||||
2548 | => 78, | ||||||
2549 | |
||||||
2550 | -height => 32, | ||||||
2551 | |
||||||
2552 | -wrap => 'word', | ||||||
2553 | |
||||||
2554 | -font => ['Courier New','11'] | ||||||
2555 | )->pack(-side => |
||||||
2556 | 'bottom', | ||||||
2557 | |
||||||
2558 | -expand => 1, | ||||||
2559 | |
||||||
2560 | -fill => 'both', | ||||||
2561 | |||||||
2562 | There is no reason to retain this bloat in your thread file. The digest | ||||||
2563 | providers should have stripped it out, but the program they were using failed | ||||||
2564 | to do so. Other digests, such as those at Yahoo! Groups, eliminate all this | ||||||
2565 | blather. | ||||||
2566 | |||||||
2567 | Now, with Mail::Digest::Tools, you can eliminate much of the bloat yourself. | ||||||
2568 | After examining 6-10 instances of a particular mailing list digest, you should | ||||||
2569 | be able to determine whether the digest needs a dose of digital castor oil or | ||||||
2570 | not, and you set key C |
||||||
2571 | unnecessary multipart MIME content, you set this flag to C<1>; otherwise, to | ||||||
2572 | C<0>. | ||||||
2573 | |||||||
2574 | And with that you have completed your analysis of the internal structure of a | ||||||
2575 | given digest and entered the relevant information into the first configuration | ||||||
2576 | hash: | ||||||
2577 | |||||||
2578 | %pw32u_config_in = ( | ||||||
2579 | grep_formula => 'Perl-Win32-Users digest', | ||||||
2580 | pattern_target => '.*Vol\s(\d+),\sIssue\s(\d+)\.txt', | ||||||
2581 | topics_intro => 'Today\'s Topics:', | ||||||
2582 | source_msg_delimiter => "--__--__--\n\n", | ||||||
2583 | message_style_flag => '^Message:\s+(\d+)$', | ||||||
2584 | from_style_flag => '^From:\s+(.+)$', | ||||||
2585 | org_style_flag => '^Organization:\s+(.+)$', | ||||||
2586 | to_style_flag => '^To:\s+(.+)$', | ||||||
2587 | cc_style_flag => '^CC:\s+(.+)$', | ||||||
2588 | subject_style_flag => '^Subject:\s+(.+)$', | ||||||
2589 | date_style_flag => '^Date:\s+(.+)$', | ||||||
2590 | reply_to_style_flag => '^Reply-To:\s+(.+)$', | ||||||
2591 | MIME_cleanup_flag => 1, | ||||||
2592 | ); | ||||||
2593 | |||||||
2594 | %pbml_config_in = ( | ||||||
2595 | grep_formula => '\[PBML\]', | ||||||
2596 | pattern_target => '.*\s(\d+)\.txt$', | ||||||
2597 | topics_intro => 'Topics in this digest:', | ||||||
2598 | source_msg_delimiter => "________________________________________________________________________\n________________________________________________________________________\n\n", | ||||||
2599 | message_style_flag => '^Message:\s+(\d+)$', | ||||||
2600 | from_style_flag => '^\s+From:\s+(.+)$', | ||||||
2601 | subject_style_flag => '^Subject:\s+(.+)$', | ||||||
2602 | date_style_flag => '^\s+Date:\s+(.+)$', | ||||||
2603 | MIME_cleanup_flag => 0, | ||||||
2604 | ); | ||||||
2605 | |||||||
2606 | =head1 C<%config_out>: HOW TO PROCESS A DIGEST ON YOUR SYSTEM | ||||||
2607 | |||||||
2608 | C<%config_in> holds the answers to the question: What internal structure has | ||||||
2609 | the mailing list sponsor provided for a given digest? In contrast, | ||||||
2610 | C<%config_out> will hold the answer to this question: How do I want to | ||||||
2611 | structure the results of applying Mail::Digest::Tools to a particular digest | ||||||
2612 | on my system? | ||||||
2613 | |||||||
2614 | For purpose of illustration, we will continue to assume that we are processing | ||||||
2615 | digest files received from the Perl-Win32-Users and Perl Beginner lists. We | ||||||
2616 | will make slightly different choices as to how we process those digest files | ||||||
2617 | so as to illustrate different options available from Mail::Digest::Tools. | ||||||
2618 | |||||||
2619 | We shall also assume that we going to place the scripts from which we call | ||||||
2620 | Mail::Digest::Tools functions in the directory I |
||||||
2621 | which we store the digest files once they have been saved as plain-text files. | ||||||
2622 | If we call this directory C |
||||||
2623 | then we will have a directory structure that starts out like this: | ||||||
2624 | |||||||
2625 | digest/ | ||||||
2626 | process_new.pl | ||||||
2627 | process_ALL.pl | ||||||
2628 | reply_digest_message.pl | ||||||
2629 | repair_digest_order.pl | ||||||
2630 | consolidate_threads.pl | ||||||
2631 | deletables.pl | ||||||
2632 | pw32u/ | ||||||
2633 | Perl-Win32-Users Digest, Vol 1 Issue 1771.txt | ||||||
2634 | Perl-Win32-Users Digest, Vol 1 Issue 1772.txt | ||||||
2635 | pbml/ | ||||||
2636 | [PBML] Digest Number 1491.txt | ||||||
2637 | [PBML] Digest Number 1492.txt | ||||||
2638 | |||||||
2639 | =head2 Required C<%config_out> Keys | ||||||
2640 | |||||||
2641 | There are 9 keys which are required in C<%config_out> in order for | ||||||
2642 | Mail::Digest::Tools to function properly. They correspond to 9 decisions | ||||||
2643 | which you must make in setting up a Mail::Digest::Tools configuration on | ||||||
2644 | your system. | ||||||
2645 | |||||||
2646 | =over 4 | ||||||
2647 | |||||||
2648 | =item 1 Title | ||||||
2649 | |||||||
2650 | Each digest must be given a title which is used whenever Mail::Digest::Tools | ||||||
2651 | needs to prompt or warn you on standard output. The key which holds this | ||||||
2652 | information in C<%config_out> must be called C |
||||||
2653 | element should be sensible. | ||||||
2654 | |||||||
2655 | %pw32u_config_out = ( | ||||||
2656 | title => 'Perl-Win32-Users', | ||||||
2657 | ... | ||||||
2658 | ); | ||||||
2659 | |||||||
2660 | %pbml_config_out = ( | ||||||
2661 | title => 'Perl Beginner', | ||||||
2662 | ... | ||||||
2663 | ); | ||||||
2664 | |||||||
2665 | =item 2 Digest Directory | ||||||
2666 | |||||||
2667 | For each digest a directory must be designated where individual digest files | ||||||
2668 | are stored in plain-text format. The key which holds this information in | ||||||
2669 | C<%config_out> must be called C |
||||||
2670 | directories are named relative to the 'current' directory (C<..>), | ||||||
2671 | I |
||||||
2672 | Mail::Digest::Function is stored. | ||||||
2673 | |||||||
2674 | %pw32u_config_out = ( | ||||||
2675 | title => 'Perl-Win32-Users', | ||||||
2676 | dir_digest => "../pw32u", | ||||||
2677 | ... | ||||||
2678 | ); | ||||||
2679 | |||||||
2680 | %pbml_config_out = ( | ||||||
2681 | title => 'Perl Beginner', | ||||||
2682 | dir_digest => "../pbml", | ||||||
2683 | ... | ||||||
2684 | ); | ||||||
2685 | |||||||
2686 | =item 3 Threads Directory | ||||||
2687 | |||||||
2688 | For each digest a directory must be designated where the thread files created | ||||||
2689 | by use of Mail::Digest::Tools functions are stored. The key which holds this | ||||||
2690 | information in C<%config_out> must be called C |
||||||
2691 | below the threads directory is a subdirectory of the digest directory, but | ||||||
2692 | you may make other choices. | ||||||
2693 | |||||||
2694 | %pw32u_config_out = ( | ||||||
2695 | title => 'Perl-Win32-Users', | ||||||
2696 | dir_digest => "../pw32u", | ||||||
2697 | dir_threads => "../pw32u/Threads", | ||||||
2698 | ... | ||||||
2699 | ); | ||||||
2700 | |||||||
2701 | %pbml_config_out = ( | ||||||
2702 | title => 'Perl Beginner', | ||||||
2703 | dir_digest => "../pbml", | ||||||
2704 | dir_threads => "../pbml/Threads", | ||||||
2705 | ... | ||||||
2706 | ); | ||||||
2707 | |||||||
2708 | =item 4 Digests Log File | ||||||
2709 | |||||||
2710 | For each digest a file must be kept which logs whether a given digest file | ||||||
2711 | has already been processed or not and, if so, when. The key which holds this | ||||||
2712 | information in C<%config_out> must be called C |
||||||
2713 | found convenient to keep this file in the digests directory, but you may make | ||||||
2714 | other choices. | ||||||
2715 | |||||||
2716 | %pw32u_config_out = ( | ||||||
2717 | title => 'Perl-Win32-Users', | ||||||
2718 | dir_digest => "../pw32u", | ||||||
2719 | dir_threads => "../pw32u/Threads", | ||||||
2720 | digests_log => "../pw32u/digests_log.txt", | ||||||
2721 | ... | ||||||
2722 | ); | ||||||
2723 | |||||||
2724 | %pbml_config_out = ( | ||||||
2725 | title => 'Perl Beginner', | ||||||
2726 | dir_digest => "../pbml", | ||||||
2727 | dir_threads => "../pbml/Threads", | ||||||
2728 | digests_log => "../pbml/digests_log.txt", | ||||||
2729 | ... | ||||||
2730 | ); | ||||||
2731 | |||||||
2732 | =item 5 Today's Topics | ||||||
2733 | |||||||
2734 | For each digest a file must be kept which holds an ongoing record of the | ||||||
2735 | list of topics found in each individual digest file. The key which holds this | ||||||
2736 | information in C<%config_out> must be called |
||||||
2737 | found convenient to keep this file in the digests directory, but you may make | ||||||
2738 | other choices. | ||||||
2739 | |||||||
2740 | %pw32u_config_out = ( | ||||||
2741 | title => 'Perl-Win32-Users', | ||||||
2742 | dir_digest => "../pw32u", | ||||||
2743 | dir_threads => "../pw32u/Threads", | ||||||
2744 | digests_log => "../pw32u/digests_log.txt", | ||||||
2745 | todays_topics => "../pw32u/todays_topics.txt", | ||||||
2746 | ... | ||||||
2747 | ); | ||||||
2748 | |||||||
2749 | %pbml_config_out = ( | ||||||
2750 | title => 'Perl Beginner', | ||||||
2751 | dir_digest => "../pbml", | ||||||
2752 | dir_threads => "../pbml/Threads", | ||||||
2753 | digests_log => "../pbml/digests_log.txt", | ||||||
2754 | todays_topics => "../pbml/todays_topics.txt", | ||||||
2755 | ... | ||||||
2756 | ); | ||||||
2757 | |||||||
2758 | =item 6 Format for Identifying Digest Number in Output | ||||||
2759 | |||||||
2760 | For each digest you must choose how to format the number(s) of the individual | ||||||
2761 | digest file being processed when messages from that file are written to a | ||||||
2762 | threads file. What you are doing here is formatting the information captured | ||||||
2763 | by the C |
||||||
2764 | You express this choice as a single-quoted string which formats the data | ||||||
2765 | captured by Perl regular expression which in C |
||||||
2766 | formatting is done via the Perl C |
||||||
2767 | is assigned to be the value of C<%config_out> key |
||||||
2768 | |||||||
2769 | We saw above that digests from the Perl-Win32-Users list carried both a volume | ||||||
2770 | number and an individual digest number. | ||||||
2771 | |||||||
2772 | Perl-Win32-Users Digest, Vol 1 Issue 1771.txt | ||||||
2773 | Perl-Win32-Users Digest, Vol 1 Issue 1772.txt | ||||||
2774 | |||||||
2775 | Both numbers were captured by the Perl regular expression in | ||||||
2776 | C<%pw32u_config_in> key |
||||||
2777 | |||||||
2778 | '.*Vol\s(\d+),\sIssue\s(\d+)\.txt', | ||||||
2779 | |||||||
2780 | Here we have chosen to format the volume number as a 3-digit, 0-padded number | ||||||
2781 | and the individual digest number as a 4-digit, 0-padded number. We then join | ||||||
2782 | these two data with an underscore. | ||||||
2783 | |||||||
2784 | %pw32u_config_out = ( | ||||||
2785 | title => 'Perl-Win32-Users', | ||||||
2786 | dir_digest => "../pw32u", | ||||||
2787 | dir_threads => "../pw32u/Threads", | ||||||
2788 | digests_log => "../pw32u/digests_log.txt", | ||||||
2789 | todays_topics => "../pw32u/todays_topics.txt", | ||||||
2790 | id_format => 'sprintf("%03d",$1) . \'_\' . sprintf("%04d",$2)', | ||||||
2791 | ... | ||||||
2792 | ); | ||||||
2793 | |||||||
2794 | We saw above that digests from the Perl Beginners list carried only an | ||||||
2795 | digest number -- no volume number. | ||||||
2796 | |||||||
2797 | [PBML] Digest Number 1491.txt | ||||||
2798 | [PBML] Digest Number 1492.txt | ||||||
2799 | |||||||
2800 | This number was captured by the Perl regular expression in C<%pbml_config_in> | ||||||
2801 | key |
||||||
2802 | |||||||
2803 | '.*\s(\d+)\.txt$' | ||||||
2804 | |||||||
2805 | Here we have chosen to format the digest number as a 5-digit, 0-padded number. | ||||||
2806 | |||||||
2807 | %pbml_config_out = ( | ||||||
2808 | title => 'Perl Beginner', | ||||||
2809 | dir_digest => "../pbml", | ||||||
2810 | dir_threads => "../pbml/Threads", | ||||||
2811 | digests_log => "../pbml/digests_log.txt", | ||||||
2812 | todays_topics => "../pbml/todays_topics.txt", | ||||||
2813 | id_format => 'sprintf("%05d",$1)', | ||||||
2814 | ... | ||||||
2815 | ); | ||||||
2816 | |||||||
2817 | Note that if you allow for a 4-digit number, the highest numbered digest you | ||||||
2818 | can process off a given mailing list will be C<9999>. If you allow for a | ||||||
2819 | 5-digit number, the upper limit will be C<99999>. The latter should be | ||||||
2820 | sufficient for a lifetime even for a mailing list (I |
||||||
2821 | generates 3 or 4 digest files per day or over 1000 per year. | ||||||
2822 | |||||||
2823 | =item 7 Format for Numbering Individual Messages in Output | ||||||
2824 | |||||||
2825 | For each digest you must choose how to format the number which the digest | ||||||
2826 | assigns to its individual messages. Experience suggests that 2 digits should | ||||||
2827 | be more than sufficient to format this number, as all digests which the author | ||||||
2828 | has observed have fewer than 100 entries. However, below we have arbitrarily | ||||||
2829 | decided to allow for up to 9999 entries in a given digest. As with the digest | ||||||
2830 | number, the formatting is accomplished via the Perl C |
||||||
2831 | The result is stored in a C<%config_out> key which must be called | ||||||
2832 | C |
||||||
2833 | |||||||
2834 | %pw32u_config_out = ( | ||||||
2835 | title => 'Perl-Win32-Users', | ||||||
2836 | dir_digest => "../pw32u", | ||||||
2837 | dir_threads => "../pw32u/Threads", | ||||||
2838 | digests_log => "../pw32u/digests_log.txt", | ||||||
2839 | todays_topics => "../pw32u/todays_topics.txt", | ||||||
2840 | id_format => 'sprintf("%03d",$1) . | ||||||
2841 | \'_\' . sprintf("%04d",$2)', | ||||||
2842 | output_id_format => 'sprintf("%04d",$1)', | ||||||
2843 | ... | ||||||
2844 | ); | ||||||
2845 | |||||||
2846 | %pbml_config_out = ( | ||||||
2847 | title => 'Perl Beginner', | ||||||
2848 | dir_digest => "../pbml", | ||||||
2849 | dir_threads => "../pbml/Threads", | ||||||
2850 | digests_log => "../pbml/digests_log.txt", | ||||||
2851 | todays_topics => "../pbml/todays_topics.txt", | ||||||
2852 | id_format => 'sprintf("%05d",$1)', | ||||||
2853 | output_id_format => 'sprintf("%04d",$1)', | ||||||
2854 | ... | ||||||
2855 | ); | ||||||
2856 | |||||||
2857 | =item 8 Thread Message Delimiter | ||||||
2858 | |||||||
2859 | For each digest you must compose a string which will separate one message in | ||||||
2860 | a threads file from its successor. This string must be double-quoted and | ||||||
2861 | assigned to C<%config_out> key C |
||||||
2862 | string should terminate in two or more C<\n\n> newline characters so that the | ||||||
2863 | delimiter is always a paragraph unto itself. | ||||||
2864 | |||||||
2865 | This delimiter may -- or may not -- be the same string which the mailing list | ||||||
2866 | provider uses to separate messages in the digest files themselves. In other | ||||||
2867 | words, you may choose to use the same string for C |
||||||
2868 | C<%config_out> as you reported the list provider used in C<%config_in> key | ||||||
2869 | C |
||||||
2870 | |||||||
2871 | In the example below we make the C |
||||||
2872 | Perl-Win32-Users to be the same as its C |
||||||
2873 | |||||||
2874 | %pw32u_config_out = ( | ||||||
2875 | title => 'Perl-Win32-Users', | ||||||
2876 | dir_digest => "../pw32u", | ||||||
2877 | dir_threads => "../pw32u/Threads", | ||||||
2878 | digests_log => "../pw32u/digests_log.txt", | ||||||
2879 | todays_topics => "../pw32u/todays_topics.txt", | ||||||
2880 | id_format => 'sprintf("%03d",$1) . | ||||||
2881 | \'_\' . sprintf("%04d",$2)', | ||||||
2882 | output_id_format => 'sprintf("%04d",$1)', | ||||||
2883 | thread_msg_delimiter => "--__--__--\n\n", | ||||||
2884 | ... | ||||||
2885 | ); | ||||||
2886 | |||||||
2887 | Note: In light of the earlier discussion of the changes ActiveState made | ||||||
2888 | to its mailing list digests in early 2004, the reader is cautioned that the | ||||||
2889 | code above should not be directly 'copied-and-pasted' into a configuration | ||||||
2890 | hash with which you might follow an ActiveState mailing list. Treat it as | ||||||
2891 | educational. In particular, the author is now testing the following as a | ||||||
2892 | setting for C<$pw32u_config_out{'thread_msg_delimiter'}>: | ||||||
2893 | |||||||
2894 | "\n--__--__--\n\n", | ||||||
2895 | |||||||
2896 | For threads generated by appling Mail::Digest::Tools to the Perl | ||||||
2897 | Beginners list, we choose an output message delimiter which differs from the | ||||||
2898 | source message delimiter. | ||||||
2899 | |||||||
2900 | %pbml_config_out = ( | ||||||
2901 | title => 'Perl Beginner', | ||||||
2902 | dir_digest => "../pbml", | ||||||
2903 | dir_threads => "../pbml/Threads", | ||||||
2904 | digests_log => "../pbml/digests_log.txt", | ||||||
2905 | todays_topics => "../pbml/todays_topics.txt", | ||||||
2906 | id_format => 'sprintf("%05d",$1)', | ||||||
2907 | output_id_format => 'sprintf("%04d",$1)', | ||||||
2908 | thread_msg_delimiter => "_*_*_*_*_*_\n_*_*_*_*_*_\n\n\n", | ||||||
2909 | ... | ||||||
2910 | ); | ||||||
2911 | |||||||
2912 | Whatever choice you make for the C |
||||||
2913 | unlikely to occur within the text of a message and should terminate in two or | ||||||
2914 | more newlines. | ||||||
2915 | |||||||
2916 | =item 9 Archive or Delete Threads? | ||||||
2917 | |||||||
2918 | For each digest you process with Mail::Digest::Tools, you must decide whether | ||||||
2919 | to retain the resulting thread files in an archive them in a separate | ||||||
2920 | directory after a specified period of time, to delete them from disk | ||||||
2921 | after a specified period of time, or to do neither and allow them to | ||||||
2922 | accumulate indefinitely in the threads directory. Your decision is represented | ||||||
2923 | as the value of C<%config_out> key |
||||||
2924 | be expressed as one of three numerical values: | ||||||
2925 | |||||||
2926 | 0 Thread files are neither archived nor deleted | ||||||
2927 | |||||||
2928 | 1 Thread files are archived in a separate directory (or directories) | ||||||
2929 | after the number of days specified by key 'archive_kill_days' | ||||||
2930 | (see below) | ||||||
2931 | |||||||
2932 | -1 Thread files are deleted after I |
||||||
2933 | 'archive_kill_days' | ||||||
2934 | |||||||
2935 | In the examples below we have chosen to archive all threads generated by the | ||||||
2936 | Perl-Win32-Users list but to kill all threads generated by the Perl Beginner | ||||||
2937 | list after a number of days whose specification we shall come to shortly. | ||||||
2938 | |||||||
2939 | %pw32u_config_out = ( | ||||||
2940 | title => 'Perl-Win32-Users', | ||||||
2941 | dir_digest => "../pw32u", | ||||||
2942 | dir_threads => "../pw32u/Threads", | ||||||
2943 | digests_log => "../pw32u/digests_log.txt", | ||||||
2944 | todays_topics => "../pw32u/todays_topics.txt", | ||||||
2945 | id_format => 'sprintf("%03d",$1) . \'_\' . | ||||||
2946 | sprintf("%04d",$2)', | ||||||
2947 | output_id_format => 'sprintf("%04d",$1)', | ||||||
2948 | thread_msg_delimiter => "--__--__--\n\n", | ||||||
2949 | archive_kill_trigger => 1, | ||||||
2950 | ... | ||||||
2951 | ); | ||||||
2952 | |||||||
2953 | %pbml_config_out = ( | ||||||
2954 | title => 'Perl Beginner', | ||||||
2955 | dir_digest => "../pbml", | ||||||
2956 | dir_threads => "../pbml/Threads", | ||||||
2957 | digests_log => "../pbml/digests_log.txt", | ||||||
2958 | todays_topics => "../pbml/todays_topics.txt", | ||||||
2959 | id_format => 'sprintf("%05d",$1)', | ||||||
2960 | output_id_format => 'sprintf("%04d",$1)', | ||||||
2961 | thread_msg_delimiter => "_*_*_*_*_*_\n_*_*_*_*_*_\n\n\n", | ||||||
2962 | archive_kill_trigger => -1, | ||||||
2963 | ... | ||||||
2964 | ); | ||||||
2965 | |||||||
2966 | =back | ||||||
2967 | |||||||
2968 | This completes the 9 required keys for C<%config_out>. We now turn to keys | ||||||
2969 | which are either optional or which are required if you have assigned a value | ||||||
2970 | of C<1> or C<-1> to key C |
||||||
2971 | |||||||
2972 | =head2 Optional C<%config_out> Keys | ||||||
2973 | |||||||
2974 | =over 4 | ||||||
2975 | |||||||
2976 | =item * Digests Read File | ||||||
2977 | |||||||
2978 | As an option, Mail::Digest::Tools offers file to log which instances of a | ||||||
2979 | particular digest have previously been processed which is more | ||||||
2980 | human-readable than the file named in C<%config_out> key C |
||||||
2981 | That file logs a digest as follows: | ||||||
2982 | |||||||
2983 | 001_9999;Fri Feb 6 18:57:41 2004;Fri Feb 6 18:57:41 2004 | ||||||
2984 | |||||||
2985 | It is probably easier to read this data like this: | ||||||
2986 | |||||||
2987 | 09999: | ||||||
2988 | first processed at Fri Feb 6 18:57:41 2004 | ||||||
2989 | most recently processed at Fri Feb 6 18:57:41 2004 | ||||||
2990 | |||||||
2991 | To choose this option you need to set I |
||||||
2992 | |||||||
2993 | =over 4 | ||||||
2994 | |||||||
2995 | =item 1 C |
||||||
2996 | |||||||
2997 | This must be assigned a true value such as C<1>. This tells | ||||||
2998 | Mail::Digest::Tools that you indeed want a 'digests read' file. | ||||||
2999 | |||||||
3000 | =item 2 C |
||||||
3001 | |||||||
3002 | This should be assigned the name of the 'digests read' file, but it will | ||||||
3003 | default to a file F |
||||||
3004 | C |
||||||
3005 | |||||||
3006 | =back | ||||||
3007 | |||||||
3008 | Adding these keys to our C<%config_out>, we get: | ||||||
3009 | |||||||
3010 | %pw32u_config_out = ( | ||||||
3011 | title => 'Perl-Win32-Users', | ||||||
3012 | dir_digest => "../pw32u", | ||||||
3013 | dir_threads => "../pw32u/Threads", | ||||||
3014 | digests_log => "../pw32u/digests_log.txt", | ||||||
3015 | todays_topics => "../pw32u/todays_topics.txt", | ||||||
3016 | id_format => 'sprintf("%03d",$1) . \'_\' . | ||||||
3017 | sprintf("%04d",$2)', | ||||||
3018 | output_id_format => 'sprintf("%04d",$1)', | ||||||
3019 | thread_msg_delimiter => "--__--__--\n\n", | ||||||
3020 | archive_kill_trigger => 1, | ||||||
3021 | digests_read_flag => 1, | ||||||
3022 | digests_read => "../pw32u/digests_read.txt", | ||||||
3023 | ... | ||||||
3024 | ); | ||||||
3025 | |||||||
3026 | %pbml_config_out = ( | ||||||
3027 | title => 'Perl Beginner', | ||||||
3028 | dir_digest => "../pbml", | ||||||
3029 | dir_threads => "../pbml/Threads", | ||||||
3030 | digests_log => "../pbml/digests_log.txt", | ||||||
3031 | todays_topics => "../pbml/todays_topics.txt", | ||||||
3032 | id_format => 'sprintf("%05d",$1)', | ||||||
3033 | output_id_format => 'sprintf("%04d",$1)', | ||||||
3034 | thread_msg_delimiter => "_*_*_*_*_*_\n_*_*_*_*_*_\n\n\n", | ||||||
3035 | archive_kill_trigger => -1, | ||||||
3036 | digests_read_flag => 1, | ||||||
3037 | digests_read => "../pbml/digests_read.txt", | ||||||
3038 | ... | ||||||
3039 | ); | ||||||
3040 | |||||||
3041 | =item * Keys Needed When Archiving Thread Files | ||||||
3042 | |||||||
3043 | If, as discussed above, you have assigned the value C<1> to the | ||||||
3044 | C< |
||||||
3045 | will archive older thread files, I |
||||||
3046 | directory specified in key C |
||||||
3047 | thread file has not been modified in a specified number of days. If new | ||||||
3048 | messages need to be posted to a thread file which has been archived, that | ||||||
3049 | file will be de-archived and brought back to the C |
||||||
3050 | Thread files which are either archived or de-archived via a call to | ||||||
3051 | C |
||||||
3052 | appropriately named files. | ||||||
3053 | |||||||
3054 | Hence, the keys you will need to define when archiving thread files are: | ||||||
3055 | |||||||
3056 | =over 4 | ||||||
3057 | |||||||
3058 | =item 1 C |
||||||
3059 | |||||||
3060 | This key must be assigned the number of days after which a thread file sitting | ||||||
3061 | in the C |
||||||
3062 | specified, will default to 14 days. | ||||||
3063 | |||||||
3064 | =item 2 C |
||||||
3065 | |||||||
3066 | This key must be assigned the name of the I |
||||||
3067 | the directory at the top of a tree of archive directories. | ||||||
3068 | |||||||
3069 | When you track a particular mailing list digest for a number of years, the | ||||||
3070 | number of different thread files can grow to enormous proportions. For | ||||||
3071 | example, the author has tracked over 10,000 distinct thread files from the | ||||||
3072 | Perl-Win32-Users list over a three-and-a-half year period. 10,000 files in a | ||||||
3073 | single directory is completely unwieldy and slows directory read-times | ||||||
3074 | tremendously. Mail::Digest::Tools therefore by default provides a tree of | ||||||
3075 | archive directories: a top directory which contains no thread files but | ||||||
3076 | instead holds 27 subdirectories , one for each letter of the English alphabet | ||||||
3077 | and one for thread files which start with any other character (guaranteed to | ||||||
3078 | work with ASCII only; not tested with other character sets). | ||||||
3079 | |||||||
3080 | dir_archive_top | ||||||
3081 | a | ||||||
3082 | b | ||||||
3083 | c | ||||||
3084 | ... | ||||||
3085 | z | ||||||
3086 | other | ||||||
3087 | |||||||
3088 | The user gets to choose where to place the top archive directory but the 27 | ||||||
3089 | subdirectories are automatically placed beneath that one. The top archive | ||||||
3090 | directory is the value assigned to C<%config_out> key C |
||||||
3091 | |||||||
3092 | =item 3 C |
||||||
3093 | |||||||
3094 | This key should be assigned the name of a file which will log any and all | ||||||
3095 | files archived by a single call to C |
||||||
3096 | C |
||||||
3097 | an ongoing log; it only shows what happened today.) If not assigned a value, | ||||||
3098 | it will default to a file called F |
||||||
3099 | directory named by key C |
||||||
3100 | |||||||
3101 | =item 4 C |
||||||
3102 | |||||||
3103 | This key should be assigned the name of a file which will log any and all | ||||||
3104 | files de-archived by a single call to C |
||||||
3105 | C |
||||||
3106 | an ongoing log; it only shows what happened today.) If not assigned a value, | ||||||
3107 | it will default to a file called F |
||||||
3108 | directory named by key C |
||||||
3109 | |||||||
3110 | =item 5 C |
||||||
3111 | |||||||
3112 | This key is reserved for future use. In the current version of | ||||||
3113 | Mail::Digest::Tools it does not need to be set, but, should you be obsessive | ||||||
3114 | about this, set it to C<0>. | ||||||
3115 | |||||||
3116 | =back | ||||||
3117 | |||||||
3118 | Adding these keys to our sample C<%config_out> hashes, we get: | ||||||
3119 | |||||||
3120 | %pw32u_config_out = ( | ||||||
3121 | title => 'Perl-Win32-Users', | ||||||
3122 | dir_digest => "../pw32u", | ||||||
3123 | dir_threads => "../pw32u/Threads", | ||||||
3124 | digests_log => "../pw32u/digests_log.txt", | ||||||
3125 | todays_topics => "../pw32u/todays_topics.txt", | ||||||
3126 | id_format => 'sprintf("%03d",$1) . \'_\' . | ||||||
3127 | sprintf("%04d",$2)', | ||||||
3128 | output_id_format => 'sprintf("%04d",$1)', | ||||||
3129 | thread_msg_delimiter => "--__--__--\n\n", | ||||||
3130 | archive_kill_trigger => 1, | ||||||
3131 | digests_read_flag => 1, | ||||||
3132 | digests_read => "../pw32u/digests_read.txt", | ||||||
3133 | archive_kill_days => 14, | ||||||
3134 | dir_archive_top => "../pw32u/Threads/archive", | ||||||
3135 | archived_today => "../pw32u/archived_today.txt", | ||||||
3136 | de_archived_today => "../pw32u/de_archived_today.txt", | ||||||
3137 | ... | ||||||
3138 | ); | ||||||
3139 | |||||||
3140 | %pbml_config_out = ( | ||||||
3141 | title => 'Perl Beginner', | ||||||
3142 | dir_digest => "../pbml", | ||||||
3143 | dir_threads => "../pbml/Threads", | ||||||
3144 | digests_log => "../pbml/digests_log.txt", | ||||||
3145 | todays_topics => "../pbml/todays_topics.txt", | ||||||
3146 | id_format => 'sprintf("%05d",$1)', | ||||||
3147 | output_id_format => 'sprintf("%04d",$1)', | ||||||
3148 | thread_msg_delimiter => "_*_*_*_*_*_\n_*_*_*_*_*_\n\n\n", | ||||||
3149 | archive_kill_trigger => -1, | ||||||
3150 | digests_read_flag => 1, | ||||||
3151 | digests_read => "../pbml/digests_read.txt", | ||||||
3152 | ... | ||||||
3153 | ); | ||||||
3154 | |||||||
3155 | Note that since in our example we chose I |
||||||
3156 | the Perl Beginner list -- as evinced by the assignment of C<-1> to key | ||||||
3157 | C |
||||||
3158 | C |
||||||
3159 | C<%pbml_config_out>. | ||||||
3160 | |||||||
3161 | =item * Keys Needed When Deleting Thread Files | ||||||
3162 | |||||||
3163 | The keys needed for C<%config_out> when you have chosen to delete thread | ||||||
3164 | files after a specified interval parallel those you would have needed if you | ||||||
3165 | had chosen to archive those files instead. | ||||||
3166 | |||||||
3167 | =over 4 | ||||||
3168 | |||||||
3169 | =item 1 C |
||||||
3170 | |||||||
3171 | This key must be assigned the number of days after which a thread file sitting | ||||||
3172 | in the C |
||||||
3173 | to 14 days. | ||||||
3174 | |||||||
3175 | =item 2 C |
||||||
3176 | |||||||
3177 | This key should be assigned the name of a file which will log any and all | ||||||
3178 | files deleted by a single call to C |
||||||
3179 | C |
||||||
3180 | an ongoing log; it only shows what happened today.) If not assigned a value, | ||||||
3181 | it will default to a file called F |
||||||
3182 | directory named by key C |
||||||
3183 | |||||||
3184 | =back | ||||||
3185 | |||||||
3186 | Adding these keys to our sample C<%config_out> hashes, we get: | ||||||
3187 | |||||||
3188 | %pw32u_config_out = ( | ||||||
3189 | title => 'Perl-Win32-Users', | ||||||
3190 | dir_digest => "../pw32u", | ||||||
3191 | dir_threads => "../pw32u/Threads", | ||||||
3192 | digests_log => "../pw32u/digests_log.txt", | ||||||
3193 | todays_topics => "../pw32u/todays_topics.txt", | ||||||
3194 | id_format => 'sprintf("%03d",$1) . \'_\' . | ||||||
3195 | sprintf("%04d",$2)', | ||||||
3196 | output_id_format => 'sprintf("%04d",$1)', | ||||||
3197 | thread_msg_delimiter => "--__--__--\n\n", | ||||||
3198 | archive_kill_trigger => 1, | ||||||
3199 | digests_read_flag => 1, | ||||||
3200 | digests_read => "../pw32u/digests_read.txt", | ||||||
3201 | archive_kill_days => 14, | ||||||
3202 | dir_archive_top => "../pw32u/Threads/archive", | ||||||
3203 | archived_today => "../pw32u/archived_today.txt", | ||||||
3204 | de_archived_today => "../pw32u/de_archived_today.txt", | ||||||
3205 | ... | ||||||
3206 | ); | ||||||
3207 | |||||||
3208 | %pbml_config_out = ( | ||||||
3209 | title => 'Perl Beginner', | ||||||
3210 | dir_digest => "../pbml", | ||||||
3211 | dir_threads => "../pbml/Threads", | ||||||
3212 | digests_log => "../pbml/digests_log.txt", | ||||||
3213 | todays_topics => "../pbml/todays_topics.txt", | ||||||
3214 | id_format => 'sprintf("%05d",$1)', | ||||||
3215 | output_id_format => 'sprintf("%04d",$1)', | ||||||
3216 | thread_msg_delimiter => "_*_*_*_*_*_\n_*_*_*_*_*_\n\n\n", | ||||||
3217 | archive_kill_trigger => -1, | ||||||
3218 | digests_read_flag => 1, | ||||||
3219 | digests_read => "../pbml/digests_read.txt", | ||||||
3220 | archive_kill_days => 14, | ||||||
3221 | deleted_today => "../pbml/deleted_today.txt", | ||||||
3222 | ... | ||||||
3223 | ); | ||||||
3224 | |||||||
3225 | Note that since in our example we chose to archive thread files from | ||||||
3226 | the Perl-Win32-Users list -- as evinced by the assignment of C<1> to key | ||||||
3227 | C |
||||||
3228 | C |
||||||
3229 | |||||||
3230 | =item * Keys Needed When Stripping Multipart MIME Content from Thread Files | ||||||
3231 | |||||||
3232 | Recall from above that you had to study a given digest to determine whether or | ||||||
3233 | not it contained multipart MIME content in need of stripping out. If a digest, | ||||||
3234 | such as the ActiveState Perl-Win32-Users digest, contained a lot of such bloat, | ||||||
3235 | you set key C |
||||||
3236 | the other hand, the mailing list provider stripped out the multipart MIME | ||||||
3237 | content before distributing the digest, you set that key to a value of C<0>. | ||||||
3238 | |||||||
3239 | Mail::Digest::Tools will automatically strip out multipart MIME content once | ||||||
3240 | you have set C |
||||||
3241 | is: Do I want to view a log of which messages processed in a I |
||||||
3242 | C |
||||||
3243 | content stripped out -- or not? If so, you must set two keys in | ||||||
3244 | C<%config_out>: | ||||||
3245 | |||||||
3246 | =over 4 | ||||||
3247 | |||||||
3248 | =item 1 C |
||||||
3249 | |||||||
3250 | This key must be set to a true value such as C<1>. | ||||||
3251 | |||||||
3252 | =item 2 C |
||||||
3253 | |||||||
3254 | This key should be assigned the name of the 'mimelog' file, but if you do not | ||||||
3255 | specify a value it will default to a file F |
||||||
3256 | directory named by key C |
||||||
3257 | |||||||
3258 | =back | ||||||
3259 | |||||||
3260 | The logfile so created looks like this: | ||||||
3261 | |||||||
3262 | Processed Problem | ||||||
3263 | |||||||
3264 | 001_1775_0003 CASE C | ||||||
3265 | 001_1775_0015 CASE C | ||||||
3266 | 001_1775_0018 CASE C | ||||||
3267 | 001_1775_0021 CASE E | ||||||
3268 | |||||||
3269 | where items in the 'Processed' column were either (a) successfully stripped of | ||||||
3270 | multipart MIME content by Mail::Digest::Tools as specified by the internal rule | ||||||
3271 | denoted by the 'CASE'; or (b) were recognized by Mail::Digest::Tools as | ||||||
3272 | containing multipart MIME content that could not be stripped out. | ||||||
3273 | |||||||
3274 | This is relatively esoteric and probably of interest mainly to the module's | ||||||
3275 | developer. So if you are not interested in this feature set | ||||||
3276 | C |
||||||
3277 | Mail::Digest::Tools will still do its best to strip out extraneous multipart | ||||||
3278 | MIME content. | ||||||
3279 | |||||||
3280 | Our sample C<%config_out> hashes are now complete. They look like this: | ||||||
3281 | |||||||
3282 | %pw32u_config_out = ( | ||||||
3283 | title => 'Perl-Win32-Users', | ||||||
3284 | dir_digest => "../pw32u", | ||||||
3285 | dir_threads => "../pw32u/Threads", | ||||||
3286 | digests_log => "../pw32u/digests_log.txt", | ||||||
3287 | todays_topics => "../pw32u/todays_topics.txt", | ||||||
3288 | id_format => 'sprintf("%03d",$1) . \'_\' . | ||||||
3289 | sprintf("%04d",$2)', | ||||||
3290 | output_id_format => 'sprintf("%04d",$1)', | ||||||
3291 | thread_msg_delimiter => "--__--__--\n\n", | ||||||
3292 | archive_kill_trigger => 1, | ||||||
3293 | digests_read_flag => 1, | ||||||
3294 | digests_read => "../pw32u/digests_read.txt", | ||||||
3295 | archive_kill_days => 14, | ||||||
3296 | dir_archive_top => "../pw32u/Threads/archive", | ||||||
3297 | archived_today => "../pw32u/archived_today.txt", | ||||||
3298 | de_archived_today => "../pw32u/de_archived_today.txt", | ||||||
3299 | mimelog => "../pw32u/mimelog.txt", | ||||||
3300 | MIME_cleanup_log_flag => 1, | ||||||
3301 | ); | ||||||
3302 | |||||||
3303 | %pbml_config_out = ( | ||||||
3304 | title => 'Perl Beginner', | ||||||
3305 | dir_digest => "../pbml", | ||||||
3306 | dir_threads => "../pbml/Threads", | ||||||
3307 | digests_log => "../pbml/digests_log.txt", | ||||||
3308 | todays_topics => "../pbml/todays_topics.txt", | ||||||
3309 | id_format => 'sprintf("%05d",$1)', | ||||||
3310 | output_id_format => 'sprintf("%04d",$1)', | ||||||
3311 | thread_msg_delimiter => "_*_*_*_*_*_\n_*_*_*_*_*_\n\n\n", | ||||||
3312 | archive_kill_trigger => -1, | ||||||
3313 | digests_read_flag => 1, | ||||||
3314 | digests_read => "../pbml/digests_read.txt", | ||||||
3315 | archive_kill_days => 14, | ||||||
3316 | deleted_today => "../pbml/deleted_today.txt", | ||||||
3317 | ); | ||||||
3318 | |||||||
3319 | Note that C<%pbml_config_out> does not have C |
||||||
3320 | C |
||||||
3321 | Beginners mailing list Yahoo! Groups strips out unnecessary multipart | ||||||
3322 | MIME content before sending the digest to you. | ||||||
3323 | |||||||
3324 | =back | ||||||
3325 | |||||||
3326 | =head1 HELPFUL HINTS | ||||||
3327 | |||||||
3328 | ... in which the module author shares what he has learned using | ||||||
3329 | Mail::Digest::Tools and its predecessors since August 2000. | ||||||
3330 | |||||||
3331 | =head2 Initial Configuration and Testing | ||||||
3332 | |||||||
3333 | As mentioned above, if you are considering creating a local archive of threads | ||||||
3334 | originating in daily digest versions of a mailing list, you should first | ||||||
3335 | accumulate 6-10 instances of such digests and both: | ||||||
3336 | |||||||
3337 | =over 4 | ||||||
3338 | |||||||
3339 | =item 1 | ||||||
3340 | |||||||
3341 | study the internal structure of the digest -- needed to develop a | ||||||
3342 | C<%config_in> for the digest; and | ||||||
3343 | |||||||
3344 | =item 2 | ||||||
3345 | |||||||
3346 | carefully consider how you wish to structure the output from the module's | ||||||
3347 | use on your system -- needed to develop C<%config_out> for the digest | ||||||
3348 | |||||||
3349 | =back | ||||||
3350 | |||||||
3351 | Once you have developed the initial configuration, you should call | ||||||
3352 | C |
||||||
3353 | if the results are what you want. If they are I |
||||||
3354 | need to think about what you should change in C<%config_in> and/or | ||||||
3355 | C<%config_out>. Make those changes, then call C |
||||||
3356 | again. Repeat as needed, making sure not to delete any of the digest files | ||||||
3357 | you are using as sources until you are completely satisfied with your | ||||||
3358 | configuration. | ||||||
3359 | |||||||
3360 | Once, however, you I |
||||||
3361 | C |
||||||
3362 | C |
||||||
3363 | regenerate threads containing messages from digests you have deleted over | ||||||
3364 | time). | ||||||
3365 | |||||||
3366 | =head2 Where to Store the Configuration Hashes | ||||||
3367 | |||||||
3368 | As mentioned above, you will probably find it convenient to write separate | ||||||
3369 | Perl scripts to call each one of Mail::Digest::Tool's public functions. You | ||||||
3370 | could code C<%config_in> and C<%config_out> in each of those scripts just | ||||||
3371 | before the respective function calls. But that would violate the principle of | ||||||
3372 | 'Repeated Code Is a Mistake' and multiply maintenance problems. It's far | ||||||
3373 | better to code the two configuration hashes in a separate plain-text file and | ||||||
3374 | 'require' that file into your script. That way, any changes you make in the | ||||||
3375 | configuration will be automatically picked up by each script that calls a | ||||||
3376 | Mail::Digest::Tools function. | ||||||
3377 | |||||||
3378 | Here is an example of such a file holding the configuration hashes governing | ||||||
3379 | use of the Perl-Win32-Users digest, along with a script making use of that file. | ||||||
3380 | |||||||
3381 | # file: pw32u.digest.data | ||||||
3382 | $topdir = "E:/Digest/pw32u"; | ||||||
3383 | %config_in = ( | ||||||
3384 | grep_formula => 'Perl-Win32-Users digest', | ||||||
3385 | pattern_target => '.*Vol\s(\d+),\sIssue\s(\d+)\.txt', | ||||||
3386 | # next element's value must be double-quoted | ||||||
3387 | source_msg_delimiter => "--__--__--\n\n", | ||||||
3388 | topics_intro => 'Today\'s Topics:', | ||||||
3389 | message_style_flag => '^Message:\s+(\d+)$', | ||||||
3390 | from_style_flag => '^From:\s+(.+)$', | ||||||
3391 | org_style_flag => '^Organization:\s+(.+)$', | ||||||
3392 | to_style_flag => '^To:\s+(.+)$', | ||||||
3393 | cc_style_flag => '^CC:\s+(.+)$', | ||||||
3394 | subject_style_flag => '^Subject:\s+(.+)$', | ||||||
3395 | date_style_flag => '^Date:\s+(.+)$', | ||||||
3396 | reply_to_style_flag => '^Reply-To:\s+(.+)$', | ||||||
3397 | MIME_cleanup_flag => 1, | ||||||
3398 | ); | ||||||
3399 | |||||||
3400 | %config_out = ( | ||||||
3401 | title => 'Perl-Win32-Users', | ||||||
3402 | dir_digest => $topdir, | ||||||
3403 | dir_threads => "$topdir/Threads", | ||||||
3404 | dir_archive_top => "$topdir/Threads/archive", | ||||||
3405 | archived_today => "$topdir/archived_today.txt", | ||||||
3406 | de_archived_today => "$topdir/de_archived_today.txt", | ||||||
3407 | deleted_today => "$topdir/deleted_today.txt", | ||||||
3408 | digests_log => "$topdir/digests_log.txt", | ||||||
3409 | digests_read => "$topdir/digests_read.txt", | ||||||
3410 | todays_topics => "$topdir/todays_topics.txt", | ||||||
3411 | mimelog => "$topdir/mimelog.txt", | ||||||
3412 | id_format => 'sprintf("%03d",$1) . \'_\' . | ||||||
3413 | sprintf("%04d",$2)', | ||||||
3414 | output_id_format => 'sprintf("%04d",$1)', | ||||||
3415 | MIME_cleanup_log_flag => 1, | ||||||
3416 | # next element's value must be double-quoted | ||||||
3417 | thread_msg_delimiter => "--__--__--\n\n", | ||||||
3418 | archive_kill_trigger => 1, | ||||||
3419 | archive_kill_days => 14, | ||||||
3420 | digests_read_flag => 1, | ||||||
3421 | archive_config => 0, | ||||||
3422 | ); | ||||||
3423 | |||||||
3424 | # script: dig.pl | ||||||
3425 | # USAGE: perl dig.pl | ||||||
3426 | #!/usr/bin/perl | ||||||
3427 | use strict; | ||||||
3428 | use warnings; | ||||||
3429 | use Mail::Digest::Tools qw( process_new_digests ); | ||||||
3430 | |||||||
3431 | our (%config_in, %config_out); | ||||||
3432 | my $data_file = 'pw32u.digest.data'; | ||||||
3433 | require $data_file; | ||||||
3434 | |||||||
3435 | process_new_digests(\%config_in, \%config_out); | ||||||
3436 | |||||||
3437 | print "\nFinished\n"; | ||||||
3438 | |||||||
3439 | =head2 Maintaining Local Archives of More than One Digest | ||||||
3440 | |||||||
3441 | The module author has maintained local archives of more than a half dozen | ||||||
3442 | different mailing list digests over the past several years. He has found it | ||||||
3443 | convenient to maintain the configuration information for I |
||||||
3444 | he is following at a given time in a I |
||||||
3445 | advantage to this approach is that if two digests share a similar internal | ||||||
3446 | structure (perhaps due to being generated by the same mailing list program or | ||||||
3447 | list provider) and if the user chooses to structure the output from the two | ||||||
3448 | digests in similar or identical ways, then getting the configuration hashes | ||||||
3449 | becomes much easier and the potential for error is reduced. | ||||||
3450 | |||||||
3451 | Here is a sample directory and file structure for maintaining archives of | ||||||
3452 | two different digests on a Win32 system: | ||||||
3453 | |||||||
3454 | digest/ | ||||||
3455 | digest.data | ||||||
3456 | process_new.pl | ||||||
3457 | process_ALL.pl | ||||||
3458 | reply_digest_message.pl | ||||||
3459 | repair_digest_order.pl | ||||||
3460 | consolidate_threads.pl | ||||||
3461 | deletables.pl | ||||||
3462 | pw32u/ | ||||||
3463 | Perl-Win32-Users Digest, Vol 1 Issue 1771.txt | ||||||
3464 | Perl-Win32-Users Digest, Vol 1 Issue 1772.txt | ||||||
3465 | digest_log.txt | ||||||
3466 | digest_read.txt | ||||||
3467 | mimelog.txt | ||||||
3468 | Threads/ | ||||||
3469 | pbml/ | ||||||
3470 | [PBML] Digest Number 1491.txt | ||||||
3471 | [PBML] Digest Number 1492.txt | ||||||
3472 | digest_log.txt | ||||||
3473 | Threads/ | ||||||
3474 | |||||||
3475 | File F |
||||||
3476 | |||||||
3477 | # digest.data | ||||||
3478 | $topdir = "E:/Digest"; | ||||||
3479 | %digest_structure = ( | ||||||
3480 | pbml => { | ||||||
3481 | grep_formula => '\[PBML\]', | ||||||
3482 | pattern_target => '.*\s(\d+)\.txt$', | ||||||
3483 | ... | ||||||
3484 | }, | ||||||
3485 | pw32u => { | ||||||
3486 | grep_formula => 'Perl-Win32-Users digest', | ||||||
3487 | pattern_target => '.*Vol\s(\d+),\sIssue\s(\d+)\.txt', | ||||||
3488 | ... | ||||||
3489 | }, | ||||||
3490 | ); | ||||||
3491 | %digest_output_format = ( | ||||||
3492 | pbml => { | ||||||
3493 | title => 'Perl Beginner', | ||||||
3494 | dir_digest => "$topdir/pbml", | ||||||
3495 | dir_threads => "$topdir/pbml/Threads", | ||||||
3496 | ... | ||||||
3497 | }, | ||||||
3498 | pw32u => { | ||||||
3499 | title => 'Perl-Win32-Users', | ||||||
3500 | dir_digest => "$topdir/pw32u", | ||||||
3501 | dir_threads => "$topdir/pw32u/Threads", | ||||||
3502 | ... | ||||||
3503 | }, | ||||||
3504 | ); | ||||||
3505 | |||||||
3506 | To accomodate this slightly more complex structure in the configuration file, | ||||||
3507 | the calling script might be modified as follows: | ||||||
3508 | |||||||
3509 | # script: dig.pl | ||||||
3510 | # USAGE: perl dig.pl [short-name for digest] | ||||||
3511 | #!/usr/bin/perl | ||||||
3512 | use Mail::Digest::Tools qw( process_new_digests ); | ||||||
3513 | |||||||
3514 | my ($this_key, %config_in, %config_out); | ||||||
3515 | # variables imported from $data_file | ||||||
3516 | our (%digest_structure, %digest_output_format); | ||||||
3517 | |||||||
3518 | my $data_file = 'digest.data'; | ||||||
3519 | require $data_file; | ||||||
3520 | |||||||
3521 | $this_key = shift @ARGV; | ||||||
3522 | die "\n The command-line argument you typed: $this_key\n does not call an accessible digest$!" | ||||||
3523 | unless (defined $digest_structure{$this_key} | ||||||
3524 | and defined $digest_output_format{$this_key}); | ||||||
3525 | |||||||
3526 | my ($k,$v); | ||||||
3527 | while ( ($k, $v) = each %{$digest_structure{$this_key}} ) { | ||||||
3528 | $config_in{$k} = $v; | ||||||
3529 | } | ||||||
3530 | while ( ($k, $v) = each %{$digest_output_format{$this_key}} ) { | ||||||
3531 | $config_out{$k} = $v; | ||||||
3532 | } | ||||||
3533 | |||||||
3534 | process_new_digests(\%config_in, \%config_out); | ||||||
3535 | |||||||
3536 | print "\nFinished\n"; | ||||||
3537 | |||||||
3538 | =head2 Getting Your Mail to the Right Place on Your System | ||||||
3539 | |||||||
3540 | For several years the module author used the scripts which were predecessors | ||||||
3541 | to Mail::Digest::Tools on a Win32 system where mail was read with Microsoft | ||||||
3542 | Outlook Express. He would do a "File/Save as.." on an instance of a digest, | ||||||
3543 | select text format (*.txt) and save it to an appropriate directory. Later, | ||||||
3544 | the author used the shareware e-mail client Poco, in which the same operation | ||||||
3545 | was accomplished by highlighting a file and keying "Ctrl+S". | ||||||
3546 | |||||||
3547 | But as the number of digests the author was tracking grew, this procedure | ||||||
3548 | became more and more tedious. Fortunately, about that time the author was | ||||||
3549 | assigned to write a review of the second edition of the Perl Cookbook, and he | ||||||
3550 | learned how to use the Net::POP3 module to receive his e-mail directly. So | ||||||
3551 | now he uses a Perl script to get all his digests and save them as text files | ||||||
3552 | to appropriate directories -- and then lets a GUI e-mail client take care of | ||||||
3553 | the rest. | ||||||
3554 | |||||||
3555 | Here is a script which more or less accomplishes this: | ||||||
3556 | |||||||
3557 | # script: get_digests.pl | ||||||
3558 | #!/usr/bin/perl | ||||||
3559 | use strict; | ||||||
3560 | use warnings; | ||||||
3561 | use Net::POP3; | ||||||
3562 | use Term::ReadKey; | ||||||
3563 | |||||||
3564 | my ($site, $username, $password); | ||||||
3565 | my ($verref, $pop3, $messagesref, $undeleted, $msgnum, $message); | ||||||
3566 | my ($k,$v); | ||||||
3567 | my ($oldfh, $output); | ||||||
3568 | |||||||
3569 | my %digests = ( | ||||||
3570 | 'pbml' => "E:/Digest/pbml", | ||||||
3571 | 'pw32u' => "E:/Digest/pw32u", | ||||||
3572 | 'london' => "E:/Digest/london", | ||||||
3573 | ); | ||||||
3574 | |||||||
3575 | $site = 'pop3.someISP.com'; | ||||||
3576 | $username = 'myuserid'; | ||||||
3577 | |||||||
3578 | $pop3 = Net::POP3->new($site) | ||||||
3579 | or die "Couldn't open connection to $site: $!"; | ||||||
3580 | |||||||
3581 | print "Enter password for $username at $site: "; | ||||||
3582 | ReadMode('noecho'); | ||||||
3583 | $password = ReadLine(0); | ||||||
3584 | chomp $password; | ||||||
3585 | ReadMode(0); | ||||||
3586 | print "\n"; | ||||||
3587 | |||||||
3588 | defined ($pop3->login($username, $password)) | ||||||
3589 | or die "Can't authenticate: $!"; | ||||||
3590 | |||||||
3591 | $messagesref = $pop3->list | ||||||
3592 | or die "Can't get list of undeleted messages: $!"; | ||||||
3593 | |||||||
3594 | while ( ($k,$v) = each %$messagesref ) { | ||||||
3595 | my ($messageref, $line, %headers); | ||||||
3596 | print "$k:\t$v\n"; | ||||||
3597 | $messageref = $pop3->top($k); | ||||||
3598 | local $_; | ||||||
3599 | foreach (@$messageref) { | ||||||
3600 | chomp; | ||||||
3601 | last if (/^\s*$/); | ||||||
3602 | next unless (/^\s*(Date:|From:|Subject:|To:)/); | ||||||
3603 | if (/^\s*Date:\s*(.*)/) { | ||||||
3604 | $headers{'Date'} = $1; | ||||||
3605 | } | ||||||
3606 | if (/^\s*From:\s*(.*)/) { | ||||||
3607 | $headers{'From'} = $1; | ||||||
3608 | } | ||||||
3609 | if (/^\s*Subject:\s*(.*)/) { | ||||||
3610 | $headers{'Subject'} = $1; | ||||||
3611 | } | ||||||
3612 | if (/^\s*To:\s*(.*)/) { | ||||||
3613 | $headers{'To'} = $1; | ||||||
3614 | } | ||||||
3615 | } | ||||||
3616 | if ($headers{'Subject'} =~ /^\[PBML\]/) { | ||||||
3617 | get_digest($pop3, $k, 'pbml', $headers{'Subject'}); | ||||||
3618 | } | ||||||
3619 | if ($headers{'Subject'} =~ /^Perl-Win32-Users/) { | ||||||
3620 | get_digest($pop3, $k, 'pw32u', $headers{'Subject'}); | ||||||
3621 | } | ||||||
3622 | if ($headers{'Subject'} =~ /^london\.pm/) { | ||||||
3623 | get_digest($pop3, $k, 'london', $headers{'Subject'}); | ||||||
3624 | } | ||||||
3625 | } | ||||||
3626 | |||||||
3627 | $pop3->quit() or die "Couldn't quit cleanly: $!"; | ||||||
3628 | |||||||
3629 | print "Finished!\n"; | ||||||
3630 | |||||||
3631 | sub get_digest { | ||||||
3632 | my ($pop3, $msgnum, $digest, $subj) = @_; | ||||||
3633 | print "Retrieving $msgnum: $subj"; | ||||||
3634 | my $message = | ||||||
3635 | $pop3->get($msgnum) or die "Couldn't get message $msgnum: $!"; | ||||||
3636 | if ($message) { | ||||||
3637 | print "\n"; | ||||||
3638 | my $digestfile = "$digests{$digest}/$subj.txt"; | ||||||
3639 | _print_message($digestfile, $message); | ||||||
3640 | print "Marking $msgnum for deletion\n";; | ||||||
3641 | $pop3->delete($msgnum) or die "Couldn't delete message $msgnum: $!"; | ||||||
3642 | } else { | ||||||
3643 | print "Failed: $!\n"; | ||||||
3644 | } | ||||||
3645 | } | ||||||
3646 | |||||||
3647 | sub _print_message { | ||||||
3648 | my ($digestfile, $message) = @_; | ||||||
3649 | my @lines = @{$message}; | ||||||
3650 | my $counter = 0; | ||||||
3651 | open(FH, ">$digestfile") | ||||||
3652 | or die "Couldn't open $digestfile for writing: $!"; | ||||||
3653 | for (my $i = 0; $i<=$#lines; $i++) { | ||||||
3654 | chomp($lines[$i]); | ||||||
3655 | # Identify the first blank line in the digest, | ||||||
3656 | # i.e., the end of the headers | ||||||
3657 | if ($lines[$i] =~ /^$/) { | ||||||
3658 | $counter = $i; | ||||||
3659 | last; | ||||||
3660 | } | ||||||
3661 | }; | ||||||
3662 | # Transfer digest to appropriate directory, skipping over digest header | ||||||
3663 | # so as to start just above Today's Topics | ||||||
3664 | foreach my $line (@lines[$counter+1 .. $#lines]) { | ||||||
3665 | chomp($line); | ||||||
3666 | # For some reason the $pop3->get() puts a single whitespace at the | ||||||
3667 | # start of most (all but the first?) lines | ||||||
3668 | # That has to be cleaned up so digest.pl can correctly process | ||||||
3669 | # header info and identify beginning of Today's Topics | ||||||
3670 | if ($line =~ /^\s(.*)/) { | ||||||
3671 | print FH $1, "\n"; | ||||||
3672 | } else { | ||||||
3673 | print FH $line, "\n"; | ||||||
3674 | } | ||||||
3675 | } | ||||||
3676 | close FH or die "Couldn't close after writing: $!"; | ||||||
3677 | } | ||||||
3678 | |||||||
3679 | No promise is made that this script or any script contained in this | ||||||
3680 | documentation will work correctly on your system. Hack it up to get it to | ||||||
3681 | work the way you want it to. | ||||||
3682 | |||||||
3683 | =head1 ASSUMPTIONS AND QUALIFICATIONS | ||||||
3684 | |||||||
3685 | =over 4 | ||||||
3686 | |||||||
3687 | =item 1 No Change in Mailing List Digest Software | ||||||
3688 | |||||||
3689 | The main assumption on which Mail::Digest::Tools depends for its success is | ||||||
3690 | that the provider of a particular digest continues to use the same mailing | ||||||
3691 | list software to produce the digest. If the provider changes his/her software, | ||||||
3692 | you must modify Mail::Digest::Tools' configuration data accordingly. | ||||||
3693 | |||||||
3694 | =item 2 Digest Must Be One E-mail Without Attachments | ||||||
3695 | |||||||
3696 | At its current stage of development Mail::Digest::Tools is only applicable to | ||||||
3697 | mailing list digests which arrive as one continuous file. It is C |
||||||
3698 | applicable to digests (e.g., Cygwin, module-authors@perl.org) which are | ||||||
3699 | supplied in a format consisting of (a) one file with instructions and a table | ||||||
3700 | of contents and (b) all the individual messages provided as e-mail attachments. | ||||||
3701 | |||||||
3702 | =item 3 Perl 5.6+ Only | ||||||
3703 | |||||||
3704 | The program was created with Perl 5.6. Certain features, such as the use of | ||||||
3705 | the C |
||||||
3706 | account for pre-5.6 features are left as an exercise for the user. | ||||||
3707 | |||||||
3708 | =item 4 Time::Local | ||||||
3709 | |||||||
3710 | Mail::Digest::Tools internally uses Perl core extension Time::Local. If at | ||||||
3711 | some future point this module is not included as part of a Perl core | ||||||
3712 | distribution, you would have to install it manually from CPAN. | ||||||
3713 | |||||||
3714 | =back | ||||||
3715 | |||||||
3716 | =head1 HISTORY AND FUTURE DEVELOPMENT | ||||||
3717 | |||||||
3718 | =head2 PRE-CPAN HISTORY | ||||||
3719 | |||||||
3720 | ActiveState maintains Perl for Windows-based platforms and also maintains a | ||||||
3721 | variety of mailing lists for users of its Windows-compatible versions of Perl. | ||||||
3722 | Subscribers to these lists can receive messages either as individual e-mails | ||||||
3723 | or as part of a daily digest which contains a listing of the day's topics and | ||||||
3724 | the complete text of each message. The messages are often best followed as | ||||||
3725 | discussion 'threads' which may extend over several days' worth of digests. | ||||||
3726 | |||||||
3727 | In June of 2000, however, ActiveState had to temporarily take its mailing lists | ||||||
3728 | off-line for technical reasons. When these lists were restored to service, | ||||||
3729 | their archive capacities were not immediately restored. I had just begun my | ||||||
3730 | study of Perl and had come to enjoy reading the Perl-Win32-Users digest. As | ||||||
3731 | I set off for the Yet Another Perl Conference in Pittsburgh, I shouted out, | ||||||
3732 | 'I want my Perl-Win32-Users digest!' I wrote a Perl script called C |
||||||
3733 | to fill that gap. | ||||||
3734 | |||||||
3735 | ActiveState has since restored archiving capacity to their lists. For reasons | ||||||
3736 | that would perhaps best be explored in a psychotherapeutic context, however, I | ||||||
3737 | had become attached to my local archive of the 'pw32u' list, so I continued to | ||||||
3738 | maintain this program and fine-tune its coding. | ||||||
3739 | |||||||
3740 | In early 2001 it became apparent that this program could be applied to a wide | ||||||
3741 | variety of mailing list digests -- not just those provided by ActiveState. In | ||||||
3742 | particular, valuable digests provided by Yahoo Groups (formerly E-groups) such | ||||||
3743 | as NT Emacs Users, Perl 5 Porters and Perl Beginners could also be archived if | ||||||
3744 | C |
||||||
3745 | began to track several other digests. I was able to use the archive I had | ||||||
3746 | developed as a window into one part of the Perl community in a Lightning Talk | ||||||
3747 | I gave at YAPC::North America in Montreal in June 2001, ''An Index of | ||||||
3748 | Incivility in the Perl Community.'' | ||||||
3749 | |||||||
3750 | Maintaining C |
||||||
3751 | Perl. Along the way I incorporated my first profiler into the script -- and | ||||||
3752 | then discarded it. Some of the subroutines I had written for early versions of | ||||||
3753 | the program had applicability to other scripts -- and thus was born my first | ||||||
3754 | module -- also since discarded. By July 2003 I was up to version 1.3. | ||||||
3755 | Following a suggestion by Uri Guttman at the YAPC::EU conference held in Paris | ||||||
3756 | in July 2003, wherever possible the use of separate | ||||||
3757 | print statements for each line to be printed was eliminated in favor of | ||||||
3758 | concatenating strings to be printed into much larger strings which could be | ||||||
3759 | printed all at once. This revision reduced the number of times filehandles | ||||||
3760 | had to be opened for writing. A given thread file was now opened only once | ||||||
3761 | per call of this program, rather than once for each message in each digest | ||||||
3762 | processed per call of the program. | ||||||
3763 | |||||||
3764 | Various other improvements, such as the possibility of stripping out | ||||||
3765 | unnecessary multipart MIME content and the introduction of subdirectories | ||||||
3766 | for archiving, were made in late 2003. At that point I | ||||||
3767 | decided to transform the script into a full-fledged Perl module. At first I | ||||||
3768 | tried out an object-oriented structure (with which I was familiar from my first | ||||||
3769 | two CPAN modules, F |
||||||
3770 | necessitated one constructor and one method call per typical script, but since | ||||||
3771 | the constructor did nothing but some cursory validation of the configuration | ||||||
3772 | data, it was mostly superfluous. Hence, I jettisoned the OO structure in favor | ||||||
3773 | of a functional approach. The result: Mail::Digest::Tools. | ||||||
3774 | |||||||
3775 | =head2 CPAN | ||||||
3776 | |||||||
3777 | After these revisions, I was up to version 1.96. Why revert to a lower | ||||||
3778 | version number at this point? That is why Mail::Digest::Tools makes its CPAN | ||||||
3779 | debut in version 2.04. | ||||||
3780 | |||||||
3781 | v1.97 (2/18/2004): Dealing with problem that Win32 and Unix/Linux may create | ||||||
3782 | different thread names for the same set of source messages because they have | ||||||
3783 | different lists of characters forbidden in file names. This became a problem | ||||||
3784 | while writing tests for C |
||||||
3785 | the names of thread files created via that function more difficult to predict. | ||||||
3786 | Tests adjusted appropriately. | ||||||
3787 | |||||||
3788 | v1.98 (2/19/2004): Eliminated suspect uses of C modifier on regexes. | ||||||
3789 | This was causing problems when I called C |
||||||
3790 | different types of digests in the same script. Also, eliminated code | ||||||
3791 | referring to DOS (I |
||||||
3792 | DOS filenames) as I have no way to test this module on a DOS box. | ||||||
3793 | |||||||
3794 | v1.99 (2/22/2004): ActiveState introduced a new format for its | ||||||
3795 | Perl-Win32-Users digest -- the digest which originally inspired the creation | ||||||
3796 | of this module's predecessor in 2000. One aspect of this new format was a | ||||||
3797 | clear improvement: HTML attachments are now stripped before messages are | ||||||
3798 | posted to the digest, so multipart MIME content has either been reduced | ||||||
3799 | considerably or eliminated altogether. But another aspect of this new | ||||||
3800 | format upset code going back four years: The delimiter immediately | ||||||
3801 | following Today's Topics is now different from the delimiters separating each | ||||||
3802 | message in the digest. Working around this appeared to be surprisingly | ||||||
3803 | difficult, especially since this revision had to be done in the middle of | ||||||
3804 | writing a test suite for CPAN distribution. A new key has been added to the | ||||||
3805 | C<%config_in> hash for each digest: | ||||||
3806 | |||||||
3807 | $config_in{'post_topics_delimiter'} | ||||||
3808 | |||||||
3809 | v2.00 (2/23/2004): Testing conducted after the last revision revealed a bug | ||||||
3810 | going back several versions in the internal subroutine stripping multipart | ||||||
3811 | MIME content. The last paragraph of each message which did I |
||||||
3812 | content was being stripped off. The offending code was found within | ||||||
3813 | C<_analyze_message_body()>. (The author recently learned of the CPAN | ||||||
3814 | module F |
||||||
3815 | the hand-rolled subroutine used within Mail::Digest::Tools, but a full study | ||||||
3816 | of its possibilities will be deferred to a later version. Also in this | ||||||
3817 | version, POD was rewritten to reflect the introduction of the post-topics | ||||||
3818 | delimiter. | ||||||
3819 | |||||||
3820 | v2.01 (2/24/2004): Backslashes (except as part of C<\n> newline characters) | ||||||
3821 | are prohibited in C<%config_out> key C |
||||||
3822 | because in the test suite that key's value is used as a variable inside a | ||||||
3823 | regular expression which in turn is used as an argument to C |
||||||
3824 | Preliminary investigation suggests that to work around the backslash | ||||||
3825 | metacharacter in that situation would be very time-consuming. | ||||||
3826 | |||||||
3827 | v2.02 (2/26/2004): Revised C |
||||||
3828 | subroutine C<_strip_down_for_reply> to reflect distinction between post-topics | ||||||
3829 | delimiter and source message delimiter. | ||||||
3830 | |||||||
3831 | v2.03 (3/04/2004): Fixed bug in C |
||||||
3832 | Extensive reworking of test suite. | ||||||
3833 | |||||||
3834 | v2.04 (3/05/2004): No changes in module. Refinement of test suite only. | ||||||
3835 | |||||||
3836 | v2.05 (3/07/2004): Fixed accidental deletion of incrementation of | ||||||
3837 | C<$message_count> in C<_strip_down()>. | ||||||
3838 | |||||||
3839 | v2.06 (3/10/2004): Correction of errors in test suite. Elimination of use of List::Compare in test suite. | ||||||
3840 | |||||||
3841 | v2.07 (3/11/2004): Correction of error in t/03.t | ||||||
3842 | |||||||
3843 | v2.08 (3/11/2004): Correction in _clean_up_thread_title and in tests. | ||||||
3844 | |||||||
3845 | v2.10 (3/15/2004): Corrections to README and documentation only. | ||||||
3846 | |||||||
3847 | v2.11 (10/23/2004): Fixed several errors which resulted in "Bizarre copy of hash in leave" error when running test suite under Devel::Cover. | ||||||
3848 | |||||||
3849 | v2.12 (05/14/2011): Added 'mirbsd' to list of Unixish-OSes. | ||||||
3850 | |||||||
3851 | =head1 AUTHOR | ||||||
3852 | |||||||
3853 | James E. Keenan (F |
||||||
3854 | |||||||
3855 | Creation date: August 21, 2000. | ||||||
3856 | Last modification date: May 14, 2011. | ||||||
3857 | Copyright (c) 2000-2011 James E. Keenan. United States. All rights reserved. | ||||||
3858 | |||||||
3859 | This software is distributed with absolutely no warranty, express or implied. | ||||||
3860 | Use it at your own risk. This is free software which you may distribute under | ||||||
3861 | the same terms as Perl itself. | ||||||
3862 | |||||||
3863 | =cut | ||||||
3864 | |||||||
3865 | |||||||
3866 | |||||||
3867 |