line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Catalyst::Controller::SimpleCAS::Role::TextTranscode; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
665
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
27
|
|
4
|
1
|
|
|
1
|
|
3
|
use warnings; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
30
|
|
5
|
|
|
|
|
|
|
|
6
|
1
|
|
|
1
|
|
3
|
use MooseX::MethodAttributes::Role 0.29; |
|
1
|
|
|
|
|
24
|
|
|
1
|
|
|
|
|
6
|
|
7
|
|
|
|
|
|
|
requires qw(Content fetch_content); # <-- methods of Catalyst::Controller::SimpleCAS |
8
|
|
|
|
|
|
|
|
9
|
1
|
|
|
1
|
|
32510
|
use Encode; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
75
|
|
10
|
1
|
|
|
1
|
|
620
|
use HTML::Encoding 'encoding_from_html_document', 'encoding_from_byte_order_mark'; |
|
1
|
|
|
|
|
7081
|
|
|
1
|
|
|
|
|
63
|
|
11
|
1
|
|
|
1
|
|
421
|
use HTML::TokeParser::Simple; |
|
1
|
|
|
|
|
9491
|
|
|
1
|
|
|
|
|
23
|
|
12
|
1
|
|
|
1
|
|
5
|
use Try::Tiny; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
48
|
|
13
|
1
|
|
|
1
|
|
4
|
use Email::MIME; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
16
|
|
14
|
1
|
|
|
1
|
|
413
|
use Email::MIME::CreateHTML; |
|
1
|
|
|
|
|
41350
|
|
|
1
|
|
|
|
|
40
|
|
15
|
1
|
|
|
1
|
|
415
|
use Catalyst::Controller::SimpleCAS::CSS::Simple; #<-- hack/workaround CSS::Simple busted on CPAN |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
25
|
|
16
|
1
|
|
|
1
|
|
4
|
use String::Random; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
29
|
|
17
|
1
|
|
|
1
|
|
4
|
use JSON; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
6
|
|
18
|
|
|
|
|
|
|
|
19
|
1
|
|
|
1
|
|
445
|
use Catalyst::Controller::SimpleCAS::MimeUriResolver; |
|
1
|
|
|
|
|
26
|
|
|
1
|
|
|
|
|
156
|
|
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
# FIXME - This is old and broken - file long gone from RapidApp ... |
22
|
|
|
|
|
|
|
my $ISOLATE_CSS_RULE = ''; #'@import "/static/rapidapp/css/CssIsolation.css";'; |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
# Backend action for Ext.ux.RapidApp.Plugin.HtmlEditor.LoadHtmlFile |
25
|
|
|
|
|
|
|
sub transcode_html :Chained('base') :PathPart('texttranscode/transcode_html') { |
26
|
0
|
|
|
0
|
1
|
0
|
my ($self, $c) = @_; |
27
|
|
|
|
|
|
|
|
28
|
0
|
0
|
|
|
|
0
|
my $upload = $c->req->upload('Filedata') or die "no upload object"; |
29
|
|
|
|
|
|
|
|
30
|
0
|
|
|
|
|
0
|
my $src_text = $self->normaliaze_rich_content($c,$upload,$upload->filename); |
31
|
|
|
|
|
|
|
|
32
|
0
|
|
|
|
|
0
|
my $rct= $c->stash->{requestContentType}; |
33
|
0
|
0
|
0
|
|
|
0
|
if ($rct eq 'JSON' || $rct eq 'text/x-rapidapp-form-response') { |
34
|
0
|
|
|
|
|
0
|
$c->stash->{json}= { success => \1, content => $src_text }; |
35
|
0
|
|
|
|
|
0
|
return $c->forward('View::RapidApp::JSON'); |
36
|
|
|
|
|
|
|
} |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
# find out what encoding the user wants, defaulting to utf8 |
39
|
0
|
|
0
|
|
|
0
|
my $dest_encoding= ($c->req->params->{dest_encoding} || 'utf-8'); |
40
|
0
|
0
|
|
|
|
0
|
my $out_codec= Encode::find_encoding($dest_encoding) or die "Unsupported encoding: $dest_encoding"; |
41
|
0
|
|
|
|
|
0
|
my $dest_octets= $out_codec->encode($src_text); |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
# we need to set the charset here so that catalyst doesn't try to convert it further |
44
|
0
|
|
|
|
|
0
|
$c->res->content_type('text/html; charset='.$dest_encoding); |
45
|
0
|
|
|
|
|
0
|
return $c->res->body($dest_octets); |
46
|
1
|
|
|
1
|
|
5
|
} |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
6
|
|
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
# Backend action for Ext.ux.RapidApp.Plugin.HtmlEditor.SaveMhtml |
49
|
|
|
|
|
|
|
sub generate_mhtml_download :Chained('base') :PathPart('texttranscode/generate_mhtml_download') { |
50
|
0
|
|
|
0
|
1
|
|
my ($self, $c) = @_; |
51
|
0
|
0
|
|
|
|
|
die "No html content supplied" unless ($c->req->params->{html_enc}); |
52
|
0
|
|
|
|
|
|
my $html = decode_json($c->req->params->{html_enc})->{data}; |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
# 'filename' param is optional and probably not supplied |
55
|
0
|
|
|
|
|
|
$html = $self->normaliaze_rich_content($c,$html,$c->req->params->{filename}); |
56
|
|
|
|
|
|
|
|
57
|
0
|
|
0
|
|
|
|
my $filename = $self->get_strip_orig_filename(\$html) || 'content.mht'; |
58
|
0
|
|
|
|
|
|
$filename =~ s/\"/\'/g; #<-- convert any " characters |
59
|
0
|
|
|
|
|
|
my $disposition = 'attachment;filename="' . $filename . '"'; |
60
|
|
|
|
|
|
|
|
61
|
0
|
|
|
|
|
|
my $MIME = $self->html_to_mhtml($c,$html); |
62
|
|
|
|
|
|
|
|
63
|
0
|
|
|
|
|
|
$c->response->header( $_ => $MIME->header($_) ) for ($MIME->header_names); |
64
|
0
|
|
|
|
|
|
$c->response->header('Content-Disposition' => $disposition); |
65
|
0
|
|
|
|
|
|
return $c->res->body( $MIME->as_string ); |
66
|
1
|
|
|
1
|
|
466
|
} |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
3
|
|
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
# extracts filename previously embedded by normaliaze_rich_content in html comment |
70
|
|
|
|
|
|
|
sub get_strip_orig_filename { |
71
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
72
|
0
|
|
|
|
|
|
my $htmlref = shift; |
73
|
0
|
0
|
|
|
|
|
return undef unless (ref $htmlref); |
74
|
|
|
|
|
|
|
|
75
|
0
|
|
|
|
|
|
$$htmlref =~ /(\/\*----ORIGINAL_FILENAME:(.+)----\*\/)/; |
76
|
0
|
0
|
|
|
|
|
my $comment = $1 or return undef; |
77
|
0
|
0
|
|
|
|
|
my $filename = $2 or return undef; |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
# strip comment: |
80
|
0
|
|
|
|
|
|
$$htmlref =~ s/\Q${comment}\E//; |
81
|
|
|
|
|
|
|
|
82
|
0
|
|
|
|
|
|
return $filename; |
83
|
|
|
|
|
|
|
} |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
sub html_to_mhtml { |
86
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
87
|
0
|
|
|
|
|
|
my $c = shift; |
88
|
0
|
|
|
|
|
|
my $html = shift; |
89
|
|
|
|
|
|
|
|
90
|
0
|
|
|
|
|
|
my $style = $self->parse_html_get_styles(\$html,1); |
91
|
|
|
|
|
|
|
|
92
|
0
|
0
|
|
|
|
|
if($style) { |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
# FIXME - this is broken: |
95
|
|
|
|
|
|
|
# strip isolate css import rule: |
96
|
0
|
|
|
|
|
|
$style =~ s/\Q$ISOLATE_CSS_RULE\E//g; |
97
|
|
|
|
|
|
|
|
98
|
0
|
|
|
|
|
|
my $Css = Catalyst::Controller::SimpleCAS::CSS::Simple->new; |
99
|
0
|
|
|
|
|
|
$Css->read({ css => $style }); |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
#scream_color(BLACK.ON_RED,$Css->get_selectors); |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
# undo the cas selector wrap applied during Load Html: |
104
|
0
|
|
|
|
|
|
foreach my $selector ($Css->get_selectors) { |
105
|
0
|
|
|
|
|
|
my $new_selector = $selector; |
106
|
0
|
0
|
|
|
|
|
if($selector =~ /^\#cas\-selector\-wrap\-\w+$/){ |
107
|
0
|
|
|
|
|
|
$new_selector = 'body'; |
108
|
|
|
|
|
|
|
} |
109
|
|
|
|
|
|
|
else { |
110
|
0
|
|
|
|
|
|
my @parts = split(/\s+/,$selector); |
111
|
0
|
|
|
|
|
|
my $first = shift @parts; |
112
|
0
|
0
|
|
|
|
|
next unless ($first =~ /^\#cas\-selector\-wrap\-/); |
113
|
0
|
|
|
|
|
|
$new_selector = join(' ',@parts); |
114
|
|
|
|
|
|
|
} |
115
|
0
|
|
|
|
|
|
$Css->modify_selector({ |
116
|
|
|
|
|
|
|
selector => $selector, |
117
|
|
|
|
|
|
|
new_selector => $new_selector |
118
|
|
|
|
|
|
|
}); |
119
|
|
|
|
|
|
|
} |
120
|
|
|
|
|
|
|
|
121
|
0
|
|
|
|
|
|
$style = $Css->write; |
122
|
|
|
|
|
|
|
} |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
# TODO/FIXME: remove RapidApp/TT dependency/entanglement |
125
|
0
|
|
|
|
|
|
$html = $c->template_render('templates/rapidapp/xhtml_document.tt',{ |
126
|
|
|
|
|
|
|
style => $style, |
127
|
|
|
|
|
|
|
body => $html |
128
|
|
|
|
|
|
|
}); |
129
|
|
|
|
|
|
|
|
130
|
0
|
|
|
|
|
|
my $UriResolver = Catalyst::Controller::SimpleCAS::MimeUriResolver->new({ |
131
|
|
|
|
|
|
|
Cas => $self, |
132
|
|
|
|
|
|
|
base => '' |
133
|
|
|
|
|
|
|
}); |
134
|
|
|
|
|
|
|
|
135
|
0
|
|
|
|
|
|
my $MIME = Email::MIME->create_html( |
136
|
|
|
|
|
|
|
header => [], |
137
|
|
|
|
|
|
|
body_attributes => { charset => 'UTF-8', encoding => 'quoted-printable' }, |
138
|
|
|
|
|
|
|
body => encode('UTF-8', $html), |
139
|
|
|
|
|
|
|
resolver => $UriResolver |
140
|
|
|
|
|
|
|
); |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
# Force wrap in a multipart/related |
143
|
0
|
0
|
|
|
|
|
return Email::MIME->create( |
144
|
|
|
|
|
|
|
attributes => { |
145
|
|
|
|
|
|
|
content_type => "multipart/related", |
146
|
|
|
|
|
|
|
disposition => "attachment" |
147
|
|
|
|
|
|
|
}, |
148
|
|
|
|
|
|
|
parts => [ $MIME ] |
149
|
|
|
|
|
|
|
) unless ($MIME->subparts); |
150
|
|
|
|
|
|
|
|
151
|
0
|
|
|
|
|
|
return $MIME; |
152
|
|
|
|
|
|
|
} |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
sub normaliaze_rich_content { |
156
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
157
|
0
|
|
|
|
|
|
my $c = shift; |
158
|
0
|
|
|
|
|
|
my $src_octets = shift; |
159
|
0
|
|
|
|
|
|
my $filename = shift; |
160
|
|
|
|
|
|
|
|
161
|
0
|
|
|
|
|
|
my $upload; |
162
|
0
|
0
|
|
|
|
|
if(ref($src_octets)) { |
163
|
0
|
|
|
|
|
|
$upload = $src_octets; |
164
|
0
|
|
|
|
|
|
$src_octets = $upload->slurp; |
165
|
|
|
|
|
|
|
} |
166
|
|
|
|
|
|
|
|
167
|
0
|
|
|
|
|
|
my $content; |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
# Try to determine what text encoding the file content came from, and then detect if it |
170
|
|
|
|
|
|
|
# is MIME or HTML. |
171
|
|
|
|
|
|
|
# |
172
|
|
|
|
|
|
|
# Note that if the content came from a file upload/post an encode/decode phase happened |
173
|
|
|
|
|
|
|
# during the HTTP transfer of this file, but it should have been taken care of by Catalyst |
174
|
|
|
|
|
|
|
# and now we have the original file on disk in its native 8-bit encoding. |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
# If MIME (MTHML): |
177
|
|
|
|
|
|
|
my $MIME = try{ |
178
|
|
|
|
|
|
|
# This will frequently produce uninitialized value warnings from Email::Simple::Header, |
179
|
|
|
|
|
|
|
# and I haven't been able to figure out how to stop it |
180
|
0
|
|
|
0
|
|
|
Email::MIME->new($src_octets) |
181
|
0
|
|
|
|
|
|
}; |
182
|
0
|
0
|
0
|
|
|
|
if($MIME && $MIME->subparts) { |
183
|
0
|
|
|
|
|
|
$content = $self->convert_from_mhtml($c,$MIME); |
184
|
|
|
|
|
|
|
} |
185
|
|
|
|
|
|
|
# If HTML or binary: |
186
|
|
|
|
|
|
|
else { |
187
|
0
|
0
|
0
|
|
|
|
if(!$upload || $upload->type =~ /^text/){ |
188
|
0
|
|
0
|
|
|
|
my $src_encoding= encoding_from_html_document($src_octets) || 'utf-8'; |
189
|
0
|
0
|
|
|
|
|
my $in_codec= Encode::find_encoding($src_encoding) or die "Unsupported encoding: $src_encoding"; |
190
|
0
|
0
|
|
|
|
|
$content = (utf8::is_utf8($src_octets)) ? $src_octets : $in_codec->decode($src_octets); |
191
|
|
|
|
|
|
|
} |
192
|
|
|
|
|
|
|
# Binary |
193
|
|
|
|
|
|
|
else { |
194
|
0
|
0
|
|
|
|
|
my $checksum = $self->Store->add_content_file_mv($upload->tempname) or die "Failed to add content"; |
195
|
0
|
|
|
|
|
|
my $Content = $self->Content($checksum,$upload->filename); |
196
|
0
|
0
|
|
|
|
|
return $Content->imglink if ($Content->imglink); |
197
|
0
|
|
|
|
|
|
return $Content->filelink; |
198
|
|
|
|
|
|
|
} |
199
|
|
|
|
|
|
|
} |
200
|
|
|
|
|
|
|
# TODO: Detect other content types and add fallback logic |
201
|
|
|
|
|
|
|
|
202
|
0
|
|
|
|
|
|
$content = $self->parse_html_get_style_body(\$content); |
203
|
0
|
|
|
|
|
|
$self->convert_data_uri_scheme_links($c,\$content); |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
# Use style tags just as a safe place to store the original filename |
206
|
|
|
|
|
|
|
# (switched to this after having issues with html comments) |
207
|
0
|
0
|
|
|
|
|
$content = '<style>/*----ORIGINAL_FILENAME:' . |
208
|
|
|
|
|
|
|
$filename . |
209
|
|
|
|
|
|
|
'----*/</style>' . "\n" . $content if ($filename); |
210
|
|
|
|
|
|
|
|
211
|
0
|
|
|
|
|
|
return $content; |
212
|
|
|
|
|
|
|
} |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
sub convert_from_mhtml { |
216
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
217
|
0
|
|
|
|
|
|
my $c = shift; |
218
|
0
|
|
|
|
|
|
my $MIME = shift; |
219
|
|
|
|
|
|
|
|
220
|
0
|
0
|
|
|
|
|
my ($SubPart) = $MIME->subparts or return; |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
## -- Check for and remove extra outer MIME wrapper (exists in actual MIME EMails): |
223
|
0
|
0
|
0
|
|
|
|
$MIME = $SubPart if ( |
224
|
|
|
|
|
|
|
$SubPart->content_type && |
225
|
|
|
|
|
|
|
$SubPart->content_type =~ /multipart\/related/ |
226
|
|
|
|
|
|
|
); |
227
|
|
|
|
|
|
|
## -- |
228
|
|
|
|
|
|
|
|
229
|
0
|
0
|
|
|
|
|
my ($MainPart) = $MIME->subparts or return; |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
## ------ |
232
|
|
|
|
|
|
|
## New: throw the kitchen sink at trying to figure out the charset/encoding |
233
|
|
|
|
|
|
|
## |
234
|
|
|
|
|
|
|
## This solves the long-standing problem where MHT files saved by Word 2010 |
235
|
|
|
|
|
|
|
## would load garbled. These files are encoded as 'UTF-16LE', and the system |
236
|
|
|
|
|
|
|
## is not able to realize this out of the box (I think because it lists the |
237
|
|
|
|
|
|
|
## the charset ambiguously as ' charset="unicode" ' in the Content-Type |
238
|
|
|
|
|
|
|
## MIME header, but I'm no expert on Unicode). Below we're basically trying |
239
|
|
|
|
|
|
|
## all of the functions of HTML::Encoding until we find one that gives us |
240
|
|
|
|
|
|
|
## an answer, and if we do get an answer, we apply it to the MIME object before |
241
|
|
|
|
|
|
|
## calling ->body_str() which will then use it to decode to text. |
242
|
|
|
|
|
|
|
## |
243
|
0
|
|
|
|
|
|
my $decoded = $MainPart->body; # <-- decodes from base64 (or whatever) to *bytes* |
244
|
|
|
|
|
|
|
|
245
|
0
|
|
0
|
|
|
|
my $char_set = |
246
|
|
|
|
|
|
|
HTML::Encoding::encoding_from_html_document ($decoded) || |
247
|
|
|
|
|
|
|
HTML::Encoding::encoding_from_byte_order_mark ($decoded) || |
248
|
|
|
|
|
|
|
HTML::Encoding::encoding_from_meta_element ($decoded) || |
249
|
|
|
|
|
|
|
HTML::Encoding::xml_declaration_from_octets ($decoded) || |
250
|
|
|
|
|
|
|
HTML::Encoding::encoding_from_first_chars ($decoded) || |
251
|
|
|
|
|
|
|
HTML::Encoding::encoding_from_xml_declaration ($decoded) || |
252
|
|
|
|
|
|
|
HTML::Encoding::encoding_from_content_type ($decoded) || |
253
|
|
|
|
|
|
|
HTML::Encoding::encoding_from_xml_document ($decoded); |
254
|
|
|
|
|
|
|
|
255
|
0
|
0
|
|
|
|
|
$MainPart->charset_set( $char_set ) if ($char_set); |
256
|
|
|
|
|
|
|
## ------ |
257
|
|
|
|
|
|
|
|
258
|
0
|
|
|
|
|
|
my $html = $MainPart->body_str; # <-- decodes to text using the character_set |
259
|
|
|
|
|
|
|
|
260
|
0
|
|
0
|
|
|
|
my $base_path = $self->parse_html_base_href(\$html) || $self->get_mime_part_base_path($MainPart); |
261
|
|
|
|
|
|
|
|
262
|
0
|
|
|
|
|
|
my %ndx = (); |
263
|
|
|
|
|
|
|
$MIME->walk_parts(sub{ |
264
|
0
|
|
|
0
|
|
|
my $Part = shift; |
265
|
0
|
0
|
0
|
|
|
|
return if ($Part == $MIME || $Part == $MainPart); #<-- ignore the outer and main/body parts |
266
|
|
|
|
|
|
|
|
267
|
0
|
|
|
|
|
|
my $content_id = $Part->header('Content-ID'); |
268
|
0
|
0
|
|
|
|
|
if ($content_id) { |
269
|
0
|
|
|
|
|
|
$ndx{'cid:' . $content_id} = $Part; |
270
|
0
|
|
|
|
|
|
$content_id =~ s/^\<//; |
271
|
0
|
|
|
|
|
|
$content_id =~ s/\>$//; |
272
|
0
|
|
|
|
|
|
$ndx{'cid:' . $content_id} = $Part; |
273
|
|
|
|
|
|
|
} |
274
|
|
|
|
|
|
|
|
275
|
0
|
|
|
|
|
|
my $content_location = $Part->header('Content-Location'); |
276
|
0
|
0
|
|
|
|
|
if($content_location) { |
277
|
0
|
|
|
|
|
|
$ndx{$content_location} = $Part; |
278
|
0
|
0
|
|
|
|
|
if($base_path) { |
279
|
0
|
|
|
|
|
|
$content_location =~ s/^\Q$base_path\E//; |
280
|
0
|
|
|
|
|
|
$ndx{$content_location} = $Part; |
281
|
|
|
|
|
|
|
} |
282
|
|
|
|
|
|
|
} |
283
|
0
|
|
|
|
|
|
}); |
284
|
|
|
|
|
|
|
|
285
|
0
|
|
|
|
|
|
$self->convert_mhtml_links_parts($c,\$html,\%ndx); |
286
|
0
|
|
|
|
|
|
return $html; |
287
|
|
|
|
|
|
|
} |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
# Try to extract the 'body' from html to prevent causing DOM/parsing issues on the client side |
290
|
|
|
|
|
|
|
sub parse_html_get_style_body { |
291
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
292
|
0
|
|
|
|
|
|
my $htmlref = shift; |
293
|
|
|
|
|
|
|
|
294
|
0
|
0
|
|
|
|
|
my $body = $self->parse_html_get_body($htmlref) or return $$htmlref; |
295
|
0
|
|
|
|
|
|
my $style = $self->parse_html_get_styles($htmlref); |
296
|
|
|
|
|
|
|
|
297
|
0
|
|
|
|
|
|
my $auto_css_pre = 'cas-selector-wrap-'; |
298
|
0
|
|
|
|
|
|
my $auto_css_id = $auto_css_pre . String::Random->new->randregex('[a-z0-9]{8}'); |
299
|
|
|
|
|
|
|
|
300
|
0
|
0
|
|
|
|
|
if($style) { |
301
|
0
|
|
|
|
|
|
my $Css = Catalyst::Controller::SimpleCAS::CSS::Simple->new; |
302
|
0
|
|
|
|
|
|
$Css->read({ css => $style }); |
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
#scream_color(BLACK.ON_RED,$Css->get_selectors); |
305
|
|
|
|
|
|
|
|
306
|
0
|
|
|
|
|
|
foreach my $selector ($Css->get_selectors) { |
307
|
0
|
|
|
|
|
|
my @parts = split(/\s+/,$selector); |
308
|
|
|
|
|
|
|
# strip selector wrap from previous content processing (when the user imports + |
309
|
|
|
|
|
|
|
# exports + imports multiple times) |
310
|
0
|
0
|
|
|
|
|
shift @parts if ($parts[0] =~ /^\#${auto_css_pre}/); |
311
|
0
|
|
|
|
|
|
unshift @parts, '#' . $auto_css_id; |
312
|
0
|
0
|
|
|
|
|
pop @parts if (lc($selector) eq 'body'); #<-- any 'body' selectors are replaced by the new div wrap below |
313
|
|
|
|
|
|
|
|
314
|
0
|
|
|
|
|
|
$Css->modify_selector({ |
315
|
|
|
|
|
|
|
selector => $selector, |
316
|
|
|
|
|
|
|
new_selector => join(' ',@parts) |
317
|
|
|
|
|
|
|
}); |
318
|
|
|
|
|
|
|
} |
319
|
|
|
|
|
|
|
|
320
|
0
|
|
|
|
|
|
$style = $Css->write; |
321
|
|
|
|
|
|
|
} |
322
|
|
|
|
|
|
|
|
323
|
0
|
0
|
|
|
|
|
if ($style) { |
324
|
|
|
|
|
|
|
# minify: |
325
|
0
|
|
|
|
|
|
$style =~ s/\r?\n/ /gm; |
326
|
0
|
|
|
|
|
|
$style =~ s/\s+/ /gm; |
327
|
0
|
|
|
|
|
|
$style = "\n<style type=\"text/css\">\n$style\n</style>"; |
328
|
|
|
|
|
|
|
} |
329
|
|
|
|
|
|
|
|
330
|
0
|
|
0
|
|
|
|
$style ||= ''; |
331
|
0
|
|
|
|
|
|
$style = "\n" . '<style type="text/css">' . "\n" . |
332
|
|
|
|
|
|
|
" $ISOLATE_CSS_RULE\n" . |
333
|
|
|
|
|
|
|
'</style>' . $style . "\n"; |
334
|
|
|
|
|
|
|
|
335
|
0
|
|
|
|
|
|
return '<div class="isolate" id="' . $auto_css_id . '">' . "\n" . |
336
|
|
|
|
|
|
|
$body . "\n" . |
337
|
|
|
|
|
|
|
'</div>' . "\n$style"; |
338
|
|
|
|
|
|
|
} |
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
# Try to extract the 'body' from html to prevent causing DOM/parsing issues on the client side |
342
|
|
|
|
|
|
|
# Also strip html comments |
343
|
|
|
|
|
|
|
sub parse_html_get_body { |
344
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
345
|
0
|
|
|
|
|
|
my $htmlref = shift; |
346
|
0
|
|
|
|
|
|
my $parser = HTML::TokeParser::Simple->new($htmlref); |
347
|
0
|
|
|
|
|
|
my $in_body = 0; |
348
|
0
|
|
|
|
|
|
my $inner = ''; |
349
|
0
|
|
|
|
|
|
while (my $tag = $parser->get_token) { |
350
|
0
|
0
|
0
|
|
|
|
last if ($in_body && $tag->is_end_tag('body')); |
351
|
0
|
0
|
0
|
|
|
|
$inner .= $tag->as_is if ($in_body && !$tag->is_comment); |
352
|
0
|
0
|
|
|
|
|
$in_body = 1 if ($tag->is_start_tag('body')); |
353
|
|
|
|
|
|
|
}; |
354
|
0
|
0
|
|
|
|
|
return undef if ($inner eq ''); |
355
|
0
|
|
|
|
|
|
return $inner; |
356
|
|
|
|
|
|
|
} |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
sub parse_html_get_styles { |
359
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
360
|
0
|
|
|
|
|
|
my $htmlref = shift; |
361
|
0
|
|
|
|
|
|
my $strip = shift; |
362
|
0
|
|
|
|
|
|
my $parser = HTML::TokeParser::Simple->new($htmlref); |
363
|
0
|
|
|
|
|
|
my $in_style = 0; |
364
|
0
|
|
|
|
|
|
my $styles = ''; |
365
|
0
|
|
|
|
|
|
my $newhtml = ''; |
366
|
0
|
|
|
|
|
|
while (my $tag = $parser->get_token) { |
367
|
0
|
0
|
|
|
|
|
if ($tag->is_end_tag('style')) { |
368
|
0
|
|
|
|
|
|
$in_style = 0; |
369
|
0
|
|
|
|
|
|
next; |
370
|
|
|
|
|
|
|
} |
371
|
0
|
0
|
0
|
|
|
|
$styles .= $tag->as_is and next if ($in_style); |
372
|
0
|
0
|
|
|
|
|
if ($tag->is_start_tag('style')) { |
373
|
0
|
|
|
|
|
|
$in_style = 1; |
374
|
0
|
|
|
|
|
|
next; |
375
|
|
|
|
|
|
|
} |
376
|
0
|
0
|
0
|
|
|
|
$newhtml .= $tag->as_is if($strip && !$tag->is_tag('style')); |
377
|
|
|
|
|
|
|
}; |
378
|
0
|
0
|
|
|
|
|
return undef if ($styles eq ''); |
379
|
|
|
|
|
|
|
|
380
|
0
|
0
|
|
|
|
|
$$htmlref = $newhtml if ($strip); |
381
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
# Pull out html comment characters, ignored in css, but can interfere with CSS::Simple (rare cases) |
383
|
0
|
|
|
|
|
|
$styles =~ s/\<\!\-\-//g; |
384
|
0
|
|
|
|
|
|
$styles =~ s/\-\-\>//g; |
385
|
|
|
|
|
|
|
|
386
|
0
|
|
|
|
|
|
return $styles; |
387
|
|
|
|
|
|
|
} |
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
# Extracts the base file path from the 'base' tag of the MHTML content |
392
|
|
|
|
|
|
|
sub parse_html_base_href { |
393
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
394
|
0
|
|
|
|
|
|
my $htmlref = shift; |
395
|
0
|
|
|
|
|
|
my $parser = HTML::TokeParser::Simple->new($htmlref); |
396
|
0
|
|
|
|
|
|
while (my $tag = $parser->get_tag) { |
397
|
0
|
0
|
|
|
|
|
if($tag->is_tag('base')){ |
398
|
0
|
0
|
|
|
|
|
my $url = $tag->get_attr('href') or next; |
399
|
0
|
|
|
|
|
|
return $url; |
400
|
|
|
|
|
|
|
} |
401
|
|
|
|
|
|
|
}; |
402
|
0
|
|
|
|
|
|
return undef; |
403
|
|
|
|
|
|
|
} |
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
# alternative method to identify a base path from a Mime Part |
406
|
|
|
|
|
|
|
sub get_mime_part_base_path { |
407
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
408
|
0
|
|
|
|
|
|
my $Part = shift; |
409
|
|
|
|
|
|
|
|
410
|
0
|
0
|
|
|
|
|
my $content_location = $Part->header('Content-Location') or return undef; |
411
|
0
|
|
|
|
|
|
my @parts = split(/\//,$content_location); |
412
|
0
|
|
|
|
|
|
my $filename = pop @parts; |
413
|
0
|
|
|
|
|
|
my $path = join('/',@parts) . '/'; |
414
|
|
|
|
|
|
|
|
415
|
0
|
|
|
|
|
|
return $path; |
416
|
|
|
|
|
|
|
} |
417
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
sub convert_mhtml_links_parts { |
420
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
421
|
0
|
|
|
|
|
|
my $c = shift; |
422
|
0
|
|
|
|
|
|
my $htmlref = shift; |
423
|
0
|
|
|
|
|
|
my $part_ndx = shift; |
424
|
|
|
|
|
|
|
|
425
|
0
|
0
|
|
|
|
|
die "convert_mhtml_links_parts(): Invalid arguments!!" unless (ref $part_ndx eq 'HASH'); |
426
|
|
|
|
|
|
|
|
427
|
0
|
|
|
|
|
|
my $parser = HTML::TokeParser::Simple->new($htmlref); |
428
|
|
|
|
|
|
|
|
429
|
0
|
|
|
|
|
|
my $substitutions = {}; |
430
|
|
|
|
|
|
|
|
431
|
0
|
|
|
|
|
|
while (my $tag = $parser->get_tag) { |
432
|
0
|
0
|
|
|
|
|
next if($tag->is_tag('base')); #<-- skip the 'base' tag which we parsed earlier |
433
|
0
|
|
|
|
|
|
for my $attr (qw(src href)){ |
434
|
0
|
0
|
|
|
|
|
my $url = $tag->get_attr($attr) or next; |
435
|
0
|
0
|
|
|
|
|
my $Part = $part_ndx->{$url} or next; |
436
|
0
|
0
|
|
|
|
|
my $cas_url = $self->mime_part_to_cas_url($c,$Part) or next; |
437
|
|
|
|
|
|
|
|
438
|
0
|
|
|
|
|
|
my $as_is = $tag->as_is; |
439
|
0
|
|
|
|
|
|
$tag->set_attr( $attr => $cas_url ); |
440
|
0
|
|
|
|
|
|
$substitutions->{$as_is} = $tag->as_is; |
441
|
|
|
|
|
|
|
} |
442
|
|
|
|
|
|
|
} |
443
|
|
|
|
|
|
|
|
444
|
0
|
|
|
|
|
|
foreach my $find (keys %$substitutions) { |
445
|
0
|
|
|
|
|
|
my $replace = $substitutions->{$find}; |
446
|
0
|
|
|
|
|
|
$$htmlref =~ s/\Q$find\E/$replace/gm; |
447
|
|
|
|
|
|
|
} |
448
|
|
|
|
|
|
|
} |
449
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
# See http://en.wikipedia.org/wiki/Data_URI_scheme |
453
|
|
|
|
|
|
|
sub convert_data_uri_scheme_links { |
454
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
455
|
0
|
|
|
|
|
|
my $c = shift; |
456
|
0
|
|
|
|
|
|
my $htmlref = shift; |
457
|
|
|
|
|
|
|
|
458
|
0
|
|
|
|
|
|
my $parser = HTML::TokeParser::Simple->new($htmlref); |
459
|
|
|
|
|
|
|
|
460
|
0
|
|
|
|
|
|
my $substitutions = {}; |
461
|
|
|
|
|
|
|
|
462
|
0
|
|
|
|
|
|
while (my $tag = $parser->get_tag) { |
463
|
|
|
|
|
|
|
|
464
|
0
|
|
|
|
|
|
my $attr; |
465
|
0
|
0
|
|
|
|
|
if($tag->is_tag('img')) { |
|
|
0
|
|
|
|
|
|
466
|
0
|
|
|
|
|
|
$attr = 'src'; |
467
|
|
|
|
|
|
|
} |
468
|
|
|
|
|
|
|
elsif($tag->is_tag('a')) { |
469
|
0
|
|
|
|
|
|
$attr = 'href'; |
470
|
|
|
|
|
|
|
} |
471
|
|
|
|
|
|
|
else { |
472
|
0
|
|
|
|
|
|
next; |
473
|
|
|
|
|
|
|
} |
474
|
|
|
|
|
|
|
|
475
|
0
|
0
|
|
|
|
|
my $url = $tag->get_attr($attr) or next; |
476
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
# Support the special case where the src value is literal base64 data: |
478
|
0
|
0
|
|
|
|
|
if ($url =~ /^data:/) { |
479
|
0
|
|
|
|
|
|
my $newurl = $self->embedded_src_data_to_url($c,$url); |
480
|
0
|
0
|
|
|
|
|
$substitutions->{$url} = $newurl if ($newurl); |
481
|
|
|
|
|
|
|
} |
482
|
|
|
|
|
|
|
} |
483
|
|
|
|
|
|
|
|
484
|
0
|
|
|
|
|
|
foreach my $find (keys %$substitutions) { |
485
|
0
|
|
|
|
|
|
my $replace = $substitutions->{$find}; |
486
|
0
|
|
|
|
|
|
$$htmlref =~ s/\Q$find\E/$replace/gm; |
487
|
|
|
|
|
|
|
} |
488
|
|
|
|
|
|
|
} |
489
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
sub embedded_src_data_to_url { |
491
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
492
|
0
|
|
|
|
|
|
my $c = shift; |
493
|
0
|
|
|
|
|
|
my $url = shift; |
494
|
|
|
|
|
|
|
|
495
|
0
|
|
|
|
|
|
my ($pre,$content_type,$encoding,$base64_data) = split(/[\:\;\,]/,$url); |
496
|
|
|
|
|
|
|
|
497
|
|
|
|
|
|
|
# we only know how to handle base64 currently: |
498
|
0
|
0
|
|
|
|
|
return undef unless (lc($encoding) eq 'base64'); |
499
|
|
|
|
|
|
|
|
500
|
0
|
|
|
0
|
|
|
my $checksum = try{$self->Store->add_content_base64($base64_data)} |
501
|
0
|
0
|
|
|
|
|
or return undef; |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
# This is RapidApp-specific |
504
|
0
|
0
|
0
|
|
|
|
my $pfx = $c->can('mount_url') ? $c->mount_url || '' : ''; |
505
|
|
|
|
|
|
|
|
506
|
0
|
|
|
|
|
|
return join('/',$pfx, |
507
|
|
|
|
|
|
|
$self->action_namespace($c), |
508
|
|
|
|
|
|
|
'fetch_content', $checksum |
509
|
|
|
|
|
|
|
); |
510
|
|
|
|
|
|
|
} |
511
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
sub mime_part_to_cas_url { |
513
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
514
|
0
|
|
|
|
|
|
my $c = shift; |
515
|
0
|
|
|
|
|
|
my $Part = shift; |
516
|
|
|
|
|
|
|
|
517
|
0
|
|
|
|
|
|
my $data = $Part->body; |
518
|
0
|
|
|
|
|
|
my $filename = $Part->filename(1); |
519
|
0
|
0
|
|
|
|
|
my $checksum = $self->Store->add_content($data) or return undef; |
520
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
# This is RapidApp-specific |
522
|
0
|
0
|
0
|
|
|
|
my $pfx = $c->can('mount_url') ? $c->mount_url || '' : ''; |
523
|
|
|
|
|
|
|
|
524
|
0
|
|
|
|
|
|
return join('/',$pfx, |
525
|
|
|
|
|
|
|
$self->action_namespace($c), |
526
|
|
|
|
|
|
|
'fetch_content', $checksum, $filename |
527
|
|
|
|
|
|
|
); |
528
|
|
|
|
|
|
|
} |
529
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
1; |
531
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
__END__ |
533
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
=head1 NAME |
535
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
Catalyst::Controller::SimpleCAS::Role::TextTranscode - Addl MHTML methods for SimpleCAS |
537
|
|
|
|
|
|
|
|
538
|
|
|
|
|
|
|
=head1 SYNOPSIS |
539
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
use Catalyst::Controller::SimpleCAS; |
541
|
|
|
|
|
|
|
... |
542
|
|
|
|
|
|
|
|
543
|
|
|
|
|
|
|
=head1 DESCRIPTION |
544
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
This is a Role which adds extra methods and functionality to L<Catalyst::Controller::SimpleCAS>. |
546
|
|
|
|
|
|
|
This role is automatically loaded into the main controller class. The reason that this exists and |
547
|
|
|
|
|
|
|
is structured this way is for historical reasons and will likely be refactored later. |
548
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
=head1 PUBLIC ACTIONS |
551
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
=head2 transcode_html (texttranscode/transcode_html) |
553
|
|
|
|
|
|
|
|
554
|
|
|
|
|
|
|
=head2 generate_mhtml_download (texttranscode/generate_mhtml_download) |
555
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
=head1 METHODS |
557
|
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
=head2 convert_data_uri_scheme_links |
559
|
|
|
|
|
|
|
|
560
|
|
|
|
|
|
|
=head2 convert_from_mhtml |
561
|
|
|
|
|
|
|
|
562
|
|
|
|
|
|
|
=head2 convert_mhtml_links_parts |
563
|
|
|
|
|
|
|
|
564
|
|
|
|
|
|
|
=head2 embedded_src_data_to_url |
565
|
|
|
|
|
|
|
|
566
|
|
|
|
|
|
|
=head2 get_mime_part_base_path |
567
|
|
|
|
|
|
|
|
568
|
|
|
|
|
|
|
=head2 get_strip_orig_filename |
569
|
|
|
|
|
|
|
|
570
|
|
|
|
|
|
|
=head2 html_to_mhtml |
571
|
|
|
|
|
|
|
|
572
|
|
|
|
|
|
|
=head2 mime_part_to_cas_url |
573
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
=head2 normaliaze_rich_content |
575
|
|
|
|
|
|
|
|
576
|
|
|
|
|
|
|
=head2 parse_html_base_href |
577
|
|
|
|
|
|
|
|
578
|
|
|
|
|
|
|
=head2 parse_html_get_body |
579
|
|
|
|
|
|
|
|
580
|
|
|
|
|
|
|
=head2 parse_html_get_style_body |
581
|
|
|
|
|
|
|
|
582
|
|
|
|
|
|
|
=head2 parse_html_get_styles |
583
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
=head1 SEE ALSO |
585
|
|
|
|
|
|
|
|
586
|
|
|
|
|
|
|
=over |
587
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
=item * |
589
|
|
|
|
|
|
|
|
590
|
|
|
|
|
|
|
L<Catalyst::Controller::SimpleCAS> |
591
|
|
|
|
|
|
|
|
592
|
|
|
|
|
|
|
=back |
593
|
|
|
|
|
|
|
|
594
|
|
|
|
|
|
|
=head1 AUTHOR |
595
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
Henry Van Styn <vanstyn@cpan.org> |
597
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
599
|
|
|
|
|
|
|
|
600
|
|
|
|
|
|
|
This software is copyright (c) 2014 by IntelliTree Solutions llc. |
601
|
|
|
|
|
|
|
|
602
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under |
603
|
|
|
|
|
|
|
the same terms as the Perl 5 programming language system itself. |
604
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
=cut |