| blib/lib/Text/Textile2MarkdownStandalone.pm | |||
|---|---|---|---|
| Criterion | Covered | Total | % |
| statement | 156 | 204 | 76.4 |
| branch | 30 | 50 | 60.0 |
| condition | 9 | 16 | 56.2 |
| subroutine | 14 | 14 | 100.0 |
| pod | 4 | 6 | 66.6 |
| total | 213 | 290 | 73.4 |
| line | stmt | bran | cond | sub | pod | time | code |
|---|---|---|---|---|---|---|---|
| 1 | package Text::Textile2MarkdownStandalone; | ||||||
| 2 | 2 | 2 | 610436 | use 5.008001; | |||
| 2 | 10 | ||||||
| 3 | 2 | 2 | 14 | use strict; | |||
| 2 | 4 | ||||||
| 2 | 83 | ||||||
| 4 | 2 | 2 | 12 | use warnings; | |||
| 2 | 4 | ||||||
| 2 | 164 | ||||||
| 5 | |||||||
| 6 | 2 | 2 | 13 | use Carp; | |||
| 2 | 6 | ||||||
| 2 | 9233 | ||||||
| 7 | |||||||
| 8 | our $VERSION = "0.11"; | ||||||
| 9 | |||||||
| 10 | sub new { | ||||||
| 11 | 3 | 3 | 1 | 763 | my ($class, %opt) = @_; | ||
| 12 | return bless { | ||||||
| 13 | input_file => $opt{input_file} // "", | ||||||
| 14 | 3 | 100 | 49 | output_file => $opt{output_file} // "", | |||
| 100 | |||||||
| 15 | }, $class; | ||||||
| 16 | } | ||||||
| 17 | |||||||
| 18 | sub input_file { | ||||||
| 19 | 5 | 5 | 1 | 447 | my ($self, $file) = @_; | ||
| 20 | 5 | 100 | 16 | $self->{input_file} = $file if $file; | |||
| 21 | 5 | 25 | return $self->{input_file}; | ||||
| 22 | } | ||||||
| 23 | |||||||
| 24 | sub output_file { | ||||||
| 25 | 7 | 7 | 1 | 20 | my ($self, $file) = @_; | ||
| 26 | 7 | 100 | 21 | $self->{output_file} = $file if $file; | |||
| 27 | 7 | 33 | return $self->{output_file}; | ||||
| 28 | } | ||||||
| 29 | |||||||
| 30 | sub convert { | ||||||
| 31 | 3 | 3 | 1 | 9 | my ($self) = @_; | ||
| 32 | 3 | 10 | my $text = $self->_read_file($self->input_file); | ||||
| 33 | 3 | 13 | my $markdown = $self->textile_2_markdown($text); | ||||
| 34 | 3 | 100 | 11 | if ($self->output_file) { | |||
| 35 | 2 | 4 | $self->_save_file($self->output_file, $markdown); | ||||
| 36 | } | ||||||
| 37 | else { | ||||||
| 38 | 1 | 6 | return $markdown; | ||||
| 39 | } | ||||||
| 40 | } | ||||||
| 41 | |||||||
| 42 | sub _read_file { | ||||||
| 43 | 3 | 3 | 7 | my ($self, $input_file) = @_; | |||
| 44 | 3 | 50 | 164 | open(my $fh, "<:encoding(utf8)", $input_file) || die "cannot open file ". $input_file; | |||
| 45 | 3 | 317 | my @line = <$fh>; | ||||
| 46 | 3 | 382 | close($fh); | ||||
| 47 | 3 | 45 | my $string = join("", @line); | ||||
| 48 | 3 | 34 | return $string; | ||||
| 49 | } | ||||||
| 50 | |||||||
| 51 | sub _save_file { | ||||||
| 52 | 2 | 2 | 6 | my ($self, $output_file, $string) = @_; | |||
| 53 | 2 | 50 | 5 | unless ($string) { | |||
| 54 | 0 | 0 | croak "notfound string $string ."; | ||||
| 55 | } | ||||||
| 56 | 2 | 50 | 6 | unless ($output_file) { | |||
| 57 | 0 | 0 | croak "notfound output_file $output_file ."; | ||||
| 58 | } | ||||||
| 59 | 2 | 50 | 402 | open (my $fh, ">:encoding(utf8)", $output_file) || die "cannot open file ".$output_file; | |||
| 60 | 2 | 144 | binmode($fh, ":utf8"); | ||||
| 61 | 2 | 205 | print $fh $string; | ||||
| 62 | 2 | 95 | close($fh); | ||||
| 63 | } | ||||||
| 64 | |||||||
| 65 | sub textile_2_markdown { | ||||||
| 66 | 3 | 3 | 0 | 9 | my ($self, $text) = @_; | ||
| 67 | |||||||
| 68 | # Protect URLs completely first - execute before other conversions | ||||||
| 69 | 3 | 5 | my @urls; | ||||
| 70 | my @url_positions; | ||||||
| 71 | 3 | 6 | my $counter = 0; | ||||
| 72 | |||||||
| 73 | # Detect URLs and replace them with placeholders | ||||||
| 74 | 3 | 37 | while ($text =~ m{(https?://[^\s"<>\(\))\]]+)}g) { | ||||
| 75 | 4 | 15 | my $url = $1; | ||||
| 76 | 4 | 11 | my $placeholder = "URL_PLACEHOLDER_${counter}"; | ||||
| 77 | 4 | 15 | my $pos = pos($text) - length($url); | ||||
| 78 | |||||||
| 79 | 4 | 11 | push @urls, $url; | ||||
| 80 | 4 | 9 | push @url_positions, [$pos, $placeholder]; | ||||
| 81 | 4 | 27 | $counter++; | ||||
| 82 | } | ||||||
| 83 | |||||||
| 84 | # Replace with placeholders (process from end to avoid offset issues) | ||||||
| 85 | 3 | 8 | foreach my $url_info (reverse @url_positions) { | ||||
| 86 | 4 | 11 | my ($pos, $placeholder) = @$url_info; | ||||
| 87 | 4 | 11 | my $url_length = length($urls[$counter - 1]); | ||||
| 88 | 4 | 33 | substr($text, $pos, $url_length) = $placeholder; | ||||
| 89 | 4 | 9 | $counter--; | ||||
| 90 | } | ||||||
| 91 | |||||||
| 92 | # Process nested ordered lists | ||||||
| 93 | 3 | 13 | $text = $self->_convert_list_number($text); | ||||
| 94 | |||||||
| 95 | # Process bulleted lists | ||||||
| 96 | 3 | 229 | $text =~ s/^(\s*)\*\s+(.+)$/$1* $2/gm; | ||||
| 97 | 3 | 48 | $text =~ s/^(\s*)\*\*\s+(.+)$/$1 * $2/gm; | ||||
| 98 | 3 | 11 | $text =~ s/^(\s*)\*\*\*\s+(.+)$/$1 * $2/gm; | ||||
| 99 | |||||||
| 100 | # Convert headings with correct depth mapping | ||||||
| 101 | 3 | 40 | $text =~ s/^\s*h1\.\s+(.+)$/# $1/gm; | ||||
| 102 | 3 | 38 | $text =~ s/^\s*h2\.\s+(.+)$/## $1/gm; | ||||
| 103 | 3 | 117 | $text =~ s/^\s*h3\.\s+(.+)$/### $1/gm; | ||||
| 104 | 3 | 78 | $text =~ s/^\s*h4\.\s+(.+)$/#### $1/gm; | ||||
| 105 | 3 | 99 | $text =~ s/^\s*h5\.\s+(.+)$/##### $1/gm; | ||||
| 106 | 3 | 11 | $text =~ s/^\s*h6\.\s+(.+)$/###### $1/gm; | ||||
| 107 | |||||||
| 108 | # Convert single emphasis to double (**text**) | ||||||
| 109 | 3 | 100 | $text =~ s/\*([^\*\n]+)\*/\*\*$1\*\*/g; | ||||
| 110 | |||||||
| 111 | # Convert strikethrough (excluding URLs) | ||||||
| 112 | 3 | 54 | $text =~ s/-([^-\n]+)-/~~$1~~/g; | ||||
| 113 | |||||||
| 114 | # Remove paragraph markers | ||||||
| 115 | 3 | 42 | $text =~ s/^p\.\s*(.+)$/ $1\n\n/gm; | ||||
| 116 | |||||||
| 117 | # Convert horizontal rules | ||||||
| 118 | 3 | 24 | $text =~ s/^-{3,}$/---/gm; | ||||
| 119 | |||||||
| 120 | # Process text color markup | ||||||
| 121 | 3 | 17 | $text =~ s/%\{color:(.*?)\}(.*?)%/**$2**/g; | ||||
| 122 | |||||||
| 123 | # Blockquote conversion | ||||||
| 124 | 3 | 43 | $text =~ s/^bq\.\s+(.+)$/> $1/gm; | ||||
| 125 | |||||||
| 126 | # Convert links | ||||||
| 127 | 3 | 25 | $text =~ s/"([^"]+)":([^\s]+)/[$1]($2)/g; | ||||
| 128 | |||||||
| 129 | # Convert images | ||||||
| 130 | 3 | 29 | $text =~ s/!([^!(]+)\(([^!)]+)\)!//g; | ||||
| 131 | |||||||
| 132 | # Convert inline code | ||||||
| 133 | 3 | 26 | $text =~ s/@([^@]+)@/`$1`/g; | ||||
| 134 | |||||||
| 135 | # Collapse block processing | ||||||
| 136 | 3 | 11 | $text =~ s/\{\{collapse\s*(.*?)\}\}/ | ||||
| 137 | 0 | 0 | my $content = $1; | ||||
| 138 | 0 | 0 | " \n 詳細情報<\/summary>\n\n$content\n<\/details>" |
||||
| 139 | /gse; | ||||||
| 140 | |||||||
| 141 | # Convert code blocks | ||||||
| 142 | 3 | 8 | $text =~ s/(.*?)<\/pre>/```\n$1\n```/gs; |
||||
| 143 | 3 | 10 | $text =~ s/^pre\.\s*\n(.*?)(?=\n\n|\z)/```\n$1\n```/gms; | ||||
| 144 | 3 | 69 | $text =~ s/^bc\.*\s*\n(.*?)(?=\n\n|\z|\n[^\s]+)/```\n$1\n```/gms; | ||||
| 145 | |||||||
| 146 | # Improved table conversion | ||||||
| 147 | 3 | 31 | $text = $self->_convert_textile_tables_improved($text); | ||||
| 148 | |||||||
| 149 | # Internal link conversion | ||||||
| 150 | 3 | 39 | $text =~ s/\[\[([^|]+)\|([^\]]+)\]\]/[$2]($1)/g; | ||||
| 151 | 3 | 10 | $text =~ s/\[\[([^\]]+)\]\]/[$1]($1)/g; | ||||
| 152 | |||||||
| 153 | # Email address handling | ||||||
| 154 | 3 | 8 | $text =~ s/([a-zA-Z0-9._%+-]+)\@([a-zA-Z0-9.-]+\.[a-zA-Z]{2,})/$1\@$2/g; | ||||
| 155 | |||||||
| 156 | # Line break processing | ||||||
| 157 | 3 | 11 | $text =~ s/ /\n\n/gi; |
||||
| 158 | |||||||
| 159 | # Restore URL placeholders | ||||||
| 160 | 3 | 6 | $counter = 0; | ||||
| 161 | 3 | 9 | foreach my $url (@urls) { | ||||
| 162 | 4 | 10 | my $placeholder = "URL_PLACEHOLDER_${counter}"; | ||||
| 163 | 4 | 161 | $text =~ s/$placeholder/$url/g; | ||||
| 164 | 4 | 17 | $counter++; | ||||
| 165 | } | ||||||
| 166 | |||||||
| 167 | # Remove consecutive blank lines | ||||||
| 168 | 3 | 39 | $text =~ s/\n{3,}/\n\n/g; | ||||
| 169 | |||||||
| 170 | 3 | 8 | my $after_string = $text; | ||||
| 171 | 3 | 17 | return $after_string; | ||||
| 172 | } | ||||||
| 173 | |||||||
| 174 | sub _convert_list_number { | ||||||
| 175 | 3 | 3 | 9 | my ($self, $text) = @_; | |||
| 176 | |||||||
| 177 | 3 | 6 | my @counters; | ||||
| 178 | my @result; | ||||||
| 179 | 3 | 97 | my @line = split("\n", $text); | ||||
| 180 | 3 | 13 | for my $l (@line) { | ||||
| 181 | 209 | 392 | chomp $l; | ||||
| 182 | 209 | 100 | 562 | if ($l =~ /^(#+)\s*(.*)/) { | |||
| 183 | 39 | 128 | my $level = length($1); | ||||
| 184 | 39 | 76 | my $text = $2; | ||||
| 185 | # Trim deeper levels | ||||||
| 186 | 39 | 68 | splice @counters, $level; | ||||
| 187 | # Initialize or increment the counter for the current level | ||||||
| 188 | 39 | 100 | 85 | if (!defined $counters[$level-1]) { | |||
| 189 | 15 | 37 | $counters[$level-1] = 1; | ||||
| 190 | } else { | ||||||
| 191 | 24 | 38 | $counters[$level-1]++; | ||||
| 192 | } | ||||||
| 193 | # Indent by (4*level - 1) spaces | ||||||
| 194 | 39 | 86 | my $indent = ' ' x (4 * $level - 1); | ||||
| 195 | 39 | 130 | push @result, "$indent$counters[$level-1]. $text"; | ||||
| 196 | } else { | ||||||
| 197 | 170 | 265 | @counters = (); | ||||
| 198 | 170 | 393 | push @result, $l; | ||||
| 199 | } | ||||||
| 200 | } | ||||||
| 201 | |||||||
| 202 | 3 | 85 | return join("\n", @result); | ||||
| 203 | } | ||||||
| 204 | |||||||
| 205 | sub _convert_textile_tables_improved { | ||||||
| 206 | 3 | 3 | 17 | my ($self, $text) = @_; | |||
| 207 | 3 | 92 | my @lines = split(/\n/, $text); | ||||
| 208 | 3 | 7 | my @result; | ||||
| 209 | 3 | 6 | my $in_table = 0; | ||||
| 210 | 3 | 6 | my $header_detected = 0; | ||||
| 211 | 3 | 6 | my @table_rows = (); | ||||
| 212 | 3 | 8 | my $current_cell = ""; | ||||
| 213 | 3 | 14 | my $processing_multiline_cell = 0; | ||||
| 214 | |||||||
| 215 | 3 | 14 | for (my $i = 0; $i < scalar @lines; $i++) { | ||||
| 216 | 190 | 343 | my $line = $lines[$i]; | ||||
| 217 | |||||||
| 218 | # Detect table start line (starts with '|') | ||||||
| 219 | 190 | 100 | 100 | 719 | if (!$in_table && $line =~ /^\|/) { | ||
| 220 | # Insert blank line before table if previous line is not blank | ||||||
| 221 | 4 | 50 | 33 | 36 | if ($i > 0 && $lines[$i-1] !~ /^\s*$/) { | ||
| 222 | 0 | 0 | push @result, ""; | ||||
| 223 | } | ||||||
| 224 | |||||||
| 225 | 4 | 9 | $in_table = 1; | ||||
| 226 | 4 | 9 | @table_rows = (); | ||||
| 227 | } | ||||||
| 228 | |||||||
| 229 | # When processing a multiline cell | ||||||
| 230 | 190 | 50 | 435 | if ($processing_multiline_cell) { | |||
| 100 | |||||||
| 231 | # Detect next cell boundary or end of line | ||||||
| 232 | 0 | 0 | 0 | 0 | if ($line =~ /^\|/ || $line =~ /^$/) { | ||
| 233 | 0 | 0 | $processing_multiline_cell = 0; | ||||
| 234 | 0 | 0 | push @{$table_rows[-1]}, $current_cell; | ||||
| 0 | 0 | ||||||
| 235 | 0 | 0 | $current_cell = ""; | ||||
| 236 | |||||||
| 237 | # When a new row starts, process normally | ||||||
| 238 | 0 | 0 | 0 | if ($line =~ /^\|/) { | |||
| 239 | # Remove leading '|' | ||||||
| 240 | 0 | 0 | $line =~ s/^\|//g; | ||||
| 241 | 0 | 0 | my @cells = split(/\|/, $line); | ||||
| 242 | 0 | 0 | push @table_rows, []; | ||||
| 243 | |||||||
| 244 | # Process each cell | ||||||
| 245 | 0 | 0 | foreach my $cell (@cells) { | ||||
| 246 | # If last cell ends with ' ', enter multiline mode |
||||||
| 247 | 0 | 0 | 0 | if ($cell =~ / $/) { |
|||
| 248 | 0 | 0 | $current_cell = $cell; | ||||
| 249 | 0 | 0 | $processing_multiline_cell = 1; | ||||
| 250 | } else { | ||||||
| 251 | # Detect header cell and process | ||||||
| 252 | 0 | 0 | 0 | if ($cell =~ /^_\.(.*)$/) { | |||
| 253 | 0 | 0 | $header_detected = 1; | ||||
| 254 | 0 | 0 | push @{$table_rows[-1]}, $1; | ||||
| 0 | 0 | ||||||
| 255 | } else { | ||||||
| 256 | 0 | 0 | push @{$table_rows[-1]}, $cell; | ||||
| 0 | 0 | ||||||
| 257 | } | ||||||
| 258 | } | ||||||
| 259 | } | ||||||
| 260 | } else { | ||||||
| 261 | # On blank line, end table processing | ||||||
| 262 | 0 | 0 | $in_table = 0; | ||||
| 263 | 0 | 0 | $self->output_table(\@result, \@table_rows); | ||||
| 264 | 0 | 0 | @table_rows = (); | ||||
| 265 | 0 | 0 | push @result, $line; | ||||
| 266 | } | ||||||
| 267 | } else { | ||||||
| 268 | # Add text to current cell during multiline processing | ||||||
| 269 | 0 | 0 | $current_cell .= " " . $line; | ||||
| 270 | } | ||||||
| 271 | } | ||||||
| 272 | # Normal row processing (no ' ') |
||||||
| 273 | elsif ($line =~ /^\|/) { | ||||||
| 274 | 14 | 50 | 96 | if (!$in_table) { | |||
| 275 | 0 | 0 | $in_table = 1; | ||||
| 276 | 0 | 0 | @table_rows = (); | ||||
| 277 | } | ||||||
| 278 | |||||||
| 279 | # Check for ' ' |
||||||
| 280 | 14 | 50 | 39 | if ($line =~ / /) { |
|||
| 281 | # Process cells before and after ' ' |
||||||
| 282 | 0 | 0 | my @parts = split(/ /, $line, 2); |
||||
| 283 | 0 | 0 | my @cells = split(/\|/, $parts[0]); | ||||
| 284 | |||||||
| 285 | # Add new row | ||||||
| 286 | 0 | 0 | push @table_rows, []; | ||||
| 287 | |||||||
| 288 | # Process normal cells | ||||||
| 289 | 0 | 0 | for (my $j = 0; $j < scalar(@cells) - 1; $j++) { | ||||
| 290 | 0 | 0 | my $cell = $cells[$j]; | ||||
| 291 | # Detect header cell and process | ||||||
| 292 | 0 | 0 | 0 | if ($cell =~ /^_\.(.*)$/) { | |||
| 293 | 0 | 0 | $header_detected = 1; | ||||
| 294 | 0 | 0 | push @{$table_rows[-1]}, $1; | ||||
| 0 | 0 | ||||||
| 295 | } else { | ||||||
| 296 | 0 | 0 | push @{$table_rows[-1]}, $cell; | ||||
| 0 | 0 | ||||||
| 297 | } | ||||||
| 298 | } | ||||||
| 299 | |||||||
| 300 | # Process cell containing ' ' |
||||||
| 301 | 0 | 0 | $current_cell = $cells[-1] . " " . $parts[1]; |
||||
| 302 | 0 | 0 | $current_cell =~ s/ / /g; |
||||
| 303 | 0 | 0 | push @{$table_rows[-1]}, $current_cell; | ||||
| 0 | 0 | ||||||
| 304 | } else { | ||||||
| 305 | # Normal row processing | ||||||
| 306 | 14 | 96 | $line =~ s/\|$//g; | ||||
| 307 | 14 | 66 | my @cells = split(/\|/, $line); | ||||
| 308 | |||||||
| 309 | # Add new row | ||||||
| 310 | 14 | 47 | push @table_rows, []; | ||||
| 311 | |||||||
| 312 | # Process each cell | ||||||
| 313 | 14 | 61 | foreach my $cell (@cells) { | ||||
| 314 | # Detect header cell and process | ||||||
| 315 | 50 | 100 | 140 | if ($cell =~ /^_\.(.*)$/) { | |||
| 316 | 10 | 18 | $header_detected = 1; | ||||
| 317 | 10 | 16 | push @{$table_rows[-1]}, $1; | ||||
| 10 | 59 | ||||||
| 318 | } else { | ||||||
| 319 | 40 | 67 | push @{$table_rows[-1]}, $cell; | ||||
| 40 | 113 | ||||||
| 320 | } | ||||||
| 321 | } | ||||||
| 322 | } | ||||||
| 323 | } else { | ||||||
| 324 | # When encountering a non-table line | ||||||
| 325 | 176 | 100 | 369 | if ($in_table) { | |||
| 326 | 4 | 7 | $in_table = 0; | ||||
| 327 | 4 | 21 | $self->output_table(\@result, \@table_rows); | ||||
| 328 | 4 | 15 | @table_rows = (); | ||||
| 329 | |||||||
| 330 | # Insert blank line after table if next line is not blank | ||||||
| 331 | 4 | 100 | 24 | if ($line !~ /^\s*$/) { | |||
| 332 | 2 | 23 | push @result, ""; | ||||
| 333 | } | ||||||
| 334 | } | ||||||
| 335 | 176 | 483 | push @result, $line; | ||||
| 336 | } | ||||||
| 337 | } | ||||||
| 338 | |||||||
| 339 | # Handle end-of-file table closure | ||||||
| 340 | 3 | 50 | 33 | 11 | if ($in_table && @table_rows) { | ||
| 341 | 0 | 0 | $self->output_table(\@result, \@table_rows); | ||||
| 342 | 0 | 0 | push @result, ""; | ||||
| 343 | } | ||||||
| 344 | |||||||
| 345 | 3 | 90 | return join("\n", @result); | ||||
| 346 | } | ||||||
| 347 | |||||||
| 348 | |||||||
| 349 | sub output_table { | ||||||
| 350 | 4 | 4 | 0 | 12 | my ($self, $result, $table_rows) = @_; | ||
| 351 | |||||||
| 352 | 4 | 50 | 11 | if (@$table_rows) { | |||
| 353 | # Process header row | ||||||
| 354 | 4 | 8 | my $first_row = shift @$table_rows; | ||||
| 355 | 4 | 19 | my $header_row = "| " . join(" | ", @$first_row) . " |"; | ||||
| 356 | 4 | 9 | push @$result, $header_row; | ||||
| 357 | |||||||
| 358 | # Add separator row | ||||||
| 359 | 4 | 8 | my $separator = "|"; | ||||
| 360 | 4 | 9 | foreach my $cell (@$first_row) { | ||||
| 361 | 14 | 29 | $separator .= " --- |"; | ||||
| 362 | } | ||||||
| 363 | 4 | 9 | push @$result, $separator; | ||||
| 364 | |||||||
| 365 | # Process data rows (convert ' ' to space) |
||||||
| 366 | 4 | 9 | foreach my $row (@$table_rows) { | ||||
| 367 | 10 | 21 | my @processed_cells = map { s/ / /g; $_ } @$row; |
||||
| 36 | 65 | ||||||
| 36 | 83 | ||||||
| 368 | 10 | 49 | push @$result, "| " . join(" | ", @processed_cells) . " |"; | ||||
| 369 | } | ||||||
| 370 | } | ||||||
| 371 | } | ||||||
| 372 | |||||||
| 373 | |||||||
| 374 | 1; | ||||||
| 375 | __END__ |