| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | # | 
| 2 |  |  |  |  |  |  | # Web::DataService::Text | 
| 3 |  |  |  |  |  |  | # | 
| 4 |  |  |  |  |  |  | # This module is responsible for putting data responses into either | 
| 5 |  |  |  |  |  |  | # tab-separated text or comma-separated text format.  It is used when | 
| 6 |  |  |  |  |  |  | # the user selects any of the following three format strings: | 
| 7 |  |  |  |  |  |  | # | 
| 8 |  |  |  |  |  |  | # csv	comma-separated text | 
| 9 |  |  |  |  |  |  | # tsv	tab-separated text | 
| 10 |  |  |  |  |  |  | # txt	comma-separated text, to be shown directly in a browser tab | 
| 11 |  |  |  |  |  |  | # | 
| 12 |  |  |  |  |  |  | # Author: Michael McClennen | 
| 13 |  |  |  |  |  |  |  | 
| 14 | 1 |  |  | 1 |  | 9 | use strict; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 44 |  | 
| 15 |  |  |  |  |  |  |  | 
| 16 |  |  |  |  |  |  | package Web::DataService::Plugin::Text; | 
| 17 |  |  |  |  |  |  |  | 
| 18 | 1 |  |  | 1 |  | 6 | use Encode; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 75 |  | 
| 19 | 1 |  |  | 1 |  | 6 | use Scalar::Util qw(reftype); | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 43 |  | 
| 20 | 1 |  |  | 1 |  | 6 | use Carp qw(croak); | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 1766 |  | 
| 21 |  |  |  |  |  |  |  | 
| 22 |  |  |  |  |  |  |  | 
| 23 |  |  |  |  |  |  |  | 
| 24 |  |  |  |  |  |  | # emit_header ( request, field_list ) | 
| 25 |  |  |  |  |  |  | # | 
| 26 |  |  |  |  |  |  | # Generate any initial text that is necessary for a text format response.  This | 
| 27 |  |  |  |  |  |  | # will be formatted according to the format suffix specified in the request | 
| 28 |  |  |  |  |  |  | # (comma-separated or tab-separated). | 
| 29 |  |  |  |  |  |  |  | 
| 30 |  |  |  |  |  |  | sub emit_header { | 
| 31 |  |  |  |  |  |  |  | 
| 32 | 0 |  |  | 0 | 0 |  | my ($class, $request, $field_list) = @_; | 
| 33 |  |  |  |  |  |  |  | 
| 34 | 0 |  |  |  |  |  | my $output = ''; | 
| 35 |  |  |  |  |  |  |  | 
| 36 |  |  |  |  |  |  | # If the user has directed that the header be suppressed, just return | 
| 37 |  |  |  |  |  |  | # the empty string. | 
| 38 |  |  |  |  |  |  |  | 
| 39 | 0 | 0 |  |  |  |  | return $output unless $request->display_header; | 
| 40 |  |  |  |  |  |  |  | 
| 41 |  |  |  |  |  |  | # If the user has specified that the source of this data be shown, add | 
| 42 |  |  |  |  |  |  | # some header lines to convey this. | 
| 43 |  |  |  |  |  |  |  | 
| 44 | 0 | 0 |  |  |  |  | if ( $request->display_datainfo ) | 
| 45 |  |  |  |  |  |  | { | 
| 46 | 0 |  |  |  |  |  | my $info = $request->datainfo; | 
| 47 |  |  |  |  |  |  |  | 
| 48 | 0 |  |  |  |  |  | foreach my $key ( $request->datainfo_keys ) | 
| 49 |  |  |  |  |  |  | { | 
| 50 | 0 | 0 |  |  |  |  | next unless $info->{$key}; | 
| 51 | 0 |  |  |  |  |  | my $label = generate_label($key); | 
| 52 | 0 |  |  |  |  |  | $output .= $class->emit_line($request, $label, $info->{$key}); | 
| 53 |  |  |  |  |  |  | } | 
| 54 |  |  |  |  |  |  |  | 
| 55 | 0 |  |  |  |  |  | $output .= $class->emit_line($request, "Parameters:"); | 
| 56 |  |  |  |  |  |  |  | 
| 57 | 0 |  |  |  |  |  | my @display = $request->params_for_display; | 
| 58 |  |  |  |  |  |  |  | 
| 59 | 0 |  |  |  |  |  | while ( @display ) | 
| 60 |  |  |  |  |  |  | { | 
| 61 | 0 |  |  |  |  |  | my $param = shift @display; | 
| 62 | 0 |  |  |  |  |  | my $value = shift @display; | 
| 63 |  |  |  |  |  |  |  | 
| 64 | 0 | 0 | 0 |  |  |  | next unless defined $param && $param ne ''; | 
| 65 | 0 |  | 0 |  |  |  | $value //= ''; | 
| 66 |  |  |  |  |  |  |  | 
| 67 | 0 | 0 |  |  |  |  | if ( ref $value eq 'ARRAY' ) | 
| 68 |  |  |  |  |  |  | { | 
| 69 | 0 |  |  |  |  |  | $output .= $class->emit_line($request, '', $param, @$value); | 
| 70 |  |  |  |  |  |  | } | 
| 71 |  |  |  |  |  |  |  | 
| 72 |  |  |  |  |  |  | else | 
| 73 |  |  |  |  |  |  | { | 
| 74 | 0 |  |  |  |  |  | $output .= $class->emit_line($request, '', $param, $value); | 
| 75 |  |  |  |  |  |  | } | 
| 76 |  |  |  |  |  |  | } | 
| 77 |  |  |  |  |  |  |  | 
| 78 |  |  |  |  |  |  | # If there have been any extra datainfo fields set, list them now. | 
| 79 |  |  |  |  |  |  |  | 
| 80 | 0 | 0 |  |  |  |  | if ( my @extra = $request->extra_datainfo_keys ) | 
| 81 |  |  |  |  |  |  | { | 
| 82 | 0 |  |  |  |  |  | foreach my $key ( @extra ) | 
| 83 |  |  |  |  |  |  | { | 
| 84 | 0 | 0 |  |  |  |  | next unless defined $key; | 
| 85 | 0 |  |  |  |  |  | my $value = $request->extra_datainfo($key); | 
| 86 | 0 | 0 |  |  |  |  | next unless defined $value; | 
| 87 |  |  |  |  |  |  |  | 
| 88 | 0 |  | 0 |  |  |  | my $title = $request->extra_datainfo_title($key) || $key; | 
| 89 |  |  |  |  |  |  |  | 
| 90 | 0 | 0 | 0 |  |  |  | if ( ref $value && reftype $value eq 'HASH' ) | 
|  |  | 0 | 0 |  |  |  |  | 
| 91 |  |  |  |  |  |  | { | 
| 92 | 0 |  |  |  |  |  | $output .= $class->emit_line($request, $title); | 
| 93 |  |  |  |  |  |  |  | 
| 94 | 0 |  |  |  |  |  | my @keys; | 
| 95 |  |  |  |  |  |  |  | 
| 96 | 0 | 0 | 0 |  |  |  | if ( defined $value->{_keys} && ref $value->{_keys} eq 'ARRAY' ) | 
| 97 |  |  |  |  |  |  | { | 
| 98 | 0 |  |  |  |  |  | @keys = @{$value->{_keys}}; | 
|  | 0 |  |  |  |  |  |  | 
| 99 |  |  |  |  |  |  | } | 
| 100 |  |  |  |  |  |  |  | 
| 101 |  |  |  |  |  |  | else | 
| 102 |  |  |  |  |  |  | { | 
| 103 | 0 |  |  |  |  |  | @keys = keys %$value; | 
| 104 |  |  |  |  |  |  | } | 
| 105 |  |  |  |  |  |  |  | 
| 106 | 0 |  |  |  |  |  | foreach my $k ( @keys ) | 
| 107 |  |  |  |  |  |  | { | 
| 108 | 0 | 0 |  |  |  |  | if ( ref $value->{$k} eq 'ARRAY' ) | 
| 109 |  |  |  |  |  |  | { | 
| 110 | 0 |  |  |  |  |  | $output .= $class->emit_line($request, '', @{$value->{$k}}); | 
|  | 0 |  |  |  |  |  |  | 
| 111 |  |  |  |  |  |  | } | 
| 112 |  |  |  |  |  |  |  | 
| 113 |  |  |  |  |  |  | else | 
| 114 |  |  |  |  |  |  | { | 
| 115 | 0 |  |  |  |  |  | $output .= $class->emit_line($request, '', $value->{$k}); | 
| 116 |  |  |  |  |  |  | } | 
| 117 |  |  |  |  |  |  | } | 
| 118 |  |  |  |  |  |  | } | 
| 119 |  |  |  |  |  |  |  | 
| 120 |  |  |  |  |  |  | elsif ( ref $value && reftype $value eq 'ARRAY' ) | 
| 121 |  |  |  |  |  |  | { | 
| 122 | 0 |  |  |  |  |  | $output .= $class->emit_line($request, $title); | 
| 123 |  |  |  |  |  |  |  | 
| 124 | 0 |  |  |  |  |  | foreach my $row ( @$value ) | 
| 125 |  |  |  |  |  |  | { | 
| 126 | 0 | 0 |  |  |  |  | if ( ref $row eq 'ARRAY' ) | 
| 127 |  |  |  |  |  |  | { | 
| 128 | 0 |  |  |  |  |  | $output .= $class->emit_line($request, '', @$row); | 
| 129 |  |  |  |  |  |  | } | 
| 130 |  |  |  |  |  |  |  | 
| 131 |  |  |  |  |  |  | else | 
| 132 |  |  |  |  |  |  | { | 
| 133 | 0 |  |  |  |  |  | $output .= $class->emit_line($request, '', $row); | 
| 134 |  |  |  |  |  |  | } | 
| 135 |  |  |  |  |  |  | } | 
| 136 |  |  |  |  |  |  | } | 
| 137 |  |  |  |  |  |  |  | 
| 138 |  |  |  |  |  |  | else | 
| 139 |  |  |  |  |  |  | { | 
| 140 | 0 |  |  |  |  |  | $output .= $class->emit_line($request, $title, $value); | 
| 141 |  |  |  |  |  |  | } | 
| 142 |  |  |  |  |  |  | } | 
| 143 |  |  |  |  |  |  | } | 
| 144 |  |  |  |  |  |  | } | 
| 145 |  |  |  |  |  |  |  | 
| 146 |  |  |  |  |  |  | # If the user has directed that result counts are to be shown, and if any | 
| 147 |  |  |  |  |  |  | # are available to show, then add those at the very top. | 
| 148 |  |  |  |  |  |  |  | 
| 149 | 0 | 0 |  |  |  |  | if ( $request->display_counts ) | 
| 150 |  |  |  |  |  |  | { | 
| 151 | 0 |  |  |  |  |  | my $counts = $request->result_counts; | 
| 152 |  |  |  |  |  |  |  | 
| 153 | 0 |  |  |  |  |  | $output .= $class->emit_line($request, "Elapsed Time", sprintf("%.3g", $request->{elapsed})); | 
| 154 | 0 |  |  |  |  |  | $output .= $class->emit_line($request, "Records Found", $counts->{found}); | 
| 155 | 0 |  |  |  |  |  | $output .= $class->emit_line($request, "Records Returned", $counts->{returned}); | 
| 156 |  |  |  |  |  |  | $output .= $class->emit_line($request, "Record Offset", $counts->{offset}) | 
| 157 | 0 | 0 | 0 |  |  |  | if defined $counts->{offset} && $counts->{offset} > 0; | 
| 158 |  |  |  |  |  |  | } | 
| 159 |  |  |  |  |  |  |  | 
| 160 |  |  |  |  |  |  | # If any warnings were generated on this request, add them in next. | 
| 161 |  |  |  |  |  |  |  | 
| 162 | 0 | 0 |  |  |  |  | if ( my @msgs = $request->warnings ) | 
| 163 |  |  |  |  |  |  | { | 
| 164 | 0 |  |  |  |  |  | $output .= $class->emit_line($request, "Warning:", $_) foreach @msgs; | 
| 165 |  |  |  |  |  |  | } | 
| 166 |  |  |  |  |  |  |  | 
| 167 |  |  |  |  |  |  | # If we have summary data to output, do so now. | 
| 168 |  |  |  |  |  |  |  | 
| 169 | 0 | 0 | 0 |  |  |  | if ( $request->{summary_data} && $request->{summary_field_list} ) | 
| 170 |  |  |  |  |  |  | { | 
| 171 | 0 |  |  |  |  |  | my @summary_fields = map { $_->{name} } @{$request->{summary_field_list}}; | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
| 172 | 0 |  |  |  |  |  | $output .= $class->emit_line($request, "Summary:"); | 
| 173 | 0 |  |  |  |  |  | $output .= $class->emit_line($request, @summary_fields); | 
| 174 | 0 |  |  |  |  |  | $output .= $class->emit_record($request, $request->{summary_data}, $request->{summary_field_list}); | 
| 175 |  |  |  |  |  |  | } | 
| 176 |  |  |  |  |  |  |  | 
| 177 |  |  |  |  |  |  | # If any header material was generated, add a line to introduce the start | 
| 178 |  |  |  |  |  |  | # of the actual data. | 
| 179 |  |  |  |  |  |  |  | 
| 180 | 0 | 0 |  |  |  |  | if ( $output ne '' ) | 
| 181 |  |  |  |  |  |  | { | 
| 182 | 0 |  |  |  |  |  | $output .= $class->emit_line($request, "Records:"); | 
| 183 |  |  |  |  |  |  | } | 
| 184 |  |  |  |  |  |  |  | 
| 185 |  |  |  |  |  |  | # Now, if any output fields were specified for this request, list them in | 
| 186 |  |  |  |  |  |  | # a header line. | 
| 187 |  |  |  |  |  |  |  | 
| 188 | 0 | 0 | 0 |  |  |  | if ( ref $field_list eq 'ARRAY' && @$field_list ) | 
| 189 |  |  |  |  |  |  | { | 
| 190 | 0 |  |  |  |  |  | my @fields = map { $_->{name} } @$field_list; | 
|  | 0 |  |  |  |  |  |  | 
| 191 |  |  |  |  |  |  |  | 
| 192 | 0 |  |  |  |  |  | $output .= $class->emit_line($request, @fields); | 
| 193 |  |  |  |  |  |  | } | 
| 194 |  |  |  |  |  |  |  | 
| 195 |  |  |  |  |  |  | # Return the text that we have generated. | 
| 196 |  |  |  |  |  |  |  | 
| 197 | 0 |  |  |  |  |  | return $output; | 
| 198 |  |  |  |  |  |  | } | 
| 199 |  |  |  |  |  |  |  | 
| 200 |  |  |  |  |  |  |  | 
| 201 |  |  |  |  |  |  | # generate_label ( key ) | 
| 202 |  |  |  |  |  |  | # | 
| 203 |  |  |  |  |  |  | # Turn a field identifier (key) into a text label by turning underscores into | 
| 204 |  |  |  |  |  |  | # spaces and capitalizing words. | 
| 205 |  |  |  |  |  |  |  | 
| 206 |  |  |  |  |  |  | sub generate_label { | 
| 207 |  |  |  |  |  |  |  | 
| 208 | 0 |  |  | 0 | 0 |  | my ($key) = @_; | 
| 209 |  |  |  |  |  |  |  | 
| 210 | 0 |  |  |  |  |  | my @components = split(/_/, $key); | 
| 211 | 0 |  |  |  |  |  | foreach ( @components ) { s/^url$/URL/ } | 
|  | 0 |  |  |  |  |  |  | 
| 212 | 0 |  |  |  |  |  | my $label = join(' ', map { ucfirst } @components); | 
|  | 0 |  |  |  |  |  |  | 
| 213 |  |  |  |  |  |  |  | 
| 214 | 0 |  |  |  |  |  | return $label; | 
| 215 |  |  |  |  |  |  | } | 
| 216 |  |  |  |  |  |  |  | 
| 217 |  |  |  |  |  |  |  | 
| 218 |  |  |  |  |  |  | # emit_empty ( ) | 
| 219 |  |  |  |  |  |  | # | 
| 220 |  |  |  |  |  |  | # Return the string (if any) to output in lieu of an empty result set. | 
| 221 |  |  |  |  |  |  |  | 
| 222 |  |  |  |  |  |  | sub emit_empty { | 
| 223 |  |  |  |  |  |  |  | 
| 224 | 0 |  |  | 0 | 0 |  | my ($class, $request) = @_; | 
| 225 |  |  |  |  |  |  |  | 
| 226 | 0 |  |  |  |  |  | return $class->emit_line($request, "THIS REQUEST RETURNED NO RECORDS"); | 
| 227 |  |  |  |  |  |  | } | 
| 228 |  |  |  |  |  |  |  | 
| 229 |  |  |  |  |  |  |  | 
| 230 |  |  |  |  |  |  | # emit_footer ( request ) | 
| 231 |  |  |  |  |  |  | # | 
| 232 |  |  |  |  |  |  | # None of the formats handled by this module involve any text after the last record | 
| 233 |  |  |  |  |  |  | # is output, so we just return the empty string. | 
| 234 |  |  |  |  |  |  |  | 
| 235 |  |  |  |  |  |  | sub emit_footer { | 
| 236 |  |  |  |  |  |  |  | 
| 237 | 0 |  |  | 0 | 0 |  | return ''; | 
| 238 |  |  |  |  |  |  | } | 
| 239 |  |  |  |  |  |  |  | 
| 240 |  |  |  |  |  |  |  | 
| 241 |  |  |  |  |  |  | # emit_record (request, record, field_list) | 
| 242 |  |  |  |  |  |  | # | 
| 243 |  |  |  |  |  |  | # Return a text line expressing a single record, according to the format | 
| 244 |  |  |  |  |  |  | # specified in the request (comma-separated or tab-separated) and the | 
| 245 |  |  |  |  |  |  | # given list of output field specifications. | 
| 246 |  |  |  |  |  |  |  | 
| 247 |  |  |  |  |  |  | sub emit_record { | 
| 248 |  |  |  |  |  |  |  | 
| 249 | 0 |  |  | 0 | 0 |  | my ($class, $request, $record, $field_list) = @_; | 
| 250 |  |  |  |  |  |  |  | 
| 251 |  |  |  |  |  |  | # If no output fields were specified, we return the empty string. | 
| 252 |  |  |  |  |  |  |  | 
| 253 | 0 | 0 |  |  |  |  | return '' unless ref $field_list eq 'ARRAY'; | 
| 254 |  |  |  |  |  |  |  | 
| 255 |  |  |  |  |  |  | # Otherwise, generate the list of values for the current line.  For each output | 
| 256 |  |  |  |  |  |  | # field, we take either the explicitly specified value or the value of the | 
| 257 |  |  |  |  |  |  | # specified field from the record. | 
| 258 |  |  |  |  |  |  |  | 
| 259 | 0 |  |  |  |  |  | my @values; | 
| 260 |  |  |  |  |  |  |  | 
| 261 | 0 |  |  |  |  |  | foreach my $f ( @$field_list ) | 
| 262 |  |  |  |  |  |  | { | 
| 263 | 0 |  |  |  |  |  | my $v = ''; | 
| 264 |  |  |  |  |  |  |  | 
| 265 |  |  |  |  |  |  | # First figure out what each value should be | 
| 266 |  |  |  |  |  |  |  | 
| 267 | 0 | 0 | 0 |  |  |  | if ( defined $f->{value} ) | 
|  |  | 0 |  |  |  |  |  | 
| 268 |  |  |  |  |  |  | { | 
| 269 | 0 |  |  |  |  |  | $v = $f->{value}; | 
| 270 |  |  |  |  |  |  | } | 
| 271 |  |  |  |  |  |  |  | 
| 272 |  |  |  |  |  |  | elsif ( defined $f->{field} && defined $record->{$f->{field}} ) | 
| 273 |  |  |  |  |  |  | { | 
| 274 | 0 |  |  |  |  |  | $v = $record->{$f->{field}}; | 
| 275 |  |  |  |  |  |  | } | 
| 276 |  |  |  |  |  |  |  | 
| 277 |  |  |  |  |  |  | # Cancel out the value if this field has the 'if_field' or 'not_field' | 
| 278 |  |  |  |  |  |  | # attribute and the corresponding condition is true. | 
| 279 |  |  |  |  |  |  |  | 
| 280 | 0 | 0 | 0 |  |  |  | $v = '' if $f->{if_field} and not $record->{$f->{if_field}}; | 
| 281 | 0 | 0 | 0 |  |  |  | $v = '' if $f->{not_field} and $record->{$f->{not_field}}; | 
| 282 |  |  |  |  |  |  |  | 
| 283 |  |  |  |  |  |  | # Cancel out any field with a 'dedup' attribute if its value is the same | 
| 284 |  |  |  |  |  |  | # as the value of the field indicated by the attribute. | 
| 285 |  |  |  |  |  |  |  | 
| 286 |  |  |  |  |  |  | $v = '' if $f->{dedup} and defined $record->{$f->{field}} and defined $record->{$f->{dedup}} | 
| 287 | 0 | 0 | 0 |  |  |  | and $record->{$f->{field}} eq $record->{$f->{dedup}}; | 
|  |  |  | 0 |  |  |  |  | 
|  |  |  | 0 |  |  |  |  | 
| 288 |  |  |  |  |  |  |  | 
| 289 |  |  |  |  |  |  | # If the value is an array, join it into a string.  If no joining | 
| 290 |  |  |  |  |  |  | # string was specified, use a comma. | 
| 291 |  |  |  |  |  |  |  | 
| 292 | 0 | 0 |  |  |  |  | if ( ref $v eq 'ARRAY' ) | 
| 293 |  |  |  |  |  |  | { | 
| 294 | 0 |  | 0 |  |  |  | my $join = $f->{text_join} // q{, }; | 
| 295 | 0 |  |  |  |  |  | $v = join($join, @$v); | 
| 296 |  |  |  |  |  |  | } | 
| 297 |  |  |  |  |  |  |  | 
| 298 |  |  |  |  |  |  | # Now add the value to the list. | 
| 299 |  |  |  |  |  |  |  | 
| 300 | 0 |  |  |  |  |  | push @values, $v; | 
| 301 |  |  |  |  |  |  | } | 
| 302 |  |  |  |  |  |  |  | 
| 303 | 0 |  |  |  |  |  | return $class->emit_line($request, @values); | 
| 304 |  |  |  |  |  |  | } | 
| 305 |  |  |  |  |  |  |  | 
| 306 |  |  |  |  |  |  |  | 
| 307 |  |  |  |  |  |  | # emit_line ( request, values... ) | 
| 308 |  |  |  |  |  |  | # | 
| 309 |  |  |  |  |  |  | # Generate an output line containing the given values. | 
| 310 |  |  |  |  |  |  |  | 
| 311 |  |  |  |  |  |  | sub emit_line { | 
| 312 |  |  |  |  |  |  |  | 
| 313 | 0 |  |  | 0 | 0 |  | my $class = shift; | 
| 314 | 0 |  |  |  |  |  | my $request = shift; | 
| 315 |  |  |  |  |  |  |  | 
| 316 | 0 |  |  |  |  |  | my $linebreak = $request->linebreak; | 
| 317 |  |  |  |  |  |  |  | 
| 318 | 0 | 0 |  |  |  |  | if ( $request->output_format eq 'tsv' ) | 
| 319 |  |  |  |  |  |  | { | 
| 320 | 0 |  |  |  |  |  | return join("\t", map { tsv_clean($_) } @_) . $linebreak; | 
|  | 0 |  |  |  |  |  |  | 
| 321 |  |  |  |  |  |  | } | 
| 322 |  |  |  |  |  |  |  | 
| 323 |  |  |  |  |  |  | else | 
| 324 |  |  |  |  |  |  | { | 
| 325 | 0 |  |  |  |  |  | return join(',', map { csv_clean($_) } @_) . $linebreak; | 
|  | 0 |  |  |  |  |  |  | 
| 326 |  |  |  |  |  |  | } | 
| 327 |  |  |  |  |  |  | } | 
| 328 |  |  |  |  |  |  |  | 
| 329 |  |  |  |  |  |  |  | 
| 330 |  |  |  |  |  |  | my (%TXTESCAPE) = ( '"' => '""', "'" => "''", "\t" => '\t', "\n" => '\n', | 
| 331 |  |  |  |  |  |  | "\r" => '\r' );	#' | 
| 332 |  |  |  |  |  |  |  | 
| 333 |  |  |  |  |  |  | # csv_clean ( string, quoted ) | 
| 334 |  |  |  |  |  |  | # | 
| 335 |  |  |  |  |  |  | # Given a string value, return an equivalent string value that will be valid | 
| 336 |  |  |  |  |  |  | # as part of a csv-format result.  If 'quoted' is true, then all fields will | 
| 337 |  |  |  |  |  |  | # be quoted.  Otherwise, only those which contain commas or quotes will be. | 
| 338 |  |  |  |  |  |  |  | 
| 339 |  |  |  |  |  |  | sub csv_clean { | 
| 340 |  |  |  |  |  |  |  | 
| 341 | 0 |  |  | 0 | 0 |  | my ($string) = @_; | 
| 342 |  |  |  |  |  |  |  | 
| 343 |  |  |  |  |  |  | # Return an empty string unless the value is defined. | 
| 344 |  |  |  |  |  |  |  | 
| 345 | 0 | 0 |  |  |  |  | return '""' unless defined $string; | 
| 346 |  |  |  |  |  |  |  | 
| 347 |  |  |  |  |  |  | # Do a quick check for okay characters.  If there's nothing exotic, just | 
| 348 |  |  |  |  |  |  | # return the quoted value. | 
| 349 |  |  |  |  |  |  |  | 
| 350 | 0 | 0 |  |  |  |  | return '"' . $string . '"' unless $string =~ /[^a-zA-Z0-9 _.;:<>-]/; | 
| 351 |  |  |  |  |  |  |  | 
| 352 |  |  |  |  |  |  | # Otherwise, we need to do some longer processing. | 
| 353 |  |  |  |  |  |  |  | 
| 354 |  |  |  |  |  |  | # Turn any numeric character references into actual Unicode characters. | 
| 355 |  |  |  |  |  |  | # The database does contain some of these. | 
| 356 |  |  |  |  |  |  |  | 
| 357 | 0 |  |  |  |  |  | $string =~ s/&\#(\d)+;/pack("U", $1)/eg; | 
|  | 0 |  |  |  |  |  |  | 
| 358 |  |  |  |  |  |  |  | 
| 359 |  |  |  |  |  |  | # Next, double all quotes and textify whitespace control characters | 
| 360 |  |  |  |  |  |  |  | 
| 361 | 0 |  |  |  |  |  | $string =~ s/("|\n|\r)/$TXTESCAPE{$1}/ge; | 
|  | 0 |  |  |  |  |  |  | 
| 362 |  |  |  |  |  |  |  | 
| 363 |  |  |  |  |  |  | # Finally, delete all other control characters (they shouldn't be in the | 
| 364 |  |  |  |  |  |  | # database in the first place, but unfortunately some rows do contain | 
| 365 |  |  |  |  |  |  | # them). | 
| 366 |  |  |  |  |  |  |  | 
| 367 | 0 |  |  |  |  |  | $string =~ s/[\0-\037\177]//g; | 
| 368 |  |  |  |  |  |  |  | 
| 369 | 0 |  |  |  |  |  | return '"' . $string . '"'; | 
| 370 |  |  |  |  |  |  | } | 
| 371 |  |  |  |  |  |  |  | 
| 372 |  |  |  |  |  |  |  | 
| 373 |  |  |  |  |  |  | # tsv_clean ( string ) | 
| 374 |  |  |  |  |  |  | # | 
| 375 |  |  |  |  |  |  | # Given a string value, return an equivalent string value that will be valid | 
| 376 |  |  |  |  |  |  | # as part of a tsv-format result.  If 'quoted' is true, then all fields will | 
| 377 |  |  |  |  |  |  | # be quoted.  Otherwise, only those which contain commas or quotes will be. | 
| 378 |  |  |  |  |  |  |  | 
| 379 |  |  |  |  |  |  | sub tsv_clean { | 
| 380 |  |  |  |  |  |  |  | 
| 381 | 0 |  |  | 0 | 0 |  | my ($string, $quoted) = @_; | 
| 382 |  |  |  |  |  |  |  | 
| 383 |  |  |  |  |  |  | # Return an empty string unless the value is defined. | 
| 384 |  |  |  |  |  |  |  | 
| 385 | 0 | 0 |  |  |  |  | return '' unless defined $string; | 
| 386 |  |  |  |  |  |  |  | 
| 387 |  |  |  |  |  |  | # Do a quick check for okay characters.  If there's nothing exotic, just | 
| 388 |  |  |  |  |  |  | # return the value as-is. | 
| 389 |  |  |  |  |  |  |  | 
| 390 | 0 | 0 |  |  |  |  | return $string unless $string =~ /^[a-zA-Z0-9 _.,;:<>-]/; | 
| 391 |  |  |  |  |  |  |  | 
| 392 |  |  |  |  |  |  | # Otherwise, we need to do some longer processing. | 
| 393 |  |  |  |  |  |  |  | 
| 394 |  |  |  |  |  |  | # Turn any numeric character references into actual Unicode characters. | 
| 395 |  |  |  |  |  |  | # The database does contain some of these. | 
| 396 |  |  |  |  |  |  |  | 
| 397 | 0 |  |  |  |  |  | $string =~ s/&\#(\d)+;/pack("U", $1)/eg; | 
|  | 0 |  |  |  |  |  |  | 
| 398 |  |  |  |  |  |  |  | 
| 399 |  |  |  |  |  |  | # Next, textify whitespace control characters | 
| 400 |  |  |  |  |  |  |  | 
| 401 | 0 |  |  |  |  |  | $string =~ s/(\n|\t|\r)/$TXTESCAPE{$1}/ge; | 
|  | 0 |  |  |  |  |  |  | 
| 402 |  |  |  |  |  |  |  | 
| 403 |  |  |  |  |  |  | # Finally, delete all other control characters (they shouldn't be in the | 
| 404 |  |  |  |  |  |  | # database in the first place, but unfortunately some rows do contain | 
| 405 |  |  |  |  |  |  | # them). | 
| 406 |  |  |  |  |  |  |  | 
| 407 | 0 |  |  |  |  |  | $string =~ s/[\0-\037\177]//g; | 
| 408 |  |  |  |  |  |  |  | 
| 409 | 0 |  |  |  |  |  | return $string; | 
| 410 |  |  |  |  |  |  | } | 
| 411 |  |  |  |  |  |  |  | 
| 412 |  |  |  |  |  |  |  | 
| 413 |  |  |  |  |  |  | 1; |