File Coverage

blib/lib/Net/Gemini.pm
Criterion Covered Total %
statement 172 199 86.4
branch 72 98 73.4
condition 22 25 88.0
subroutine 31 31 100.0
pod 14 14 100.0
total 311 367 84.7


line stmt bran cond sub pod time code
1             # -*- Perl -*-
2             #
3             # a Gemini protocol client
4             #
5             # "The conjunction of Jupiter with one of the stars of Gemini, which
6             # 'we ourselves have seen' (1.6.343b30) has been dated in recent years
7             # to December 337 BC."
8             # -- Malcolm Wilson. Structure and Method in Aristotle's Meteorologica.
9              
10             # NOTE this silently accepts URI with userinfo; those probably
11             # should be failed?
12             #
13             # KLUGE this may break if the URI module ever gets URI/gemini.pm
14             package URI::gemini {
15 44     44   2032304 use URI;
  44         413298  
  44         2023  
16 44     44   10212 use parent 'URI::_server';
  44         8158  
  44         360  
17 118     118   14889 sub default_port { 1965 }
18 19     19   76 sub userinfo { return undef } # gemini has no userinfo
19 19     19   5915289 sub secure { 1 }
20              
21             sub canonical {
22 122     122   534817 my $self = shift;
23 122         604 my $other = $self->SUPER::canonical;
24 122         5670 $self->SUPER::userinfo(undef); # gemini has no userinfo
25              
26 122   66     29310 my $slash_path =
27             defined( $other->authority )
28             && !length( $other->path )
29             && !defined( $other->query );
30              
31 122 100       5697 if ($slash_path) {
32 1 50       4 $other = $other->clone if $other == $self;
33 1         24 $other->path("/");
34             }
35 122         397 $other;
36             }
37             }
38              
39             package Net::Gemini;
40             our $VERSION = '0.11';
41 44     44   420859 use strict;
  44         83  
  44         1527  
42 44     44   184 use warnings;
  44         87  
  44         2269  
43 44     44   25272 use Digest::SHA 'sha256_hex';
  44         159261  
  44         5285  
44 44     44   13670 use Encode ();
  44         404819  
  44         2007  
45 44     44   328 use Exporter 'import';
  44         79  
  44         1908  
46 44     44   16864 use IO::Socket::IP;
  44         486805  
  44         478  
47 44     44   55431 use IO::Socket::SSL;
  44         1965227  
  44         496  
48 44     44   10063 use Net::SSLeay;
  44         97  
  44         2386  
49 44     44   24858 use Parse::MIME 'parse_mime_type';
  44         71813  
  44         126489  
50              
51             our @EXPORT_OK = qw(gemini_request);
52              
53             sub _DEFAULT_BUFSIZE () { 4096 }
54             sub _DEFAULT_MAX_CONTENT () { 2097152 }
55             sub _DEFAULT_REDIRECTS () { 5 }
56             sub _DEFAULT_REDIRECT_SLEEP () { 1 }
57              
58 19     19 1 1343 sub code { $_[0]{_code} } # 0..6 response code
59              
60             sub content {
61 3     3 1 86 $_[0]{_content};
62             } # NOTE only after certain calls and codes
63 18     18 1 245 sub error { $_[0]{_error} } # error message for 0 code
64 17     17 1 204 sub host { $_[0]{_host} }
65 17     17 1 561 sub ip { $_[0]{_ip} }
66 33     33 1 318 sub meta { $_[0]{_meta} }
67 3     3 1 101 sub mime { $_[0]{_mime} } # NOTE only after certain calls and codes
68 17     17 1 289 sub port { $_[0]{_port} }
69 17     17 1 799 sub socket { $_[0]{_socket} }
70              
71             sub status {
72 18     18 1 16751 $_[0]{_status};
73             } # two digit '1x', '2x', ... response code
74 19     19 1 162 sub uri { $_[0]{_uri} }
75              
76             # see VERIFICATION below; the caller should supply a custom callback.
77             # the default is thus "Trust On Almost Any Use" (TOAAU) or similar to
78             # what gg(1) of gmid does
79 84     84   998 sub _verify_ssl { 1 }
80              
81             # minimal method to get a resource (see also gemini_request)
82             sub get {
83 156     156 1 71700 my ( $class, $source, %param ) = @_;
84 156         522 my %obj;
85 156 100       632 unless ( defined $source ) {
86 19         95 @obj{qw(_code _error)} = ( 0, "source is not defined" );
87 19         171 goto BLESSING;
88             }
89              
90 137         3790 $obj{_uri} = URI->new($source);
91 137 100       73270 unless ( $obj{_uri}->scheme eq 'gemini' ) {
92 19         3116 @obj{qw(_code _error)} = ( 0, "could not parse '$source'" );
93 19         133 goto BLESSING;
94             }
95 118         7010 @obj{qw/_host _port/} = ( $obj{_uri}->host, $obj{_uri}->port );
96              
97 118         15167 my $yuri = $obj{_uri}->canonical;
98 118 100       1505 if ( length $yuri > 1024 ) {
99 19         608 @obj{qw(_code _error)} = ( 0, "URI is too long" );
100 19         133 goto BLESSING;
101             }
102              
103             # VERIFICATION is based on the following though much remains up to
104             # the caller to manage
105             # gemini://makeworld.space/gemlog/2020-07-03-tofu-rec.gmi
106             # gemini://alexschroeder.ch/page/2020-07-20%20Does%20a%20Gemini%20certificate%20need%20a%20Common%20Name%20matching%20the%20domain%3F
107             eval {
108             $obj{_socket} = IO::Socket::IP->new(
109             ( exists $param{family} ? ( Domain => $param{family} ) : () ),
110             PeerAddr => $obj{_host},
111             PeerPort => $obj{_port},
112 99 50       2704 Proto => 'tcp'
    100          
113             ) or die $!;
114 80         123639 $obj{_ip} = $obj{_socket}->peerhost;
115             IO::Socket::SSL->start_SSL(
116             $obj{_socket},
117             SSL_hostname => $obj{_host}, # SNI
118             ( $param{tofu} ? ( SSL_verifycn_scheme => 'none' ) : () ),
119             SSL_verify_callback => sub {
120 96     96   4664312 my ( $ok, $ctx_store, $certname, $error, $cert, $depth ) = @_;
121 96 50       1108 if ( $depth != 0 ) {
122 0 0       0 return 1 if $param{tofu};
123 0         0 return $ok;
124             }
125             my $digest = ( $param{verify_ssl} || \&_verify_ssl )->(
126             { host => $obj{_host},
127             port => $obj{_port},
128             cert => $cert, # warning, memory address!
129             # compatible with certID function of amfora
130             digest =>
131             uc( sha256_hex( Net::SSLeay::X509_get_X509_PUBKEY($cert) ) ),
132             ip => $obj{_ip},
133 96   100     8890 notBefore => Net::SSLeay::P_ASN1_TIME_get_isotime(
134             Net::SSLeay::X509_get_notBefore($cert)
135             ),
136             notAfter => Net::SSLeay::P_ASN1_TIME_get_isotime(
137             Net::SSLeay::X509_get_notAfter($cert)
138             ),
139             okay => $ok,
140             }
141             );
142             },
143 80 100       14706 ( exists $param{ssl} ? %{ $param{ssl} } : () ),
  65 100       4468  
    50          
144             ) or die $!;
145 79         444223 1;
146 99 100       880 } or do {
147 19         39311 @obj{qw(_code _error)} = ( 0, "IO::Socket::SSL failed: $@" );
148 19         266 goto BLESSING;
149             };
150              
151 79         858 binmode $obj{_socket}, ':raw';
152              
153 79         1159 my $n = syswrite $obj{_socket}, "$yuri\r\n";
154 79 50       23374 unless ( defined $n ) {
155 0         0 @obj{qw(_code _error)} = ( 0, "send URI failed: $!" );
156 0         0 goto BLESSING;
157             }
158              
159             # get the STATUS SPACE header response (and, probably, more)
160 79         1727 $obj{_buf} = '';
161 79         205 while (1) {
162             my $n = sysread $obj{_socket}, my $buf,
163 155   100     1810 $param{bufsize} || _DEFAULT_BUFSIZE;
164 155 50       37130343 unless ( defined $n ) {
165 0         0 @obj{qw(_code _error)} = ( 0, "recv response failed: $!" );
166 0         0 goto BLESSING;
167             }
168 155 50       784 if ( $n == 0 ) {
169 0         0 @obj{qw(_code _error)} = ( 0, "recv EOF" );
170 0         0 goto BLESSING;
171             }
172 155         724 $obj{_buf} .= $buf;
173 155 100       694 last if length $obj{_buf} >= 3;
174             }
175             # NOTE this is sloppy; there are fewer "full two digit status codes"
176             # defined in the appendix, e.g. only 10, 11, 20, 30, 31, 40, ...
177             # on the other hand, this supports any new extensions to the
178             # existing numbers
179 79 100       2467 if ( $obj{_buf} =~ m/^(([1-6])[0-9])[ ]/ ) {
180 61         1180 @obj{qw(_status _code)} = ( $1, $2 );
181 61         370 substr $obj{_buf}, 0, 3, '';
182             } else {
183             @obj{qw(_code _error)} = (
184             0,
185             "invalid response " . sprintf "%vx",
186             substr $obj{_buf},
187 18         1260 0, 3
188             );
189 18         288 goto BLESSING;
190             }
191              
192             # META -- at most 1024 characters, followed by \r\n. the loop is in
193             # the event the server is being naughty and trickling bytes in one
194             # by one (probably you will want a timeout somewhere, or an async
195             # version of this code)
196 61   100     775 my $bufsize = $param{bufsize} || _DEFAULT_BUFSIZE;
197 61         312 while (1) {
198 18569 100       51054 if ( $obj{_buf} =~ m/^(.{0,1024}?)\r\n/ ) {
199 43         558 $obj{_meta} = $1;
200 43         269 my $len = length $obj{_meta};
201 43 100       372 if ( $len == 0 ) {
202             # special case mentioned in the specification
203 7 50       71 $obj{_meta} = 'text/gemini;charset=utf-8' if $obj{_code} == 2;
204             } else {
205             eval {
206             $obj{_meta} =
207 36         1638 Encode::decode( 'UTF-8', $obj{_meta}, Encode::FB_CROAK );
208 36         6991 1;
209 36 50       253 } or do {
210 0         0 @obj{qw(_code _error)} = ( 0, "failed to decode meta: $@" );
211 0         0 goto BLESSING;
212             };
213             # another special case (RFC 2045 says that these things
214             # are not case sensitive, hence the (?i) despite the
215             # gemini specification saying "text/")
216 36 100 66     1030 if ( $obj{_code} == 2
      100        
217             and $obj{_meta} =~ m{^(?i)text/}
218             and $obj{_meta} !~ m/(?i)charset=/ ) {
219 19         262 $obj{_meta} .= ';charset=utf-8';
220             }
221             }
222 43         235 substr $obj{_buf}, 0, $len + 2, ''; # +2 for the \r\n
223 43         108 last;
224             } else {
225 18526         28852 my $len = length $obj{_buf};
226 18526 100       37942 if ( $len > 1024 ) {
227 18         666 @obj{qw(_code _error)} = ( 0, "meta is too long" );
228 18         468 goto BLESSING;
229             }
230 18508         26506 my $buf;
231 18508         55653 my $n = sysread $obj{_socket}, $buf, $bufsize;
232 18508 50       1988397 unless ( defined $n ) {
233 0         0 @obj{qw(_code _error)} = ( 0, "recv response failed: $!" );
234 0         0 goto BLESSING;
235             }
236 18508 50       39004 if ( $n == 0 ) {
237 0         0 @obj{qw(_code _error)} = ( 0, "recv EOF" );
238 0         0 goto BLESSING;
239             }
240 18508         39104 $obj{_buf} .= $buf;
241             }
242             }
243              
244             BLESSING:
245 155 100 100     1908 close $obj{_socket} if defined $obj{_socket} and $obj{_code} != 2;
246 155         28568 bless( \%obj, $class ), $obj{_code};
247             }
248              
249             # utility function that handles redirects and various means of content
250             # collection
251             sub gemini_request {
252 7     7 1 154 my ( $source, %options ) = @_;
253             $options{max_redirects} = _DEFAULT_REDIRECTS
254 7 100       76 unless exists $options{max_redirects};
255             $options{redirect_delay} = _DEFAULT_REDIRECT_SLEEP
256 7 100       52 unless exists $options{redirect_delay};
257             $options{max_size} = _DEFAULT_MAX_CONTENT
258 7 100       55 unless exists $options{max_size};
259              
260 7         35 my ( $gem, $code );
261 7         22 my $redirects = 0;
262             REQUEST:
263             ( $gem, $code ) = Net::Gemini->get( $source,
264 15 100       244 ( exists $options{param} ? %{ $options{param} } : () ) );
  1         8  
265 15 100 66     163 if ( $code == 2 ) {
    100          
266 5         13 my $len = length $gem->{_buf};
267 5   100     38 my $bufsize = $options{bufsize} || _DEFAULT_BUFSIZE;
268             # this can make uninit noise for a meta of ";" which might be
269             # worth an upstream patch?
270 5         48 $gem->{_mime} = [ parse_mime_type( $gem->meta ) ];
271 5 100       442 if ( exists $options{content_callback} ) {
272 1 50       5 if ($len) {
273 1 50       28 $options{content_callback}->( $gem->{_buf}, $len, $gem )
274             or goto CLEANUP;
275             }
276 0         0 while (1) {
277 0         0 my $buf;
278 0         0 $len = sysread $gem->{_socket}, $buf, $bufsize;
279 0 0       0 if ( !defined $len ) {
    0          
280 0         0 die "sysread failed: $!\n";
281             } elsif ( $len == 0 ) {
282 0         0 last;
283             }
284 0 0       0 $options{content_callback}->( $buf, $len, $gem ) or goto CLEANUP;
285             }
286             } else {
287 4 100       13 if ($len) {
288 3 50       11 if ( $len > $options{max_size} ) {
289 0         0 $gem->{_content} = substr $gem->{_buf}, 0, $options{max_size};
290 0         0 @{$gem}{qw(_code _error)} = ( 0, 'max_size' );
  0         0  
291 0         0 goto CLEANUP;
292             }
293 3         18 $gem->{_content} = $gem->{_buf};
294 3         11 $options{max_size} -= $len;
295             }
296 4         7 while (1) {
297 7         16 my $buf;
298 7         35 $len = sysread $gem->{_socket}, $buf, $bufsize;
299 7 50       742 if ( !defined $len ) {
    100          
300 0         0 die "sysread failed: $!\n";
301             } elsif ( $len == 0 ) {
302 3         22 last;
303             }
304 4 100       14 if ( $len > $options{max_size} ) {
305 1         6 $gem->{_content} .= substr $buf, 0, $options{max_size};
306 1         3 @{$gem}{qw(_code _error)} = ( 0, 'max_size' );
  1         27  
307 1         34 goto CLEANUP;
308             }
309 3         37 $gem->{_content} .= $buf;
310 3         9 $options{max_size} -= $len;
311             }
312             }
313             } elsif ( $code == 3 and ++$redirects <= $options{max_redirects} ) {
314             # a '31' permanent redirect should result in us not requesting
315             # the old URL again, but that would require more code here for
316             # something that is probably rare
317 8         23 my $new = $gem->{_meta};
318 8         167 $source = URI->new_abs( $new, $gem->{_uri} );
319 8         5759931 select( undef, undef, undef, $options{redirect_delay} );
320 8         355 goto REQUEST;
321             }
322             CLEANUP:
323 7         1898 undef $gem->{_buf};
324 7         34 close $gem->{_socket};
325 7         1798 return $gem, $code;
326             }
327              
328             # drain what remains (if anything) via a callback interface. assumes
329             # that a ->get call has been made
330             sub getmore {
331 26     26 1 206 my ( $self, $callback, %param ) = @_;
332              
333 26         287 my $len = length $self->{_buf};
334 26 100       134 if ($len) {
335 25 50       95 $callback->( $self->{_buf}, $len ) or return;
336 25         1098 undef $self->{_buf};
337             }
338              
339 26   100     380 my $bufsize = $param{bufsize} || _DEFAULT_BUFSIZE;
340 26         50 while (1) {
341 26         54 my $buf;
342 26         214 $len = sysread $self->{_socket}, $buf, $bufsize;
343 26 50       6832 if ( !defined $len ) {
    100          
344 0         0 die "sysread failed: $!\n";
345             } elsif ( $len == 0 ) {
346 25         74 last;
347             }
348 1 50       6 $callback->( $buf, $len ) or return;
349             }
350 25         257 close $self->{_socket};
351             }
352              
353             1;
354             __END__