line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Logfile::EPrints::Mapping::DSpace; |
2
|
|
|
|
|
|
|
|
3
|
6
|
|
|
6
|
|
33
|
use strict; |
|
6
|
|
|
|
|
11
|
|
|
6
|
|
|
|
|
208
|
|
4
|
6
|
|
|
6
|
|
30
|
use warnings; |
|
6
|
|
|
|
|
14
|
|
|
6
|
|
|
|
|
1893
|
|
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
=head1 NAME |
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
Logfile::EPrints::Mapping::DSpace - Map DSpace logs to requests |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
=head1 SYNOPSIS |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
See L. |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
use Logfile::EPrints; |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
my $parser = Logfile::EPrints::Parser->new( |
17
|
|
|
|
|
|
|
handler => Logfile::EPrints::Mapping::DSpace->new( |
18
|
|
|
|
|
|
|
identifier => 'oai:dspace:', |
19
|
|
|
|
|
|
|
handler => MyHandler->new |
20
|
|
|
|
|
|
|
)); |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
$parser->parse_fh( $fh ); |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=head1 CAVEATS |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
This module only supports abstract and fulltext. |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
DSpace uses the exact same URL layout for communities as it does papers, so there's no way to distinguish them from just the log files. Community hits therefore come out as 'abstract' hits. |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
=cut |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
sub new |
33
|
|
|
|
|
|
|
{ |
34
|
0
|
|
|
0
|
0
|
|
my( $class, %self ) = @_; |
35
|
|
|
|
|
|
|
|
36
|
0
|
0
|
|
|
|
|
Carp::croak(__PACKAGE__." requires identifier argument") unless exists $self{identifier}; |
37
|
|
|
|
|
|
|
|
38
|
0
|
|
|
|
|
|
bless \%self, $class; |
39
|
|
|
|
|
|
|
} |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
sub hit |
42
|
|
|
|
|
|
|
{ |
43
|
0
|
|
|
0
|
0
|
|
my( $self, $hit ) = @_; |
44
|
|
|
|
|
|
|
|
45
|
0
|
|
|
|
|
|
my $page = $hit->page; |
46
|
|
|
|
|
|
|
|
47
|
0
|
0
|
|
|
|
|
if( not defined $page ) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
48
|
|
|
|
|
|
|
{ |
49
|
0
|
|
|
|
|
|
warn "Hmm, error parsing hit - no page request found in: ".$hit->raw."\n"; |
50
|
|
|
|
|
|
|
} |
51
|
|
|
|
|
|
|
# Abstract or community page |
52
|
|
|
|
|
|
|
elsif( $page =~ /\/handle\/(\d+)\/(\d+)$/ ) |
53
|
|
|
|
|
|
|
{ |
54
|
0
|
|
|
|
|
|
$hit->{identifier} = $self->_identifier( $1, $2 ); |
55
|
0
|
|
|
|
|
|
$self->{handler}->abstract( $hit ); |
56
|
|
|
|
|
|
|
} |
57
|
|
|
|
|
|
|
# Bitstream /dspace/bitstream/2160/229/1/Holocene+environments+faynan.pdf |
58
|
|
|
|
|
|
|
elsif( $page =~ /\/bitstream\/(\d+)\/(\d+)\/(\d+)\// ) |
59
|
|
|
|
|
|
|
{ |
60
|
0
|
|
|
|
|
|
$hit->{identifier} = $self->_identifier( $1, $2 ); |
61
|
0
|
|
|
|
|
|
$self->{handler}->fulltext( $hit ); |
62
|
|
|
|
|
|
|
} |
63
|
|
|
|
|
|
|
# Browse /dspace/browse-date?top=2160%2F161 |
64
|
|
|
|
|
|
|
# elsif( $page =~ /\/browse-(\w+)(?:\?|$)/ ) |
65
|
|
|
|
|
|
|
# { |
66
|
|
|
|
|
|
|
# } |
67
|
|
|
|
|
|
|
# Items-by /dspace/items-by-author?author=Pearce%2C+Jake&order=date |
68
|
|
|
|
|
|
|
# elsif( $page =~ /\/items-by-(\w+)(?:\?|$)/ ) |
69
|
|
|
|
|
|
|
# { |
70
|
|
|
|
|
|
|
# } |
71
|
|
|
|
|
|
|
# Feedback /dspace/feedback?fromPage=http%3A%2F%2Fcadair.aber.ac.uk%2Fdspace%2Fbrowse-title%3Fstarts_with%3DI |
72
|
|
|
|
|
|
|
# elsif( $page =~ /\/feedback\?/ ) |
73
|
|
|
|
|
|
|
# { |
74
|
|
|
|
|
|
|
# } |
75
|
|
|
|
|
|
|
# static /dspace/image/arrow.gif |
76
|
|
|
|
|
|
|
# elsif( $page =~ /\/image\/([^\/]+)$/ or $page =~ /styles.css.jsp|robots.txt|utils.js$/ ) |
77
|
|
|
|
|
|
|
# { |
78
|
|
|
|
|
|
|
# } |
79
|
|
|
|
|
|
|
# else |
80
|
|
|
|
|
|
|
# { |
81
|
|
|
|
|
|
|
# print STDERR "Unhandled hit: ".$hit->raw."\n"; |
82
|
|
|
|
|
|
|
# } |
83
|
|
|
|
|
|
|
} |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
sub _identifier |
86
|
|
|
|
|
|
|
{ |
87
|
0
|
|
|
0
|
|
|
my( $self, $repo, $item ) = @_; |
88
|
|
|
|
|
|
|
|
89
|
0
|
|
|
|
|
|
return $self->{identifier} . "$repo/$item"; |
90
|
|
|
|
|
|
|
} |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
1; |