|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
  
 
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 typedef struct perl_tokenizer {  | 
| 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     sqlite3_tokenizer base;  | 
| 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     SV *coderef;                 /* the perl tokenizer is a coderef that takes  | 
| 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                     a string and returns a cursor coderef */  | 
| 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 } perl_tokenizer;  | 
| 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 typedef struct perl_tokenizer_cursor {  | 
| 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     sqlite3_tokenizer_cursor base;  | 
| 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     SV *coderef;                 /* ref to the closure that returns terms */  | 
| 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     char *pToken;                /* storage for a copy of the last token */  | 
| 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     int nTokenAllocated;         /* space allocated to pToken buffer */  | 
| 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     /* members below are only used if the input string is in utf8 */  | 
| 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     const char *pInput;          /* input we are tokenizing */  | 
| 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     const char *lastByteOffset;  /* offset into pInput */  | 
| 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     int lastCharOffset;          /* char offset corresponding to lastByteOffset */  | 
| 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 } perl_tokenizer_cursor;  | 
| 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /*  | 
| 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ** Create a new tokenizer instance.  | 
| 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ** Will be called whenever a FTS3 table is created with  | 
| 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 **   CREATE .. USING fts3( ... , tokenize=perl qualified::function::name)  | 
| 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ** where qualified::function::name is a fully qualified perl function  | 
| 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 */  | 
| 
25
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static int perl_tokenizer_Create(  | 
| 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     int argc, const char * const *argv,  | 
| 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     sqlite3_tokenizer **ppTokenizer  | 
| 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ){  | 
| 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     dTHX;  | 
| 
30
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     dSP;  | 
| 
31
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     int n_retval;  | 
| 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     SV *retval;  | 
| 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     perl_tokenizer *t;  | 
| 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
35
 | 
5
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     if (!argc) {  | 
| 
36
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         return SQLITE_ERROR;  | 
| 
37
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
39
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     t = (perl_tokenizer *) sqlite3_malloc(sizeof(*t));  | 
| 
40
 | 
4
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     if( t==NULL ) return SQLITE_NOMEM;  | 
| 
41
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     memset(t, 0, sizeof(*t));  | 
| 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
43
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ENTER;  | 
| 
44
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     SAVETMPS;  | 
| 
45
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     /* call the qualified::function::name */  | 
| 
47
 | 
4
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     PUSHMARK(SP);  | 
| 
48
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     PUTBACK;  | 
| 
49
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     n_retval = call_pv(argv[0], G_SCALAR);  | 
| 
50
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     SPAGAIN;  | 
| 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
52
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     /* store a copy of the returned coderef into the tokenizer structure */  | 
| 
53
 | 
4
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     if (n_retval != 1) {  | 
| 
54
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         warn("tokenizer_Create returned %d arguments", n_retval);  | 
| 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
56
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     retval = POPs;  | 
| 
57
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     t->coderef   = newSVsv(retval);  | 
| 
58
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     *ppTokenizer = &t->base;  | 
| 
59
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
60
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     PUTBACK;  | 
| 
61
 | 
4
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     FREETMPS;  | 
| 
62
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     LEAVE;  | 
| 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
64
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return SQLITE_OK;  | 
| 
65
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
66
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
67
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /*  | 
| 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ** Destroy a tokenizer  | 
| 
69
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 */  | 
| 
70
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static int perl_tokenizer_Destroy(sqlite3_tokenizer *pTokenizer){  | 
| 
71
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     dTHX;  | 
| 
72
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     perl_tokenizer *t = (perl_tokenizer *) pTokenizer;  | 
| 
73
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     sv_free(t->coderef);  | 
| 
74
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     sqlite3_free(t);  | 
| 
75
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return SQLITE_OK;  | 
| 
76
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
77
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
78
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /*  | 
| 
79
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ** Prepare to begin tokenizing a particular string.  The input  | 
| 
80
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ** string to be tokenized is supposed to be pInput[0..nBytes-1] ..  | 
| 
81
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ** except that nBytes passed by fts3 is -1 (don't know why) !  | 
| 
82
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ** This is passed to the tokenizer instance, which then returns a  | 
| 
83
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ** closure implementing the cursor (so the cursor is again a coderef).  | 
| 
84
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 */  | 
| 
85
 | 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static int perl_tokenizer_Open(  | 
| 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     sqlite3_tokenizer *pTokenizer,       /* Tokenizer object */  | 
| 
87
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     const char *pInput, int nBytes,      /* Input buffer */  | 
| 
88
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     sqlite3_tokenizer_cursor **ppCursor  /* OUT: Created tokenizer cursor */  | 
| 
89
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ){  | 
| 
90
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     dTHX;  | 
| 
91
 | 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     dSP;  | 
| 
92
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     dMY_CXT;  | 
| 
93
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     U32 flags;  | 
| 
94
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     SV *perl_string;  | 
| 
95
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     int n_retval;  | 
| 
96
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
97
 | 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     perl_tokenizer *t = (perl_tokenizer *)pTokenizer;  | 
| 
98
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
99
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     /* allocate and initialize the cursor struct */  | 
| 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     perl_tokenizer_cursor *c;  | 
| 
101
 | 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     c = (perl_tokenizer_cursor *) sqlite3_malloc(sizeof(*c));  | 
| 
102
 | 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     memset(c, 0, sizeof(*c));  | 
| 
103
 | 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     *ppCursor = &c->base;  | 
| 
104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     /* flags for creating the Perl SV containing the input string */  | 
| 
106
 | 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     flags = SVs_TEMP; /* will call sv_2mortal */  | 
| 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     /* special handling if working with utf8 strings */  | 
| 
109
 | 
68
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     if (MY_CXT.last_dbh_is_unicode) {  | 
| 
110
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         /* data to keep track of byte offsets */  | 
| 
112
 | 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         c->lastByteOffset = c->pInput = pInput;  | 
| 
113
 | 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         c->lastCharOffset = 0;  | 
| 
114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         /* string passed to Perl needs to be flagged as utf8 */  | 
| 
116
 | 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         flags |= SVf_UTF8;  | 
| 
117
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
118
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
119
 | 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ENTER;  | 
| 
120
 | 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     SAVETMPS;  | 
| 
121
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     /* build a Perl copy of the input string */  | 
| 
123
 | 
68
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     if (nBytes < 0) { /* we get -1 from fts3. Don't know why ! */  | 
| 
124
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         nBytes = strlen(pInput);  | 
| 
125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
126
 | 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     perl_string = newSVpvn_flags(pInput, nBytes, flags);  | 
| 
127
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     /* call the tokenizer coderef */  | 
| 
129
 | 
68
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     PUSHMARK(SP);  | 
| 
130
 | 
68
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     XPUSHs(perl_string);  | 
| 
131
 | 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     PUTBACK;  | 
| 
132
 | 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     n_retval = call_sv(t->coderef, G_SCALAR);  | 
| 
133
 | 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     SPAGAIN;  | 
| 
134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
135
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     /* store the cursor coderef returned by the tokenizer */  | 
| 
136
 | 
68
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     if (n_retval != 1) {  | 
| 
137
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         warn("tokenizer returned %d arguments", n_retval);  | 
| 
138
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
139
 | 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     c->coderef = newSVsv(POPs);  | 
| 
140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
141
 | 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     PUTBACK;  | 
| 
142
 | 
68
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     FREETMPS;  | 
| 
143
 | 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     LEAVE;  | 
| 
144
 | 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return SQLITE_OK;  | 
| 
145
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
147
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /*  | 
| 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ** Close a tokenization cursor previously opened by a call to  | 
| 
149
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ** perl_tokenizer_Open() above.  | 
| 
150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 */  | 
| 
151
 | 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static int perl_tokenizer_Close(sqlite3_tokenizer_cursor *pCursor){  | 
| 
152
 | 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     perl_tokenizer_cursor *c = (perl_tokenizer_cursor *) pCursor;  | 
| 
153
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
154
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     dTHX;  | 
| 
155
 | 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     sv_free(c->coderef);  | 
| 
156
 | 
68
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     if (c->pToken) sqlite3_free(c->pToken);  | 
| 
157
 | 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     sqlite3_free(c);  | 
| 
158
 | 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return SQLITE_OK;  | 
| 
159
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
160
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
161
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /*  | 
| 
162
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ** Extract the next token from a tokenization cursor.  The cursor must  | 
| 
163
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ** have been opened by a prior call to perl_tokenizer_Open().  | 
| 
164
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 */  | 
| 
165
 | 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static int perl_tokenizer_Next(  | 
| 
166
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     sqlite3_tokenizer_cursor *pCursor,  /* Cursor returned by perl_tokenizer_Open */  | 
| 
167
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     const char **ppToken,               /* OUT: *ppToken is the token text */  | 
| 
168
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     int *pnBytes,                       /* OUT: Number of bytes in token */  | 
| 
169
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     int *piStartOffset,                 /* OUT: Starting offset of token */  | 
| 
170
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     int *piEndOffset,                   /* OUT: Ending offset of token */  | 
| 
171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     int *piPosition                     /* OUT: Position integer of token */  | 
| 
172
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ){  | 
| 
173
 | 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     perl_tokenizer_cursor *c = (perl_tokenizer_cursor *) pCursor;  | 
| 
174
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     int result;  | 
| 
175
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     int n_retval;  | 
| 
176
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     char *token;  | 
| 
177
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     char *byteOffset;  | 
| 
178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     STRLEN n_a; /* this is required for older perls < 5.8.8 */  | 
| 
179
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     I32 hop;  | 
| 
180
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
181
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     dTHX;  | 
| 
182
 | 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     dSP;  | 
| 
183
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
184
 | 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ENTER;  | 
| 
185
 | 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     SAVETMPS;  | 
| 
186
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
187
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     /* call the cursor */  | 
| 
188
 | 
148
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     PUSHMARK(SP);  | 
| 
189
 | 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     PUTBACK;  | 
| 
190
 | 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     n_retval = call_sv(c->coderef, G_ARRAY);  | 
| 
191
 | 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     SPAGAIN;  | 
| 
192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
193
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     /* if we get back an empty list, there is no more token */  | 
| 
194
 | 
148
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     if (n_retval == 0) {  | 
| 
195
 | 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         result = SQLITE_DONE;  | 
| 
196
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
197
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     /* otherwise, get token details from the return list */  | 
| 
198
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else {  | 
| 
199
 | 
128
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         if (n_retval != 5) {  | 
| 
200
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             warn("tokenizer cursor returned %d arguments", n_retval);  | 
| 
201
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
202
 | 
128
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         *piPosition    = POPi;  | 
| 
203
 | 
128
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         *piEndOffset   = POPi;  | 
| 
204
 | 
128
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         *piStartOffset = POPi;  | 
| 
205
 | 
128
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         *pnBytes       = POPi;  | 
| 
206
 | 
128
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         token          = POPpx;  | 
| 
207
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
208
 | 
128
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         if (c->pInput) { /* if working with utf8 data */  | 
| 
209
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
210
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             /* recompute *pnBytes in bytes, not in chars */  | 
| 
211
 | 
64
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             *pnBytes = strlen(token);  | 
| 
212
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             /* recompute start/end offsets in bytes, not in chars */  | 
| 
214
 | 
64
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             hop            = *piStartOffset - c->lastCharOffset;  | 
| 
215
 | 
64
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             byteOffset     = (char*)utf8_hop((U8*)c->lastByteOffset, hop);  | 
| 
216
 | 
64
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             hop            = *piEndOffset - *piStartOffset;  | 
| 
217
 | 
64
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             *piStartOffset = byteOffset - c->pInput;  | 
| 
218
 | 
64
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             byteOffset     = (char*)utf8_hop((U8*)byteOffset, hop);  | 
| 
219
 | 
64
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             *piEndOffset   = byteOffset - c->pInput;  | 
| 
220
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
221
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             /* remember where we are for next round */  | 
| 
222
 | 
64
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             c->lastCharOffset = *piEndOffset,  | 
| 
223
 | 
64
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             c->lastByteOffset = byteOffset;  | 
| 
224
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
225
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
226
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         /* make sure we have enough storage for copying the token */  | 
| 
227
 | 
128
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         if (*pnBytes > c->nTokenAllocated ){  | 
| 
228
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             char *pNew;  | 
| 
229
 | 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             c->nTokenAllocated = *pnBytes + 20;  | 
| 
230
 | 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             pNew = sqlite3_realloc(c->pToken, c->nTokenAllocated);  | 
| 
231
 | 
68
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             if( !pNew ) return SQLITE_NOMEM;  | 
| 
232
 | 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             c->pToken = pNew;  | 
| 
233
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
234
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
235
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         /* need to copy the token into the C cursor before perl frees that  | 
| 
236
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            memory */  | 
| 
237
 | 
128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         memcpy(c->pToken, token, *pnBytes);  | 
| 
238
 | 
128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         *ppToken  = c->pToken;  | 
| 
239
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
240
 | 
128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         result = SQLITE_OK;  | 
| 
241
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
242
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
243
 | 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     PUTBACK;  | 
| 
244
 | 
148
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     FREETMPS;  | 
| 
245
 | 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     LEAVE;  | 
| 
246
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
247
 | 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return result;  | 
| 
248
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
249
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
250
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /*  | 
| 
251
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ** The set of routines that implement the perl tokenizer  | 
| 
252
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 */  | 
| 
253
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sqlite3_tokenizer_module perl_tokenizer_Module = {  | 
| 
254
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     0,  | 
| 
255
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     perl_tokenizer_Create,  | 
| 
256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     perl_tokenizer_Destroy,  | 
| 
257
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     perl_tokenizer_Open,  | 
| 
258
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     perl_tokenizer_Close,  | 
| 
259
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     perl_tokenizer_Next  | 
| 
260
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
261
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
262
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /*  | 
| 
263
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ** Register the perl tokenizer with FTS3  | 
| 
264
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 */  | 
| 
265
 | 
248
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 int sqlite_db_register_fts3_perl_tokenizer(pTHX_ SV *dbh)  | 
| 
266
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
267
 | 
248
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     D_imp_dbh(dbh);  | 
| 
268
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
269
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     int rc;  | 
| 
270
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     sqlite3_stmt *pStmt;  | 
| 
271
 | 
248
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     const char zSql[] = "SELECT fts3_tokenizer(?, ?)";  | 
| 
272
 | 
248
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     sqlite3_tokenizer_module *p = &perl_tokenizer_Module;  | 
| 
273
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
274
 | 
248
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     if (!DBIc_ACTIVE(imp_dbh)) {  | 
| 
275
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         sqlite_error(dbh, -2, "attempt to register fts3 tokenizer on inactive database handle");  | 
| 
276
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         return FALSE;  | 
| 
277
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
278
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
279
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #if SQLITE_VERSION_NUMBER >= 3012000  | 
| 
280
 | 
248
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     rc = sqlite3_db_config(imp_dbh->db, SQLITE_DBCONFIG_ENABLE_FTS3_TOKENIZER, 1, 0);  | 
| 
281
 | 
248
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     if( rc!=SQLITE_OK ){  | 
| 
282
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         return rc;  | 
| 
283
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
284
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif  | 
| 
285
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
286
 | 
248
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     rc = sqlite3_prepare_v2(imp_dbh->db, zSql, -1, &pStmt, 0);  | 
| 
287
 | 
248
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     if( rc!=SQLITE_OK ){  | 
| 
288
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         return rc;  | 
| 
289
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
290
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
291
 | 
248
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     sqlite3_bind_text(pStmt, 1, "perl", -1, SQLITE_STATIC);  | 
| 
292
 | 
248
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC);  | 
| 
293
 | 
248
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     sqlite3_step(pStmt);  | 
| 
294
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
295
 | 
248
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return sqlite3_finalize(pStmt);  | 
| 
296
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  |