11import sys
2- import ffilib
32import array
3+ import ffilib
4+ import uctypes
45
6+ pcre2 = ffilib .open ("libpcre2-8" )
57
6- pcre = ffilib .open ("libpcre" )
8+ # pcre2_code *pcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE length,
9+ # uint32_t options, int *errorcode, PCRE2_SIZE *erroroffset,
10+ # pcre2_compile_context *ccontext);
11+ pcre2_compile = pcre2 .func ("p" , "pcre2_compile_8" , "siippp" )
712
8- # pcre *pcre_compile (const char *pattern, int options ,
9- # const char **errptr, int *erroffset ,
10- # const unsigned char *tableptr );
11- pcre_compile = pcre .func ("p " , "pcre_compile " , "sipps " )
13+ # int pcre2_match (const pcre2_code *code, PCRE2_SPTR subject ,
14+ # PCRE2_SIZE length, PCRE2_SIZE startoffset, uint32_t options ,
15+ # pcre2_match_data *match_data, pcre2_match_context *mcontext );
16+ pcre2_match = pcre2 .func ("i " , "pcre2_match_8 " , "Psiiipp " )
1217
13- # int pcre_exec(const pcre *code, const pcre_extra *extra,
14- # const char *subject, int length, int startoffset,
15- # int options, int *ovector, int ovecsize);
16- pcre_exec = pcre .func ("i" , "pcre_exec" , "PPsiiipi" )
18+ # int pcre2_pattern_info(const pcre2_code *code, uint32_t what,
19+ # void *where);
20+ pcre2_pattern_info = pcre2 .func ("i" , "pcre2_pattern_info_8" , "Pip" )
1721
18- # int pcre_fullinfo(const pcre *code, const pcre_extra *extra,
19- # int what, void *where);
20- pcre_fullinfo = pcre .func ("i" , "pcre_fullinfo" , "PPip" )
22+ # PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *match_data);
23+ pcre2_get_ovector_pointer = pcre2 .func ("p" , "pcre2_get_ovector_pointer_8" , "p" )
2124
25+ # pcre2_match_data *pcre2_match_data_create_from_pattern(const pcre2_code *code,
26+ # pcre2_general_context *gcontext);
27+ pcre2_match_data_create_from_pattern = pcre2 .func (
28+ "p" , "pcre2_match_data_create_from_pattern_8" , "Pp"
29+ )
2230
23- IGNORECASE = I = 1
24- MULTILINE = M = 2
25- DOTALL = S = 4
31+ IGNORECASE = I = 0x8
32+ MULTILINE = M = 0x400
33+ DOTALL = S = 0x20
2634VERBOSE = X = 8
27- PCRE_ANCHORED = 0x10
35+ PCRE2_ANCHORED = 0x80000000
36+ 37+ # Real value in pcre2.h is 0xFFFFFFFF for 32bit and
38+ # 0x0xFFFFFFFFFFFFFFFF for 64bit that is equivalent
39+ # to -1
40+ PCRE2_ZERO_TERMINATED = - 1
2841
2942# TODO. Note that Python3 has unicode by default
3043ASCII = A = 0
3144UNICODE = U = 0
3245
33- PCRE_INFO_CAPTURECOUNT = 2
46+ PCRE2_INFO_CAPTURECOUNT = 4
3447
3548
3649class PCREMatch :
@@ -67,19 +80,26 @@ def __init__(self, compiled_ptn):
6780 def search (self , s , pos = 0 , endpos = - 1 , _flags = 0 ):
6881 assert endpos == - 1 , "pos: %d, endpos: %d" % (pos , endpos )
6982 buf = array .array ("i" , [0 ])
70- pcre_fullinfo (self .obj , None , PCRE_INFO_CAPTURECOUNT , buf )
83+ pcre2_pattern_info (self .obj , PCRE2_INFO_CAPTURECOUNT , buf )
7184 cap_count = buf [0 ]
72- ov = array . array ( "i" , [ 0 , 0 , 0 ] * ( cap_count + 1 ) )
73- num = pcre_exec (self .obj , None , s , len (s ), pos , _flags , ov , len ( ov ) )
85+ match_data = pcre2_match_data_create_from_pattern ( self . obj , None )
86+ num = pcre2_match (self .obj , s , len (s ), pos , _flags , match_data , None )
7487 if num == - 1 :
7588 # No match
7689 return None
90+ ov_ptr = pcre2_get_ovector_pointer (match_data )
91+ # pcre2_get_ovector_pointer return PCRE2_SIZE that is of type
92+ # size_t. Use ULONG as type to support both 32bit and 64bit.
93+ ov_buf = uctypes .bytearray_at (
94+ ov_ptr , uctypes .sizeof ({"field" : 0 | uctypes .ULONG }) * (cap_count + 1 ) * 2
95+ )
96+ ov = array .array ("L" , ov_buf )
7797 # We don't care how many matching subexpressions we got, we
7898 # care only about total # of capturing ones (including empty)
7999 return PCREMatch (s , cap_count + 1 , ov )
80100
81101 def match (self , s , pos = 0 , endpos = - 1 ):
82- return self .search (s , pos , endpos , PCRE_ANCHORED )
102+ return self .search (s , pos , endpos , PCRE2_ANCHORED )
83103
84104 def sub (self , repl , s , count = 0 ):
85105 if not callable (repl ):
@@ -141,9 +161,9 @@ def findall(self, s):
141161
142162
143163def compile (pattern , flags = 0 ):
144- errptr = bytes (4 )
164+ errcode = bytes (4 )
145165 erroffset = bytes (4 )
146- regex = pcre_compile (pattern , flags , errptr , erroffset , None )
166+ regex = pcre2_compile (pattern , PCRE2_ZERO_TERMINATED , flags , errcode , erroffset , None )
147167 assert regex
148168 return PCREPattern (regex )
149169
@@ -154,7 +174,7 @@ def search(pattern, string, flags=0):
154174
155175
156176def match (pattern , string , flags = 0 ):
157- r = compile (pattern , flags | PCRE_ANCHORED )
177+ r = compile (pattern , flags | PCRE2_ANCHORED )
158178 return r .search (string )
159179
160180
0 commit comments