11import sys
22import ffilib
33import array
4+ import uctypes
45
6+ pcre2 = ffilib .open ("libpcre2-8" )
57
6- pcre = ffilib .open ("libpcre" )
8+ # pcre2_code *pcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE length,
9+ # uint32_t options, int *errorcode, PCRE2_SIZE *erroroffset,
10+ # pcre2_compile_context *ccontext);
11+ pcre2_compile = pcre2 .func ("p" , "pcre2_compile_8" , "siippp" )
712
8- # pcre *pcre_compile (const char *pattern, int options ,
9- # const char **errptr, int *erroffset ,
10- # const unsigned char *tableptr );
11- pcre_compile = pcre .func ("p " , "pcre_compile " , "sipps " )
13+ # int pcre2_match (const pcre2_code *code, PCRE2_SPTR subject ,
14+ # PCRE2_SIZE length, PCRE2_SIZE startoffset, uint32_t options ,
15+ # pcre2_match_data *match_data, pcre2_match_context *mcontext );
16+ pcre2_match = pcre2 .func ("i " , "pcre2_match_8 " , "Psiiipp " )
1217
13- # int pcre_exec(const pcre *code, const pcre_extra *extra,
14- # const char *subject, int length, int startoffset,
15- # int options, int *ovector, int ovecsize);
16- pcre_exec = pcre .func ("i" , "pcre_exec" , "PPsiiipi" )
18+ # int pcre2_pattern_info(const pcre2_code *code, uint32_t what,
19+ # void *where);
20+ pcre2_pattern_info = pcre2 .func ("i" , "pcre2_pattern_info_8" , "Pip" )
1721
18- # int pcre_fullinfo(const pcre *code, const pcre_extra *extra,
19- # int what, void *where);
20- pcre_fullinfo = pcre .func ("i" , "pcre_fullinfo" , "PPip" )
22+ # PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *match_data);
23+ pcre2_get_ovector_pointer = pcre2 .func ("p" , "pcre2_get_ovector_pointer_8" , "p" )
2124
25+ # pcre2_match_data *pcre2_match_data_create_from_pattern(const pcre2_code *code,
26+ # pcre2_general_context *gcontext);
27+ pcre2_match_data_create_from_pattern = pcre2 .func (
28+ "p" , "pcre2_match_data_create_from_pattern_8" , "Pp"
29+ )
2230
23- IGNORECASE = I = 1
24- MULTILINE = M = 2
25- DOTALL = S = 4
26- VERBOSE = X = 8
27- PCRE_ANCHORED = 0x10
31+ # PCRE2_SIZE that is of type size_t.
32+ # Use ULONG as type to support both 32bit and 64bit.
33+ PCRE2_SIZE_SIZE = uctypes .sizeof ({"field" : 0 | uctypes .ULONG })
34+ PCRE2_SIZE_TYPE = "L"
35+ 36+ # Real value in pcre2.h is 0xFFFFFFFF for 32bit and
37+ # 0x0xFFFFFFFFFFFFFFFF for 64bit that is equivalent
38+ # to -1
39+ PCRE2_ZERO_TERMINATED = - 1
40+ 41+ 42+ IGNORECASE = I = 0x8
43+ MULTILINE = M = 0x400
44+ DOTALL = S = 0x20
45+ VERBOSE = X = 0x80
46+ PCRE2_ANCHORED = 0x80000000
2847
2948# TODO. Note that Python3 has unicode by default
3049ASCII = A = 0
3150UNICODE = U = 0
3251
33- PCRE_INFO_CAPTURECOUNT = 2
52+ PCRE2_INFO_CAPTURECOUNT = 0x4
3453
3554
3655class PCREMatch :
@@ -67,19 +86,23 @@ def __init__(self, compiled_ptn):
6786 def search (self , s , pos = 0 , endpos = - 1 , _flags = 0 ):
6887 assert endpos == - 1 , "pos: %d, endpos: %d" % (pos , endpos )
6988 buf = array .array ("i" , [0 ])
70- pcre_fullinfo (self .obj , None , PCRE_INFO_CAPTURECOUNT , buf )
89+ pcre2_pattern_info (self .obj , PCRE2_INFO_CAPTURECOUNT , buf )
7190 cap_count = buf [0 ]
72- ov = array . array ( "i" , [ 0 , 0 , 0 ] * ( cap_count + 1 ) )
73- num = pcre_exec (self .obj , None , s , len (s ), pos , _flags , ov , len ( ov ) )
91+ match_data = pcre2_match_data_create_from_pattern ( self . obj , None )
92+ num = pcre2_match (self .obj , s , len (s ), pos , _flags , match_data , None )
7493 if num == - 1 :
7594 # No match
7695 return None
96+ ov_ptr = pcre2_get_ovector_pointer (match_data )
97+ # pcre2_get_ovector_pointer return PCRE2_SIZE
98+ ov_buf = uctypes .bytearray_at (ov_ptr , PCRE2_SIZE_SIZE * (cap_count + 1 ) * 2 )
99+ ov = array .array (PCRE2_SIZE_TYPE , ov_buf )
77100 # We don't care how many matching subexpressions we got, we
78101 # care only about total # of capturing ones (including empty)
79102 return PCREMatch (s , cap_count + 1 , ov )
80103
81104 def match (self , s , pos = 0 , endpos = - 1 ):
82- return self .search (s , pos , endpos , PCRE_ANCHORED )
105+ return self .search (s , pos , endpos , PCRE2_ANCHORED )
83106
84107 def sub (self , repl , s , count = 0 ):
85108 if not callable (repl ):
@@ -141,9 +164,9 @@ def findall(self, s):
141164
142165
143166def compile (pattern , flags = 0 ):
144- errptr = bytes (4 )
167+ errcode = bytes (4 )
145168 erroffset = bytes (4 )
146- regex = pcre_compile (pattern , flags , errptr , erroffset , None )
169+ regex = pcre2_compile (pattern , PCRE2_ZERO_TERMINATED , flags , errcode , erroffset , None )
147170 assert regex
148171 return PCREPattern (regex )
149172
@@ -154,7 +177,7 @@ def search(pattern, string, flags=0):
154177
155178
156179def match (pattern , string , flags = 0 ):
157- r = compile (pattern , flags | PCRE_ANCHORED )
180+ r = compile (pattern , flags | PCRE2_ANCHORED )
158181 return r .search (string )
159182
160183
0 commit comments