@@ -311,7 +311,9 @@ fn matchPattern(patt: []const RegOp, sets: []const CharSet, haystack: []const u8
311311 dispatch : while (this_patt .len != 0 ) {
312312 if (i == haystack .len ) {
313313 switch (this_patt [0 ]) {
314- .word_break , .not_word_break = > {},
314+ .word_break , .not_word_break = > {
315+ if (haystack .len == 0 ) return null ;
316+ },
315317 .optional ,
316318 .star ,
317319 .lazy_optional ,
@@ -333,11 +335,18 @@ fn matchPattern(patt: []const RegOp, sets: []const CharSet, haystack: []const u8
333335 return null ;
334336 }
335337 },
338+ .some = > | how_many | {
339+ if (how_many == 0 ) {
340+ this_patt = nextPattern (this_patt );
341+ continue :dispatch ;
342+ } else {
343+ return null ;
344+ }
345+ },
336346 .begin ,
337347 .plus ,
338348 .lazy_plus ,
339349 .eager_plus ,
340- .some ,
341350 .dot ,
342351 .class ,
343352 .not_class ,
@@ -465,7 +474,7 @@ fn matchStar(patt: []const RegOp, sets: []const CharSet, haystack: []const u8, i
465474 return OpMatch { .i = i , .j = next_patt };
466475 } else {
467476 // We're not done, back off a bit
468- i -= 1 ; // Haystack always has len > 0
477+ i -= 1 ; // Haystack always has len > 0 when this is reached.
469478 }
470479 }
471480
@@ -476,6 +485,9 @@ fn matchStar(patt: []const RegOp, sets: []const CharSet, haystack: []const u8, i
476485 // TODO this logic is wrong because /our/ pattern might not match at every point in the string.
477486 // fix that later (I have something in mind here: a mask storing an intersection of every real
478487 // match for us, with a potential match for the next guy)
488+ //
489+ // Theory: if we switch to matchLazyStar here, starting from the beginning, we always get what
490+ // we came for, but pay the minimum amount for it.
479491 i = if (i == i_in ) i_in else (i - 1 );
480492 while (true ) {
481493 const try_next = matchPattern (next_patt , sets , haystack , i );
@@ -712,7 +724,7 @@ fn matchUpToInner(
712724fn matchEagerUpTo (patt : []const RegOp , sets : []const CharSet , haystack : []const u8 , i_in : usize ) OpMatch {
713725 const this_patt = thisPattern (patt [1.. ]);
714726 const first_match = matchPattern (this_patt , sets , haystack , i_in );
715- if (first_match == null ) return OpMatch { .i = 0 , .j = nextPattern (patt ) };
727+ if (first_match == null ) return OpMatch { .i = i_in , .j = nextPattern (patt ) };
716728 // Keep it up
717729 var latest_match : OpMatch = first_match .? ;
718730 var count = patt [0 ].eager_up_to - 1 ;
@@ -748,6 +760,9 @@ fn matchGroup(patt: []const RegOp, sets: []const CharSet, haystack: []const u8,
748760 } // There's at least one alt.
749761 // Is there another pattern to check?
750762 if (next_patt .len == 0 ) {
763+ // NOTE: This is where we might implement 'leftmost longest'. Current
764+ // behavior short-circuits on the first match, but we can keep trying
765+ // and keep the longest of any matches.
751766 const our_match = matchAlt (inner_patt , sets , haystack , i );
752767 if (our_match ) | m | {
753768 // Strip the remaining matches, may as well use empty next_patt
@@ -1449,9 +1464,9 @@ fn compileRegex(RegexT: type, in: []const u8) ?RegexT {
14491464 if (in [i ] == '}' ) { // {,N}
14501465 j += prefixModifier (patt , j , RegOp { .up_to = c1 }) catch
14511466 {
1452- bad_string = true ;
1453- break :dispatch ;
1454- };
1467+ bad_string = true ;
1468+ break :dispatch ;
1469+ };
14551470 continue :dispatch ;
14561471 } else {
14571472 bad_string = true ;
@@ -2034,6 +2049,7 @@ fn printPatternInternal(patt: []const RegOp) ?u8 {
20342049 },
20352050 .some ,
20362051 .up_to ,
2052+ .eager_up_to ,
20372053 = > | op | {
20382054 std .debug .print ("{s} {d}" , .{ @tagName (patt [j ]), op });
20392055 },
@@ -2425,3 +2441,68 @@ test "Uppercase Greek" {
24252441test "M of N multibyte" {
24262442 try testMatchEnd ("abλ{3,5}" , "abλλλλ" );
24272443}
2444+ 2445+ test "zero length match on zero length haystack" {
2446+ const regex = Regex .compile (".*" );
2447+ const the_match = regex .? .match ("" );
2448+ try expect (the_match != null );
2449+ try expectEqual (0 , the_match .? .start );
2450+ try expectEqual (0 , the_match .? .end );
2451+ }
2452+ 2453+ test "zero length optional match on zero length haystack" {
2454+ const regex = Regex .compile (".?" );
2455+ const the_match = regex .? .match ("" );
2456+ try expect (the_match != null );
2457+ try expectEqual (0 , the_match .? .start );
2458+ try expectEqual (0 , the_match .? .end );
2459+ }
2460+ 2461+ test "zero length bookended optional match on zero length haystack" {
2462+ const regex = Regex .compile ("^.?$" );
2463+ const the_match = regex .? .match ("" );
2464+ try expect (the_match != null );
2465+ try expectEqual (0 , the_match .? .start );
2466+ try expectEqual (0 , the_match .? .end );
2467+ }
2468+ 2469+ test "mandatory a fails on zero length haystack" {
2470+ const regex = Regex .compile ("a" ).? ;
2471+ try expectEqual (null , regex .match ("" ));
2472+ }
2473+ 2474+ test "some == 0 is an optional for termination" {
2475+ try testMatchAll ("^[A-Za-z][0-9A-Za-z]{0,19}$" , "x" );
2476+ }
2477+ 2478+ test "mvzr compile in releasesafe mode" {
2479+ const hi = compile ("test" );
2480+ try std .testing .expect (hi != null );
2481+ try std .testing .expect (hi .? .isMatch ("test" ));
2482+ }
2483+ 2484+ test "mvzr is not longest-leftmost" {
2485+ const fubar = compile ("(foo|foobar)" ).? ;
2486+ const the_match = fubar .match ("foobar" );
2487+ if (the_match ) | is_foo | {
2488+ try expectEqualStrings ("foo" , is_foo .slice );
2489+ }
2490+ }
2491+ 2492+ test "do not make this test any longer" {
2493+ // Proof of exponential growth due to ?
2494+ try testMatchAll ("a?a?a?a?a?a?a?aaaaaaa" , "aaaaaaa" );
2495+ }
2496+ 2497+ test "rewrites coin address at the end" {
2498+ const bogus_coin = compile ("7[a-zA-Z0-9]{25,34}+" ).? ;
2499+ const bogo_string = "Send the boguscoins to 7YWHMfk9JZe123123123123123\n " ;
2500+ try expect (bogus_coin .isMatch (bogo_string ));
2501+ const a_match = bogus_coin .match (bogo_string ).? ;
2502+ _ = a_match ;
2503+ }
2504+ 2505+ test "word boundary with zero length haystack" {
2506+ // Courtesy apvanzanten: https://github.com/mnemnion/mvzr/pull/8
2507+ try testFail ("\\ b" , "" );
2508+ }
0 commit comments