Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 8dc5f9f

Browse files
maj_20250823-04:20
1 parent df4fe57 commit 8dc5f9f

File tree

4 files changed

+96
-13
lines changed

4 files changed

+96
-13
lines changed

‎README.md‎

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,5 +151,5 @@ buf = [_]u8{0} ** 3;
151151
* •upgrade 2025年06月26日.<BR />
152152
&nbsp;&nbsp;&nbsp;&larr;&nbsp;Simplification and control mvzr and pcre-posix<BR />
153153

154-
* •upgrade 2025-08-03.<BR />
155-
&nbsp;&nbsp;&nbsp;&larr;&nbsp;zig version 0.15.dev<BR />
154+
* •upgrade 2025-08-22.<BR />
155+
&nbsp;&nbsp;&nbsp;&larr;&nbsp;zig version 0.15.1<BR />

‎library/regex/mvzr.zig‎

Lines changed: 88 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,9 @@ fn matchPattern(patt: []const RegOp, sets: []const CharSet, haystack: []const u8
311311
dispatch: while (this_patt.len != 0) {
312312
if (i == haystack.len) {
313313
switch (this_patt[0]) {
314-
.word_break, .not_word_break => {},
314+
.word_break, .not_word_break => {
315+
if (haystack.len == 0) return null;
316+
},
315317
.optional,
316318
.star,
317319
.lazy_optional,
@@ -333,11 +335,18 @@ fn matchPattern(patt: []const RegOp, sets: []const CharSet, haystack: []const u8
333335
return null;
334336
}
335337
},
338+
.some => |how_many| {
339+
if (how_many == 0) {
340+
this_patt = nextPattern(this_patt);
341+
continue :dispatch;
342+
} else {
343+
return null;
344+
}
345+
},
336346
.begin,
337347
.plus,
338348
.lazy_plus,
339349
.eager_plus,
340-
.some,
341350
.dot,
342351
.class,
343352
.not_class,
@@ -465,7 +474,7 @@ fn matchStar(patt: []const RegOp, sets: []const CharSet, haystack: []const u8, i
465474
return OpMatch{ .i = i, .j = next_patt };
466475
} else {
467476
// We're not done, back off a bit
468-
i -= 1; // Haystack always has len > 0
477+
i -= 1; // Haystack always has len > 0 when this is reached.
469478
}
470479
}
471480

@@ -476,6 +485,9 @@ fn matchStar(patt: []const RegOp, sets: []const CharSet, haystack: []const u8, i
476485
// TODO this logic is wrong because /our/ pattern might not match at every point in the string.
477486
// fix that later (I have something in mind here: a mask storing an intersection of every real
478487
// match for us, with a potential match for the next guy)
488+
//
489+
// Theory: if we switch to matchLazyStar here, starting from the beginning, we always get what
490+
// we came for, but pay the minimum amount for it.
479491
i = if (i == i_in) i_in else (i - 1);
480492
while (true) {
481493
const try_next = matchPattern(next_patt, sets, haystack, i);
@@ -712,7 +724,7 @@ fn matchUpToInner(
712724
fn matchEagerUpTo(patt: []const RegOp, sets: []const CharSet, haystack: []const u8, i_in: usize) OpMatch {
713725
const this_patt = thisPattern(patt[1..]);
714726
const first_match = matchPattern(this_patt, sets, haystack, i_in);
715-
if (first_match == null) return OpMatch{ .i = 0, .j = nextPattern(patt) };
727+
if (first_match == null) return OpMatch{ .i = i_in, .j = nextPattern(patt) };
716728
// Keep it up
717729
var latest_match: OpMatch = first_match.?;
718730
var count = patt[0].eager_up_to - 1;
@@ -748,6 +760,9 @@ fn matchGroup(patt: []const RegOp, sets: []const CharSet, haystack: []const u8,
748760
} // There's at least one alt.
749761
// Is there another pattern to check?
750762
if (next_patt.len == 0) {
763+
// NOTE: This is where we might implement 'leftmost longest'. Current
764+
// behavior short-circuits on the first match, but we can keep trying
765+
// and keep the longest of any matches.
751766
const our_match = matchAlt(inner_patt, sets, haystack, i);
752767
if (our_match) |m| {
753768
// Strip the remaining matches, may as well use empty next_patt
@@ -1449,9 +1464,9 @@ fn compileRegex(RegexT: type, in: []const u8) ?RegexT {
14491464
if (in[i] == '}') { // {,N}
14501465
j += prefixModifier(patt, j, RegOp{ .up_to = c1 }) catch
14511466
{
1452-
bad_string = true;
1453-
break :dispatch;
1454-
};
1467+
bad_string = true;
1468+
break :dispatch;
1469+
};
14551470
continue :dispatch;
14561471
} else {
14571472
bad_string = true;
@@ -2034,6 +2049,7 @@ fn printPatternInternal(patt: []const RegOp) ?u8 {
20342049
},
20352050
.some,
20362051
.up_to,
2052+
.eager_up_to,
20372053
=> |op| {
20382054
std.debug.print("{s} {d}", .{ @tagName(patt[j]), op });
20392055
},
@@ -2425,3 +2441,68 @@ test "Uppercase Greek" {
24252441
test "M of N multibyte" {
24262442
try testMatchEnd("abλ{3,5}", "abλλλλ");
24272443
}
2444+
2445+
test "zero length match on zero length haystack" {
2446+
const regex = Regex.compile(".*");
2447+
const the_match = regex.?.match("");
2448+
try expect(the_match != null);
2449+
try expectEqual(0, the_match.?.start);
2450+
try expectEqual(0, the_match.?.end);
2451+
}
2452+
2453+
test "zero length optional match on zero length haystack" {
2454+
const regex = Regex.compile(".?");
2455+
const the_match = regex.?.match("");
2456+
try expect(the_match != null);
2457+
try expectEqual(0, the_match.?.start);
2458+
try expectEqual(0, the_match.?.end);
2459+
}
2460+
2461+
test "zero length bookended optional match on zero length haystack" {
2462+
const regex = Regex.compile("^.?$");
2463+
const the_match = regex.?.match("");
2464+
try expect(the_match != null);
2465+
try expectEqual(0, the_match.?.start);
2466+
try expectEqual(0, the_match.?.end);
2467+
}
2468+
2469+
test "mandatory a fails on zero length haystack" {
2470+
const regex = Regex.compile("a").?;
2471+
try expectEqual(null, regex.match(""));
2472+
}
2473+
2474+
test "some == 0 is an optional for termination" {
2475+
try testMatchAll("^[A-Za-z][0-9A-Za-z]{0,19}$", "x");
2476+
}
2477+
2478+
test "mvzr compile in releasesafe mode" {
2479+
const hi = compile("test");
2480+
try std.testing.expect(hi != null);
2481+
try std.testing.expect(hi.?.isMatch("test"));
2482+
}
2483+
2484+
test "mvzr is not longest-leftmost" {
2485+
const fubar = compile("(foo|foobar)").?;
2486+
const the_match = fubar.match("foobar");
2487+
if (the_match) |is_foo| {
2488+
try expectEqualStrings("foo", is_foo.slice);
2489+
}
2490+
}
2491+
2492+
test "do not make this test any longer" {
2493+
// Proof of exponential growth due to ?
2494+
try testMatchAll("a?a?a?a?a?a?a?aaaaaaa", "aaaaaaa");
2495+
}
2496+
2497+
test "rewrites coin address at the end" {
2498+
const bogus_coin = compile("7[a-zA-Z0-9]{25,34}+").?;
2499+
const bogo_string = "Send the boguscoins to 7YWHMfk9JZe123123123123123\n";
2500+
try expect(bogus_coin.isMatch(bogo_string));
2501+
const a_match = bogus_coin.match(bogo_string).?;
2502+
_ = a_match;
2503+
}
2504+
2505+
test "word boundary with zero length haystack" {
2506+
// Courtesy apvanzanten: https://github.com/mnemnion/mvzr/pull/8
2507+
try testFail("\\b", "");
2508+
}

‎src-zig/build.zig.zon‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
.{
22
.name = .as400jplpc,
33
.version = "0.0.0",
4-
.minimum_zig_version = "0.14.0",
4+
.minimum_zig_version = "0.15.1",
55

66
.dependencies = .{
77
.library = .{

‎src-zig/buildtestmvzr.zig‎

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,11 @@ pub fn build(b: *std.Build) void {
2222
// Building the executable
2323
const Prog = b.addExecutable(.{
2424
.name = "testmvzr",
25-
.root_source_file = b.path("./testmvzr.zig"),
26-
.target = target,
27-
.optimize = optimize,
25+
.root_module = b.createModule(.{
26+
.root_source_file = b.path("./testmvzr.zig"),
27+
.target = target,
28+
.optimize = optimize,
29+
})
2830
});
2931

3032
// Resolve the 'library' dependency.

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /