I'm building my own scheduler and I took the first step by creating a cron expression parser.
To test it I used this expression to cover each case. (I guess the 52W is wrong but this doesn't matter yet - validation comes later).
"14,18,3-39/3,52 0/5 14,18,3-39,52W ? JAN,MAR,SEP MON-WED,FRI#3 2002-2010"
Core
It starts with the tokenizer that has only one method with a loop that builds each token. It requires special handling of extensions like W
, L
or #
.
class CronExpressionTokenizer
{
private static readonly Dictionary<char, TokenType> TokenTypes = new Dictionary<char, TokenType>
{
[' '] = TokenType.FieldSeparator,
[','] = TokenType.ListItemSeparator,
['-'] = TokenType.RangeSeparator,
['/'] = TokenType.StepSeparator,
['*'] = TokenType.Blank,
['?'] = TokenType.Blank,
['L'] = TokenType.Extension,
['W'] = TokenType.Extension,
['#'] = TokenType.Extension,
};
public static IEnumerable<Token> Tokenize(string text)
{
var position = (int?)null;
var value = new StringBuilder();
var lastTokenType = TokenType.None;
var updateLastTokenType = new Func<Token, Token>(t => { lastTokenType = t.Type; return t; });
for (var i = 0; i < text.Length; i++)
{
var c = text[i];
var tokenType = TokenType.None;
if (TokenTypes.TryGetValue(c, out tokenType))
{
// Special extension handling.
var isNotExtension =
// Parsed as extension...
tokenType == TokenType.Extension &&
// but a "W" not after a value or field-separator.
(c == 'W' && (lastTokenType != TokenType.Value || lastTokenType == TokenType.FieldSeparator));
if (isNotExtension)
{
lastTokenType = TokenType.Value;
position = position ?? i;
value.Append(c);
continue;
}
if (value.Any())
{
yield return updateLastTokenType(new Token
{
Type = TokenType.Value,
Position = position.Value,
Value = value.ToString()
});
lastTokenType = TokenType.Value;
position = i + 1;
value = new StringBuilder();
}
yield return updateLastTokenType(new Token
{
Type = tokenType,
Position = i,
Value = c.ToString()
});
continue;
}
else
{
lastTokenType = TokenType.Value;
position = position ?? i;
value.Append(c);
}
}
if (value.Any())
{
yield return new Token
{
Type = TokenType.Value,
Position = position.Value,
Value = value.ToString()
};
}
}
}
The result for the test expression is:
Type Position Value
Value 0 14
ListItemSeparator 2 ,
Value 3 18
ListItemSeparator 5 ,
Value 6 3
RangeSeparator 7 -
Value 8 39
StepSeparator 10 /
Value 11 3
ListItemSeparator 12 ,
Value 13 52
FieldSeparator 15
Value 16 0
StepSeparator 17 /
Value 18 5
FieldSeparator 19
Value 20 14
ListItemSeparator 22 ,
Value 23 18
ListItemSeparator 25 ,
Value 26 3
RangeSeparator 27 -
Value 28 39
ListItemSeparator 30 ,
Value 31 52
Extension 33 W
FieldSeparator 34
Value 34 JAN
Blank 35 ?
FieldSeparator 36
ListItemSeparator 40 ,
Value 41 MAR
ListItemSeparator 44 ,
Value 45 SEP
FieldSeparator 48
Value 49 MON
RangeSeparator 52 -
Value 53 WED
ListItemSeparator 56 ,
Value 57 FRI
Extension 60 #
Value 61 3
FieldSeparator 62
Value 63 2002
RangeSeparator 67 -
Value 68 2010
Then the CronExpressionParser
comes in play. It groups all tokens into fields TokenGroup
and creates Subexpression
s. It also parses names of months and days of week. This for names and ordinal extensions I use three dictionaries.
class CronExpressionParser
{
private static readonly IReadOnlyDictionary<string, int> DaysOfWeek = new[]
{
"SUN", "MON", "TUE", "WED", "THU", "FRI", "SAT"
}
.Select((dayOfWeek, index) => new { dayOfWeek, index }).ToDictionary(x => x.dayOfWeek, x => x.index + 1, StringComparer.OrdinalIgnoreCase);
private static readonly IReadOnlyDictionary<string, int> Months = new[]
{
"JAN", "FEB", "MAR", "APR", "MAI", "JUN", "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"
}
.Select((month, index) => new { month, index }).ToDictionary(x => x.month, x => x.index + 1, StringComparer.OrdinalIgnoreCase);
private static readonly IReadOnlyDictionary<string, CronExtension> Extensions = new Dictionary<string, CronExtension>(StringComparer.OrdinalIgnoreCase)
{
["1"] = CronExtension.First,
["2"] = CronExtension.Second,
["3"] = CronExtension.Third,
["4"] = CronExtension.Fourth,
["5"] = CronExtension.Fifth,
["L"] = CronExtension.Last,
["W"] = CronExtension.Weekday,
};
public static IEnumerable<Subexpression> Parse(IEnumerable<Token> tokens)
{
var tokenGroups = GroupTokens(tokens);
foreach (var tokenGroup in tokenGroups)
{
var subexpression = ParseSubexpression(tokenGroup);
yield return subexpression;
}
}
private static IEnumerable<IGrouping<Type, Token>> GroupTokens(IEnumerable<Token> tokens)
{
var subexpressionTypes = new Queue<Type>(new[]
{
typeof(Second),
typeof(Minute),
typeof(Hour),
typeof(DayOfMonth),
typeof(Month),
typeof(DayOfWeek),
typeof(Year)
});
var field = new List<Token>();
foreach (var token in tokens)
{
if (token.Type == TokenType.FieldSeparator)
{
yield return new TokenGrouping(subexpressionTypes.Dequeue(), field);
field = new List<Token>();
continue;
}
field.Add(token);
}
yield return new TokenGrouping(subexpressionTypes.Dequeue(), field);
}
private static Subexpression ParseSubexpression(IGrouping<Type, Token> tokenGroup)
{
var ranges = new List<CronRange>();
var range = (CronRange?)null;
var lastTokenType = TokenType.None;
foreach (var token in tokenGroup)
{
if (token.Type == TokenType.Value)
{
var value = 0;
var valueParsed =
int.TryParse(token.Value, out value) ||
DaysOfWeek.TryGetValue(token.Value, out value) ||
Months.TryGetValue(token.Value, out value);
if (!valueParsed)
{
throw new Exception($"Invalid value \"{token.Value}\" at {token.Position}.");
}
range = range ?? CronRange.Empty.SetMin(value).SetMax(value);
if (lastTokenType == TokenType.ListItemSeparator)
{
ranges.Add(range.Value);
range = CronRange.Empty.SetMin(value).SetMax(value);
}
if (lastTokenType == TokenType.RangeSeparator)
{
range = range.SetMax(value);
}
if (lastTokenType == TokenType.StepSeparator)
{
range = range.SetStep(value);
}
}
var canUpdateExtension = new[] { lastTokenType, token.Type }.Any(t => t == TokenType.Extension) && token.Value != "#";
if (canUpdateExtension)
{
ranges.Add(range.SetExtension(Extensions[token.Value]));
range = null;
}
lastTokenType = token.Type;
}
if (range.HasValue)
{
ranges.Add(range.Value);
}
return (Subexpression)Activator.CreateInstance(tokenGroup.Key, ranges);
}
}
I store all values as CronRange
. If it's a single value then the min and max are equal. Some values can have a step or an extension.
[DebuggerDisplay("{DebuggerDisplay,nq}")]
struct CronRange
{
public CronRange(int min, int max, int step, CronExtension extension)
{
Min = min;
Max = max;
Step = step;
Extension = extension;
}
public static CronRange Empty => new CronRange();
public int Min { get; }
public int Max { get; }
public int Step { get; }
public CronExtension Extension { get; }
private string DebuggerDisplay => ToString();
public override string ToString() => $"Min = {Min} Max = {Max} Step = {Step} Extension = {Extension}";
}
Auxiliary types
The Token
and two enums for their types and entensions:
enum TokenType
{
None,
Blank,
FieldSeparator,
ListItemSeparator,
RangeSeparator,
StepSeparator,
Value,
Extension,
}
enum CronExtension
{
None,
First,
Second,
Third,
Fourth,
Fifth,
Last,
Weekday,
}
class Token
{
public TokenType Type { get; set; }
public int Position { get; set; }
public string Value { get; set; }
}
The TokenGrouping
for each field:
class TokenGrouping : IGrouping<Type, Token>
{
private readonly IEnumerable<Token> _tokens;
public TokenGrouping(Type key, IEnumerable<Token> tokens)
{
Key = key;
_tokens = tokens;
}
public Type Key { get; }
public IEnumerator<Token> GetEnumerator() => _tokens.GetEnumerator();
IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
}
For future use I define a couple of classes for each field. They're empty but later they will check the DateTime
and validate if the TokenGroup
has valid values.
[DebuggerDisplay("{DebuggerDisplay,nq}")]
abstract
class Subexpression
{
protected Subexpression(IEnumerable<CronRange> ranges)
{
Ranges = ranges;
}
protected IEnumerable<CronRange> Ranges { get; }
private string DebuggerDisplay => ToString();
public override string ToString() => $"Ranges = [{string.Join(", ", Ranges.Select(r => $"{{{r}}}"))}]";
}
class Second : Subexpression { public Second(IEnumerable<CronRange> ranges) : base(ranges) { } }
class Minute : Subexpression { public Minute(IEnumerable<CronRange> ranges) : base(ranges) { } }
class Hour : Subexpression { public Hour(IEnumerable<CronRange> ranges) : base(ranges) { } }
class DayOfMonth : Subexpression { public DayOfMonth(IEnumerable<CronRange> ranges) : base(ranges) { } }
class Month : Subexpression { public Month(IEnumerable<CronRange> ranges) : base(ranges) { } }
class DayOfWeek : Subexpression { public DayOfWeek(IEnumerable<CronRange> ranges) : base(ranges) { } }
class Year : Subexpression { public Year(IEnumerable<CronRange> ranges) : base(ranges) { } }
Lastly a few helper extensions:
static class RangeExtensions
{
public static CronRange SetMin(this CronRange range, int min)
{
return new CronRange(min, range.Max, range.Step, range.Extension);
}
public static CronRange SetMax(this CronRange range, int max)
{
return new CronRange(range.Min, max, range.Step, range.Extension);
}
public static CronRange SetStep(this CronRange range, int step)
{
return new CronRange(range.Min, range.Max, step, range.Extension);
}
public static CronRange SetExtension(this CronRange range, CronExtension extension)
{
return new CronRange(range.Min, range.Max, range.Step, extension);
}
public static CronRange SetMin(this CronRange? range, int min) => range.Value.SetMin(min);
public static CronRange SetMax(this CronRange? range, int max) => range.Value.SetMax(max);
public static CronRange SetStep(this CronRange? range, int step) => range.Value.SetStep(step);
public static CronRange SetExtension(this CronRange? range, CronExtension extension) => range.Value.SetExtension(extension);
}
static class StringBuilderExtensions
{
public static bool Any(this StringBuilder stringBuilder)
{
return stringBuilder.Length > 0;
}
}
Result
This is what I get:
Ranges = [{Min = 14 Max = 14 Step = 0 Extension = None}, {Min = 18 Max = 18 Step = 0 Extension = None}, {Min = 3 Max = 39 Step = 3 Extension = None}, {Min = 52 Max = 52 Step = 0 Extension = None}]
Ranges = [{Min = 0 Max = 0 Step = 5 Extension = None}]
Ranges = [{Min = 14 Max = 14 Step = 0 Extension = None}, {Min = 18 Max = 18 Step = 0 Extension = None}, {Min = 3 Max = 39 Step = 0 Extension = None}, {Min = 52 Max = 52 Step = 0 Extension = Weekday}]
Ranges = []
Ranges = [{Min = 1 Max = 1 Step = 0 Extension = None}, {Min = 3 Max = 3 Step = 0 Extension = None}, {Min = 9 Max = 9 Step = 0 Extension = None}]
Ranges = [{Min = 2 Max = 4 Step = 0 Extension = None}, {Min = 6 Max = 6 Step = 0 Extension = Third}]
Ranges = [{Min = 2002 Max = 2010 Step = 0 Extension = None}]
2 Answers 2
There's not a single access modifier on any type, did you mean for them all to be internal
? If so, specify that explicitly.
var position = (int?)null;
Really? In cases like this (casting null
) don't use var
, just specify the type.
var updateLastTokenType = new Func<Token, Token>(t => { lastTokenType = t.Type; return t; });
For your use case, this function is not helpful. You update lastTokenType
manually as many times as you update it with this function, and one of the updates you do with this function is undone by a manual update. Just swallow the bullet and replace this with manual updates instead.
private static readonly IReadOnlyDictionary<string, int> Months = new[] { "JAN", "FEB", "MAR", "APR", "MAI", "JUN", "JUL", "AUG", "SEP", "OCT", "NOV", "DEC" } .Select((month, index) => new { month, index }).ToDictionary(x => x.month, x => x.index + 1, StringComparer.OrdinalIgnoreCase);
Is MAI
spelled like that? Not MAY
?
Also, with what this and the property above it do, I would consider wrapping them in a function call that will do that conversion, that's a lot of code to have in an initializer.
private static IEnumerable<IGrouping<Type, Token>> GroupTokens(IEnumerable<Token> tokens)
You should be returning IEnumerable<TokenGrouping>
there, since you are returning objects of that type anyway.
public static CronRange SetMin(this CronRange? range, int min) => range.Value.SetMin(min); public static CronRange SetMax(this CronRange? range, int max) => range.Value.SetMax(max); public static CronRange SetStep(this CronRange? range, int step) => range.Value.SetStep(step); public static CronRange SetExtension(this CronRange? range, CronExtension extension) => range.Value.SetExtension(extension);
What happens when I provide null
here? Easy fix: range?.Value.Set...
Overall, excellent work here. It's always nice to see your questions as they tend to be very good code-wise to begin with, the worst I can find here is a few nitpicks. :)
-
1\$\begingroup\$ It can also be written like
var position = default(int?)
. \$\endgroup\$Xiaoy312– Xiaoy3122016年12月30日 20:23:54 +00:00Commented Dec 30, 2016 at 20:23 -
\$\begingroup\$ The
SetProperty
extension methods for nullable shouldn't be using the?.
operator, as it hides the underlying issue if any. \$\endgroup\$Xiaoy312– Xiaoy3122016年12月30日 21:30:47 +00:00Commented Dec 30, 2016 at 21:30
private static readonly IReadOnlyDictionary<string, int> DaysOfWeek = /*...*/ private static readonly IReadOnlyDictionary<string, int> Months = /*...*/
You can use the CultureInfo.DateTimeFormat
to extract the name of weekdays and months.
// So, you won't defaulting "May" into french.
private static readonly IReadOnlyDictionary<string, int> DaysOfWeek =
CultureInfo.InvariantCulture.DateTimeFormat.AbbreviatedDayNames
.Select((x, i) => new { Index = i, Value = x })
.ToDictionary(x => x.Value.ToUpper(), x => x.Index + 1, StringComparer.OrdinalIgnoreCase);
private static readonly IReadOnlyDictionary<string, int> Months =
CultureInfo.InvariantCulture.DateTimeFormat.AbbreviatedMonthNames
.Where(x => x != string.Empty) // the array has 13 elements
.Select((x, i) => new { Index = i, Value = x })
.ToDictionary(x => x.Value.ToUpper(), x => x.Index + 1, StringComparer.OrdinalIgnoreCase);
range = range ?? CronRange.Empty.SetMin(value).SetMax(value); if (lastTokenType == TokenType.ListItemSeparator) { ranges.Add(range.Value); range = CronRange.Empty.SetMin(value).SetMax(value); } if (lastTokenType == TokenType.RangeSeparator) { range = range.SetMax(value); } if (lastTokenType == TokenType.StepSeparator) { range = range.SetStep(value); }
At first glance, it looked like some kind of awkward fluent syntax. Then, I realised you need immutability.
You should simply expose the properties as get-only from an interface which CronRange
implements, and upgrade CronRange
into a class
.
public interface ICronRange
{
int Min { get; }
int Max { get; }
int Step { get; }
CronExtension Extension { get; }
}
[DebuggerDisplay("{DebuggerDisplay,nq}")]
internal class CronRange : ICronRange
{
public int Min { get; set; }
public int Max { get; set; }
public int Step { get; set; }
public CronExtension Extension { get; set; }
// remove the ctor and use the default one
public override string ToString() => $"Min = {Min} Max = {Max} Step = {Step} Extension = {Extension}";
private string DebuggerDisplay => ToString();
}
Substitute CronRange
for ICronRange
anywhere it is used in Subexpression
and its derived classes.
And, just use range.property = ...
directly:
private static Subexpression ParseSubexpression(IGrouping<Type, Token> tokenGroup)
{
//var range = (CronRange?)null;
var range = default(CronRange);
//range = range ?? CronRange.Empty.SetMin(value).SetMax(value);
range = range ?? new CronRange{ min = value, max = value };
//ranges.Add(range.SetExtension(Extensions[token.Value]));
range.Extension = token.Value;
ranges.Add(range)
range = null;
}
-
\$\begingroup\$ These are very clever techniques, especially the one with the interface trick for faking an immutable type publicly. I have to admit the
default
looks much better now. \$\endgroup\$t3chb0t– t3chb0t2016年12月30日 22:11:14 +00:00Commented Dec 30, 2016 at 22:11