@@ -22,18 +22,10 @@ public class EdgeNGramTests : TokenizerAssertionBase<EdgeNGramTests>
2222
2323 public override ITokenizer Initializer => new EdgeNGramTokenizer
2424 {
25- MaxGram = 2 ,
26- MinGram = 1 ,
27- TokenChars = new [ ] { TokenChar . Digit , TokenChar . Letter }
25+ MaxGram = 2 , MinGram = 1 , TokenChars = new [ ] { TokenChar . Digit , TokenChar . Letter }
2826 } ;
2927
30- public override object Json => new
31- {
32- min_gram = 1 ,
33- max_gram = 2 ,
34- token_chars = new [ ] { "digit" , "letter" } ,
35- type = "edge_ngram"
36- } ;
28+ public override object Json => new { min_gram = 1 , max_gram = 2 , token_chars = new [ ] { "digit" , "letter" } , type = "edge_ngram" } ;
3729
3830 public override string Name => "endgen" ;
3931 }
@@ -50,10 +42,7 @@ public class EdgeNGramCustomTokenCharsTests : TokenizerAssertionBase<EdgeNGramCu
5042
5143 public override ITokenizer Initializer => new EdgeNGramTokenizer
5244 {
53- MaxGram = 2 ,
54- MinGram = 1 ,
55- TokenChars = new [ ] { TokenChar . Custom } ,
56- CustomTokenChars = "+-_"
45+ MaxGram = 2 , MinGram = 1 , TokenChars = new [ ] { TokenChar . Custom } , CustomTokenChars = "+-_"
5746 } ;
5847
5948 public override object Json => new
@@ -62,7 +51,7 @@ public class EdgeNGramCustomTokenCharsTests : TokenizerAssertionBase<EdgeNGramCu
6251 max_gram = 2 ,
6352 token_chars = new [ ] { "custom" } ,
6453 custom_token_chars = "+-_" ,
65- type = "edge_ngram"
54+ type = "edge_ngram"
6655 } ;
6756
6857 public override string Name => "endgen_custom" ;
@@ -78,18 +67,10 @@ public class NGramTests : TokenizerAssertionBase<NGramTests>
7867
7968 public override ITokenizer Initializer => new NGramTokenizer
8069 {
81- MaxGram = 2 ,
82- MinGram = 1 ,
83- TokenChars = new [ ] { TokenChar . Digit , TokenChar . Letter }
70+ MaxGram = 2 , MinGram = 1 , TokenChars = new [ ] { TokenChar . Digit , TokenChar . Letter }
8471 } ;
8572
86- public override object Json => new
87- {
88- min_gram = 1 ,
89- max_gram = 2 ,
90- token_chars = new [ ] { "digit" , "letter" } ,
91- type = "ngram"
92- } ;
73+ public override object Json => new { min_gram = 1 , max_gram = 2 , token_chars = new [ ] { "digit" , "letter" } , type = "ngram" } ;
9374
9475 public override string Name => "ng" ;
9576 }
@@ -106,10 +87,7 @@ public class NGramCustomTokenCharsTests : TokenizerAssertionBase<NGramCustomToke
10687
10788 public override ITokenizer Initializer => new NGramTokenizer
10889 {
109- MaxGram = 2 ,
110- MinGram = 1 ,
111- TokenChars = new [ ] { TokenChar . Custom } ,
112- CustomTokenChars = "+-_"
90+ MaxGram = 2 , MinGram = 1 , TokenChars = new [ ] { TokenChar . Custom } , CustomTokenChars = "+-_"
11391 } ;
11492
11593 public override object Json => new
@@ -164,16 +142,9 @@ public class IcuTests : TokenizerAssertionBase<IcuTests>
164142 . RuleFiles ( RuleFiles )
165143 ) ;
166144
167- public override ITokenizer Initializer => new IcuTokenizer
168- {
169- RuleFiles = RuleFiles ,
170- } ;
145+ public override ITokenizer Initializer => new IcuTokenizer { RuleFiles = RuleFiles , } ;
171146
172- public override object Json => new
173- {
174- rule_files = RuleFiles ,
175- type = "icu_tokenizer"
176- } ;
147+ public override object Json => new { rule_files = RuleFiles , type = "icu_tokenizer" } ;
177148
178149 public override string Name => "icu" ;
179150 }
@@ -198,7 +169,7 @@ public class KuromojiTests : TokenizerAssertionBase<KuromojiTests>
198169 DiscardPunctuation = true ,
199170 NBestExamples = Example ,
200171 NBestCost = 1000 ,
201- UserDictionaryRules = new [ ] { Inline }
172+ UserDictionaryRules = new [ ] { Inline }
202173 } ;
203174
204175 public override object Json => new
@@ -208,7 +179,7 @@ public class KuromojiTests : TokenizerAssertionBase<KuromojiTests>
208179 nbest_cost = 1000 ,
209180 nbest_examples = Example ,
210181 type = "kuromoji_tokenizer" ,
211- user_dictionary_rules = new [ ] { Inline }
182+ user_dictionary_rules = new [ ] { Inline }
212183 } ;
213184
214185 public override string Name => "kuro" ;
@@ -228,18 +199,9 @@ public class KuromojiDiscardCompoundTokenTests : TokenizerAssertionBase<Kuromoji
228199 . DiscardCompoundToken ( )
229200 ) ;
230201
231- public override ITokenizer Initializer => new KuromojiTokenizer
232- {
233- Mode = KuromojiTokenizationMode . Search ,
234- DiscardCompoundToken = true ,
235- } ;
202+ public override ITokenizer Initializer => new KuromojiTokenizer { Mode = KuromojiTokenizationMode . Search , DiscardCompoundToken = true , } ;
236203
237- public override object Json => new
238- {
239- discard_compound_token = true ,
240- mode = "search" ,
241- type = "kuromoji_tokenizer" ,
242- } ;
204+ public override object Json => new { discard_compound_token = true , mode = "search" , type = "kuromoji_tokenizer" , } ;
243205
244206 public override string Name => "kuro_discard_compound_token" ;
245207 }
@@ -252,11 +214,7 @@ public class UaxTests : TokenizerAssertionBase<UaxTests>
252214
253215 public override ITokenizer Initializer => new UaxEmailUrlTokenizer { MaxTokenLength = 12 } ;
254216
255- public override object Json => new
256- {
257- max_token_length = 12 ,
258- type = "uax_url_email"
259- } ;
217+ public override object Json => new { max_token_length = 12 , type = "uax_url_email" } ;
260218
261219 public override string Name => "uax" ;
262220 }
@@ -269,20 +227,9 @@ public class PatternTests : TokenizerAssertionBase<PatternTests>
269227 . Pattern ( @"\W+" )
270228 ) ;
271229
272- public override ITokenizer Initializer => new PatternTokenizer
273- {
274- Flags = "CASE_INSENSITIVE" ,
275- Group = 1 ,
276- Pattern = @"\W+"
277- } ;
230+ public override ITokenizer Initializer => new PatternTokenizer { Flags = "CASE_INSENSITIVE" , Group = 1 , Pattern = @"\W+" } ;
278231
279- public override object Json => new
280- {
281- pattern = @"\W+" ,
282- flags = "CASE_INSENSITIVE" ,
283- group = 1 ,
284- type = "pattern"
285- } ;
232+ public override object Json => new { pattern = @"\W+" , flags = "CASE_INSENSITIVE" , group = 1 , type = "pattern" } ;
286233
287234 public override string Name => "pat" ;
288235 }
@@ -312,10 +259,7 @@ public class NoriTests : TokenizerAssertionBase<NoriTests>
312259 . DecompoundMode ( NoriDecompoundMode . Mixed )
313260 ) ;
314261
315- public override ITokenizer Initializer => new NoriTokenizer
316- {
317- DecompoundMode = NoriDecompoundMode . Mixed
318- } ;
262+ public override ITokenizer Initializer => new NoriTokenizer { DecompoundMode = NoriDecompoundMode . Mixed } ;
319263
320264 public override object Json => new { type = "nori_tokenizer" , decompound_mode = "mixed" } ;
321265 public override string Name => "nori" ;
@@ -331,16 +275,14 @@ public class NoriWithUserDictionaryTests : TokenizerAssertionBase<NoriWithUserDi
331275
332276 public override ITokenizer Initializer => new NoriTokenizer
333277 {
334- DecompoundMode = NoriDecompoundMode . Mixed ,
335- UserDictionaryRules = new [ ] { "c++" , "C샤프" , "세종" , "세종시 세종 시" }
278+ DecompoundMode = NoriDecompoundMode . Mixed , UserDictionaryRules = new [ ] { "c++" , "C샤프" , "세종" , "세종시 세종 시" }
336279 } ;
337280
338281 public override object Json => new
339282 {
340- type = "nori_tokenizer" ,
341- decompound_mode = "mixed" ,
342- user_dictionary_rules = new [ ] { "c++" , "C샤프" , "세종" , "세종시 세종 시" }
283+ type = "nori_tokenizer" , decompound_mode = "mixed" , user_dictionary_rules = new [ ] { "c++" , "C샤프" , "세종" , "세종시 세종 시" }
343284 } ;
285+ 344286 public override string Name => "nori_userdictionary" ;
345287 }
346288
@@ -353,16 +295,9 @@ public class CharGroupTests : TokenizerAssertionBase<CharGroupTests>
353295 . TokenizeOnCharacters ( _chars )
354296 ) ;
355297
356- public override ITokenizer Initializer => new CharGroupTokenizer
357- {
358- TokenizeOnCharacters = _chars
359- } ;
298+ public override ITokenizer Initializer => new CharGroupTokenizer { TokenizeOnCharacters = _chars } ;
360299
361- public override object Json => new
362- {
363- tokenize_on_chars = _chars ,
364- type = "char_group"
365- } ;
300+ public override object Json => new { tokenize_on_chars = _chars , type = "char_group" } ;
366301
367302 public override string Name => "char_group" ;
368303 }
@@ -377,18 +312,9 @@ public class CharGroupMaxTokenLengthTests : TokenizerAssertionBase<CharGroupMaxT
377312 . MaxTokenLength ( 255 )
378313 ) ;
379314
380- public override ITokenizer Initializer => new CharGroupTokenizer
381- {
382- TokenizeOnCharacters = _chars ,
383- MaxTokenLength = 255
384- } ;
315+ public override ITokenizer Initializer => new CharGroupTokenizer { TokenizeOnCharacters = _chars , MaxTokenLength = 255 } ;
385316
386- public override object Json => new
387- {
388- tokenize_on_chars = _chars ,
389- type = "char_group" ,
390- max_token_length = 255
391- } ;
317+ public override object Json => new { tokenize_on_chars = _chars , type = "char_group" , max_token_length = 255 } ;
392318
393319 public override string Name => "char_group_max_token_length" ;
394320 }
@@ -400,13 +326,38 @@ public class DiscardPunctuationTests : TokenizerAssertionBase<DiscardPunctuation
400326 . DiscardPunctuation ( )
401327 ) ;
402328
403- public override ITokenizer Initializer => new NoriTokenizer
404- {
405- DiscardPunctuation = true
406- } ;
329+ public override ITokenizer Initializer => new NoriTokenizer { DiscardPunctuation = true } ;
407330
408331 public override object Json => new { type = "nori_tokenizer" , discard_punctuation = true } ;
409332 public override string Name => "nori-discard" ;
410333 }
334+ 335+ [ SkipVersion ( "<7.7.0" , "simple_pattern experimental until 7.7.0" ) ]
336+ public class SimplePatternTests : TokenizerAssertionBase < SimplePatternTests >
337+ {
338+ public override FuncTokenizer Fluent => ( n , t ) => t . SimplePattern ( n , e => e
339+ . Pattern ( @"\W+" )
340+ ) ;
341+ 342+ public override ITokenizer Initializer => new SimplePatternTokenizer { Pattern = @"\W+" } ;
343+ 344+ public override object Json => new { pattern = @"\W+" , type = "simple_pattern" } ;
345+ 346+ public override string Name => "simple-pattern" ;
347+ }
348+ 349+ [ SkipVersion ( "<7.7.0" , "simple_pattern_split experimental until 7.7.0" ) ]
350+ public class SimplePatternSplitTests : TokenizerAssertionBase < SimplePatternTests >
351+ {
352+ public override FuncTokenizer Fluent => ( n , t ) => t . SimplePatternSplit ( n , e => e
353+ . Pattern ( @"\W+" )
354+ ) ;
355+ 356+ public override ITokenizer Initializer => new SimplePatternTokenizer { Pattern = @"\W+" } ;
357+ 358+ public override object Json => new { pattern = @"\W+" , type = "simple_pattern_split" } ;
359+ 360+ public override string Name => "simple-pattern-split" ;
361+ }
411362 }
412363}
0 commit comments