Jump to content
Wikipedia The Free Encyclopedia

User:Trappist the monk/CS1 maint: Unrecognized language

From Wikipedia, the free encyclopedia

This is a crude AWB custom module that might be used to find and fix previously identified spelling errors in the value assigned to |language=.

Custom module

[edit ]
publicstringProcessArticle(stringArticleText,stringArticleTitle,intwikiNamespace,outstringSummary,outboolSkip)
{
Skip=false;
Summary="CS1 fixes; ";// TR necessary to have here; moved text below "Regex.Match" to make appending conditional
boolchanges_made=false;// TR to make appending to Summary conditional
boolndash=false;
stringpattern;// local variable to hold regex pattern for reuse

stringIS_CS1=@"(?:[Cc]ite[_ ](?=(?:(?:AV|av) [Mm]edia(?: notes)?)|article|blog|book|conference|document|(?:DVD|dvd)(?: notes)?|encyclopa?edia|interview|journal|letter|[Mm]agazine|(?:news(?!group|paper))|paper|podcast|press release|sign|speech|techreport|thesis|video|web)|[Cc]itation|[Cc]ite(?=\s*\|))";
//---------------------------< M I S S P E L L I N G D I C T I O N A R Y >----------------------------------
// This is a crude dictionary of misspellings. For each item, the first is the misspelling, the second is the correct spelling
// This dictionary can also be used to remove 'qualifiers' (eg Portuguese (Brazil) to Portuguese because Portuguese (Brazil) is
// not an ISO 639-1 language). Dictionary misspellings should be lower case only because the code sets all language values to
// lower case before it searches the dictionary.
Dictionary<string,string>spelling_map=newDictionary<string,string>();
spelling_map.Add("albainian","Albanian");
spelling_map.Add("al","Albanian");
spelling_map.Add("albania","Albanian");
spelling_map.Add("alemán","German");
spelling_map.Add("american english","");
spelling_map.Add("american folklore society","");
spelling_map.Add("angol","");
spelling_map.Add("arabic (kuwait)","Arabic");
spelling_map.Add("arcs","");
spelling_map.Add("argentina","Spanish");
spelling_map.Add("australian","");
spelling_map.Add("austrian","German");
spelling_map.Add("austrian german","German");
spelling_map.Add("austrian-german","German");
spelling_map.Add("azerbajani","Azerbaijani");
spelling_map.Add("azerbaycani","Azerbaijani");
spelling_map.Add("azerbaijan","Azerbaijani");
spelling_map.Add("azeri","Azerbaijani");
spelling_map.Add("bahasa","");// bahasa = 'language'
spelling_map.Add("bahasa indonesia","Indonesian");
spelling_map.Add("bahasa indonesian","Indonesian");
spelling_map.Add("bahasa inggris","");// English
spelling_map.Add("bahasa malaysia","Malaysian");
spelling_map.Add("bangla","Bengali");
spelling_map.Add("bbc","");
spelling_map.Add("belarussian","Belarusian");
spelling_map.Add("belorussian","Belarusian");
spelling_map.Add("book","");
spelling_map.Add("braille","");// writing system, not a language
spelling_map.Add("brazilian","Portuguese");
spelling_map.Add("brazilian portuguese","Portuguese");
spelling_map.Add("canadian english","");
spelling_map.Add("canadian french","French");
spelling_map.Add("castellano","Spanish");
spelling_map.Add("castellà","Spanish");
spelling_map.Add("castilan","Spanish");
spelling_map.Add("castillan","Spanish");
spelling_map.Add("castilian","Spanish");
spelling_map.Add("castilian spanish","Spanish");
spelling_map.Add("castillian","Spanish");
spelling_map.Add("castillian (spanish)","Spanish");
spelling_map.Add("catalano","Catalan");
spelling_map.Add("catalán","Catalan");
spelling_map.Add("català","Catalan");
spelling_map.Add("china","Chinese");
spelling_map.Add("china times","");
spelling_map.Add("chinese simp.","Chinese");
spelling_map.Add("chinese.","Chinese");
spelling_map.Add("chinese (simplified han)","Chinese");
spelling_map.Add("chinese(traditional)","Chinese");
spelling_map.Add("chinese (traditional)","Chinese");
spelling_map.Add("chinewe","Chinese");
spelling_map.Add("classical chinese","Chinese");
spelling_map.Add("cn","Chinese");
spelling_map.Add("costa rica","Spanish");
spelling_map.Add("cricinfo","");
spelling_map.Add("croation","Croatian");
spelling_map.Add("cyrillic","");
spelling_map.Add("cz","cs");
spelling_map.Add("czec","Czech");
spelling_map.Add("czecg","Czech");
spelling_map.Add("danis","Danish");
spelling_map.Add("deutsch","German");
spelling_map.Add("dhivehi","Divehi");
spelling_map.Add("dansih","Danish");
spelling_map.Add("dansk","Danish");
spelling_map.Add("denmark","Danish");
spelling_map.Add("dk","Danish");
spelling_map.Add("du","Dutch");
spelling_map.Add("duth","Dutch");
spelling_map.Add("duthc","Dutch");
spelling_map.Add("en_au","");
spelling_map.Add("en-au","");
spelling_map.Add("en-gb","");
spelling_map.Add("en-us","");
spelling_map.Add("eng","");
spelling_map.Add("eng.","");
spelling_map.Add("engl","");
spelling_map.Add("engliah","");
spelling_map.Add("englisch","");
spelling_map.Add("englis","");
spelling_map.Add("english","");
spelling_map.Add("english edition","");
spelling_map.Add("english (US)","");
spelling_map.Add("english, u.k.","");
spelling_map.Add("englısh","");// i without a dot: ı
spelling_map.Add("english translation","");
spelling_map.Add("english trans. by k. k. dixit","|others=trans. by K. K. Dixit");
spelling_map.Add("english (american)","");
spelling_map.Add("english (british styled)","");
spelling_map.Add("english (british-style pakistani english)","");
spelling_map.Add("english (pakistan)","");
spelling_map.Add("eng;ish","");
spelling_map.Add("erbian","Serbian");
spelling_map.Add("español","Spanish");
spelling_map.Add("espanol","Spanish");
spelling_map.Add("espanhol","Spanish");
spelling_map.Add("estonia","Estonian");
spelling_map.Add("euskara","Basque");
spelling_map.Add("faeroese","Faroese");
spelling_map.Add("færøysk","Faroese");
spelling_map.Add("farsi","Persian");
spelling_map.Add("fgerman","German");
spelling_map.Add("finis","Finnish");
spelling_map.Add("finish","Finnish");
spelling_map.Add("finnisg","Finnish");
spelling_map.Add("foreword","");
spelling_map.Add("francais","French");
spelling_map.Add("français","French");
spelling_map.Add("france","French");
spelling_map.Add("francés","French");
spelling_map.Add("fre","French");
spelling_map.Add("frenc","French");
spelling_map.Add("frence","French");
spelling_map.Add("frencg","French");
spelling_map.Add("french (abstract)","French");
spelling_map.Add("gaeilge","Irish");
spelling_map.Add("gaeilge, [ga]","Irish");
spelling_map.Add("gallego","Galician");
spelling_map.Add("ge","Georgian");
spelling_map.Add("ger","German");
spelling_map.Add("geraman","German");
spelling_map.Add("germaan","German");
spelling_map.Add("germany","German");
spelling_map.Add("germană","German");
spelling_map.Add("german (swiss)","German");
spelling_map.Add("german-","German");
spelling_map.Add("germna","German");
spelling_map.Add("gernan","German");
spelling_map.Add("greece","Greek");
spelling_map.Add("greenlandic","Kalaallisut");
spelling_map.Add("hangul","Korean");
spelling_map.Add("hn","Spanish");
spelling_map.Add("honduran spanish","Spanish");
spelling_map.Add("hungary","Hungarian");
spelling_map.Add("imgartists.com","");
spelling_map.Add("indonesia","Indonesian");
spelling_map.Add("inglês","");
spelling_map.Add("inglés","");
spelling_map.Add("ingles","");
spelling_map.Add("islandic","Icelandic");
spelling_map.Add("israel","Hebrew");
spelling_map.Add("irsaeli","Hebrew");
spelling_map.Add("israeli","Hebrew");
spelling_map.Add("ilalian","Italian");
spelling_map.Add("italiain","Italian");
spelling_map.Add("italic","Italian");
spelling_map.Add("italics","Italian");
spelling_map.Add("italien","Italian");
spelling_map.Add("italian/milanese dialect","Italian");
spelling_map.Add("italin","Italian");
spelling_map.Add("italina","Italian");
spelling_map.Add("italiano","Italian");
spelling_map.Add("italy","Italian");
spelling_map.Add("itunes","");
spelling_map.Add("japanaese","Japanese");
spelling_map.Add("japaneses","Japanese");
spelling_map.Add("japanese)","Japanese");
spelling_map.Add("japonês","Japanese");
spelling_map.Add("japones","Japanese");
spelling_map.Add("japonese","Japanese");
spelling_map.Add("javanesse","Javanese");
spelling_map.Add("javascript","");
spelling_map.Add("jp","ja");
spelling_map.Add("jpn","ja");
spelling_map.Add("jspanese","Japanese");
spelling_map.Add("kannaḍa","Kannada");
spelling_map.Add("kiswahili","Swahili");
spelling_map.Add("koeran","Korean");
spelling_map.Add("koṅkaṇī","Konkani");
spelling_map.Add("korea","Korean");
spelling_map.Add("koreai","Korean");
spelling_map.Add("koream","Korean");
spelling_map.Add("korean=","Korean");
spelling_map.Add("koren","Korean");
spelling_map.Add("language","");
spelling_map.Add("lat","Latin");// not Latvian
spelling_map.Add("latin (original citation)","Latin");
spelling_map.Add("latín","Latin");
spelling_map.Add("lecture","");
spelling_map.Add("legalese","");
spelling_map.Add("lietuvių k.","Lithuanian");
spelling_map.Add("lithusanian","Lithuanian");
spelling_map.Add("magyar","Hungarian");
spelling_map.Add("malayalam)","Malayalam");
spelling_map.Add("mandarin","Chinese");
spelling_map.Add("mandarin chinese","Chinese");
spelling_map.Add("many","");
spelling_map.Add("manuscript latin","Latin");
spelling_map.Add("marāṭhī","Marathi");
spelling_map.Add("mexican","Spanish");
spelling_map.Add("mexico city","Spanish");
spelling_map.Add("mixed","");
spelling_map.Add("modern russian","Russian");
spelling_map.Add("mongol","Mongolian");
spelling_map.Add("multiple","");
spelling_map.Add("multiplelanguages","");
spelling_map.Add("multiple languages","");
spelling_map.Add("mx","Spanish");
spelling_map.Add("nepal bhasa","Newar");//639-3 new
spelling_map.Add("netherlands","Dutch");
spelling_map.Add("norge","Norwegian");
spelling_map.Add("norsk","Norwegian");
spelling_map.Add("norsk (bokmål)","Norwegian Bokmål");
spelling_map.Add("northern sámi","Northern Sami");
spelling_map.Add("norway","Norwegian");
spelling_map.Add("norwegain","Norwegian");
spelling_map.Add("norwegian","Norwegian");// because of bug in module, since fixed
spelling_map.Add("norwegian bokmal","Norwegian Bokmål");
spelling_map.Add("norwegian nynorsk","Norwegian Nynorsk");
spelling_map.Add("norweigen","Norwegian");
spelling_map.Add("norweigian","Norwegian");
spelling_map.Add("norwergian","Norwegian");
spelling_map.Add("norwgian","Norwegian");
spelling_map.Add("pay-per-view","");
spelling_map.Add("pay=per-view","");
spelling_map.Add("pay-per=view","");
spelling_map.Add("persian (farsi)","Persian");
spelling_map.Add("pdf","");
spelling_map.Add("pol","Polish");
spelling_map.Add("polis","Polish");
spelling_map.Add("polish2","Polish");
spelling_map.Add("polishi","Polish");
spelling_map.Add("polsih","Polish");
spelling_map.Add("portguês","Portuguese");
spelling_map.Add("portughese","Portuguese");
spelling_map.Add("portugues","Portuguese");
spelling_map.Add("portugués","Portuguese");
spelling_map.Add("portufuês","Portuguese");
spelling_map.Add("português","Portuguese");
spelling_map.Add("portugugese","Portuguese");
spelling_map.Add("portuagese","Portuguese");
spelling_map.Add("porutguese","Portuguese");
spelling_map.Add("portuguese (brasil)","Portuguese");
spelling_map.Add("portuguese (brazil)","Portuguese");
spelling_map.Add("potuguese","Portuguese");
spelling_map.Add("projekt records","");
spelling_map.Add("requires subscription","");
spelling_map.Add("romanina","Romanian");
spelling_map.Add("română","Romanian");
spelling_map.Add("rurkish","Turkish");
spelling_map.Add("ruassian","Russian");
spelling_map.Add("rus","Russian");
spelling_map.Add("rus.","Russian");
spelling_map.Add("russia","Russian");
spelling_map.Add("russian (translated)","Russian");
spelling_map.Add("sanish","Spanish");
spelling_map.Add("saṃskṛta","Sanskrit");
spelling_map.Add("sbrj","");
spelling_map.Add("self-published","");
spelling_map.Add("serbia","Serbian");
spelling_map.Add("serbian cyrillic","Serbian");
spelling_map.Add("serbian [translated]","Serbian");
spelling_map.Add("serbocroatian","Serbo-Croatian");
spelling_map.Add("serb-croatian","Serbo-Croatian");
spelling_map.Add("serbo - croatian","Serbo-Croatian");//only whitespace
spelling_map.Add("several","");
spelling_map.Add("shift jis","ja");
spelling_map.Add("([[shift jis]])","ja");
spelling_map.Add("shqip","Albanian");
spelling_map.Add("singapore","");
spelling_map.Add("sinhalese","Sinhala");
spelling_map.Add("simplified chinese","Chinese");
spelling_map.Add("slovakian","Slovak");
spelling_map.Add("slovene","Slovenian");
spelling_map.Add("slovene, with a summary in english","Slovenian");
spelling_map.Add("slovene [slovene biographical encyclopedia]","Slovenian");
spelling_map.Add("slovene [slovene biographical lexicon]","Slovenian");
spelling_map.Add("slovenia","Slovenian");
spelling_map.Add("slovenian language","Slovenian");
spelling_map.Add("somalian","Somali");
spelling_map.Add("spain","Spanish");
spelling_map.Add("spainsh","Spanish");
spelling_map.Add("spanihs","Spanish");
spelling_map.Add("spanis","Spanish");
spelling_map.Add("spansih","Spanish");
spelling_map.Add("spanishh","Spanish");
spelling_map.Add("spanish=","Spanish");
spelling_map.Add("spanish (appendix only)","Spanish");
spelling_map.Add("spanish; castilian","Spanish");
spelling_map.Add("\"spanish (argentina)\"","Spanish");
spelling_map.Add("spanish (castilian)","Spanish");
spelling_map.Add("spanish, español","Spanish");
spelling_map.Add("spanish.","Spanish");
spelling_map.Add("suomi","Finnish");
spelling_map.Add("surabaya","Indonesian");
spelling_map.Add("svensk","Swedish");
spelling_map.Add("svenska","Swedish");
spelling_map.Add("swe","Swedish");
spelling_map.Add("sweden","Swedish");
spelling_map.Add("swedieh","Swedish");
spelling_map.Add("swedis","Swedish");
spelling_map.Add("swedish)","Swedish");
spelling_map.Add("swedisy","Swedish");
spelling_map.Add("swiss german","German");
spelling_map.Add("taiwanese","Chinese");
spelling_map.Add("telgu","Telugu");
spelling_map.Add("traditional chinese","Chinese");
spelling_map.Add("traditional han chinese","Chinese");
spelling_map.Add("translated","");
spelling_map.Add("(translated)","");
spelling_map.Add("tu","Turkish");
spelling_map.Add("tuekish","Turkish");
spelling_map.Add("turjish","Turkish");
spelling_map.Add("turish","Turkish");
spelling_map.Add("turkihs","Turkish");
spelling_map.Add("turkis","Turkish");
spelling_map.Add("turksh","Turkish");
spelling_map.Add("turksih","Turkish");
spelling_map.Add("turkşsh","Turkish");
spelling_map.Add("türkçe","Turkish");
spelling_map.Add("ua","uk");
spelling_map.Add("ucalgary","");
spelling_map.Add("unidentified","");
spelling_map.Add("ukraian","Ukrainian");
spelling_map.Add("ukrainan","Ukrainian");
spelling_map.Add("uk english","");
spelling_map.Add("urkish","Turkish");
spelling_map.Add("us","");
spelling_map.Add("us english","");
spelling_map.Add("valenciano","Valencian");
spelling_map.Add("various","");
spelling_map.Add("vietnamise","Vietnamese");
spelling_map.Add("vn","Vietnamese");
spelling_map.Add("weeds","");
spelling_map.Add("wessa alien plants","");
spelling_map.Add("west frisian","Western Frisian");
spelling_map.Add("zh-hans","Chinese");
spelling_map.Add("zh=hans","Chinese");
spelling_map.Add("македонски","Macedonian");
spelling_map.Add("-","");
//---------------------------< M I S C F I X E S >----------------------------------------------------------
// replace {{spaced ndash}} templates with ' – '
while(Regex.Match(ArticleText,@"\{\{\s*"+IS_CS1+@"[^}]*\{\{\s*(?:spaced\s*ndash|snd)\s*\}\}").Success)
{
ArticleText=Regex.Replace(ArticleText,@"(\{\{\s*"+IS_CS1+@"[^\{\}]*)\{\{\s*spaced\s*ndash\s*\}\}\s*","1ドル – ");
ndash=true;
}
// replace {{xx icon}} templates with xx within CS1 templates
while(Regex.Match(ArticleText,@"\{\{\s*"+IS_CS1+@"[^}]*\{\{\s*[a-z]{2}\s*icon\s*\}\}").Success)
{
ArticleText=Regex.Replace(ArticleText,@"(\{\{\s*"+IS_CS1+@"[^\{\}]*)\{\{\s*([a-z]{2})\s*icon\s*\}\}","1ドル2ドル");
}
// When |language=In <language>, remove leading punctuation
ArticleText=Regex.Replace(ArticleText,@"({{\s*"+IS_CS1+@"[^}]+\|\s*language\s*=\s*)[\-\.,;–—]+\s*([^\|\}]+)","1ドル2ドル");

// When |language=In <language>, remove 'In ' (space is required)
ArticleText=Regex.Replace(ArticleText,@"({{\s*"+IS_CS1+@"[^}]+\|\s*language\s*=\s*)[Ii]n ([^\|\}]+)","1ドル2ドル");

// When |language='''<language>''', remove bold wikimarkup
ArticleText=Regex.Replace(ArticleText,@"({{\s*"+IS_CS1+@"[^}]+\|\s*language\s*=\s*)'''([a-zA-Z\s\-]+)'''","1ドル2ドル");
// When |language=''<language>'', remove italic wikimarkup
ArticleText=Regex.Replace(ArticleText,@"({{\s*"+IS_CS1+@"[^}]+\|\s*language\s*=\s*)''([a-zA-Z\s\-]+)''","1ドル2ドル");
// DATES
// When |language=<language name> where <language name> is a mdy date, remove it
ArticleText=Regex.Replace(ArticleText,@"({{\s*"+IS_CS1+@"[^}]+)\|\s*language\s*=\s*[a-zA-Z]+\s*\d\d?,\s*\d{4}","1ドル");
// When |language=<language name> where <language name> is a dmy date, remove it
ArticleText=Regex.Replace(ArticleText,@"({{\s*"+IS_CS1+@"[^}]+)\|\s*language\s*=\s*\d\d?\s*[a-zA-Z]+\s*\d{4}","1ドル");
// When |language=<language name> where <language name> is a my date, remove it
ArticleText=Regex.Replace(ArticleText,@"({{\s*"+IS_CS1+@"[^}]+)\|\s*language\s*=\s*[a-zA-Z]+\s*\d{4}","1ドル");
// When |language=<language name> where <language name> is numeric or y-m-d style date, remove it
ArticleText=Regex.Replace(ArticleText,@"({{\s*"+IS_CS1+@"[^}]+)\|\s*language\s*=\s*\d[\d\s\-]*","1ドル");
// WIKILINKS: Remove simple wikilinks from |language parameters because they prevent proper categorization
// Replace [[Text]] or ([[text]]) with Text
pattern=@"(\{\{\s*"+IS_CS1+@"[^\}]*\|\s*language\s*=\s*)\(?\[\[([A-Za-zá\s]+)\]\]\)?";
ArticleText=Regex.Replace(ArticleText,pattern,"1ドル2ドル");
// WIKILINKS: Remove complex wikilinks from |language parameters because they prevent proper categorization
// Replace [[Article|Text]] or ([[Article|Text]]) with Text
pattern=@"(\{\{\s*"+IS_CS1+@"[^\}]*\|\s*language\s*=\s*)\(?\[\[[A-Za-zá\s\(\)]+\|([A-Za-zá\s]+)\]\]\)?";
ArticleText=Regex.Replace(ArticleText,pattern,"1ドル2ドル");
// WIKILINKS: Remove complex wikilinks in the form [[xxx{{!}}xxx]] from |language parameters because they prevent proper categorization
// Replace [[Article|Text]] with Text
pattern=@"(\{\{\s*"+IS_CS1+@"[^\}]*\|\s*language\s*=\s*)\[\[[A-Za-z\s\(\)]+\{\{!\}\}([A-Za-z\s]+)\]\]";
ArticleText=Regex.Replace(ArticleText,pattern,"1ドル2ドル");
// When |language=<language name> language, remove language		spelling_map.Add("简体中文", "Chinese");		// simplified chinese
ArticleText=Regex.Replace(ArticleText,@"({{\s*"+IS_CS1+@"[^}]+\|\s*language\s*=\s*)([a-zA-Z\s\-]+) languages?","1ドル2ドル");
// When |language={{xx icon, without closing }} remove icon text
ArticleText=Regex.Replace(ArticleText,@"({{\s*"+IS_CS1+@"[^}]+\|\s*language\s*=\s*)\{\{([a-zA-Z]{2})\s*icon","1ドル2ドル");
// SIMPLIFIED CHINESE
// When |language=简体中文
ArticleText=Regex.Replace(ArticleText,@"({{\s*"+IS_CS1+@"[^}]+\|\s*language\s*=\s*)简体中文","1ドルChinese");
// THAI
// When |language=ไทย
ArticleText=Regex.Replace(ArticleText,@"({{\s*"+IS_CS1+@"[^}]+\|\s*language\s*=\s*)ไทย","1ドルChinese");
//---------------------------< M I S S P E L L I N G S >------------------------------------------------------
// MISSPELLINGS: Fix misspellings in |language=<value> where <value> is misspelled.
pattern=@"({{\s*"+IS_CS1+@"[^}]*\|\s*language\s*=\s*)([^\|\}]*)";
if(Regex.Match(ArticleText,pattern).Success)
{
ArticleText=Regex.Replace(ArticleText,pattern,
delegate(Matchmatch)
{
stringnew_spelling;
stringreturn_string=match.Groups[0].Value;// no misspelling, return the raw string
try// get correct spelling from dictionary
{
new_spelling=spelling_map[match.Groups[2].Value.Trim().ToLower()];// will throw an exception if language <value> (key) is not found in dictionary (presumed correct)
changes_made=true;// TR Summary can't be changed here; need a dummy variable
}
catch(KeyNotFoundException)// trap the exception
{
returnreturn_string;// return the raw string
}
returnmatch.Groups[1].Value+new_spelling;
});
}
if(true==changes_made)
Summary+=" |language= spelling;";// TR				
if(true==ndash)
Summary+=" remove {{spaced ndash}};";// TR
returnArticleText;
}

AltStyle によって変換されたページ (->オリジナル) /