User:Trappist the monk/CS1 maint: Unrecognized language
Appearance
From Wikipedia, the free encyclopedia
This is a crude AWB custom module that might be used to find and fix previously identified spelling errors in the value assigned to |language=
.
Custom module
[edit ]publicstringProcessArticle(stringArticleText,stringArticleTitle,intwikiNamespace,outstringSummary,outboolSkip) { Skip=false; Summary="CS1 fixes; ";// TR necessary to have here; moved text below "Regex.Match" to make appending conditional boolchanges_made=false;// TR to make appending to Summary conditional boolndash=false; stringpattern;// local variable to hold regex pattern for reuse stringIS_CS1=@"(?:[Cc]ite[_ ](?=(?:(?:AV|av) [Mm]edia(?: notes)?)|article|blog|book|conference|document|(?:DVD|dvd)(?: notes)?|encyclopa?edia|interview|journal|letter|[Mm]agazine|(?:news(?!group|paper))|paper|podcast|press release|sign|speech|techreport|thesis|video|web)|[Cc]itation|[Cc]ite(?=\s*\|))"; //---------------------------< M I S S P E L L I N G D I C T I O N A R Y >---------------------------------- // This is a crude dictionary of misspellings. For each item, the first is the misspelling, the second is the correct spelling // This dictionary can also be used to remove 'qualifiers' (eg Portuguese (Brazil) to Portuguese because Portuguese (Brazil) is // not an ISO 639-1 language). Dictionary misspellings should be lower case only because the code sets all language values to // lower case before it searches the dictionary. Dictionary<string,string>spelling_map=newDictionary<string,string>(); spelling_map.Add("albainian","Albanian"); spelling_map.Add("al","Albanian"); spelling_map.Add("albania","Albanian"); spelling_map.Add("alemán","German"); spelling_map.Add("american english",""); spelling_map.Add("american folklore society",""); spelling_map.Add("angol",""); spelling_map.Add("arabic (kuwait)","Arabic"); spelling_map.Add("arcs",""); spelling_map.Add("argentina","Spanish"); spelling_map.Add("australian",""); spelling_map.Add("austrian","German"); spelling_map.Add("austrian german","German"); spelling_map.Add("austrian-german","German"); spelling_map.Add("azerbajani","Azerbaijani"); spelling_map.Add("azerbaycani","Azerbaijani"); spelling_map.Add("azerbaijan","Azerbaijani"); spelling_map.Add("azeri","Azerbaijani"); spelling_map.Add("bahasa","");// bahasa = 'language' spelling_map.Add("bahasa indonesia","Indonesian"); spelling_map.Add("bahasa indonesian","Indonesian"); spelling_map.Add("bahasa inggris","");// English spelling_map.Add("bahasa malaysia","Malaysian"); spelling_map.Add("bangla","Bengali"); spelling_map.Add("bbc",""); spelling_map.Add("belarussian","Belarusian"); spelling_map.Add("belorussian","Belarusian"); spelling_map.Add("book",""); spelling_map.Add("braille","");// writing system, not a language spelling_map.Add("brazilian","Portuguese"); spelling_map.Add("brazilian portuguese","Portuguese"); spelling_map.Add("canadian english",""); spelling_map.Add("canadian french","French"); spelling_map.Add("castellano","Spanish"); spelling_map.Add("castellà","Spanish"); spelling_map.Add("castilan","Spanish"); spelling_map.Add("castillan","Spanish"); spelling_map.Add("castilian","Spanish"); spelling_map.Add("castilian spanish","Spanish"); spelling_map.Add("castillian","Spanish"); spelling_map.Add("castillian (spanish)","Spanish"); spelling_map.Add("catalano","Catalan"); spelling_map.Add("catalán","Catalan"); spelling_map.Add("català","Catalan"); spelling_map.Add("china","Chinese"); spelling_map.Add("china times",""); spelling_map.Add("chinese simp.","Chinese"); spelling_map.Add("chinese.","Chinese"); spelling_map.Add("chinese (simplified han)","Chinese"); spelling_map.Add("chinese(traditional)","Chinese"); spelling_map.Add("chinese (traditional)","Chinese"); spelling_map.Add("chinewe","Chinese"); spelling_map.Add("classical chinese","Chinese"); spelling_map.Add("cn","Chinese"); spelling_map.Add("costa rica","Spanish"); spelling_map.Add("cricinfo",""); spelling_map.Add("croation","Croatian"); spelling_map.Add("cyrillic",""); spelling_map.Add("cz","cs"); spelling_map.Add("czec","Czech"); spelling_map.Add("czecg","Czech"); spelling_map.Add("danis","Danish"); spelling_map.Add("deutsch","German"); spelling_map.Add("dhivehi","Divehi"); spelling_map.Add("dansih","Danish"); spelling_map.Add("dansk","Danish"); spelling_map.Add("denmark","Danish"); spelling_map.Add("dk","Danish"); spelling_map.Add("du","Dutch"); spelling_map.Add("duth","Dutch"); spelling_map.Add("duthc","Dutch"); spelling_map.Add("en_au",""); spelling_map.Add("en-au",""); spelling_map.Add("en-gb",""); spelling_map.Add("en-us",""); spelling_map.Add("eng",""); spelling_map.Add("eng.",""); spelling_map.Add("engl",""); spelling_map.Add("engliah",""); spelling_map.Add("englisch",""); spelling_map.Add("englis",""); spelling_map.Add("english",""); spelling_map.Add("english edition",""); spelling_map.Add("english (US)",""); spelling_map.Add("english, u.k.",""); spelling_map.Add("englısh","");// i without a dot: ı spelling_map.Add("english translation",""); spelling_map.Add("english trans. by k. k. dixit","|others=trans. by K. K. Dixit"); spelling_map.Add("english (american)",""); spelling_map.Add("english (british styled)",""); spelling_map.Add("english (british-style pakistani english)",""); spelling_map.Add("english (pakistan)",""); spelling_map.Add("eng;ish",""); spelling_map.Add("erbian","Serbian"); spelling_map.Add("español","Spanish"); spelling_map.Add("espanol","Spanish"); spelling_map.Add("espanhol","Spanish"); spelling_map.Add("estonia","Estonian"); spelling_map.Add("euskara","Basque"); spelling_map.Add("faeroese","Faroese"); spelling_map.Add("færøysk","Faroese"); spelling_map.Add("farsi","Persian"); spelling_map.Add("fgerman","German"); spelling_map.Add("finis","Finnish"); spelling_map.Add("finish","Finnish"); spelling_map.Add("finnisg","Finnish"); spelling_map.Add("foreword",""); spelling_map.Add("francais","French"); spelling_map.Add("français","French"); spelling_map.Add("france","French"); spelling_map.Add("francés","French"); spelling_map.Add("fre","French"); spelling_map.Add("frenc","French"); spelling_map.Add("frence","French"); spelling_map.Add("frencg","French"); spelling_map.Add("french (abstract)","French"); spelling_map.Add("gaeilge","Irish"); spelling_map.Add("gaeilge, [ga]","Irish"); spelling_map.Add("gallego","Galician"); spelling_map.Add("ge","Georgian"); spelling_map.Add("ger","German"); spelling_map.Add("geraman","German"); spelling_map.Add("germaan","German"); spelling_map.Add("germany","German"); spelling_map.Add("germană","German"); spelling_map.Add("german (swiss)","German"); spelling_map.Add("german-","German"); spelling_map.Add("germna","German"); spelling_map.Add("gernan","German"); spelling_map.Add("greece","Greek"); spelling_map.Add("greenlandic","Kalaallisut"); spelling_map.Add("hangul","Korean"); spelling_map.Add("hn","Spanish"); spelling_map.Add("honduran spanish","Spanish"); spelling_map.Add("hungary","Hungarian"); spelling_map.Add("imgartists.com",""); spelling_map.Add("indonesia","Indonesian"); spelling_map.Add("inglês",""); spelling_map.Add("inglés",""); spelling_map.Add("ingles",""); spelling_map.Add("islandic","Icelandic"); spelling_map.Add("israel","Hebrew"); spelling_map.Add("irsaeli","Hebrew"); spelling_map.Add("israeli","Hebrew"); spelling_map.Add("ilalian","Italian"); spelling_map.Add("italiain","Italian"); spelling_map.Add("italic","Italian"); spelling_map.Add("italics","Italian"); spelling_map.Add("italien","Italian"); spelling_map.Add("italian/milanese dialect","Italian"); spelling_map.Add("italin","Italian"); spelling_map.Add("italina","Italian"); spelling_map.Add("italiano","Italian"); spelling_map.Add("italy","Italian"); spelling_map.Add("itunes",""); spelling_map.Add("japanaese","Japanese"); spelling_map.Add("japaneses","Japanese"); spelling_map.Add("japanese)","Japanese"); spelling_map.Add("japonês","Japanese"); spelling_map.Add("japones","Japanese"); spelling_map.Add("japonese","Japanese"); spelling_map.Add("javanesse","Javanese"); spelling_map.Add("javascript",""); spelling_map.Add("jp","ja"); spelling_map.Add("jpn","ja"); spelling_map.Add("jspanese","Japanese"); spelling_map.Add("kannaḍa","Kannada"); spelling_map.Add("kiswahili","Swahili"); spelling_map.Add("koeran","Korean"); spelling_map.Add("koṅkaṇī","Konkani"); spelling_map.Add("korea","Korean"); spelling_map.Add("koreai","Korean"); spelling_map.Add("koream","Korean"); spelling_map.Add("korean=","Korean"); spelling_map.Add("koren","Korean"); spelling_map.Add("language",""); spelling_map.Add("lat","Latin");// not Latvian spelling_map.Add("latin (original citation)","Latin"); spelling_map.Add("latín","Latin"); spelling_map.Add("lecture",""); spelling_map.Add("legalese",""); spelling_map.Add("lietuvių k.","Lithuanian"); spelling_map.Add("lithusanian","Lithuanian"); spelling_map.Add("magyar","Hungarian"); spelling_map.Add("malayalam)","Malayalam"); spelling_map.Add("mandarin","Chinese"); spelling_map.Add("mandarin chinese","Chinese"); spelling_map.Add("many",""); spelling_map.Add("manuscript latin","Latin"); spelling_map.Add("marāṭhī","Marathi"); spelling_map.Add("mexican","Spanish"); spelling_map.Add("mexico city","Spanish"); spelling_map.Add("mixed",""); spelling_map.Add("modern russian","Russian"); spelling_map.Add("mongol","Mongolian"); spelling_map.Add("multiple",""); spelling_map.Add("multiplelanguages",""); spelling_map.Add("multiple languages",""); spelling_map.Add("mx","Spanish"); spelling_map.Add("nepal bhasa","Newar");//639-3 new spelling_map.Add("netherlands","Dutch"); spelling_map.Add("norge","Norwegian"); spelling_map.Add("norsk","Norwegian"); spelling_map.Add("norsk (bokmål)","Norwegian Bokmål"); spelling_map.Add("northern sámi","Northern Sami"); spelling_map.Add("norway","Norwegian"); spelling_map.Add("norwegain","Norwegian"); spelling_map.Add("norwegian","Norwegian");// because of bug in module, since fixed spelling_map.Add("norwegian bokmal","Norwegian Bokmål"); spelling_map.Add("norwegian nynorsk","Norwegian Nynorsk"); spelling_map.Add("norweigen","Norwegian"); spelling_map.Add("norweigian","Norwegian"); spelling_map.Add("norwergian","Norwegian"); spelling_map.Add("norwgian","Norwegian"); spelling_map.Add("pay-per-view",""); spelling_map.Add("pay=per-view",""); spelling_map.Add("pay-per=view",""); spelling_map.Add("persian (farsi)","Persian"); spelling_map.Add("pdf",""); spelling_map.Add("pol","Polish"); spelling_map.Add("polis","Polish"); spelling_map.Add("polish2","Polish"); spelling_map.Add("polishi","Polish"); spelling_map.Add("polsih","Polish"); spelling_map.Add("portguês","Portuguese"); spelling_map.Add("portughese","Portuguese"); spelling_map.Add("portugues","Portuguese"); spelling_map.Add("portugués","Portuguese"); spelling_map.Add("portufuês","Portuguese"); spelling_map.Add("português","Portuguese"); spelling_map.Add("portugugese","Portuguese"); spelling_map.Add("portuagese","Portuguese"); spelling_map.Add("porutguese","Portuguese"); spelling_map.Add("portuguese (brasil)","Portuguese"); spelling_map.Add("portuguese (brazil)","Portuguese"); spelling_map.Add("potuguese","Portuguese"); spelling_map.Add("projekt records",""); spelling_map.Add("requires subscription",""); spelling_map.Add("romanina","Romanian"); spelling_map.Add("română","Romanian"); spelling_map.Add("rurkish","Turkish"); spelling_map.Add("ruassian","Russian"); spelling_map.Add("rus","Russian"); spelling_map.Add("rus.","Russian"); spelling_map.Add("russia","Russian"); spelling_map.Add("russian (translated)","Russian"); spelling_map.Add("sanish","Spanish"); spelling_map.Add("saṃskṛta","Sanskrit"); spelling_map.Add("sbrj",""); spelling_map.Add("self-published",""); spelling_map.Add("serbia","Serbian"); spelling_map.Add("serbian cyrillic","Serbian"); spelling_map.Add("serbian [translated]","Serbian"); spelling_map.Add("serbocroatian","Serbo-Croatian"); spelling_map.Add("serb-croatian","Serbo-Croatian"); spelling_map.Add("serbo - croatian","Serbo-Croatian");//only whitespace spelling_map.Add("several",""); spelling_map.Add("shift jis","ja"); spelling_map.Add("([[shift jis]])","ja"); spelling_map.Add("shqip","Albanian"); spelling_map.Add("singapore",""); spelling_map.Add("sinhalese","Sinhala"); spelling_map.Add("simplified chinese","Chinese"); spelling_map.Add("slovakian","Slovak"); spelling_map.Add("slovene","Slovenian"); spelling_map.Add("slovene, with a summary in english","Slovenian"); spelling_map.Add("slovene [slovene biographical encyclopedia]","Slovenian"); spelling_map.Add("slovene [slovene biographical lexicon]","Slovenian"); spelling_map.Add("slovenia","Slovenian"); spelling_map.Add("slovenian language","Slovenian"); spelling_map.Add("somalian","Somali"); spelling_map.Add("spain","Spanish"); spelling_map.Add("spainsh","Spanish"); spelling_map.Add("spanihs","Spanish"); spelling_map.Add("spanis","Spanish"); spelling_map.Add("spansih","Spanish"); spelling_map.Add("spanishh","Spanish"); spelling_map.Add("spanish=","Spanish"); spelling_map.Add("spanish (appendix only)","Spanish"); spelling_map.Add("spanish; castilian","Spanish"); spelling_map.Add("\"spanish (argentina)\"","Spanish"); spelling_map.Add("spanish (castilian)","Spanish"); spelling_map.Add("spanish, español","Spanish"); spelling_map.Add("spanish.","Spanish"); spelling_map.Add("suomi","Finnish"); spelling_map.Add("surabaya","Indonesian"); spelling_map.Add("svensk","Swedish"); spelling_map.Add("svenska","Swedish"); spelling_map.Add("swe","Swedish"); spelling_map.Add("sweden","Swedish"); spelling_map.Add("swedieh","Swedish"); spelling_map.Add("swedis","Swedish"); spelling_map.Add("swedish)","Swedish"); spelling_map.Add("swedisy","Swedish"); spelling_map.Add("swiss german","German"); spelling_map.Add("taiwanese","Chinese"); spelling_map.Add("telgu","Telugu"); spelling_map.Add("traditional chinese","Chinese"); spelling_map.Add("traditional han chinese","Chinese"); spelling_map.Add("translated",""); spelling_map.Add("(translated)",""); spelling_map.Add("tu","Turkish"); spelling_map.Add("tuekish","Turkish"); spelling_map.Add("turjish","Turkish"); spelling_map.Add("turish","Turkish"); spelling_map.Add("turkihs","Turkish"); spelling_map.Add("turkis","Turkish"); spelling_map.Add("turksh","Turkish"); spelling_map.Add("turksih","Turkish"); spelling_map.Add("turkşsh","Turkish"); spelling_map.Add("türkçe","Turkish"); spelling_map.Add("ua","uk"); spelling_map.Add("ucalgary",""); spelling_map.Add("unidentified",""); spelling_map.Add("ukraian","Ukrainian"); spelling_map.Add("ukrainan","Ukrainian"); spelling_map.Add("uk english",""); spelling_map.Add("urkish","Turkish"); spelling_map.Add("us",""); spelling_map.Add("us english",""); spelling_map.Add("valenciano","Valencian"); spelling_map.Add("various",""); spelling_map.Add("vietnamise","Vietnamese"); spelling_map.Add("vn","Vietnamese"); spelling_map.Add("weeds",""); spelling_map.Add("wessa alien plants",""); spelling_map.Add("west frisian","Western Frisian"); spelling_map.Add("zh-hans","Chinese"); spelling_map.Add("zh=hans","Chinese"); spelling_map.Add("македонски","Macedonian"); spelling_map.Add("-",""); //---------------------------< M I S C F I X E S >---------------------------------------------------------- // replace {{spaced ndash}} templates with ' – ' while(Regex.Match(ArticleText,@"\{\{\s*"+IS_CS1+@"[^}]*\{\{\s*(?:spaced\s*ndash|snd)\s*\}\}").Success) { ArticleText=Regex.Replace(ArticleText,@"(\{\{\s*"+IS_CS1+@"[^\{\}]*)\{\{\s*spaced\s*ndash\s*\}\}\s*","1ドル – "); ndash=true; } // replace {{xx icon}} templates with xx within CS1 templates while(Regex.Match(ArticleText,@"\{\{\s*"+IS_CS1+@"[^}]*\{\{\s*[a-z]{2}\s*icon\s*\}\}").Success) { ArticleText=Regex.Replace(ArticleText,@"(\{\{\s*"+IS_CS1+@"[^\{\}]*)\{\{\s*([a-z]{2})\s*icon\s*\}\}","1ドル2ドル"); } // When |language=In <language>, remove leading punctuation ArticleText=Regex.Replace(ArticleText,@"({{\s*"+IS_CS1+@"[^}]+\|\s*language\s*=\s*)[\-\.,;–—]+\s*([^\|\}]+)","1ドル2ドル"); // When |language=In <language>, remove 'In ' (space is required) ArticleText=Regex.Replace(ArticleText,@"({{\s*"+IS_CS1+@"[^}]+\|\s*language\s*=\s*)[Ii]n ([^\|\}]+)","1ドル2ドル"); // When |language='''<language>''', remove bold wikimarkup ArticleText=Regex.Replace(ArticleText,@"({{\s*"+IS_CS1+@"[^}]+\|\s*language\s*=\s*)'''([a-zA-Z\s\-]+)'''","1ドル2ドル"); // When |language=''<language>'', remove italic wikimarkup ArticleText=Regex.Replace(ArticleText,@"({{\s*"+IS_CS1+@"[^}]+\|\s*language\s*=\s*)''([a-zA-Z\s\-]+)''","1ドル2ドル"); // DATES // When |language=<language name> where <language name> is a mdy date, remove it ArticleText=Regex.Replace(ArticleText,@"({{\s*"+IS_CS1+@"[^}]+)\|\s*language\s*=\s*[a-zA-Z]+\s*\d\d?,\s*\d{4}","1ドル"); // When |language=<language name> where <language name> is a dmy date, remove it ArticleText=Regex.Replace(ArticleText,@"({{\s*"+IS_CS1+@"[^}]+)\|\s*language\s*=\s*\d\d?\s*[a-zA-Z]+\s*\d{4}","1ドル"); // When |language=<language name> where <language name> is a my date, remove it ArticleText=Regex.Replace(ArticleText,@"({{\s*"+IS_CS1+@"[^}]+)\|\s*language\s*=\s*[a-zA-Z]+\s*\d{4}","1ドル"); // When |language=<language name> where <language name> is numeric or y-m-d style date, remove it ArticleText=Regex.Replace(ArticleText,@"({{\s*"+IS_CS1+@"[^}]+)\|\s*language\s*=\s*\d[\d\s\-]*","1ドル"); // WIKILINKS: Remove simple wikilinks from |language parameters because they prevent proper categorization // Replace [[Text]] or ([[text]]) with Text pattern=@"(\{\{\s*"+IS_CS1+@"[^\}]*\|\s*language\s*=\s*)\(?\[\[([A-Za-zá\s]+)\]\]\)?"; ArticleText=Regex.Replace(ArticleText,pattern,"1ドル2ドル"); // WIKILINKS: Remove complex wikilinks from |language parameters because they prevent proper categorization // Replace [[Article|Text]] or ([[Article|Text]]) with Text pattern=@"(\{\{\s*"+IS_CS1+@"[^\}]*\|\s*language\s*=\s*)\(?\[\[[A-Za-zá\s\(\)]+\|([A-Za-zá\s]+)\]\]\)?"; ArticleText=Regex.Replace(ArticleText,pattern,"1ドル2ドル"); // WIKILINKS: Remove complex wikilinks in the form [[xxx{{!}}xxx]] from |language parameters because they prevent proper categorization // Replace [[Article|Text]] with Text pattern=@"(\{\{\s*"+IS_CS1+@"[^\}]*\|\s*language\s*=\s*)\[\[[A-Za-z\s\(\)]+\{\{!\}\}([A-Za-z\s]+)\]\]"; ArticleText=Regex.Replace(ArticleText,pattern,"1ドル2ドル"); // When |language=<language name> language, remove language spelling_map.Add("简体中文", "Chinese"); // simplified chinese ArticleText=Regex.Replace(ArticleText,@"({{\s*"+IS_CS1+@"[^}]+\|\s*language\s*=\s*)([a-zA-Z\s\-]+) languages?","1ドル2ドル"); // When |language={{xx icon, without closing }} remove icon text ArticleText=Regex.Replace(ArticleText,@"({{\s*"+IS_CS1+@"[^}]+\|\s*language\s*=\s*)\{\{([a-zA-Z]{2})\s*icon","1ドル2ドル"); // SIMPLIFIED CHINESE // When |language=简体中文 ArticleText=Regex.Replace(ArticleText,@"({{\s*"+IS_CS1+@"[^}]+\|\s*language\s*=\s*)简体中文","1ドルChinese"); // THAI // When |language=ไทย ArticleText=Regex.Replace(ArticleText,@"({{\s*"+IS_CS1+@"[^}]+\|\s*language\s*=\s*)ไทย","1ドルChinese"); //---------------------------< M I S S P E L L I N G S >------------------------------------------------------ // MISSPELLINGS: Fix misspellings in |language=<value> where <value> is misspelled. pattern=@"({{\s*"+IS_CS1+@"[^}]*\|\s*language\s*=\s*)([^\|\}]*)"; if(Regex.Match(ArticleText,pattern).Success) { ArticleText=Regex.Replace(ArticleText,pattern, delegate(Matchmatch) { stringnew_spelling; stringreturn_string=match.Groups[0].Value;// no misspelling, return the raw string try// get correct spelling from dictionary { new_spelling=spelling_map[match.Groups[2].Value.Trim().ToLower()];// will throw an exception if language <value> (key) is not found in dictionary (presumed correct) changes_made=true;// TR Summary can't be changed here; need a dummy variable } catch(KeyNotFoundException)// trap the exception { returnreturn_string;// return the raw string } returnmatch.Groups[1].Value+new_spelling; }); } if(true==changes_made) Summary+=" |language= spelling;";// TR if(true==ndash) Summary+=" remove {{spaced ndash}};";// TR returnArticleText; }