Module:Wikt-lang/data

Module documentation[view] [edit] [history] [purge]
Language templates
Language names (ISO 639)
{{ISO 639 name }}
Interwiki links
{{Wikt-lang }} Module:Wikt-lang Module:Wikt-lang/data {{Interlanguage link }}
Foreign-language text
{{Lang }} {{Langx }} Module:Lang {{Transliteration }} {{Title language }}
Other
{{IPA }}
v t e
The redirects table in Module:Wikt-lang/data connects Wikipedia language codes to the corresponding code used on the English Wiktionary. wikt:Wiktionary:Language treatment records this relationship for ISO codes. For instance, all of bs (Bosnian), hr (Croatian), sr (Serbian), cnr (Montenegrin), kjv (Kajkavian) are placed under the header for sh (Serbo-Croatian) in Wiktionary entries. See for instance wikt:kaj#Serbo-Croatian, the word that Kajkavian is named after. The subsumed codes should still be used in language-tagging on Wikipedia.
The above documentation is transcluded from Module:Wikt-lang/data/doc. (edit | history)
Editors can experiment in this module's sandbox (edit | diff) and testcases (create) pages.
Subpages of this module.
 localU=mw.ustring.char

 -- Diacritics, from the [[Combining Diacritical Marks]] block.
 localgrave=U(0x300)
 localacute=U(0x301)
 localcircumflex=U(0x302)
 localtilde=U(0x303)
 localmacron=U(0x304)
 localbreve=U(0x306)
 localdot=U(0x307)
 localdiaeresis=U(0x308)
 localdouble_acute=U(0x30B)
 localcaron=U(0x30C)
 localdouble_grave=U(0x30F)
 localinvbreve=U(0x311)
 localdot_below=U(0x323)
 localundertie=U(0x35C)

 --[[

 	This is a table of Wiktionary language codes with data belonging to them.
 	Name is the "canonical name" used on Wiktionary.
 	Article is the Wikipedia article.
 	Script is the ISO 15924 code.
 ]]
 localdata={
 ["languages"]={
 ["aaq"]={
 ["name"]="Penobscot",
 },
 ["ab"]={
 ["name"]="Abkhaz",
 },
 ["abe"]={
 ["name"]="Abenaki",
 },
 ["alg-pro"]={
 ["name"]="Proto-Algonquian",
 ["article"]="Proto-Algonquian language",
 },
 ["ang"]={
 ["name"]="Old English",
 ["article"]={"Old English"},
 -- Remove macrons, acutes, and overdots
 ["replacements"]={
 decompose=true,
 from={"["..macron..acute..dot.."]"},
 },
 },
 ["ar"]={
 ["name"]="Arabic",
 ["article"]="Arabic language",
 ["direction"]="rtl",-- Should be in the script data module.
 ["replacements"]={
 -- ālif with wasla is replaced by ālif;
 [U(0x0671)]=U(0x0627),
 -- taṭwīl, fatḥatan, ḍammatan, kasratan,
 -- fatḥa, ḍamma, kasra,
 -- shadda, sukūn, and superscript (dagger) ālif are removed.
 ["["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D)
 ..U(0x064E)..U(0x064F)..U(0x0650)
 ..U(0x0651)..U(0x0652)..U(0x0670).."]"]="",
 },
 },
 ["ara"]={
 ["name"]="Arabic",
 ["article"]="Arabic language",
 ["direction"]="rtl",-- Should be in the script data module.
 ["replacements"]={
 -- ālif with wasla is replaced by ālif;
 [U(0x0671)]=U(0x0627),
 -- taṭwīl, fatḥatan, ḍammatan, kasratan,
 -- fatḥa, ḍamma, kasra,
 -- shadda, sukūn, and superscript (dagger) ālif are removed.
 ["["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D)
 ..U(0x064E)..U(0x064F)..U(0x0650)
 ..U(0x0651)..U(0x0652)..U(0x0670).."]"]="",
 },
 },
 ["arb"]={
 ["name"]="Modern Standard Arabic",
 ["article"]="Modern Standard Arabic",
 ["direction"]="rtl",-- Should be in the script data module.
 ["replacements"]={
 -- ālif with wasla is replaced by ālif;
 [U(0x0671)]=U(0x0627),
 -- taṭwīl, fatḥatan, ḍammatan, kasratan,
 -- fatḥa, ḍamma, kasra,
 -- shadda, sukūn, and superscript (dagger) ālif are removed.
 ["["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D)
 ..U(0x064E)..U(0x064F)..U(0x0650)
 ..U(0x0651)..U(0x0652)..U(0x0670).."]"]="",
 },
 },
 ["apc"]={
 ["name"]="North Levantine Arabic",
 ["article"]="North Levantine Arabic",
 ["direction"]="rtl",-- Should be in the script data module.
 ["replacements"]={
 -- ālif with wasla is replaced by ālif;
 [U(0x0671)]=U(0x0627),
 -- taṭwīl, fatḥatan, ḍammatan, kasratan,
 -- fatḥa, ḍamma, kasra,
 -- shadda, sukūn, and superscript (dagger) ālif are removed.
 ["["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D)
 ..U(0x064E)..U(0x064F)..U(0x0650)
 ..U(0x0651)..U(0x0652)..U(0x0670).."]"]="",
 },
 },
 ["ajp"]={
 ["name"]="South Levantine Arabic",
 ["article"]="South Levantine Arabic",
 ["direction"]="rtl",-- Should be in the script data module.
 ["replacements"]={
 -- ālif with wasla is replaced by ālif;
 [U(0x0671)]=U(0x0627),
 -- taṭwīl, fatḥatan, ḍammatan, kasratan,
 -- fatḥa, ḍamma, kasra,
 -- shadda, sukūn, and superscript (dagger) ālif are removed.
 ["["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D)
 ..U(0x064E)..U(0x064F)..U(0x0650)
 ..U(0x0651)..U(0x0652)..U(0x0670).."]"]="",
 },
 },
 ["arz"]={
 ["name"]="Egyptian Arabic",
 ["article"]="Egyptian Arabic",
 ["direction"]="rtl",-- Should be in the script data module.
 ["replacements"]={
 -- ālif with wasla is replaced by ālif;
 [U(0x0671)]=U(0x0627),
 -- taṭwīl, fatḥatan, ḍammatan, kasratan,
 -- fatḥa, ḍamma, kasra,
 -- shadda, sukūn, and superscript (dagger) ālif are removed.
 ["["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D)
 ..U(0x064E)..U(0x064F)..U(0x0650)
 ..U(0x0651)..U(0x0652)..U(0x0670).."]"]="",
 },
 },
 ["av"]={
 ["name"]="Avar"
 },
 ["be"]={
 ["article"]="Belarusian language",
 ["replacements"]={[acute]="",},
 },
 ["bn"]={
 ["name"]="Bengali",
 ["article"]="Bengali language",
 },
 ["bua"]={
 ["name"]="Buryat",
 },
 ["cel-pro"]={-- Incorrect tag
 ["name"]="Proto-Celtic",
 ["Wikipedia_code"]="cel-x-proto",
 },
 ["cel-x-proto"]={
 ["name"]="Proto-Celtic",
 },
 ["cel-bry-pro"]={-- Incorrect tag
 ["name"]="Proto-Brythonic",
 ["article"]="Common Brittonic",
 ["type"]="reconstructed",
 },
 ["com"]={
 ["name"]="Comanche",
 ["article"]="Comanche language",
 },
 ["cu"]={
 ["name"]="Old Church Slavonic",
 ["article"]="Old Church Slavonic",
 },
 ["de"]={
 ["name"]="German",
 ["article"]="German language",
 },
 ["en"]={
 ["name"]="English",
 ["article"]="English language",
 },
 ["es"]={
 ["name"]="Spanish",
 ["article"]="Spanish language",
 },
 ["egy"]={
 ["name"]="Egyptian",
 },
 ["evn"]={
 ["name"]="Evenki",
 ["article"]="Evenki language",
 },
 ["fr"]={
 ["name"]="French",
 ["article"]="French language",
 },
 ["frm"]={
 ["name"]="Middle French",
 ["article"]="Middle French",
 },
 ["frp"]={
 ["name"]="Franco-Provençal",
 },
 ["ff"]={
 ["name"]="Fula",
 },
 ["gem-pro"]={-- Incorrect tag
 ["name"]="Proto-Germanic",
 ["article"]="Proto-Germanic language",
 ["type"]="reconstructed",
 ["replacements"]={},
 ["Wikipedia_code"]="gem-x-proto",
 },
 ["gem-x-proto"]={
 ["name"]="Proto-Germanic",
 ["article"]="Proto-Germanic language",
 ["type"]="reconstructed",
 ["replacements"]={},
 },
 ["gml"]={
 ["name"]="Middle Low German",
 },
 ["gmw-ecg"]={
 ["name"]="East Central German",
 },
 ["gmw-x-proto"]={
 ["name"]="Proto-West Germanic",
 ["article"]="Proto-West Germanic language",
 ["type"]="reconstructed",
 ["replacements"]={},
 },
 ["gmq-x-gut"]={
 ["name"]="Gutnish",
 ["article"]="Gutnish",
 },
 ["goh"]={
 ["replacements"]={
 decompose=true,
 from={
 "["..macron..circumflex..diaeresis.."]",
 },
 },
 },
 ["got"]={
 ["name"]="Gothic",
 ["article"]="Gothic language",
 ["replacements"]={
 -- Latin to Gothic since people will not want to have to copy
 -- and paste Gothic letters in
 ["[AÁaáĀā]"]="𐌰",
 ["[Bb]"]="𐌱",
 ["[Gg]"]="𐌲",
 ["[Dd]"]="𐌳",
 ["[EeĒē]"]="𐌴",
 ["[Qq]"]="𐌵",
 ["[Zz]"]="𐌶",
 ["[Hh]"]="𐌷",
 ["[Þþ]"]="𐌸",
 ["[IiÍí]"]="𐌹",
 ["[Kk]"]="𐌺",
 ["[Ll]"]="𐌻",
 ["[Mm]"]="𐌼",
 ["[Nn]"]="𐌽",
 ["[Jj]"]="𐌾",
 ["[UuÚúŪū]"]="𐌿",
 ["[Pp]"]="𐍀",
 ["[Rr]"]="𐍂",
 ["[Ss]"]="𐍃",
 ["[Tt]"]="𐍄",
 ["[WwYy]"]="𐍅",
 ["[Ff]"]="𐍆",
 ["[Xx]"]="𐍇",
 ["[Ƕƕ]"]="𐍈",-- Not sure if "hw" and "hv" can safely be converted
 ["[OoŌō]"]="𐍉",
 },
 },
 ["gsw"]={
 ["name"]="Alemannic German",
 },
 ["grc"]={
 ["name"]="Ancient Greek",
 ["article"]="Ancient Greek",
 ["replacements"]={
 decompose=true,
 from={
 -- Replace variant letterforms with standard ones.
 "β","ε","θ","κ","ρ","ς","φ",
 -- Remove macrons and breves.
 "["..macron..breve..undertie.."]"
 },
 to={
 "β","ε","θ","κ","ρ","σ","φ",
 }
 },
 },
 ["grk-pro"]={-- Incorrect tag
 ["name"]="Proto-Hellenic",
 ["Wikipedia_name"]="Proto-Greek",
 ["article"]="Proto-Greek language",
 ["type"]="reconstructed",
 ["replacements"]={},
 ["Wikipedia_code"]="grk-x-proto",
 },
 ["grk-x-proto"]={
 ["name"]="Proto-Hellenic",
 ["Wikipedia_name"]="Proto-Greek",
 ["article"]="Proto-Greek language",
 ["type"]="reconstructed",
 ["replacements"]={},
 },
 ["grt"]={
 ["name"]="Garo",
 },
 ["ha"]={
 ["name"]="Hausa",
 -- remove tilde, grave, acute, macron, circumflex
 ["replacements"]={
 decompose=true,
 from={"["..grave..circumflex..macron..acute..tilde.."]"},
 },
 },
 ["hi"]={
 ["name"]="Hindi",
 ["article"]="Hindi",
 },
 ["ilo"]={
 ["name"]="Ilocano",
 ["article"]="Ilocano language",
 },
 ["ine-bsl-pro"]={
 ["name"]="Proto-Balto-Slavic",
 ["article"]="Proto-Balto-Slavic language",
 ["type"]="reconstructed",
 },
 ["ine-pro"]={-- Incorrect tag
 ["name"]="Proto-Indo-European",
 ["article"]="Proto-Indo-European language",
 ["type"]="reconstructed",
 ["replacements"]={},
 ["Wikipedia_code"]="ine-x-proto",
 },
 ["ine-x-proto"]={
 ["name"]="Proto-Indo-European",
 ["article"]="Proto-Indo-European language",
 ["type"]="reconstructed",
 ["replacements"]={},
 },
 ["ja"]={
 ["name"]="Japanese",
 ["article"]="Japanese language",
 },
 ["jbo"]={-- Lojban
 ["type"]="appendix",
 },
 ["ket"]={
 ["name"]="Ket",
 ["article"]="Ket language",
 },
 ["ksk"]={
 ["name"]="Kansa",
 ["article"]="Kansa language",
 },
 ["la"]={
 ["name"]="Latin",
 ["article"]="Latin",
 ["replacements"]={
 decompose=true,
 from={"["..macron..breve..diaeresis.."]"},
 },
 },
 ["lt"]={
 ["name"]="Lithuanian",
 -- remove acute, tilde, grave
 ["replacements"]={
 decompose=true,
 from={"["..acute..tilde..grave.."]"},
 },
 },
 ["mkh-mvi"]={
 ["name"]="Middle Vietnamese",
 },
 ["moe"]={
 ["name"]="Cree",
 },
 ["mul"]={
 ["name"]="Translingual",
 ["article"]="",
 },
 ["nci"]={
 ["name"]="Classical Nahuatl",
 ["article"]="Classical Nahuatl",
 -- Remove macrons, acutes, circumflexes and graves
 ["replacements"]={
 decompose=true,
 -- Remove macrons, acutes, circumflexes, graves, and saltillo;
 -- see [[Saltillo (linguistics)]].
 from={"["..grave..acute..macron..circumflex.."Ꞌꞌʻʼ'ʔ]"},
 },
 },
 ["nds-de"]={
 ["name"]="German Low German",
 },
 ["non"]={
 ["name"]="Old Norse",
 },
 ["non-x-proto"]={
 ["name"]="Proto-Norse",
 },
 ["odt"]={
 ["name"]="Old Dutch",
 },
 ["oge"]={
 ["name"]="Old Georgian",
 },
 ["oj"]={
 ["name"]="Ojibwe",
 },
 ["orv"]={
 ["name"]="Old East Slavic",
 ["article"]="Old East Slavic",
 ["replacements"]={
 [U(0x484)]="",
 },
 },
 ["osp"]={
 ["name"]="Old Spanish",
 },
 ["osx"]={
 ["name"]="Old Saxon",
 },
 ["gug"]={
 ["name"]="Paraguayan Guaraní",
 },
 ["pt"]={
 ["name"]="Portuguese",
 ["article"]="Portuguese language",
 -- ["scripts"] = { "Latn" },
 },
 ["pa"]={
 ["name"]="Punjabi",
 ["article"]="Punjabi language",
 },
 ["pgl"]={
 ["name"]="Primitive Irish",
 ["article"]="Primitive Irish",
 },
 ["pis"]={
 ["name"]="Pijin",
 ["article"]="Pijin language",
 },
 ["poz-x-poly-proto"]={
 ["name"]="Proto-Nuclear Polynesian",
 ["article"]="Proto-Polynesian language",
 ["type"]="reconstructed",
 },
 ["rap"]={
 ["name"]="Rapa Nui",
 ["article"]="Rapa Nui language",
 },
 ["ru"]={
 ["name"]="Russian",
 ["article"]="Russian language",
 ["replacements"]={[acute]="",},
 },
 ["rw"]={
 ["name"]="Rwanda-Rundi",
 },
 ["se"]={
 ["replacements"]={
 ["([đflmnŋrsšŧv])'%1"]="%1%1",
 },
 },
 ["sem-pro"]={
 ["name"]="Proto-Semitic",
 ["article"]="Proto-Semitic",
 ["type"]="reconstructed",
 },
 ["sh"]={
 ["article"]="Serbo-Croatian language",
 ["replacements"]={
 decompose=true,
 from={"([AaEeIiOoUuRrАаЕеИиОоУуРр])["..double_grave
 ..grave..invbreve..acute..macron..tilde.."]"},
 to={"%1"},
 },
 },
 ["sl"]={
 ["name"]="Slovene",
 ["replacements"]={
 decompose=true,
 -- remove tonal orthography
 from={"ł","["..grave..acute..macron..double_grave..invbreve..circumflex..dot_below.."]"},
 to={"l"},
 },
 },
 ["sla-pro"]={-- Incorrect tag
 ["name"]="Proto-Slavic",-- also Common Slavic
 ["type"]="reconstructed",
 ["replacements"]={
 ["[ÀÁÃĀȀȂ]"]="A",
 ["[àáãāȁȃ]"]="a",
 ["[ÈÉẼĒȄȆ]"]="E",
 ["[èéẽēȅȇ]"]="e",
 ["[ÌÍĨĪȈȊ]"]="I",
 ["[ìíĩīȉȋ]"]="i",
 ["[ÒÓÕŌȌȎŐ]"]="O",
 ["[òóõōȍȏő]"]="o",
 ["[ÙÚŨŪȔȖŰ]"]="U",
 ["[ùúũūȕȗű]"]="u",
 ["[ỲÝỸȲ]"]="Y",
 ["[ỳýỹȳ]"]="y",
 ["Ǭ"]="Ǫ",
 ["ǭ"]="ǫ",
 ["["..grave..acute..double_acute..tilde..macron..double_grave..invbreve.."]"]="",
 ["ĭ"]="ь",
 ["ŭ"]="ъ",
 },
 ["Wikipedia_code"]="sla-x-proto",
 },
 ["sla-x-proto"]={
 ["name"]="Proto-Slavic",-- also Common Slavic
 ["type"]="reconstructed",
 ["replacements"]={
 ["[ÀÁÃĀȀȂ]"]="A",
 ["[àáãāȁȃ]"]="a",
 ["[ÈÉẼĒȄȆ]"]="E",
 ["[èéẽēȅȇ]"]="e",
 ["[ÌÍĨĪȈȊ]"]="I",
 ["[ìíĩīȉȋ]"]="i",
 ["[ÒÓÕŌȌȎŐ]"]="O",
 ["[òóõōȍȏő]"]="o",
 ["[ÙÚŨŪȔȖŰ]"]="U",
 ["[ùúũūȕȗű]"]="u",
 ["[ỲÝỸȲ]"]="Y",
 ["[ỳýỹȳ]"]="y",
 ["Ǭ"]="Ǫ",
 ["ǭ"]="ǫ",
 ["["..grave..acute..double_acute..tilde..macron..double_grave..invbreve.."]"]="",
 ["ĭ"]="ь",
 ["ŭ"]="ъ",
 },
 },
 ["tts"]={
 ["name"]="Isan",-- also "Northeastern Thai"
 ["article"]="Isan language",
 },
 ["tzo"]={
 ["name"]="Tzotzil",
 ["article"]="Tzotzil language",
 },
 ["ug"]={
 ["name"]="Uyghur",--also less commonly "Uighur"
 ["article"]="Uyghur language",
 },
 ["uk"]={
 ["article"]="Ukrainian language",
 ["replacements"]={[acute]="",}
 },
 ["ur"]={
 ["name"]="Urdu",
 ["article"]="Urdu",
 },
 ["xcl"]={
 ["name"]="Old Armenian",
 ["article"]="Classical Armenian",
 ["replacements"]={
 ["[՞՜՛՟]"]="",
 ["եւ"]="եւ",
 },
 },
 ["xgf"]={
 ["name"]="Tongva",-- not ISO name "Gabrielino-Fernandeño"
 ["article"]="Tongva language",
 ["replacements"]={
 ["['`ʔ]"]="ʼ",
 },
 },
 ["xlu"]={
 ["name"]="Luwian",-- not ISO name "Cuneiform Luwian"
 ["article"]="Cuneiform Luwian"
 },
 ["xpq"]={
 ["name"]="Mohegan-Pequot",
 },
 ["xxt"]={
 ["name"]="Tambora",
 ["article"]="Tambora language",
 },
 ["xvn"]={
 ["name"]="Vandalic",
 ["article"]="Vandalic language",
 },
 ["yua"]={
 ["name"]="Yucatec Maya",
 ["article"]="Yucatec Maya language",
 },
 ["zh"]={
 ["name"]="Chinese",
 ["article"]="Chinese language",
 -- ["scripts"] = { "Hani" },
 },
 ["zle-ort"]={
 ["name"]="Old Ruthenian",
 ["article"]="Old Ruthenian",
 ["replacements"]={[acute]="",},
 },
 },

 -- Here, keys (for example, "gem") are Wikipedia language codes used in
 -- {{lang}}, and values (for example, "gem-pro") are the equivalent Wiktionary
 -- code.
 -- Subtags are not currently supported.
 ["redirects"]={
 ["aae"]="sq",
 ["aiq"]="fa",
 ["aln"]="sq",
 ["als"]="sq",
 ["azb"]="az",
 ["azj"]="az",
 ["bgn"]="bal",
 ["bs"]="sh",
 ["bxr"]="bua",
 ["ciw"]="oj",
 ["cnr"]="sh",
 ["fil"]="tl",
 ["fuf"]="ff",
 ["gem"]="gem-pro",-- Not correct, but is commonly used.
 ["hak"]="zh",
 ["hbo"]="he",
 ["hr"]="sh",
 ["ine"]="ine-pro",-- Not correct, but might be commonly used.
 ["kjv"]="sh",
 ["nan"]="zh",
 ["prs"]="fa",
 ["rn"]="rw",
 ["sli"]="gmw-ecg",
 ["sr"]="sh",
 ["src"]="sc",
 ["sro"]="sc",
 ["tw"]="ak",
 ["wae"]="gsw",
 ["wep"]="nds-de",
 ["yue"]="zh",
 ["xno"]="fro",
 },
 }

 returndata
Retrieved from "https://en.wikipedia.org/w/index.php?title=Module:Wikt-lang/data&oldid=1316726834"