Module:Sandbox/AbstractWikipedia/Functions
Appearance
From Meta, a Wikimedia project coordination wiki
This is the functions module of the Abstract Wikipedia template-renderer prototype.
You can define here new functions to be used in the template language. A function invoked directly in a template's slot should return a lexeme
as defined in the lexemes module.
Some remarks:
- You can rely on the Wikidata module to conveniently fetch items and lexemes from Wikidata.
- The TemplateEvaluator module allows you define sub-templates as functions (see the
QuantifiedNoun
function below as an example). - You can define language-specific functions and implementations in language submodules, e.g. Module:Sandbox/AbstractWikipedia/Functions/en for English.
Current language-specific implementations:
The above documentation is transcluded from Module:Sandbox/AbstractWikipedia/Functions/doc. (edit | history)
Editors can experiment in this module’s sandbox (create | mirror) and testcases (create) pages.
Please add categories to the /doc subpage. Subpages of this module.
Editors can experiment in this module’s sandbox (create | mirror) and testcases (create) pages.
Please add categories to the /doc subpage. Subpages of this module.
local p = {} local l = require("Module:Sandbox/AbstractWikipedia/Lexemes") local te = require("Module:Sandbox/AbstractWikipedia/TemplateEvaluator") local wd = require("Module:Sandbox/AbstractWikipedia/Wikidata") -- It is assumed that the global variable "language" holds the language code -- used for rendering language = "he" -- The following is a list of language-agnostic functions to be supported by the -- template language. Language-specific implementations can be found in the -- Module:Sandbox/AbstractWikipedia/Functions/xx where xx is the language code. -- Construct a lexeme from a cardinal number. Note that the argument number is -- assumed to be a string function p.Cardinal ( number ) local result = l.newLexeme ( number, "numeral") -- simple logic, only works for some languages if tonumber(number) == 1 then result.addFeature("number", "singular") else result.addFeature("number", "plural") end result.addForm ( tostring(number), {} ) return result end -- General facility to convert text to a lexeme function p.TemplateText ( text, text_type ) text_type = text_type or "text" return l.newLexeme( text, text_type) end -- Helper functions to expand a list of Q-id features and add them to a form -- or lexeme local function addFeaturesToForm (form, wdFeatures) for _, wdFeature in ipairs(wdFeatures) do local features_to_add = wd.expandFeature(wdFeature) for category, feature in pairs(features_to_add) do form.addFeature(category, feature) end end end -- Fetch the demonym data from an item, and construct an appropriate lexeme function p.Demonym ( q_id ) local demonyms = wd.getDemonyms(q_id) if #demonyms == 0 then error("No demonyms for "..q_id.." in language "..language) -- Use some fallback here? end -- Demonyms can be either specified by giving all their forms or by linking -- to a lexeme if demonyms[1].lexeme then return p.Lexeme(demonyms[1].lexeme) end local lexeme = l.newLexeme(demonyms[1].label , "adjective") for _, demonym in ipairs(demonyms) do local form = lexeme.addForm(demonym.label) addFeaturesToForm(form, demonym.features) end lexeme.log() return lexeme end -- Function to transform lexemes from Wikidata the the internal representation -- The extra arguments are Q-ids of features which should act as extra -- contraints. function p.Lexeme (lexeme_id, ...) local wdLexeme = mw.wikibase.getEntity( lexeme_id ) local lemma, used_language = wd.getLemma(wdLexeme, lexeme_id) local lexeme = l.newLexeme(lemma , wd.getPOS(wdLexeme)) local grammatical_gender = wd.getGrammaticalGender(lexeme_id) if grammatical_gender then lexeme.addFeature("gender", grammatical_gender) end -- More statements may need to be fetched here -- Add any extra constraints passed as arguments addFeaturesToForm(lexeme, arg) forms = wdLexeme:getForms() for index, wdForm in ipairs(forms) do -- ingest forms -- We only want forms of one language code spelling = wdForm:getRepresentation(used_language) if spelling then local form = lexeme.addForm(spelling, {}) wdFeatures = wdForm:getGrammaticalFeatures() addFeaturesToForm(form, wdFeatures) end end lexeme.log() return lexeme end -- Creates a compound lexeme out of two lexemes (typically nouns), which -- should be the gendered versions of the same lemma (e.g. German Arzt/Ärztin) function p.GenderedLexeme ( masculine_lexeme_id, feminine_lexeme_id, ...) local mascLexeme = mw.wikibase.getEntity( masculine_lexeme_id ) local femLexeme = mw.wikibase.getEntity( feminine_lexeme_id ) local mascLemma, used_language = wd.getLemma(mascLexeme, masculine_lexeme_id) local femLemma, used_language2 = wd.getLemma(femLexeme, feminine_lexeme_id) if used_language ~= used_language2 then error("Lexemes "..mascLemma.." and "..femLemma.." don't use the same language code") end local pos = wd.getPOS(mascLexeme) if pos ~= wd.getPOS(femLexeme) then error("Lexemes "..mascLemma.." and "..femLemma.." don't have the same part-of-speech") end lexeme = l.newLexeme(mascLemma.."/"..femLemma , pos) -- Add any extra constraints passed as arguments addFeaturesToForm(lexeme, arg) for gender, wdLexeme in pairs{ masculine = mascLexeme, feminine = femLexeme } do forms = wdLexeme:getForms() for index, wdForm in ipairs(forms) do -- ingest forms -- We only want forms of one language code spelling = wdForm:getRepresentation(used_language) if spelling then local form = lexeme.addForm(spelling) wdFeatures = wdForm:getGrammaticalFeatures() addFeaturesToForm(form, wdFeatures) if not form.getFeatureIndex("gender") then form.addFeature("gender", gender) end end end end lexeme.log() return lexeme end -- Constructs a lexeme corresponding to a person -- This populates the grammatical gender according to the social gender -- and adds grammatical number "singular" function p.Person (q_id) local name = wd.getLabel( q_id ) local result = l.newLexeme ( name, "noun") result.addFeature("number", "singular") local gender = wd.getHumanGender(q_id) -- Handling of non-binary gender is language dependent and would have to -- be done in a language-specific implementation. if (gender == "masculine" or gender == "feminine") then result.addFeature("gender", gender) end if (wd.isDead(q_id)) then -- We add a past tense feature for lexeme of dead people, as they are -- normally spoken about in the past tense. This can exposed to the -- verb by using the "tsubj relation". result.addFeature("nominal_tense", "past") end result.log() return result end -- Fetches the label of an entity. -- To allow reverse look-up of lexemes from items, I have used the -- "literal translation" propery (P2441) qualified by "lexeme sense" (P7018) -- to point to language-specific lexemes. See discussion in: -- https://phabricator.wikimedia.org/T320263#8341702 -- These lexemes are stored in the gendered.unspecified.lexeme field (if no -- gedered pairs are given in "male form of label" (P3321) or -- "female form of label" (P2521)). function p.Label (q_id) -- We disable the following check, since it requires an expensive call --[[ if wd.isHuman(q_id) then return p.Person(q_id) end ]]-- local gendered = wd.getGenderedLabels(q_id) if (gendered.male.lexeme and gendered.female.lexeme) then return p.GenderedLexeme(gendered.male.lexeme, gendered.female.lexeme) elseif (gendered.male.lexeme or gendered.female.lexeme) then return p.Lexeme(gendered.male.lexeme or gendered.female.lexeme) elseif (gendered.unspecified.lexeme) then return p.Lexeme(gendered.unspecified.lexeme) else lexeme = l.newLexeme(wd.getLabel(q_id) , "noun") if (gendered.male.label) then lexeme.addForm(gendered.male.label, {"gender", "masculine"}) end if (gendered.female.label) then lexeme.addForm(gendered.female.label, {"gender", "feminine"}) end end return lexeme end -- Example of the use a sub-template as a function function p.QuantifiedNoun(num, noun) return te.evaluateTemplate("{nummod:Cardinal(num)} {root:noun}", { num = num, noun = noun}) end -- Invokes either the Person function or the Pronoun function, depending on pronominalize -- If neither a q_id nor pronominalize are set, it will return an empty dummy noun function p.PersonOrPronoun(q_id, pronominalize) if q_id:match("^Q%d+") then if pronominalize == "true" then return functions.Pronoun(q_id) else return functions.Person(q_id) end elseif pronominalize == "true" then return functions.Pronoun() else return l.newLexeme("", "noun") end end -- Generates a generic date expression of the form day.month.year -- TODO: This should really use CLDR to get a language-specific expression function p.Date ( date ) local elements = {} if date.day and tonumber(date.day) > 0 then table.insert(elements, tostring(date.day)) end if date.month and tonumber(date.month) > 0 and tonumber(date.month) <=12 then table.insert(elements, tostring(date.month)) end if date.year and tonumber(date.year) ~= 0 then table.insert(elements, tostring(date.year)) end local result = table.concat(elements, '.') return l.newLexeme (result, "noun") end -- Generates a generic ordina expression of the form day.month.year -- TODO: This should really use CLDR to get a language-specific expression function p.Ordinal ( number ) return l.newLexeme(tostring(number)..'.', "adjective") end return p