Module:Sandbox/AbstractWikipedia/Lexemes
Appearance
From Meta, a Wikimedia project coordination wiki
This is the lexemes module of the Abstract Wikipedia template-renderer prototype.
It defines the internal lexeme
datatype and various methods to manipulate it. This type is based on the lexeme type discussed in the Abstract Wikipedia template proposal.
The handling of the lexemes' features is done through the UnifiableFeatures module.
The above documentation is transcluded from Module:Sandbox/AbstractWikipedia/Lexemes/doc. (edit | history)
Editors can experiment in this module’s sandbox (create | mirror) and testcases (create) pages.
Please add categories to the /doc subpage. Subpages of this module.
Editors can experiment in this module’s sandbox (create | mirror) and testcases (create) pages.
Please add categories to the /doc subpage. Subpages of this module.
local p = {} local uf = require("Module:Sandbox/AbstractWikipedia/UnifiableFeatures") local gf = require("Module:Sandbox/AbstractWikipedia/GrammaticalFeatures") -- create a list of features from given table of category-feature pairs local function featureList ( new_features ) local features = {} local features_api = {} -- Verify that category hasn't been used already local function verifyNewCategory ( category ) if features[category] then error ("Category "..category.." already exists", 3) end end -- Adds a new feature of a certain category, returns the index function features_api.addFeature ( category, feature ) verifyNewCategory(category) features[category] = uf.createNewFeature(feature) return features[category] end function features_api.getFeature(category) return uf.getFeature(features[category]) end function features_api.getFeatureIndex(category) return features[category] end function features_api.featureIterator() return pairs(features) end function features_api.numFeatures() -- This could probably be stored in the table itself local count = 0 for _, _ in pairs(features) do count = count + 1 end return count end -- Sets a feature of a new category to an existing value (indexed by index) function features_api.setFeatureIndex ( category, index ) verifyNewCategory(category) if (not uf.getFeature(index)) then error ("Index "..index.." points to inexistent feature.") end features[category] = index end -- for debugging purposes function features_api.listFeatures () for category, index in pairs(features) do mw.log(category.." == "..tostring(uf.getFeature(index))) end end for category, feature in pairs(new_features) do features_api.addFeature(category, feature) end return features_api end local function newForm ( spelling, new_features ) local form = { spelling = spelling, features = featureList(new_features) } local function tostring(self) return self.spelling end setmetatable(form, { __index = form.features, __tostring = tostring }) -- for debugging purposes function form.log ( index ) index = index or "" mw.log("Form "..index..": '"..form.spelling.."'") form.features.listFeatures() end return form end function p.newLexeme ( lemma, part_of_speech, new_features ) local lexeme = { lemma = lemma, pos = part_of_speech, features = featureList (new_features or {}), forms = {} } local function tostring(self) if (#self.forms > 0) then return self.forms[1].spelling else return self.lemma end end -- features functions are be accessible at the lexeme level for convenience setmetatable(lexeme, { __index = lexeme.features, __tostring = tostring }) -- Adds a new form and returns it function lexeme.addForm ( spelling, form_features ) form_features = form_features or {} local form = newForm (spelling, form_features) table.insert(lexeme.forms, form) return form end -- Clears all forms and optionally creates a new single form. -- This is handy when we want to overwrite the existing forms function lexeme.replaceByForm ( new_single_form ) lexeme.forms = {} if new_single_form then lexeme.addForm(new_single_form) end end -- Sorts the form according to gf.cannonical_order function lexeme.sortForms() local function compare_forms (form1, form2) for _, category_order in ipairs(gf.cannonical_order) do local category=category_order.category local rank1 = category_order[form1.getFeature(category)] local rank2 = category_order[form2.getFeature(category)] if rank1 ~= rank2 then if not rank2 then return true elseif not rank1 then return false else return rank1 < rank2 end end end -- If all cannonical features are equal, prefer the form with less -- features overall as smaller return (form1.numFeatures() < form2.numFeatures()) end table.sort(lexeme.forms, compare_forms) end -- This function removes the forms which don't match the general lexeme -- features/constraints. -- Returns the number of forms which are kept function lexeme.filterForms() new_forms = {} -- Iterate on forms for _, form in ipairs(lexeme.forms) do local keep_form = true -- Iterate on lexeme constraints for category, index in lexeme.featureIterator() do -- Note that if the form lacks the category, it can be kept form_feature_index = form.getFeatureIndex(category) -- In a more strict mode, we should require the form features -- to strictly subsume the constraints. if (form_feature_index and not uf.unifiable(form_feature_index, index)) then mw.log("Discard form "..form.spelling.." due to mismatch with feature '"..uf.getFeature(index).."' of category "..category) keep_form = false break end end if keep_form then mw.log("Keeping form "..form.spelling) table.insert(new_forms, form) end end lexeme.forms = new_forms return #new_forms end -- for debugging purposes function lexeme.log () mw.log("Lemma: "..lemma.." ("..lexeme.pos..")") lexeme.features.listFeatures() for index, form in pairs(lexeme.forms) do form.log(index) end end return lexeme end -- Unify features of given categories in two lexemes function p.unifyFeatures ( category1, lexeme1, lexeme2, category2 ) -- Unify the same category across both lexemes, if only one is provided category2 = category2 or category1 local index1 = lexeme1.getFeatureIndex(category1) local index2 = lexeme2.getFeatureIndex(category2) if (not index1 and not index2) then -- unification of two empty features -- in order to unify them, we have to create a new feature index1 = lexeme1.addFeature(category1, '') lexeme2.setFeatureIndex(category2, index1) return '' elseif (not index2) then -- point lexeme2's feature to lexeme1's lexeme2.setFeatureIndex(category2, index1) return uf.getFeature(index1) elseif (not index1) then -- and conversely lexeme1.setFeatureIndex(category1, index2) return uf.getFeature(index2) else -- unify the two features local result = uf.unify(index1, index2) if (result == nil) then error ("Features "..uf.getFeature(index1).." and "..uf.getFeature(index2).." are not unifiable", 2) end return result end end -- Unify a feature of a given category of a lexeme with a new feature function p.unifyWithFeature ( category, lexeme, feature ) local index = lexeme.getFeatureIndex(category) if (not index) then -- create feature lexeme.addFeature(category, feature) return feature end local result = uf.unifyWithFeature(index, feature) if (result == nil) then error ("Features "..uf.getFeature(index).." and "..feature.." are not unifiable", 2) end return result end return p