Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 7459d35

Browse files
Updated processing of Old Czech data.
1 parent 0abec00 commit 7459d35

File tree

3 files changed

+15
-6
lines changed

3 files changed

+15
-6
lines changed

‎udapi/block/ud/cs/addmwt.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ def multiword_analysis(self, node):
126126
# could be masculine or neuter. We pick Gender=Masc and Animacy=Anim
127127
# by default, unless the original token was annotated as Animacy=Inan
128128
# or Gender=Neut.
129-
m = re.match(r"^(na|o|pro|přěde|ski?rz[eě]|za)[nň](ž?)$", node.form.lower())
129+
m = re.match(r"^(na|nade|o|pro|přěde|ski?rz[eě]|za)[nň](ž?)$", node.form.lower())
130130
if m:
131131
node.misc['AddMwt'] = ''
132132
# Remove vocalization from 'přěde' (přěd něj) but keep it in 'skrze'

‎udapi/block/ud/cs/markfeatsbugs.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@ def process_node(self, node):
3737
'Gender': ['Neut'],
3838
'Number': ['Sing', 'Dual', 'Plur'],
3939
'Case': ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Ins'],
40-
'Foreign': ['Yes']
40+
'Foreign': ['Yes'],
41+
'Abbr': ['Yes']
4142
})
4243
elif node.feats['Gender'] == 'Masc':
4344
self.check_required_features(node, ['Animacy'])
@@ -46,13 +47,15 @@ def process_node(self, node):
4647
'Animacy': ['Anim', 'Inan'],
4748
'Number': ['Sing', 'Dual', 'Plur'],
4849
'Case': ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Ins'],
49-
'Foreign': ['Yes']})
50+
'Foreign': ['Yes'],
51+
'Abbr': ['Yes']})
5052
else:
5153
self.check_allowed_features(node, {
5254
'Gender': ['Masc', 'Fem', 'Neut'],
5355
'Number': ['Sing', 'Dual', 'Plur'],
5456
'Case': ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Ins'],
55-
'Foreign': ['Yes']})
57+
'Foreign': ['Yes'],
58+
'Abbr': ['Yes']})
5659
# PROPER NOUNS #########################################################
5760
elif node.upos == 'PROPN':
5861
self.check_required_features(node, ['Gender', 'Number', 'Case'])
@@ -64,14 +67,16 @@ def process_node(self, node):
6467
'Number': ['Sing', 'Dual', 'Plur'],
6568
'Case': ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Ins'],
6669
'NameType': ['Giv', 'Sur', 'Geo', 'Nat'],
67-
'Foreign': ['Yes']})
70+
'Foreign': ['Yes'],
71+
'Abbr': ['Yes']})
6872
else:
6973
self.check_allowed_features(node, {
7074
'Gender': ['Masc', 'Fem', 'Neut'],
7175
'Number': ['Sing', 'Dual', 'Plur'],
7276
'Case': ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Ins'],
7377
'NameType': ['Giv', 'Sur', 'Geo', 'Nat'],
74-
'Foreign': ['Yes']})
78+
'Foreign': ['Yes'],
79+
'Abbr': ['Yes']})
7580
# ADJECTIVES ###########################################################
7681
elif node.upos == 'ADJ':
7782
if node.feats['Poss'] == 'Yes': # possessive adjectives

‎udapi/block/ud/fixadvmodbyupos.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ def process_node(self, node):
5151
elif node.udeprel == 'mark':
5252
if node.upos in ['PRON', 'DET']:
5353
node.deprel = 'nsubj' # it could be also obj, iobj, obl or nmod; just guessing what might be more probable
54+
elif node.upos == 'NOUN':
55+
node.deprel = 'obl'
5456
elif node.upos == 'INTJ':
5557
node.deprel = 'discourse'
5658
elif node.udeprel == 'cc':
@@ -71,6 +73,8 @@ def process_node(self, node):
7173
node.deprel = 'aux'
7274
elif node.upos == 'VERB':
7375
node.deprel = 'dep'
76+
elif node.upos == 'SCONJ':
77+
node.deprel = 'mark'
7478
elif node.upos == 'X':
7579
node.deprel = 'dep'
7680
elif node.udeprel == 'nummod':

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /