Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit ac391d5

Browse files
Improve handling of negation detection in Slavic blocks
1 parent 7459d35 commit ac391d5

File tree

9 files changed

+285
-143
lines changed

9 files changed

+285
-143
lines changed

‎udapi/block/msf/phrase.py

Lines changed: 66 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,31 @@ def process_node(self, node):
3030
'animacy':'PhraseAnimacy',
3131
'ords':'Phrase'
3232
}
33+
34+
# a dictionary where the key is the lemma of a negative particle and the value is a list of the lemmas of their possible children that have a 'fixed' relation
35+
# we do not want to include these negative particles in the phrase; these are expressions like "never", etc.
36+
negation_fixed = {
37+
# Belarusian
38+
'ні' : ['раз'],
39+
'ня' : ['толькі'],
40+
41+
# Upper Sorbian
42+
'nic' : ['naposledku'],
43+
44+
# Polish
45+
'nie' : ['mało'],
46+
47+
# Pomak
48+
'néma' : ['kak'],
49+
50+
# Slovenian
51+
'ne' : ['le'],
52+
53+
# Russian and Old East Slavic
54+
'не' : ['то', 'токмо'],
55+
'ни' : ['в', 'раз', 'шатко'],
56+
'нет' : ['нет']
57+
}
3358

3459
def write_node_info(self, node,
3560
tense = None,
@@ -51,12 +76,46 @@ def write_node_info(self, node,
5176
if val != None:
5277
node.misc[self.dictionary[key]] = val
5378

54-
def get_polarity(self, node, neg):
55-
if node.feats['Polarity'] != "":
56-
return node.feats['Polarity']
57-
if len(neg) == 0:
58-
return None
59-
return 'Neg'
79+
def has_fixed_children(self, node):
80+
"""
81+
Returns True if the node has any children with the 'fixed' relation and the node's lemma along with the child's lemma are listed in self.negation_fixed.
82+
"""
83+
fixed_children = [x for x in node.children if x.udeprel == 'fixed']
84+
85+
if fixed_children:
86+
if fixed_children[0].lemma in self.negation_fixed.get(node.lemma, []):
87+
return True
88+
return False
89+
90+
def get_polarity(self, nodes):
91+
"""
92+
Returns 'Neg' if there is exactly one node with Polarity='Neg' among the given nodes.
93+
Returns an empty string if there are zero or more than one such nodes.
94+
"""
95+
neg_count = 0
96+
for node in nodes:
97+
if node.feats['Polarity'] == 'Neg':
98+
neg_count += 1
99+
100+
if neg_count == 1:
101+
return 'Neg'
102+
103+
# neg_count can be zero or two, in either case we want to return an empty string so that the PhrasePolarity attribute is not generated
104+
else:
105+
return ''
106+
107+
def get_negative_particles(self, nodes):
108+
"""
109+
Returns a list of all negative particles found among the children
110+
of the specified nodes, except for negative particles with fixed children specified in self.negation_fixed.
111+
"""
112+
neg_particles = []
113+
for node in nodes:
114+
neg = [x for x in node.children if x.upos == 'PART' and x.feats['Polarity'] == 'Neg' and x.udeprel == 'advmod' and not self.has_fixed_children(x)]
115+
if neg:
116+
neg_particles += neg
117+
return neg_particles
118+
60119

61120
def get_is_reflex(self,node,refl):
62121
if node.feats['Voice'] == 'Mid':
@@ -75,4 +134,4 @@ def get_voice(self,node,refl):
75134
if self.is_expl_pass(refl):
76135
return 'Pass'
77136
return voice
78-
137+

‎udapi/block/msf/slavic/conditional.py

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,16 @@ def process_node(self, node):
1919
# the conditional mood can be formed using the auxiliary verb or some conjunctions (such as 'aby, kdyby...' in Czech)
2020
# so x.udeprel == 'aux' can't be required because it doesn't meet the conjunctions
2121

22-
if len(aux_cnd) >0and len(cop) ==0:
22+
if aux_cndand notcop:
2323
aux = [x for x in node.children if x.udeprel == 'aux' or x.feats['Mood'] == 'Cnd'] # all auxiliary verbs and conjuctions with feats['Mood'] == 'Cnd'
2424
refl = [x for x in node.children if x.feats['Reflex'] == 'Yes' and x.udeprel == 'expl']
25-
neg = [x for x in node.children if x.feats['Polarity'] == 'Neg' and x.upos == 'PART']
25+
26+
phrase_nodes = [node] + aux + refl
2627

27-
phrase_ords = [node.ord] + [x.ord for x in aux] + [x.ord for x in refl] + [x.ord for x in neg]
28+
neg = self.get_negative_particles(phrase_nodes)
29+
phrase_nodes += neg
30+
31+
phrase_ords = [x.ord for x in phrase_nodes]
2832
phrase_ords.sort()
2933

3034
auxVerb = aux_cnd[0]
@@ -41,7 +45,7 @@ def process_node(self, node):
4145
form='Fin',
4246
aspect=node.feats['Aspect'],
4347
reflex=self.get_is_reflex(node,refl),
44-
polarity=self.get_polarity(node,neg),
48+
polarity=self.get_polarity(phrase_nodes),
4549
voice=self.get_voice(node, refl),
4650
ords=phrase_ords,
4751
gender=node.feats['Gender'],
@@ -53,15 +57,18 @@ def process_node(self, node):
5357
cop = [x for x in node.children if x.udeprel == 'cop' and (x.feats['VerbForm'] == 'Part' or x.feats['VerbForm'] == 'Fin')]
5458
aux_cnd = [x for x in node.children if x.feats['Mood'] == 'Cnd' or x.deprel=='aux:cnd']
5559

56-
if len(cop) >0and len(aux_cnd) >0:
60+
if copand aux_cnd:
5761
# there can be a copula with Mood='Cnd' (i. e. in Old East Slavonic), we don't want to count these copula in phrase_ords twice, so there is x.udeprel != 'cop' in aux list
5862
aux = [x for x in node.children if (x.udeprel == 'aux' or x.feats['Mood'] == 'Cnd') and x.udeprel != 'cop']
59-
neg = [x for x in node.children if x.feats['Polarity'] == 'Neg' and x.upos == 'PART']
6063
prep = [x for x in node.children if x.upos == 'ADP']
6164
refl = [x for x in node.children if x.feats['Reflex'] == 'Yes' and x.udeprel == 'expl']
6265

66+
phrase_nodes = [node] + aux + prep + refl + cop
67+
neg = self.get_negative_particles(phrase_nodes)
68+
phrase_nodes += neg
69+
6370
copVerb = cop[0]
64-
phrase_ords = [node.ord] + [x.ord for x in aux] + [x.ordforxincop] + [x.ordforxinneg] + [x.ordforxinprep] + [x.ordforxinrefl]
71+
phrase_ords = [x.ord for x in phrase_nodes]
6572
phrase_ords.sort()
6673
self.write_node_info(node,
6774
aspect=copVerb.feats['Aspect'],
@@ -70,9 +77,9 @@ def process_node(self, node):
7077
mood='Cnd',
7178
form='Fin',
7279
voice=self.get_voice(copVerb, refl),
73-
polarity=self.get_polarity(copVerb,neg),
80+
polarity=self.get_polarity(phrase_nodes),
7481
reflex=self.get_is_reflex(node, refl),
7582
ords=phrase_ords,
7683
gender=copVerb.feats['Gender'],
7784
animacy=copVerb.feats['Animacy']
78-
)
85+
)

‎udapi/block/msf/slavic/converb.py

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,12 @@ def process_node(self, node):
1212
# condition node.upos == 'VERB' to prevent copulas from entering this branch
1313
if node.feats['VerbForm'] == 'Conv' and node.upos == 'VERB':
1414
refl = [x for x in node.children if x.feats['Reflex'] == 'Yes' and x.udeprel == 'expl']
15-
neg = [x for x in node.children if x.feats['Polarity'] == 'Neg' and x.upos == 'PART']
16-
17-
phrase_ords = [node.ord] + [x.ord for x in refl] + [x.ord for x in neg]
15+
16+
phrase_nodes = [node] + refl
17+
neg = self.get_negative_particles(phrase_nodes)
18+
phrase_nodes += neg
19+
20+
phrase_ords = [x.ord for x in phrase_nodes]
1821
phrase_ords.sort()
1922

2023
self.write_node_info(node,
@@ -23,7 +26,7 @@ def process_node(self, node):
2326
form='Conv',
2427
tense=node.feats['Tense'],
2528
aspect=node.feats['Aspect'],
26-
polarity=self.get_polarity(node,neg),
29+
polarity=self.get_polarity(phrase_nodes),
2730
reflex=self.get_is_reflex(node,refl),
2831
ords=phrase_ords,
2932
gender=node.feats['Gender'],
@@ -35,10 +38,13 @@ def process_node(self, node):
3538
elif node.upos == 'ADJ':
3639
aux = [x for x in node.children if x.udeprel == 'aux' and x.feats['VerbForm'] == 'Conv']
3740

38-
if len(aux) > 0:
39-
neg = [x for x in node.children if x.feats['Polarity'] == 'Neg' and x.upos == 'PART']
41+
if aux:
4042
auxVerb = aux[0]
41-
phrase_ords = [node.ord] + [x.ord for x in aux] + [x.ord for x in neg]
43+
44+
phrase_nodes = [node] + aux
45+
neg = self.get_negative_particles(phrase_nodes)
46+
phrase_nodes += neg
47+
phrase_ords = [x.ord for x in phrase_nodes]
4248
phrase_ords.sort()
4349

4450
self.write_node_info(node,
@@ -47,7 +53,7 @@ def process_node(self, node):
4753
form='Conv',
4854
tense=auxVerb.feats['Tense'],
4955
aspect=node.feats['Aspect'],
50-
polarity=self.get_polarity(auxVerb,neg),
56+
polarity=self.get_polarity(phrase_nodes),
5157
ords=phrase_ords,
5258
gender=auxVerb.feats['Gender'],
5359
animacy=auxVerb.feats['Animacy'],
@@ -58,13 +64,16 @@ def process_node(self, node):
5864
else:
5965
cop = [x for x in node.children if x.udeprel == 'cop' and x.feats['VerbForm'] == 'Conv']
6066

61-
if len(cop) >0:
67+
if cop:
6268
prep = [x for x in node.children if x.upos == 'ADP']
63-
neg = [x for x in node.children if x.feats['Polarity'] == 'Neg' and x.upos == 'PART']
6469
refl = [x for x in node.children if x.feats['Reflex'] == 'Yes' and x.udeprel == 'expl']
6570

6671
copVerb = cop[0]
67-
phrase_ords = [node.ord] + [x.ord for x in cop] + [x.ord for x in prep] + [x.ord for x in neg] + [x.ord for x in refl]
72+
73+
phrase_nodes = [node] + cop + prep + refl
74+
neg = self.get_negative_particles(phrase_nodes)
75+
phrase_nodes += neg
76+
phrase_ords = [x.ord for x in phrase_nodes]
6877
phrase_ords.sort()
6978

7079

@@ -76,7 +85,7 @@ def process_node(self, node):
7685
gender=copVerb.feats['Gender'],
7786
animacy=copVerb.feats['Animacy'],
7887
form='Conv',
79-
polarity=self.get_polarity(node,neg),
88+
polarity=self.get_polarity(phrase_nodes),
8089
ords=phrase_ords,
8190
voice=self.get_voice(copVerb, refl)
8291
)

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /