Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 55ff618

Browse files
Fixed semgrex/tokensregex to load annoatators before executing; tests pass
1 parent 45d172b commit 55ff618

File tree

2 files changed

+46
-41
lines changed

2 files changed

+46
-41
lines changed

‎corenlp/client.py

Lines changed: 27 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -203,15 +203,15 @@ def update(self, doc, annotators=None, properties=None):
203203

204204
def tokensregex(self, text, pattern, filter=False, to_words=False):
205205
matches = self.__regex('/tokensregex', text, pattern, filter)
206-
if notto_words:
207-
returnmatches
208-
return regex_matches_to_indexed_words(matches)
206+
if to_words:
207+
matches=regex_matches_to_indexed_words(matches)
208+
return matches
209209

210210
def semgrex(self, text, pattern, filter=False, to_words=False):
211211
matches = self.__regex('/semgrex', text, pattern, filter)
212-
if notto_words:
213-
returnmatches
214-
return regex_matches_to_indexed_words(matches)
212+
if to_words:
213+
matches=regex_matches_to_indexed_words(matches)
214+
return matches
215215

216216
def tregrex(self, text, pattern, filter=False):
217217
return self.__regex('/tregex', text, pattern, filter)
@@ -224,17 +224,28 @@ def __regex(self, path, text, pattern, filter):
224224
:param (bool) filter: option to filter sentences that contain matches, if false returns matches
225225
:return: request result
226226
"""
227-
r = requests.get(
228-
self.endpoint + path, params={
229-
'pattern': pattern,
230-
'filter': filter,
231-
}, data=text)
232-
output = r.text
227+
self.ensure_alive()
228+
229+
# HACK: For some stupid reason, CoreNLPServer will timeout if we
230+
# need to annotate something from scratch. So, we need to call
231+
# this to ensure that the _regex call doesn't timeout.
232+
self.annotate(text)
233+
233234
try:
234-
output = json.loads(r.text)
235-
except:
236-
pass
237-
return output
235+
r = requests.get(
236+
self.endpoint + path, params={
237+
'pattern': pattern,
238+
'filter': filter,
239+
}, data=text)
240+
r.raise_for_status()
241+
return json.loads(r.text)
242+
except requests.HTTPError as e:
243+
if r.text.startswith("Timeout"):
244+
raise TimeoutException(r.text)
245+
else:
246+
raise AnnotationException(r.text)
247+
except json.JSONDecodeError:
248+
raise AnnotationException(r.text)
238249

239250
def regex_matches_to_indexed_words(matches):
240251
"""Transforms tokensregex and semgrex matches to indexed words.

‎tests/test_client.py

Lines changed: 19 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -33,39 +33,33 @@ def test_tokensregex():
3333
"sentences": [{
3434
"0": {
3535
"text": "Chris wrote a simple sentence",
36-
"begin": 1,
37-
"end": 6,
36+
"begin": 0,
37+
"end": 5,
3838
"1": {
3939
"text": "Chris",
40-
"begin": 1,
41-
"end": 2
40+
"begin": 0,
41+
"end": 1
4242
}},
4343
"length": 1
4444
},]}
4545

4646
def test_semgrex():
47-
with corenlp.CoreNLPClient(annotators='tokenize ssplit depparse'.split()) as client:
47+
with corenlp.CoreNLPClient(annotators='tokenize ssplit pos lemma ner depparse'.split()) as client:
4848
pattern = '{word:wrote} >nsubj {}=subject >dobj {}=object'
4949
matches = client.semgrex(TEXT, pattern, to_words=True)
50-
assert matches == {
51-
"sentences": [
50+
assert matches == [
5251
{
53-
"0": {
54-
"text": "wrote",
55-
"begin": 1,
56-
"end": 2,
57-
"$subject": {
58-
"text": "Chris",
59-
"begin": 0,
60-
"end": 1
61-
},
62-
"$object": {
63-
"text": "sentence",
64-
"begin": 4,
65-
"end": 5
66-
}
52+
"text": "wrote",
53+
"begin": 1,
54+
"end": 2,
55+
"$subject": {
56+
"text": "Chris",
57+
"begin": 0,
58+
"end": 1
59+
},
60+
"$object": {
61+
"text": "sentence",
62+
"begin": 4,
63+
"end": 5
6764
},
68-
"length": 1
69-
}
70-
]
71-
}
65+
"sentence": 0,}]

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /