Commit 55ff618

committed

Fixed semgrex/tokensregex to load annoatators before executing; tests pass

1 parent 45d172b commit 55ff618Copy full SHA for 55ff618

File tree

2 files changed

+46

-41

lines changed

corenlp
- client.py
tests
- test_client.py

2 files changed

+46

-41

lines changed

`‎corenlp/client.py`

Lines changed: 27 additions & 16 deletions

Original file line number	Diff line number	Diff line change
`@@ -203,15 +203,15 @@ def update(self, doc, annotators=None, properties=None):`
`203`	`203`
`204`	`204`	`def tokensregex(self, text, pattern, filter=False, to_words=False):`
`205`	`205`	`matches = self.__regex('/tokensregex', text, pattern, filter)`
`206`		`- if notto_words:`
`207`		`- returnmatches`
`208`		`- return regex_matches_to_indexed_words(matches)`
	`206`	`+ if to_words:`
	`207`	`+ matches=regex_matches_to_indexed_words(matches)`
	`208`	`+ return matches`
`209`	`209`
`210`	`210`	`def semgrex(self, text, pattern, filter=False, to_words=False):`
`211`	`211`	`matches = self.__regex('/semgrex', text, pattern, filter)`
`212`		`- if notto_words:`
`213`		`- returnmatches`
`214`		`- return regex_matches_to_indexed_words(matches)`
	`212`	`+ if to_words:`
	`213`	`+ matches=regex_matches_to_indexed_words(matches)`
	`214`	`+ return matches`
`215`	`215`
`216`	`216`	`def tregrex(self, text, pattern, filter=False):`
`217`	`217`	`return self.__regex('/tregex', text, pattern, filter)`
`@@ -224,17 +224,28 @@ def __regex(self, path, text, pattern, filter):`
`224`	`224`	`:param (bool) filter: option to filter sentences that contain matches, if false returns matches`
`225`	`225`	`:return: request result`
`226`	`226`	`"""`
`227`		`- r = requests.get(`
`228`		`- self.endpoint + path, params={`
`229`		`- 'pattern': pattern,`
`230`		`- 'filter': filter,`
`231`		`- }, data=text)`
`232`		`- output = r.text`
	`227`	`+ self.ensure_alive()`
	`228`	`+`
	`229`	`+ # HACK: For some stupid reason, CoreNLPServer will timeout if we`
	`230`	`+ # need to annotate something from scratch. So, we need to call`
	`231`	`+ # this to ensure that the _regex call doesn't timeout.`
	`232`	`+ self.annotate(text)`
	`233`	`+`
`233`	`234`	`try:`
`234`		`- output = json.loads(r.text)`
`235`		`- except:`
`236`		`- pass`
`237`		`- return output`
	`235`	`+ r = requests.get(`
	`236`	`+ self.endpoint + path, params={`
	`237`	`+ 'pattern': pattern,`
	`238`	`+ 'filter': filter,`
	`239`	`+ }, data=text)`
	`240`	`+ r.raise_for_status()`
	`241`	`+ return json.loads(r.text)`
	`242`	`+ except requests.HTTPError as e:`
	`243`	`+ if r.text.startswith("Timeout"):`
	`244`	`+ raise TimeoutException(r.text)`
	`245`	`+ else:`
	`246`	`+ raise AnnotationException(r.text)`
	`247`	`+ except json.JSONDecodeError:`
	`248`	`+ raise AnnotationException(r.text)`
`238`	`249`
`239`	`250`	`def regex_matches_to_indexed_words(matches):`
`240`	`251`	`"""Transforms tokensregex and semgrex matches to indexed words.`

`‎tests/test_client.py`

Lines changed: 19 additions & 25 deletions

Original file line number	Diff line number	Diff line change
`@@ -33,39 +33,33 @@ def test_tokensregex():`
`33`	`33`	`"sentences": [{`
`34`	`34`	`"0": {`
`35`	`35`	`"text": "Chris wrote a simple sentence",`
`36`		`- "begin": 1,`
`37`		`- "end": 6,`
	`36`	`+ "begin": 0,`
	`37`	`+ "end": 5,`
`38`	`38`	`"1": {`
`39`	`39`	`"text": "Chris",`
`40`		`- "begin": 1,`
`41`		`- "end": 2`
	`40`	`+ "begin": 0,`
	`41`	`+ "end": 1`
`42`	`42`	`}},`
`43`	`43`	`"length": 1`
`44`	`44`	`},]}`
`45`	`45`
`46`	`46`	`def test_semgrex():`
`47`		`- with corenlp.CoreNLPClient(annotators='tokenize ssplit depparse'.split()) as client:`
	`47`	`+ with corenlp.CoreNLPClient(annotators='tokenize ssplit pos lemma ner depparse'.split()) as client:`
`48`	`48`	`pattern = '{word:wrote} >nsubj {}=subject >dobj {}=object'`
`49`	`49`	`matches = client.semgrex(TEXT, pattern, to_words=True)`
`50`		`- assert matches == {`
`51`		`- "sentences": [`
	`50`	`+ assert matches == [`
`52`	`51`	`{`
`53`		`- "0": {`
`54`		`- "text": "wrote",`
`55`		`- "begin": 1,`
`56`		`- "end": 2,`
`57`		`- "$subject": {`
`58`		`- "text": "Chris",`
`59`		`- "begin": 0,`
`60`		`- "end": 1`
`61`		`- },`
`62`		`- "$object": {`
`63`		`- "text": "sentence",`
`64`		`- "begin": 4,`
`65`		`- "end": 5`
`66`		`- }`
	`52`	`+ "text": "wrote",`
	`53`	`+ "begin": 1,`
	`54`	`+ "end": 2,`
	`55`	`+ "$subject": {`
	`56`	`+ "text": "Chris",`
	`57`	`+ "begin": 0,`
	`58`	`+ "end": 1`
	`59`	`+ },`
	`60`	`+ "$object": {`
	`61`	`+ "text": "sentence",`
	`62`	`+ "begin": 4,`
	`63`	`+ "end": 5`
`67`	`64`	`},`
`68`		`- "length": 1`
`69`		`- }`
`70`		`- ]`
`71`		`- }`
	`65`	`+ "sentence": 0,}]`

0 commit comments

Comments

(0)

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit 55ff618

File tree

2 files changed

2 files changed

`‎corenlp/client.py`

`‎tests/test_client.py`

0 commit comments