Commit f5e0d8c

committed

Minor refactor of semgrex code and addition of tests

1 parent b5c00a4 commit f5e0d8cCopy full SHA for f5e0d8c

File tree

2 files changed

+52

-12

lines changed

corenlp
- client.py
tests
- test_client.py

2 files changed

+52

-12

lines changed

`‎corenlp/client.py`

Lines changed: 11 additions & 12 deletions

Original file line number	Diff line number	Diff line change
`@@ -205,13 +205,13 @@ def tokensregex(self, text, pattern, filter=False, to_words=False):`
`205`	`205`	`matches = self.__regex('/tokensregex', text, pattern, filter)`
`206`	`206`	`if not to_words:`
`207`	`207`	`return matches`
`208`		`- return self.regex_matches_to_indexed_words(matches)`
	`208`	`+ return regex_matches_to_indexed_words(matches)`
`209`	`209`
`210`	`210`	`def semgrex(self, text, pattern, filter=False, to_words=False):`
`211`	`211`	`matches = self.__regex('/semgrex', text, pattern, filter)`
`212`	`212`	`if not to_words:`
`213`	`213`	`return matches`
`214`		`- return self.regex_matches_to_indexed_words(matches)`
	`214`	`+ return regex_matches_to_indexed_words(matches)`
`215`	`215`
`216`	`216`	`def tregrex(self, text, pattern, filter=False):`
`217`	`217`	`return self.__regex('/tregex', text, pattern, filter)`
`@@ -236,15 +236,14 @@ def __regex(self, path, text, pattern, filter):`
`236`	`236`	`pass`
`237`	`237`	`return output`
`238`	`238`
`239`		`- @staticmethod`
`240`		`- def regex_matches_to_indexed_words(matches):`
`241`		`- """Transforms tokensregex and semgrex matches to indexed words.`
`242`		`- :param matches: unprocessed regex matches`
`243`		`- :return: flat array of indexed words`
`244`		`- """`
`245`		`- words = [dict(v, **dict([('sentence', i)]))`
`246`		`- for i, s in enumerate(matches['sentences'])`
`247`		`- for k, v in s.items() if k != 'length']`
`248`		`- return words`
	`239`	`+def regex_matches_to_indexed_words(matches):`
	`240`	`+ """Transforms tokensregex and semgrex matches to indexed words.`
	`241`	`+ :param matches: unprocessed regex matches`
	`242`	`+ :return: flat array of indexed words`
	`243`	`+ """`
	`244`	`+ words = [dict(v, **dict([('sentence', i)]))`
	`245`	`+ for i, s in enumerate(matches['sentences'])`
	`246`	`+ for k, v in s.items() if k != 'length']`
	`247`	`+ return words`
`249`	`248`
`250`	`249`	`__all__ = ["CoreNLPClient", "AnnotationException", "TimeoutException", "to_text"]`

`‎tests/test_client.py`

Lines changed: 41 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -21,3 +21,44 @@ def test_update():`
`21`	`21`	`ann = client.annotate(TEXT)`
`22`	`22`	`ann = client.update(ann)`
`23`	`23`	`assert corenlp.to_text(ann.sentence[0]) == TEXT[:-1]`
	`24`	`+`
	`25`	`+def test_tokensregex():`
	`26`	`+ with corenlp.CoreNLPClient(annotators='tokenize ssplit ner depparse'.split()) as client:`
	`27`	`+ # Example pattern from: https://nlp.stanford.edu/software/tokensregex.shtml`
	`28`	`+ text = 'Hello. Bob Ross was a famous painter. Goodbye.'`
	`29`	`+ pattern = '([ner: PERSON]+) /was\|is/ /an?/ []{0,3} /painter\|artist/'`
	`30`	`+ matches = client.tokensregex(text, pattern)`
	`31`	`+ assert matches == {`
	`32`	`+ "sentences": [{`
	`33`	`+ "length": 0`
	`34`	`+ },{`
	`35`	`+ "0": {`
	`36`	`+ "text": "Ross was a famous painter",`
	`37`	`+ "begin": 1,`
	`38`	`+ "end": 6,`
	`39`	`+ "1": {`
	`40`	`+ "text": "Ross",`
	`41`	`+ "begin": 1,`
	`42`	`+ "end": 2`
	`43`	`+ }},`
	`44`	`+ "length": 1`
	`45`	`+ },{`
	`46`	`+ "length": 0`
	`47`	`+ }]}`
	`48`	`+`
	`49`	`+def test_semgrex():`
	`50`	`+ with corenlp.CoreNLPClient(annotators='tokenize ssplit depparse'.split()) as client:`
	`51`	`+ text = 'I ran.'`
	`52`	`+ pattern = '{} < {}'`
	`53`	`+ matches = client.semgrex(text, pattern, to_words=True)`
	`54`	`+ assert matches == [{`
	`55`	`+ "text": ".",`
	`56`	`+ "begin": 2,`
	`57`	`+ "end": 3,`
	`58`	`+ "sentence": 0`
	`59`	`+ },{`
	`60`	`+ "text": "I",`
	`61`	`+ "begin": 0,`
	`62`	`+ "end": 1,`
	`63`	`+ "sentence": 0`
	`64`	`+ }]`

0 commit comments

Comments

(0)

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit f5e0d8c

File tree

2 files changed

2 files changed

`‎corenlp/client.py`

`‎tests/test_client.py`

0 commit comments