Commit 09bbd1d

author

Arun Tejasvi Chaganty

committed

Ported tests to simpler py.test framework

1 parent 6755b5f commit 09bbd1dCopy full SHA for 09bbd1d

File tree

1 file changed

+114

-115

lines changed

tests
- test_read.py

1 file changed

+114

-115

lines changed

`‎tests/test_read.py`

Lines changed: 114 additions & 115 deletions

Original file line number	Diff line number	Diff line change
`@@ -8,121 +8,120 @@`
`8`	`8`	`"""`
`9`	`9`
`10`	`10`	`import os`
`11`		`-import unittest`
	`11`	`+frompytestimport fixture`
`12`	`12`	`from corenlp_protobuf import Document, Sentence, Token, DependencyGraph, CorefChain`
`13`	`13`	`from corenlp_protobuf import parseFromDelimitedString, to_text`
`14`	`14`
`15`		`-class TestProtobuf(unittest.TestCase):`
`16`		`- def setUp(self):`
`17`		`- self.text = "Chris wrote a simple sentence that he parsed with Stanford CoreNLP.\n"`
`18`		`- test_data = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data', 'test.dat')`
`19`		`- with open(test_data, 'rb') as f:`
`20`		`- self.buf = f.read()`
`21`		`-`
`22`		`- self.doc = Document()`
`23`		`- parseFromDelimitedString(self.doc, self.buf)`
`24`		`-`
`25`		`- def test_parse_protobuf(self):`
`26`		`- self.assertEqual(4239, self.doc.ByteSize(), "Could not read input correctly")`
`27`		`-`
`28`		`- def test_document_text(self):`
`29`		`- self.assertEqual(self.text, self.doc.text)`
`30`		`-`
`31`		`- def test_sentences(self):`
`32`		`- self.assertEqual(1, len(self.doc.sentence))`
`33`		`-`
`34`		`- sentence = self.doc.sentence[0]`
`35`		`- self.assertTrue(isinstance(sentence, Sentence))`
`36`		`- self.assertEqual(67, sentence.characterOffsetEnd - sentence.characterOffsetBegin) # Sentence length`
`37`		`- self.assertEqual('', sentence.text) # Note that the sentence text should actually be recovered from the tokens.`
`38`		`- self.assertEqual(self.text[:-1], to_text(sentence)) # Note that the sentence text should actually be recovered from the tokens.`
`39`		`-`
`40`		`- def test_tokens(self):`
`41`		`- sentence = self.doc.sentence[0]`
`42`		`- tokens = sentence.token`
`43`		`- self.assertEqual(12, len(tokens))`
`44`		`- self.assertTrue(isinstance(tokens[0], Token))`
`45`		`-`
`46`		`- # Word`
`47`		`- words = "Chris wrote a simple sentence that he parsed with Stanford CoreNLP .".split()`
`48`		`- words_ = [t.word for t in tokens]`
`49`		`- self.assertEqual(words, words_)`
`50`		`-`
`51`		`- # Lemma`
`52`		`- lemmas = "Chris write a simple sentence that he parse with Stanford CoreNLP .".split()`
`53`		`- lemmas_ = [t.lemma for t in tokens]`
`54`		`- self.assertEqual(lemmas, lemmas_)`
`55`		`-`
`56`		`- # POS`
`57`		`- pos = "NNP VBD DT JJ NN IN PRP VBD IN NNP NNP .".split()`
`58`		`- pos_ = [t.pos for t in tokens]`
`59`		`- self.assertEqual(pos, pos_)`
`60`		`-`
`61`		`- # NER`
`62`		`- ner = "PERSON O O O O O O O O ORGANIZATION O O".split()`
`63`		`- ner_ = [t.ner for t in tokens]`
`64`		`- self.assertEqual(ner, ner_)`
`65`		`-`
`66`		`- # character offsets`
`67`		`- begin = [int(i) for i in "0 6 12 14 21 30 35 38 45 50 59 66".split()]`
`68`		`- end = [int(i) for i in "5 11 13 20 29 34 37 44 49 58 66 67".split()]`
`69`		`- begin_ = [t.beginChar for t in tokens]`
`70`		`- end_ = [t.endChar for t in tokens]`
`71`		`- self.assertEqual(begin, begin_)`
`72`		`- self.assertEqual(end, end_)`
`73`		`-`
`74`		`- def test_dependency_parse(self):`
`75`		`- """`
`76`		`- Extract the dependency parse from the annotation.`
`77`		`- """`
`78`		`- sentence = self.doc.sentence[0]`
`79`		`-`
`80`		`- # You can choose from the following types of dependencies.`
`81`		`- # In general, you'll want enhancedPlusPlus`
`82`		`- self.assertTrue(sentence.basicDependencies.ByteSize() > 0)`
`83`		`- self.assertTrue(sentence.enhancedDependencies.ByteSize() > 0)`
`84`		`- self.assertTrue(sentence.enhancedPlusPlusDependencies.ByteSize() > 0)`
`85`		`-`
`86`		`- tree = sentence.enhancedPlusPlusDependencies`
`87`		`- self.assertTrue(isinstance(tree, DependencyGraph))`
`88`		`- # Indices are 1-indexd with 0 being the "pseudo root"`
`89`		`- self.assertEqual([2], tree.root) # 'wrote' is the root.`
`90`		`- # There are as many nodes as there are tokens.`
`91`		`- self.assertEqual(len(sentence.token), len(tree.node))`
`92`		`-`
`93`		`- # Enhanced++ depdencies often contain additional edges and are`
`94`		`- # not trees -- here, 'parsed' would also have an edge to`
`95`		`- # 'sentence'`
`96`		`- self.assertEqual(12, len(tree.edge))`
`97`		`-`
`98`		`- # This edge goes from "wrote" to "Chirs"`
`99`		`- edge = tree.edge[0]`
`100`		`- self.assertEqual(2, edge.source)`
`101`		`- self.assertEqual(1, edge.target)`
`102`		`- self.assertEqual("nsubj", edge.dep)`
`103`		`-`
`104`		`- def test_coref_chain(self):`
`105`		`- """`
`106`		`- Extract the corefence chains from the annotation.`
`107`		`- """`
`108`		`- # Coreference chains span sentences and are stored in the`
`109`		`- # document.`
`110`		`- chains = self.doc.corefChain`
`111`		`-`
`112`		`- # In this document there is 1 chain with Chris and he.`
`113`		`- self.assertEqual(1, len(chains))`
`114`		`- chain = chains[0]`
`115`		`- self.assertTrue(isinstance(chain, CorefChain))`
`116`		`- self.assertEqual(0, chain.mention[0].beginIndex) # Starts at token 0 == 'Chris'`
`117`		`- self.assertEqual(1, chain.mention[0].endIndex)`
`118`		`- self.assertEqual("MALE", chain.mention[0].gender)`
`119`		`-`
`120`		`- self.assertEqual(6, chain.mention[1].beginIndex) # Starts at token 6 == 'he'`
`121`		`- self.assertEqual(7, chain.mention[1].endIndex)`
`122`		`- self.assertEqual("MALE", chain.mention[1].gender)`
`123`		`-`
`124`		`- self.assertEqual(0, chain.representative) # The head of the chain is 'Chris'`
`125`		`-`
`126`		`-`
`127`		`-if __name__ == "__main__":`
`128`		`- unittest.main()`
	`15`	`+`
	`16`	`+# Thext that was annotated`
	`17`	`+TEXT = "Chris wrote a simple sentence that he parsed with Stanford CoreNLP.\n"`
	`18`	`+`
	`19`	`+@fixture`
	`20`	`+def doc_pb():`
	`21`	`+ test_data = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data', 'test.dat')`
	`22`	`+ with open(test_data, 'rb') as f:`
	`23`	`+ buf = f.read()`
	`24`	`+ doc = Document()`
	`25`	`+ parseFromDelimitedString(doc, buf)`
	`26`	`+ return doc`
	`27`	`+`
	`28`	`+def test_parse_protobuf(doc_pb):`
	`29`	`+ assert doc_pb.ByteSize() == 4239`
	`30`	`+`
	`31`	`+def test_document_text(doc_pb):`
	`32`	`+ assert doc_pb.text == TEXT`
	`33`	`+`
	`34`	`+def test_sentences(doc_pb):`
	`35`	`+ assert len(doc_pb.sentence) == 1`
	`36`	`+`
	`37`	`+ sentence = doc_pb.sentence[0]`
	`38`	`+ assert isinstance(sentence, Sentence)`
	`39`	`+ assert sentence.characterOffsetEnd - sentence.characterOffsetBegin # Sentence length == 67`
	`40`	`+ assert sentence.text == '' # Note that the sentence text should actually be recovered from the tokens.`
	`41`	`+ assert to_text(sentence) == TEXT[:-1] # Note that the sentence text should actually be recovered from the tokens.`
	`42`	`+`
	`43`	`+def test_tokens(doc_pb):`
	`44`	`+ sentence = doc_pb.sentence[0]`
	`45`	`+ tokens = sentence.token`
	`46`	`+ assert len(tokens) == 12`
	`47`	`+ assert isinstance(tokens[0], Token)`
	`48`	`+`
	`49`	`+ # Word`
	`50`	`+ words = "Chris wrote a simple sentence that he parsed with Stanford CoreNLP .".split()`
	`51`	`+ words_ = [t.word for t in tokens]`
	`52`	`+ assert words_ == words`
	`53`	`+`
	`54`	`+ # Lemma`
	`55`	`+ lemmas = "Chris write a simple sentence that he parse with Stanford CoreNLP .".split()`
	`56`	`+ lemmas_ = [t.lemma for t in tokens]`
	`57`	`+ assert lemmas_ == lemmas`
	`58`	`+`
	`59`	`+ # POS`
	`60`	`+ pos = "NNP VBD DT JJ NN IN PRP VBD IN NNP NNP .".split()`
	`61`	`+ pos_ = [t.pos for t in tokens]`
	`62`	`+ assert pos_ == pos`
	`63`	`+`
	`64`	`+ # NER`
	`65`	`+ ner = "PERSON O O O O O O O O ORGANIZATION O O".split()`
	`66`	`+ ner_ = [t.ner for t in tokens]`
	`67`	`+ assert ner_ == ner`
	`68`	`+`
	`69`	`+ # character offsets`
	`70`	`+ begin = [int(i) for i in "0 6 12 14 21 30 35 38 45 50 59 66".split()]`
	`71`	`+ end = [int(i) for i in "5 11 13 20 29 34 37 44 49 58 66 67".split()]`
	`72`	`+ begin_ = [t.beginChar for t in tokens]`
	`73`	`+ end_ = [t.endChar for t in tokens]`
	`74`	`+ assert begin_ == begin`
	`75`	`+ assert end_ == end`
	`76`	`+`
	`77`	`+def test_dependency_parse(doc_pb):`
	`78`	`+ """`
	`79`	`+ Extract the dependency parse from the annotation.`
	`80`	`+ """`
	`81`	`+ sentence = doc_pb.sentence[0]`
	`82`	`+`
	`83`	`+ # You can choose from the following types of dependencies.`
	`84`	`+ # In general, you'll want enhancedPlusPlus`
	`85`	`+ assert sentence.basicDependencies.ByteSize() > 0`
	`86`	`+ assert sentence.enhancedDependencies.ByteSize() > 0`
	`87`	`+ assert sentence.enhancedPlusPlusDependencies.ByteSize() > 0`
	`88`	`+`
	`89`	`+ tree = sentence.enhancedPlusPlusDependencies`
	`90`	`+ isinstance(tree, DependencyGraph)`
	`91`	`+ # Indices are 1-indexd with 0 being the "pseudo root"`
	`92`	`+ assert tree.root # 'wrote' is the root. == [2]`
	`93`	`+ # There are as many nodes as there are tokens.`
	`94`	`+ assert len(tree.node) == len(sentence.token)`
	`95`	`+`
	`96`	`+ # Enhanced++ depdencies often contain additional edges and are`
	`97`	`+ # not trees -- here, 'parsed' would also have an edge to`
	`98`	`+ # 'sentence'`
	`99`	`+ assert len(tree.edge) == 12`
	`100`	`+`
	`101`	`+ # This edge goes from "wrote" to "Chirs"`
	`102`	`+ edge = tree.edge[0]`
	`103`	`+ assert edge.source == 2`
	`104`	`+ assert edge.target == 1`
	`105`	`+ assert edge.dep == "nsubj"`
	`106`	`+`
	`107`	`+def test_coref_chain(doc_pb):`
	`108`	`+ """`
	`109`	`+ Extract the corefence chains from the annotation.`
	`110`	`+ """`
	`111`	`+ # Coreference chains span sentences and are stored in the`
	`112`	`+ # document.`
	`113`	`+ chains = doc_pb.corefChain`
	`114`	`+`
	`115`	`+ # In this document there is 1 chain with Chris and he.`
	`116`	`+ assert len(chains) == 1`
	`117`	`+ chain = chains[0]`
	`118`	`+ assert isinstance(chain, CorefChain)`
	`119`	`+ assert chain.mention[0].beginIndex == 0 # 'Chris'`
	`120`	`+ assert chain.mention[0].endIndex == 1`
	`121`	`+ assert chain.mention[0].gender == "MALE"`
	`122`	`+`
	`123`	`+ assert chain.mention[1].beginIndex == 6 # 'he'`
	`124`	`+ assert chain.mention[1].endIndex == 7`
	`125`	`+ assert chain.mention[1].gender == "MALE"`
	`126`	`+`
	`127`	`+ assert chain.representative == 0 # The head of the chain is 'Chris'`

0 commit comments

Comments

(0)

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit 09bbd1d

File tree

1 file changed

1 file changed

`‎tests/test_read.py`

0 commit comments