99
1010import os
1111from pytest import fixture
12- from corenlp_protobuf import Document , Sentence , Token , DependencyGraph , CorefChain
12+ from corenlp_protobuf import Document , Sentence , Token , DependencyGraph ,\
13+ CorefChain
1314from corenlp_protobuf import parseFromDelimitedString , to_text
1415
1516
1617# Thext that was annotated
1718TEXT = "Chris wrote a simple sentence that he parsed with Stanford CoreNLP.\n "
1819
20+ 1921@fixture
2022def doc_pb ():
21- test_data = os .path .join (os .path .dirname (os .path .abspath (__file__ )), 'data' , 'test.dat' )
23+ test_dir = os .path .dirname (os .path .abspath (__file__ ))
24+ test_data = os .path .join (test_dir , 'data' , 'test.dat' )
2225 with open (test_data , 'rb' ) as f :
2326 buf = f .read ()
2427 doc = Document ()
2528 parseFromDelimitedString (doc , buf )
2629 return doc
2730
31+ 2832def test_parse_protobuf (doc_pb ):
2933 assert doc_pb .ByteSize () == 4239
3034
35+ 3136def test_document_text (doc_pb ):
3237 assert doc_pb .text == TEXT
3338
39+ 3440def test_sentences (doc_pb ):
3541 assert len (doc_pb .sentence ) == 1
3642
3743 sentence = doc_pb .sentence [0 ]
3844 assert isinstance (sentence , Sentence )
39- assert sentence .characterOffsetEnd - sentence .characterOffsetBegin # Sentence length == 67
40- assert sentence .text == '' # Note that the sentence text should actually be recovered from the tokens.
41- assert to_text (sentence ) == TEXT [:- 1 ] # Note that the sentence text should actually be recovered from the tokens.
45+ # check sentence length
46+ assert sentence .characterOffsetEnd - sentence .characterOffsetBegin == 67
47+ # Note that the sentence text should actually be recovered from the tokens.
48+ assert sentence .text == ''
49+ assert to_text (sentence ) == TEXT [:- 1 ]
50+ 4251
4352def test_tokens (doc_pb ):
4453 sentence = doc_pb .sentence [0 ]
@@ -54,25 +63,26 @@ def test_tokens(doc_pb):
5463 # Lemma
5564 lemmas = "Chris write a simple sentence that he parse with Stanford CoreNLP ." .split ()
5665 lemmas_ = [t .lemma for t in tokens ]
57- assert lemmas_ == lemmas
66+ assert lemmas_ == lemmas
5867
5968 # POS
6069 pos = "NNP VBD DT JJ NN IN PRP VBD IN NNP NNP ." .split ()
6170 pos_ = [t .pos for t in tokens ]
62- assert pos_ == pos
71+ assert pos_ == pos
6372
6473 # NER
6574 ner = "PERSON O O O O O O O O ORGANIZATION O O" .split ()
6675 ner_ = [t .ner for t in tokens ]
67- assert ner_ == ner
76+ assert ner_ == ner
6877
6978 # character offsets
7079 begin = [int (i ) for i in "0 6 12 14 21 30 35 38 45 50 59 66" .split ()]
7180 end = [int (i ) for i in "5 11 13 20 29 34 37 44 49 58 66 67" .split ()]
7281 begin_ = [t .beginChar for t in tokens ]
7382 end_ = [t .endChar for t in tokens ]
74- assert begin_ == begin
75- assert end_ == end
83+ assert begin_ == begin
84+ assert end_ == end
85+ 7686
7787def test_dependency_parse (doc_pb ):
7888 """
@@ -89,7 +99,7 @@ def test_dependency_parse(doc_pb):
8999 tree = sentence .enhancedPlusPlusDependencies
90100 isinstance (tree , DependencyGraph )
91101 # Indices are 1-indexd with 0 being the "pseudo root"
92- assert tree .root # 'wrote' is the root. == [2]
102+ assert tree .root # 'wrote' is the root. == [2]
93103 # There are as many nodes as there are tokens.
94104 assert len (tree .node ) == len (sentence .token )
95105
@@ -104,6 +114,7 @@ def test_dependency_parse(doc_pb):
104114 assert edge .target == 1
105115 assert edge .dep == "nsubj"
106116
117+ 107118def test_coref_chain (doc_pb ):
108119 """
109120 Extract the corefence chains from the annotation.
@@ -113,15 +124,15 @@ def test_coref_chain(doc_pb):
113124 chains = doc_pb .corefChain
114125
115126 # In this document there is 1 chain with Chris and he.
116- assert len (chains ) == 1
127+ assert len (chains ) == 1
117128 chain = chains [0 ]
118129 assert isinstance (chain , CorefChain )
119- assert chain .mention [0 ].beginIndex == 0 # 'Chris'
130+ assert chain .mention [0 ].beginIndex == 0 # 'Chris'
120131 assert chain .mention [0 ].endIndex == 1
121132 assert chain .mention [0 ].gender == "MALE"
122133
123- assert chain .mention [1 ].beginIndex == 6 # 'he'
134+ assert chain .mention [1 ].beginIndex == 6 # 'he'
124135 assert chain .mention [1 ].endIndex == 7
125136 assert chain .mention [1 ].gender == "MALE"
126137
127- assert chain .representative == 0 # The head of the chain is 'Chris'
138+ assert chain .representative == 0 # Head of the chain is 'Chris'
0 commit comments