Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit f9b295b

Browse files
author
Arun Tejasvi Chaganty
committed
Fixed packaging and style issues
1 parent 09bbd1d commit f9b295b

File tree

4 files changed

+37
-22
lines changed

4 files changed

+37
-22
lines changed

‎MANIFEST.in‎

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# Include the license file
2-
include LICENSE.txt
2+
include *.md
3+
include LICENSE
34

45
# Include the data files
5-
recursive-include data *
6+
recursive-include corenlp_protobuf *.py
7+
recursive-include doc *.proto

‎corenlp_protobuf/__init__.py‎

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from google.protobuf.internal.decoder import _DecodeVarint
44
from .CoreNLP_pb2 import *
55

6+
67
def parseFromDelimitedString(obj, buf, offset=0):
78
"""
89
Stanford CoreNLP uses the Java "writeDelimitedTo" function, which
@@ -15,9 +16,11 @@ def parseFromDelimitedString(obj, buf, offset=0):
1516
obj.ParseFromString(buf[offset+pos:offset+pos+size])
1617
return pos+size
1718

19+
1820
def to_text(sentence):
1921
"""
20-
Helper routine that converts a Sentence protobuf to a string from its tokens.
22+
Helper routine that converts a Sentence protobuf to a string from
23+
its tokens.
2124
"""
2225
text = ""
2326
for i, tok in enumerate(sentence.token):

‎tests/test_read.py‎

Lines changed: 26 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,36 +9,45 @@
99

1010
import os
1111
from pytest import fixture
12-
from corenlp_protobuf import Document, Sentence, Token, DependencyGraph, CorefChain
12+
from corenlp_protobuf import Document, Sentence, Token, DependencyGraph,\
13+
CorefChain
1314
from corenlp_protobuf import parseFromDelimitedString, to_text
1415

1516

1617
# Thext that was annotated
1718
TEXT = "Chris wrote a simple sentence that he parsed with Stanford CoreNLP.\n"
1819

20+
1921
@fixture
2022
def doc_pb():
21-
test_data = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data', 'test.dat')
23+
test_dir = os.path.dirname(os.path.abspath(__file__))
24+
test_data = os.path.join(test_dir, 'data', 'test.dat')
2225
with open(test_data, 'rb') as f:
2326
buf = f.read()
2427
doc = Document()
2528
parseFromDelimitedString(doc, buf)
2629
return doc
2730

31+
2832
def test_parse_protobuf(doc_pb):
2933
assert doc_pb.ByteSize() == 4239
3034

35+
3136
def test_document_text(doc_pb):
3237
assert doc_pb.text == TEXT
3338

39+
3440
def test_sentences(doc_pb):
3541
assert len(doc_pb.sentence) == 1
3642

3743
sentence = doc_pb.sentence[0]
3844
assert isinstance(sentence, Sentence)
39-
assert sentence.characterOffsetEnd - sentence.characterOffsetBegin # Sentence length == 67
40-
assert sentence.text == '' # Note that the sentence text should actually be recovered from the tokens.
41-
assert to_text(sentence) == TEXT[:-1] # Note that the sentence text should actually be recovered from the tokens.
45+
# check sentence length
46+
assert sentence.characterOffsetEnd - sentence.characterOffsetBegin == 67
47+
# Note that the sentence text should actually be recovered from the tokens.
48+
assert sentence.text == ''
49+
assert to_text(sentence) == TEXT[:-1]
50+
4251

4352
def test_tokens(doc_pb):
4453
sentence = doc_pb.sentence[0]
@@ -54,25 +63,26 @@ def test_tokens(doc_pb):
5463
# Lemma
5564
lemmas = "Chris write a simple sentence that he parse with Stanford CoreNLP .".split()
5665
lemmas_ = [t.lemma for t in tokens]
57-
assert lemmas_ == lemmas
66+
assert lemmas_ == lemmas
5867

5968
# POS
6069
pos = "NNP VBD DT JJ NN IN PRP VBD IN NNP NNP .".split()
6170
pos_ = [t.pos for t in tokens]
62-
assert pos_ == pos
71+
assert pos_ == pos
6372

6473
# NER
6574
ner = "PERSON O O O O O O O O ORGANIZATION O O".split()
6675
ner_ = [t.ner for t in tokens]
67-
assert ner_ == ner
76+
assert ner_ == ner
6877

6978
# character offsets
7079
begin = [int(i) for i in "0 6 12 14 21 30 35 38 45 50 59 66".split()]
7180
end = [int(i) for i in "5 11 13 20 29 34 37 44 49 58 66 67".split()]
7281
begin_ = [t.beginChar for t in tokens]
7382
end_ = [t.endChar for t in tokens]
74-
assert begin_ == begin
75-
assert end_ == end
83+
assert begin_ == begin
84+
assert end_ == end
85+
7686

7787
def test_dependency_parse(doc_pb):
7888
"""
@@ -89,7 +99,7 @@ def test_dependency_parse(doc_pb):
8999
tree = sentence.enhancedPlusPlusDependencies
90100
isinstance(tree, DependencyGraph)
91101
# Indices are 1-indexd with 0 being the "pseudo root"
92-
assert tree.root # 'wrote' is the root. == [2]
102+
assert tree.root # 'wrote' is the root. == [2]
93103
# There are as many nodes as there are tokens.
94104
assert len(tree.node) == len(sentence.token)
95105

@@ -104,6 +114,7 @@ def test_dependency_parse(doc_pb):
104114
assert edge.target == 1
105115
assert edge.dep == "nsubj"
106116

117+
107118
def test_coref_chain(doc_pb):
108119
"""
109120
Extract the corefence chains from the annotation.
@@ -113,15 +124,15 @@ def test_coref_chain(doc_pb):
113124
chains = doc_pb.corefChain
114125

115126
# In this document there is 1 chain with Chris and he.
116-
assert len(chains) == 1
127+
assert len(chains) == 1
117128
chain = chains[0]
118129
assert isinstance(chain, CorefChain)
119-
assert chain.mention[0].beginIndex == 0 # 'Chris'
130+
assert chain.mention[0].beginIndex == 0 # 'Chris'
120131
assert chain.mention[0].endIndex == 1
121132
assert chain.mention[0].gender == "MALE"
122133

123-
assert chain.mention[1].beginIndex == 6 # 'he'
134+
assert chain.mention[1].beginIndex == 6 # 'he'
124135
assert chain.mention[1].endIndex == 7
125136
assert chain.mention[1].gender == "MALE"
126137

127-
assert chain.representative == 0 # The head of the chain is 'Chris'
138+
assert chain.representative == 0 # Head of the chain is 'Chris'

‎tox.ini‎

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,22 +11,21 @@
1111
# and also to help confirm pull requests to this project.
1212

1313
[tox]
14-
envlist = py{27,33,34}
14+
envlist = py{27,33,35}
1515

1616
[testenv]
1717
basepython =
1818
py27: python2.7
1919
py33: python3.3
20-
py34: python3.4
20+
py35: python3.5
2121
deps =
2222
check-manifest
2323
readme_renderer
24-
flake8
2524
pytest
25+
protobuf
2626
commands =
2727
check-manifest --ignore tox.ini,tests*
2828
python setup.py check -m -r -s
29-
flake8 .
3029
py.test tests
3130
[flake8]
3231
exclude = .tox,*.egg,build,data

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /