Commit f9b295b

author

Arun Tejasvi Chaganty

committed

Fixed packaging and style issues

1 parent 09bbd1d commit f9b295bCopy full SHA for f9b295b

File tree

4 files changed

+37

-22

lines changed

MANIFEST.in
corenlp_protobuf
- __init__.py
tests
- test_read.py
tox.ini

4 files changed

+37

-22

lines changed

`‎MANIFEST.in‎`

Lines changed: 4 additions & 2 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,7 @@`
`1`	`1`	`# Include the license file`
`2`		`-include LICENSE.txt`
	`2`	`+include *.md`
	`3`	`+include LICENSE`
`3`	`4`
`4`	`5`	`# Include the data files`
`5`		`-recursive-include data *`
	`6`	`+recursive-include corenlp_protobuf *.py`
	`7`	`+recursive-include doc *.proto`

`‎corenlp_protobuf/init.py‎`

Lines changed: 4 additions & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -3,6 +3,7 @@`
`3`	`3`	`from google.protobuf.internal.decoder import _DecodeVarint`
`4`	`4`	`from .CoreNLP_pb2 import *`
`5`	`5`
	`6`	`+`
`6`	`7`	`def parseFromDelimitedString(obj, buf, offset=0):`
`7`	`8`	`"""`
`8`	`9`	`Stanford CoreNLP uses the Java "writeDelimitedTo" function, which`
`@@ -15,9 +16,11 @@ def parseFromDelimitedString(obj, buf, offset=0):`
`15`	`16`	`obj.ParseFromString(buf[offset+pos:offset+pos+size])`
`16`	`17`	`return pos+size`
`17`	`18`
	`19`	`+`
`18`	`20`	`def to_text(sentence):`
`19`	`21`	`"""`
`20`		`- Helper routine that converts a Sentence protobuf to a string from its tokens.`
	`22`	`+ Helper routine that converts a Sentence protobuf to a string from`
	`23`	`+ its tokens.`
`21`	`24`	`"""`
`22`	`25`	`text = ""`
`23`	`26`	`for i, tok in enumerate(sentence.token):`

`‎tests/test_read.py‎`

Lines changed: 26 additions & 15 deletions

Original file line number	Diff line number	Diff line change
`@@ -9,36 +9,45 @@`
`9`	`9`
`10`	`10`	`import os`
`11`	`11`	`from pytest import fixture`
`12`		`-from corenlp_protobuf import Document, Sentence, Token, DependencyGraph, CorefChain`
	`12`	`+from corenlp_protobuf import Document, Sentence, Token, DependencyGraph,\`
	`13`	`+ CorefChain`
`13`	`14`	`from corenlp_protobuf import parseFromDelimitedString, to_text`
`14`	`15`
`15`	`16`
`16`	`17`	`# Thext that was annotated`
`17`	`18`	`TEXT = "Chris wrote a simple sentence that he parsed with Stanford CoreNLP.\n"`
`18`	`19`
	`20`	`+`
`19`	`21`	`@fixture`
`20`	`22`	`def doc_pb():`
`21`		`- test_data = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data', 'test.dat')`
	`23`	`+ test_dir = os.path.dirname(os.path.abspath(__file__))`
	`24`	`+ test_data = os.path.join(test_dir, 'data', 'test.dat')`
`22`	`25`	`with open(test_data, 'rb') as f:`
`23`	`26`	`buf = f.read()`
`24`	`27`	`doc = Document()`
`25`	`28`	`parseFromDelimitedString(doc, buf)`
`26`	`29`	`return doc`
`27`	`30`
	`31`	`+`
`28`	`32`	`def test_parse_protobuf(doc_pb):`
`29`	`33`	`assert doc_pb.ByteSize() == 4239`
`30`	`34`
	`35`	`+`
`31`	`36`	`def test_document_text(doc_pb):`
`32`	`37`	`assert doc_pb.text == TEXT`
`33`	`38`
	`39`	`+`
`34`	`40`	`def test_sentences(doc_pb):`
`35`	`41`	`assert len(doc_pb.sentence) == 1`
`36`	`42`
`37`	`43`	`sentence = doc_pb.sentence[0]`
`38`	`44`	`assert isinstance(sentence, Sentence)`
`39`		`- assert sentence.characterOffsetEnd - sentence.characterOffsetBegin # Sentence length == 67`
`40`		`- assert sentence.text == '' # Note that the sentence text should actually be recovered from the tokens.`
`41`		`- assert to_text(sentence) == TEXT[:-1] # Note that the sentence text should actually be recovered from the tokens.`
	`45`	`+ # check sentence length`
	`46`	`+ assert sentence.characterOffsetEnd - sentence.characterOffsetBegin == 67`
	`47`	`+ # Note that the sentence text should actually be recovered from the tokens.`
	`48`	`+ assert sentence.text == ''`
	`49`	`+ assert to_text(sentence) == TEXT[:-1]`
	`50`	`+`
`42`	`51`
`43`	`52`	`def test_tokens(doc_pb):`
`44`	`53`	`sentence = doc_pb.sentence[0]`
`@@ -54,25 +63,26 @@ def test_tokens(doc_pb):`
`54`	`63`	`# Lemma`
`55`	`64`	`lemmas = "Chris write a simple sentence that he parse with Stanford CoreNLP .".split()`
`56`	`65`	`lemmas_ = [t.lemma for t in tokens]`
`57`		`- assert lemmas_ == lemmas`
	`66`	`+ assert lemmas_ == lemmas`
`58`	`67`
`59`	`68`	`# POS`
`60`	`69`	`pos = "NNP VBD DT JJ NN IN PRP VBD IN NNP NNP .".split()`
`61`	`70`	`pos_ = [t.pos for t in tokens]`
`62`		`- assert pos_ == pos`
	`71`	`+ assert pos_ == pos`
`63`	`72`
`64`	`73`	`# NER`
`65`	`74`	`ner = "PERSON O O O O O O O O ORGANIZATION O O".split()`
`66`	`75`	`ner_ = [t.ner for t in tokens]`
`67`		`- assert ner_ == ner`
	`76`	`+ assert ner_ == ner`
`68`	`77`
`69`	`78`	`# character offsets`
`70`	`79`	`begin = [int(i) for i in "0 6 12 14 21 30 35 38 45 50 59 66".split()]`
`71`	`80`	`end = [int(i) for i in "5 11 13 20 29 34 37 44 49 58 66 67".split()]`
`72`	`81`	`begin_ = [t.beginChar for t in tokens]`
`73`	`82`	`end_ = [t.endChar for t in tokens]`
`74`		`- assert begin_ == begin`
`75`		`- assert end_ == end`
	`83`	`+ assert begin_ == begin`
	`84`	`+ assert end_ == end`
	`85`	`+`
`76`	`86`
`77`	`87`	`def test_dependency_parse(doc_pb):`
`78`	`88`	`"""`
`@@ -89,7 +99,7 @@ def test_dependency_parse(doc_pb):`
`89`	`99`	`tree = sentence.enhancedPlusPlusDependencies`
`90`	`100`	`isinstance(tree, DependencyGraph)`
`91`	`101`	`# Indices are 1-indexd with 0 being the "pseudo root"`
`92`		`- assert tree.root # 'wrote' is the root. == [2]`
	`102`	`+ assert tree.root # 'wrote' is the root. == [2]`
`93`	`103`	`# There are as many nodes as there are tokens.`
`94`	`104`	`assert len(tree.node) == len(sentence.token)`
`95`	`105`
`@@ -104,6 +114,7 @@ def test_dependency_parse(doc_pb):`
`104`	`114`	`assert edge.target == 1`
`105`	`115`	`assert edge.dep == "nsubj"`
`106`	`116`
	`117`	`+`
`107`	`118`	`def test_coref_chain(doc_pb):`
`108`	`119`	`"""`
`109`	`120`	`Extract the corefence chains from the annotation.`
`@@ -113,15 +124,15 @@ def test_coref_chain(doc_pb):`
`113`	`124`	`chains = doc_pb.corefChain`
`114`	`125`
`115`	`126`	`# In this document there is 1 chain with Chris and he.`
`116`		`- assert len(chains) == 1`
	`127`	`+ assert len(chains) == 1`
`117`	`128`	`chain = chains[0]`
`118`	`129`	`assert isinstance(chain, CorefChain)`
`119`		`- assert chain.mention[0].beginIndex == 0 # 'Chris'`
	`130`	`+ assert chain.mention[0].beginIndex == 0 # 'Chris'`
`120`	`131`	`assert chain.mention[0].endIndex == 1`
`121`	`132`	`assert chain.mention[0].gender == "MALE"`
`122`	`133`
`123`		`- assert chain.mention[1].beginIndex == 6 # 'he'`
	`134`	`+ assert chain.mention[1].beginIndex == 6 # 'he'`
`124`	`135`	`assert chain.mention[1].endIndex == 7`
`125`	`136`	`assert chain.mention[1].gender == "MALE"`
`126`	`137`
`127`		`- assert chain.representative == 0 # The head of the chain is 'Chris'`
	`138`	`+ assert chain.representative == 0 # Head of the chain is 'Chris'`

`‎tox.ini‎`

Lines changed: 3 additions & 4 deletions

Original file line number	Diff line number	Diff line change
`@@ -11,22 +11,21 @@`
`11`	`11`	`# and also to help confirm pull requests to this project.`
`12`	`12`
`13`	`13`	`[tox]`
`14`		`-envlist = py{27,33,34}`
	`14`	`+envlist = py{27,33,35}`
`15`	`15`
`16`	`16`	`[testenv]`
`17`	`17`	`basepython =`
`18`	`18`	`py27: python2.7`
`19`	`19`	`py33: python3.3`
`20`		`- py34: python3.4`
	`20`	`+ py35: python3.5`
`21`	`21`	`deps =`
`22`	`22`	`check-manifest`
`23`	`23`	`readme_renderer`
`24`		`- flake8`
`25`	`24`	`pytest`
	`25`	`+ protobuf`
`26`	`26`	`commands =`
`27`	`27`	`check-manifest --ignore tox.ini,tests*`
`28`	`28`	`python setup.py check -m -r -s`
`29`		`- flake8 .`
`30`	`29`	`py.test tests`
`31`	`30`	`[flake8]`
`32`	`31`	`exclude = .tox,*.egg,build,data`

0 commit comments

Comments

(0)

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit f9b295b

File tree

4 files changed

4 files changed

`‎MANIFEST.in‎`

`‎corenlp_protobuf/init.py‎`

`‎tests/test_read.py‎`

`‎tox.ini‎`

0 commit comments

File tree

4 files changed

4 files changed

‎MANIFEST.in‎

‎corenlp_protobuf/__init__.py‎

‎tests/test_read.py‎

‎tox.ini‎

0 commit comments

`‎MANIFEST.in‎`

`‎corenlp_protobuf/init.py‎`

`‎tests/test_read.py‎`

`‎tox.ini‎`