Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 620ac3b

Browse files
Updated corenlp_protobuf to include writing functions
1 parent 99340a9 commit 620ac3b

File tree

3 files changed

+27
-4
lines changed

3 files changed

+27
-4
lines changed

‎corenlp_protobuf/__init__.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
from __future__ import absolute_import
22

3+
from io import BytesIO
4+
5+
from google.protobuf.internal.encoder import _EncodeVarint
36
from google.protobuf.internal.decoder import _DecodeVarint
47
from .CoreNLP_pb2 import *
58

6-
79
def parseFromDelimitedString(obj, buf, offset=0):
810
"""
911
Stanford CoreNLP uses the Java "writeDelimitedTo" function, which
@@ -16,6 +18,20 @@ def parseFromDelimitedString(obj, buf, offset=0):
1618
obj.ParseFromString(buf[offset+pos:offset+pos+size])
1719
return pos+size
1820

21+
def writeToDelimitedString(obj, stream=None):
22+
"""
23+
Stanford CoreNLP uses the Java "writeDelimitedTo" function, which
24+
writes the size (and offset) of the buffer before writing the object.
25+
This function handles parsing this message starting from offset 0.
26+
27+
@returns how many bytes of @buf were consumed.
28+
"""
29+
if stream is None:
30+
stream = BytesIO()
31+
32+
_EncodeVarint(stream.write, obj.ByteSize())
33+
stream.write(obj.SerializeToString())
34+
return stream
1935

2036
def to_text(sentence):
2137
"""

‎setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
# Versions should comply with PEP440. For a discussion on single-sourcing
2424
# the version across setup.py and the project code, see
2525
# https://packaging.python.org/en/latest/single_source_version.html
26-
version='3.7.0',
26+
version='3.7.1',
2727

2828
description='Python bindings for Stanford CoreNLP protobufs',
2929
long_description=long_description,

‎tests/test_read.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from pytest import fixture
1212
from corenlp_protobuf import Document, Sentence, Token, DependencyGraph,\
1313
CorefChain
14-
from corenlp_protobuf import parseFromDelimitedString, to_text
14+
from corenlp_protobuf import parseFromDelimitedString, writeToDelimitedString, to_text
1515

1616

1717
# Thext that was annotated
@@ -28,10 +28,17 @@ def doc_pb():
2828
parseFromDelimitedString(doc, buf)
2929
return doc
3030

31-
3231
def test_parse_protobuf(doc_pb):
3332
assert doc_pb.ByteSize() == 4239
3433

34+
def test_write_protobuf(doc_pb):
35+
stream = writeToDelimitedString(doc_pb)
36+
buf = stream.getvalue()
37+
stream.close()
38+
39+
doc_pb_ = Document()
40+
parseFromDelimitedString(doc_pb_, buf)
41+
assert doc_pb == doc_pb_
3542

3643
def test_document_text(doc_pb):
3744
assert doc_pb.text == TEXT

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /