Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 1b85b3f

Browse files
committed
allow for differing output formats
1 parent b1f872a commit 1b85b3f

File tree

1 file changed

+21
-4
lines changed

1 file changed

+21
-4
lines changed

‎corenlp/client.py

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -163,13 +163,16 @@ def _request(self, buf, properties):
163163
else:
164164
raise AnnotationException(r.text)
165165

166-
def annotate(self, text, annotators=None, properties=None):
166+
def annotate(self, text, annotators=None, output_format=None, properties=None):
167167
"""Send a request to the CoreNLP server.
168168
169169
:param (str | unicode) text: raw text for the CoreNLPServer to parse
170+
:param (list | string) annotators: list of annotators to use
171+
:param (str) output_format: output type from server: serialized, json, text, conll, conllu, or xml
170172
:param (dict) properties: properties that the server expects
171173
:return: request result
172174
"""
175+
# set properties for server call
173176
if properties is None:
174177
properties = self.default_properties
175178
properties.update({
@@ -178,10 +181,24 @@ def annotate(self, text, annotators=None, properties=None):
178181
'outputFormat': 'serialized',
179182
'serializer': 'edu.stanford.nlp.pipeline.ProtobufAnnotationSerializer'
180183
})
184+
elif "annotators" not in properties:
185+
properties.update({'annotators': ','.join(annotators or self.default_annotators)})
186+
# if an output_format is specified, use that to override
187+
if output_format is not None:
188+
properties["outputFormat"] = output_format
189+
# make the request
181190
r = self._request(text.encode('utf-8'), properties)
182-
doc = Document()
183-
parseFromDelimitedString(doc, r.content)
184-
return doc
191+
# customize what is returned based outputFormat
192+
if properties["outputFormat"] == "serialized":
193+
doc = Document()
194+
parseFromDelimitedString(doc, r.content)
195+
return doc
196+
elif properties["outputFormat"] == "json":
197+
return r.json()
198+
elif properties["outputFormat"] in ["text", "conllu", "conll", "xml"]:
199+
return r.text
200+
else:
201+
return r
185202

186203
def update(self, doc, annotators=None, properties=None):
187204
if properties is None:

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /