@@ -50,7 +50,7 @@ class RobustService(object):
50
50
TIMEOUT = 15
51
51
52
52
def __init__ (self , start_cmd , stop_cmd , endpoint , stdout = sys .stdout ,
53
- stderr = sys .stderr ):
53
+ stderr = sys .stderr , be_quiet = False ):
54
54
self .start_cmd = start_cmd and shlex .split (start_cmd )
55
55
self .stop_cmd = stop_cmd and shlex .split (stop_cmd )
56
56
self .endpoint = endpoint
@@ -59,6 +59,7 @@ def __init__(self, start_cmd, stop_cmd, endpoint, stdout=sys.stdout,
59
59
60
60
self .server = None
61
61
self .is_active = False
62
+ self .be_quiet = be_quiet
62
63
63
64
def is_alive (self ):
64
65
try :
@@ -68,9 +69,10 @@ def is_alive(self):
68
69
69
70
def start (self ):
70
71
if self .start_cmd :
72
+ stderr = subprocess .DEVNULL if self .be_quiet else self .stderr
71
73
self .server = subprocess .Popen (self .start_cmd ,
72
- stderr = self . stderr ,
73
- stdout = self . stdout )
74
+ stderr = stderr ,
75
+ stdout = stderr )
74
76
75
77
def stop (self ):
76
78
if self .server :
@@ -121,35 +123,41 @@ class CoreNLPClient(RobustService):
121
123
"""
122
124
DEFAULT_ANNOTATORS = "tokenize ssplit lemma pos ner depparse" .split ()
123
125
DEFAULT_PROPERTIES = {}
126
+ DEFAULT_OUTPUT_FORMAT = "serialized"
124
127
125
128
def __init__ (self , start_server = True ,
126
129
endpoint = "http://localhost:9000" ,
127
130
timeout = 5000 ,
128
131
threads = 5 ,
129
- annotators = DEFAULT_ANNOTATORS ,
130
- properties = DEFAULT_PROPERTIES ,
132
+ annotators = None ,
133
+ properties = None ,
134
+ output_format = None ,
131
135
stdout = sys .stdout ,
132
- stderr = sys .stderr
136
+ stderr = sys .stderr ,
137
+ memory = "4G" ,
138
+ be_quiet = True ,
133
139
):
134
140
135
141
if start_server :
136
142
host , port = urlparse (endpoint ).netloc .split (":" )
137
143
assert host == "localhost" , "If starting a server, endpoint must be localhost"
138
144
139
145
assert os .getenv ("CORENLP_HOME" ) is not None , "Please define $CORENLP_HOME where your CoreNLP Java checkout is"
140
- start_cmd = "java -cp '{corenlp_home}/*' edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port {port} -timeout {timeout} -threads {threads}" .format (
146
+ start_cmd = "java -Xmx{memory} - cp '{corenlp_home}/*' edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port {port} -timeout {timeout} -threads {threads}" .format (
141
147
corenlp_home = os .getenv ("CORENLP_HOME" ),
142
148
port = port ,
149
+ memory = memory ,
143
150
timeout = timeout ,
144
151
threads = threads )
145
152
stop_cmd = None
146
153
else :
147
154
start_cmd = stop_cmd = None
148
155
149
156
super (CoreNLPClient , self ).__init__ (start_cmd , stop_cmd , endpoint ,
150
- stdout , stderr )
151
- self .default_annotators = annotators
152
- self .default_properties = properties
157
+ stdout , stderr , be_quiet )
158
+ self .default_annotators = annotators or self .DEFAULT_ANNOTATORS
159
+ self .default_properties = properties or self .DEFAULT_PROPERTIES
160
+ self .default_output_format = output_format or self .DEFAULT_OUTPUT_FORMAT
153
161
154
162
def _request (self , buf , properties ):
155
163
"""Send a request to the CoreNLP server.
@@ -195,7 +203,7 @@ def annotate(self, text, annotators=None, output_format=None, properties=None):
195
203
properties .update ({
196
204
'annotators' : ',' .join (annotators or self .default_annotators ),
197
205
'inputFormat' : 'text' ,
198
- 'outputFormat' : 'serialized' ,
206
+ 'outputFormat' : self . default_output_format ,
199
207
'serializer' : 'edu.stanford.nlp.pipeline.ProtobufAnnotationSerializer'
200
208
})
201
209
elif "annotators" not in properties :
0 commit comments