@@ -142,14 +142,31 @@ def query_entities(self, queries, language=None, normalized_label_languages=['en
142142 # Extraction / tagging of labels in full text by Solr Text Tagger https://lucene.apache.org/solr/guide/7_4/the-tagger-handler.html
143143 #
144144
145- def dictionary_matcher (self , text , language = None , normalized_label_languages = ['en' ], limit = 10000 , tagger = 'all_labels_ss_tag' , normalized_entities = {}):
145+ def dictionary_matcher (self , text , language = None , normalized_label_languages = ['en' ], limit = 10000 , tagger = 'all_labels_ss_tag' , normalized_entities = {}, additional_result_fields = {} ):
146146
147- url = self .solr + self .solr_core + '/' + tagger + '?matchText=true&overlaps=NO_SUB&fl=id,type_ss,preferred_label_s,skos_prefLabel_ss,label_ss,skos_altLabel_ss&wt=json'
147+ url = self .solr + self .solr_core + '/' + tagger
148+ 149+ fields = [ 'id' ,
150+ 'type_ss' ,
151+ 'preferred_label_s' ,
152+ 'skos_prefLabel_ss' ,
153+ 'label_ss' ,
154+ 'skos_altLabel_ss' ,
155+ ]
156+ 157+ if additional_result_fields :
158+ fields .extend (additional_result_fields )
159+ 160+ params = { 'wt' : 'json' ,
161+ 'matchText' : 'true' ,
162+ 'overlaps' : 'NO_SUB' ,
163+ 'fl' : ',' .join (fields ),
164+ }
148165
149166 if limit :
150- url += '& tagsLimit=' + str (limit )
167+ params [ ' tagsLimit' ] = str (limit )
151168
152- r = requests .post (url , data = text .encode ('utf-8' ))
169+ r = requests .post (url , data = text .encode ('utf-8' ), params = params )
153170
154171 if self .verbose :
155172 print ("Entity linking / Solr Text Tagger result for tagger {}: {}" .format (tagger , r .text ))
@@ -190,7 +207,12 @@ def dictionary_matcher(self, text, language=None, normalized_label_languages=['e
190207 'type' : types ,
191208 }
192209
193- normalized_entities [entity ['id' ]] = {}
210+ if additional_result_fields :
211+ for field in additional_result_fields :
212+ if field in entity :
213+ result [field ] = entity [field ]
214+ 215+ normalized_entities [entity ['id' ]] = {}
194216 normalized_entities [entity ['id' ]]['result' ] = [result ]
195217
196218 return normalized_entities
@@ -199,7 +221,7 @@ def dictionary_matcher(self, text, language=None, normalized_label_languages=['e
199221 # get entities
200222 #
201223
202- def entities (self , queries = None , language = None , normalized_label_languages = ['en' ], text = None , limit = 10000 , taggers = ['all_labels_ss_tag' ]):
224+ def entities (self , queries = None , language = None , normalized_label_languages = ['en' ], text = None , limit = 10000 , taggers = ['all_labels_ss_tag' ], additional_result_fields = {} ):
203225
204226
205227 # if no entities queries, match entities from dictionary of labels from thesaurus, ontologies, databases and lists
@@ -212,7 +234,7 @@ def entities(self, queries=None, language=None, normalized_label_languages=['en'
212234 normalized_entities = {}
213235 for tagger in taggers :
214236 try :
215- normalized_entities = self .dictionary_matcher (text = text , language = language , normalized_label_languages = normalized_label_languages , limit = limit , normalized_entities = normalized_entities , tagger = tagger )
237+ normalized_entities = self .dictionary_matcher (text = text , language = language , normalized_label_languages = normalized_label_languages , limit = limit , normalized_entities = normalized_entities , tagger = tagger , additional_result_fields = additional_result_fields )
216238 except BaseException as e :
217239 sys .stderr .write ( "Exception using Solr Text Tagger {}: {}\n " .format (tagger , e ) )
218240
0 commit comments