Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit ae0b05d

Browse files
committed
Optional adding and reading of additional metadata / properties / fields
1 parent 9778b0a commit ae0b05d

File tree

2 files changed

+35
-8
lines changed

2 files changed

+35
-8
lines changed

‎src/entity_linking/entity_linker.py‎

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -142,14 +142,31 @@ def query_entities(self, queries, language=None, normalized_label_languages=['en
142142
# Extraction / tagging of labels in full text by Solr Text Tagger https://lucene.apache.org/solr/guide/7_4/the-tagger-handler.html
143143
#
144144

145-
def dictionary_matcher(self, text, language=None, normalized_label_languages=['en'], limit=10000, tagger='all_labels_ss_tag', normalized_entities = {}):
145+
def dictionary_matcher(self, text, language=None, normalized_label_languages=['en'], limit=10000, tagger='all_labels_ss_tag', normalized_entities = {}, additional_result_fields={}):
146146

147-
url = self.solr + self.solr_core + '/' + tagger +'?matchText=true&overlaps=NO_SUB&fl=id,type_ss,preferred_label_s,skos_prefLabel_ss,label_ss,skos_altLabel_ss&wt=json'
147+
url = self.solr + self.solr_core + '/' + tagger
148+
149+
fields = [ 'id',
150+
'type_ss',
151+
'preferred_label_s',
152+
'skos_prefLabel_ss',
153+
'label_ss',
154+
'skos_altLabel_ss',
155+
]
156+
157+
if additional_result_fields:
158+
fields.extend(additional_result_fields)
159+
160+
params = { 'wt': 'json',
161+
'matchText': 'true',
162+
'overlaps': 'NO_SUB',
163+
'fl': ','.join(fields),
164+
}
148165

149166
if limit:
150-
url+='&tagsLimit='+ str(limit)
167+
params['tagsLimit'] = str(limit)
151168

152-
r = requests.post(url, data=text.encode('utf-8'))
169+
r = requests.post(url, data=text.encode('utf-8'), params=params)
153170

154171
if self.verbose:
155172
print ("Entity linking / Solr Text Tagger result for tagger {}: {}".format(tagger, r.text))
@@ -190,7 +207,12 @@ def dictionary_matcher(self, text, language=None, normalized_label_languages=['e
190207
'type': types,
191208
}
192209

193-
normalized_entities[entity['id']] = {}
210+
if additional_result_fields:
211+
for field in additional_result_fields:
212+
if field in entity:
213+
result[field] = entity[field]
214+
215+
normalized_entities[entity['id']] = {}
194216
normalized_entities[entity['id']]['result'] = [result]
195217

196218
return normalized_entities
@@ -199,7 +221,7 @@ def dictionary_matcher(self, text, language=None, normalized_label_languages=['e
199221
# get entities
200222
#
201223

202-
def entities(self, queries=None, language=None, normalized_label_languages=['en'], text = None, limit=10000, taggers=['all_labels_ss_tag']):
224+
def entities(self, queries=None, language=None, normalized_label_languages=['en'], text = None, limit=10000, taggers=['all_labels_ss_tag'], additional_result_fields={}):
203225

204226

205227
# if no entities queries, match entities from dictionary of labels from thesaurus, ontologies, databases and lists
@@ -212,7 +234,7 @@ def entities(self, queries=None, language=None, normalized_label_languages=['en'
212234
normalized_entities = {}
213235
for tagger in taggers:
214236
try:
215-
normalized_entities = self.dictionary_matcher(text=text, language=language, normalized_label_languages=normalized_label_languages, limit=limit, normalized_entities=normalized_entities, tagger=tagger)
237+
normalized_entities = self.dictionary_matcher(text=text, language=language, normalized_label_languages=normalized_label_languages, limit=limit, normalized_entities=normalized_entities, tagger=tagger, additional_result_fields=additional_result_fields)
216238
except BaseException as e:
217239
sys.stderr.write( "Exception using Solr Text Tagger {}: {}\n".format(tagger, e) )
218240

‎src/entity_manager/manager.py‎

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ class Entity_Manager(object):
2222
connector.verbose = verbose
2323

2424

25-
def add(self, id, preferred_label=None, prefLabels=[], labels=[], types=[]):
25+
def add(self, id, preferred_label=None, prefLabels=[], labels=[], types=[], fields={}):
2626

2727
# all labels
2828
dictionary_labels = []
@@ -59,6 +59,11 @@ def add(self, id, preferred_label=None, prefLabels=[], labels=[], types=[]):
5959
data['label_txt'] = data['label_ss']
6060

6161
data['all_labels_ss'] = dictionary_labels
62+
63+
# add additional fields, if there
64+
if fields:
65+
for field in fields:
66+
data[field] = fields[field]
6267

6368
# post to Solr index of entities for Normalization and Entity Linking
6469
self.connector.solr = self.solr

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /