@@ -60,43 +60,22 @@ function overlap_join(db_collection::AbstractSimStringDB, features, τ, candidat
60
60
features = sort (features, by = i -> length (lookup_feature_set_by_size_feature (db_collection, candidate_size, i) ) )
61
61
62
62
# Count the occurrences of each feature
63
- candidate_match_counts = DefaultDict (0 )
64
-
63
+ candidate_match_counts = DefaultDict {String, Int} (0 )
65
64
feature_slice_index = query_feature_length - τ + 1
65
+ idx = query_feature_length - τ
66
+ focus_features = feature_slice_index < 0 ? (@view features[0 : end + feature_slice_index]) : (@view features[0 : idx])
66
67
67
- if feature_slice_index < 0
68
- focus_features = features[1 : end + feature_slice_index]
69
- else
70
- focus_features = features[1 : feature_slice_index]
71
- end
72
-
73
- for i in focus_features
68
+ @inbounds @views for i in focus_features
74
69
for s in lookup_feature_set_by_size_feature (db_collection, candidate_size, i)
75
70
candidate_match_counts[s] += 1
76
71
end
77
72
end
78
73
79
74
results = String[]
80
75
81
- # TODO : Return results in case of a perfect match??
82
- # if τ == 1
83
- # results = collect(keys(candidate_match_counts))
84
- # end
85
-
86
76
for (candidate, match_count) in candidate_match_counts
87
-
88
- for i in (query_feature_length - τ + 1 ) : query_feature_length - 1 # TODO : Verify
89
-
90
- if i < 0
91
- feature = features[end + i]
92
- elseif i == 0
93
- feature = features[i+ 1 ]
94
- else
95
- feature = features[i]
96
-
97
- end
98
-
99
- if candidate in lookup_feature_set_by_size_feature (db_collection, candidate_size, feature)
77
+ for i in (query_feature_length - τ + 1 ) : query_feature_length # TODO : Verify
78
+ if candidate in lookup_feature_set_by_size_feature (db_collection, candidate_size, features[i])
100
79
match_count += 1
101
80
end
102
81
@@ -106,11 +85,9 @@ function overlap_join(db_collection::AbstractSimStringDB, features, τ, candidat
106
85
end
107
86
108
87
remaining_count = query_feature_length - i - 1
109
-
110
88
if (match_count + remaining_count) < τ
111
89
break
112
90
end
113
-
114
91
end
115
92
end
116
93
return results
@@ -133,7 +110,7 @@ function search!(measure::AbstractSimilarityMeasure, db_collection::DictDB, quer
133
110
results = String[]
134
111
135
112
# Generate and return results from the potential candidate size pool
136
- for candidate_size in min_feature_size: max_feature_size
113
+ @inbounds for candidate_size in min_feature_size: max_feature_size
137
114
# Minimum overlap
138
115
τ = minimum_overlap (measure, length_of_features, candidate_size, α)
139
116
0 commit comments