Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 1eb2902

Browse files
authored
[DE-526] Search optimisation (#503)
* optimizeTopK in view * optimizeTopK in inverted indexes
1 parent a6867f9 commit 1eb2902

File tree

6 files changed

+80
-38
lines changed

6 files changed

+80
-38
lines changed

‎core/src/main/java/com/arangodb/entity/InvertedIndexEntity.java‎

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ public final class InvertedIndexEntity {
4545
private Collection<InvertedIndexField> fields;
4646
private Boolean searchField;
4747
private Collection<StoredValue> storedValues;
48+
private Collection<String> optimizeTopK;
4849
private InvertedIndexPrimarySort primarySort;
4950
private String analyzer;
5051
private Set<AnalyzerFeature> features;
@@ -104,6 +105,10 @@ public Collection<StoredValue> getStoredValues() {
104105
return storedValues;
105106
}
106107

108+
public Collection<String> getOptimizeTopK() {
109+
return optimizeTopK;
110+
}
111+
107112
public InvertedIndexPrimarySort getPrimarySort() {
108113
return primarySort;
109114
}

‎core/src/main/java/com/arangodb/entity/arangosearch/ArangoSearchPropertiesEntity.java‎

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ public final class ArangoSearchPropertiesEntity extends ViewEntity {
4141
private Collection<CollectionLink> links;
4242
private ArangoSearchCompression primarySortCompression;
4343
private Collection<StoredValue> storedValues;
44+
private Collection<String> optimizeTopK;
4445
private Boolean primarySortCache;
4546
private Boolean primaryKeyCache;
4647

@@ -121,6 +122,14 @@ public Collection<StoredValue> getStoredValues() {
121122
return storedValues;
122123
}
123124

125+
/**
126+
* @return An array of strings defining optimized sort expressions.
127+
* @since ArangoDB 3.11, Enterprise Edition only
128+
*/
129+
public Collection<String> getOptimizeTopK() {
130+
return optimizeTopK;
131+
}
132+
124133
public Boolean getPrimarySortCache() {
125134
return primarySortCache;
126135
}

‎core/src/main/java/com/arangodb/model/InvertedIndexOptions.java‎

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ public final class InvertedIndexOptions extends IndexOptions<InvertedIndexOption
3838
private Integer parallelism;
3939
private InvertedIndexPrimarySort primarySort;
4040
private final Collection<StoredValue> storedValues = new ArrayList<>();
41+
private final Collection<String> optimizeTopK = new ArrayList<>();
4142
private String analyzer;
4243
private final Set<AnalyzerFeature> features = new HashSet<>();
4344
private Boolean includeAllFields;
@@ -112,6 +113,20 @@ public InvertedIndexOptions storedValues(StoredValue... storedValues) {
112113
return this;
113114
}
114115

116+
public Collection<String> getOptimizeTopK() {
117+
return optimizeTopK;
118+
}
119+
120+
/**
121+
* @param optimizeTopK An array of strings defining sort expressions that you want to optimize.
122+
* @return options
123+
* @since ArangoDB 3.11, Enterprise Edition only
124+
*/
125+
public InvertedIndexOptions optimizeTopK(String... optimizeTopK) {
126+
Collections.addAll(this.optimizeTopK, optimizeTopK);
127+
return this;
128+
}
129+
115130
public String getAnalyzer() {
116131
return analyzer;
117132
}

‎core/src/main/java/com/arangodb/model/arangosearch/ArangoSearchCreateOptions.java‎

Lines changed: 38 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ public final class ArangoSearchCreateOptions {
4343
private Collection<PrimarySort> primarySorts;
4444
private ArangoSearchCompression primarySortCompression;
4545
private Collection<StoredValue> storedValues;
46+
private Collection<String> optimizeTopK;
4647
private Boolean primarySortCache;
4748
private Boolean primaryKeyCache;
4849

@@ -57,14 +58,11 @@ ArangoSearchCreateOptions name(final String name) {
5758
}
5859

5960
/**
60-
* @param consolidationIntervalMsec Wait at least this many milliseconds between committing index data changes
61-
* and making them visible to
62-
* queries (default: 60000, to disable use: 0). For the case where there are a
63-
* lot of inserts/updates, a
64-
* lower value, until commit, will cause the index not to account for them and
65-
* memory usage would
66-
* continue to grow. For the case where there are a few inserts/updates, a
67-
* higher value will impact
61+
* @param consolidationIntervalMsec Wait at least this many milliseconds between committing index data changes and
62+
* making them visible to queries (default: 60000, to disable use: 0). For the case
63+
* where there are a lot of inserts/updates, a lower value, until commit, will
64+
* cause the index not to account for them and memory usage would continue to grow.
65+
* For the case where there are a few inserts/updates, a higher value will impact
6866
* performance and waste disk space for each commit call without any added
6967
* benefits.
7068
* @return options
@@ -76,26 +74,19 @@ public ArangoSearchCreateOptions consolidationIntervalMsec(final Long consolidat
7674

7775
/**
7876
* @param commitIntervalMsec Wait at least this many milliseconds between committing view data store changes and
79-
* making documents visible to
80-
* queries (default: 1000, to disable use: 0). For the case where there are a lot of
81-
* inserts/updates, a lower value,
82-
* until commit, will cause the index not to account for them and memory usage would
83-
* continue to grow. For the case
84-
* where there are a few inserts/updates, a higher value will impact performance and
85-
* waste disk space for each
86-
* commit call without any added benefits. Background: For data retrieval ArangoSearch
87-
* views follow the concept of
88-
* "eventually-consistent", i.e. eventually all the data in ArangoDB will be matched by
89-
* corresponding query
90-
* expressions. The concept of ArangoSearch view "commit" operation is introduced to
91-
* control the upper-bound on the
92-
* time until document addition/removals are actually reflected by corresponding query
93-
* expressions. Once a "commit"
94-
* operation is complete all documents added/removed prior to the start of the "commit"
95-
* operation will be reflected
96-
* by queries invoked in subsequent ArangoDB transactions, in-progress ArangoDB
97-
* transactions will still continue to
98-
* return a repeatable-read state.
77+
* making documents visible to queries (default: 1000, to disable use: 0). For the case
78+
* where there are a lot of inserts/updates, a lower value, until commit, will cause the
79+
* index not to account for them and memory usage would continue to grow. For the case
80+
* where there are a few inserts/updates, a higher value will impact performance and waste
81+
* disk space for each commit call without any added benefits. Background: For data
82+
* retrieval ArangoSearch views follow the concept of "eventually-consistent", i.e.
83+
* eventually all the data in ArangoDB will be matched by corresponding query expressions.
84+
* The concept of ArangoSearch view "commit" operation is introduced to control the
85+
* upper-bound on the time until document addition/removals are actually reflected by
86+
* corresponding query expressions. Once a "commit" operation is complete all documents
87+
* added/removed prior to the start of the "commit" operation will be reflected by queries
88+
* invoked in subsequent ArangoDB transactions, in-progress ArangoDB transactions will
89+
* still continue to return a repeatable-read state.
9990
* @return options
10091
*/
10192
public ArangoSearchCreateOptions commitIntervalMsec(final Long commitIntervalMsec) {
@@ -105,14 +96,11 @@ public ArangoSearchCreateOptions commitIntervalMsec(final Long commitIntervalMse
10596

10697
/**
10798
* @param cleanupIntervalStep Wait at least this many commits between removing unused files in data directory
108-
* (default: 10, to
109-
* disable use: 0). For the case where the consolidation policies merge segments often
110-
* (i.e. a lot of
111-
* commit+consolidate), a lower value will cause a lot of disk space to be wasted. For
112-
* the case where the
113-
* consolidation policies rarely merge segments (i.e. few inserts/deletes), a higher
114-
* value will impact
115-
* performance without any added benefits.
99+
* (default: 10, to disable use: 0). For the case where the consolidation policies merge
100+
* segments often (i.e. a lot of commit+consolidate), a lower value will cause a lot of
101+
* disk space to be wasted. For the case where the consolidation policies rarely merge
102+
* segments (i.e. few inserts/deletes), a higher value will impact performance without
103+
* any added benefits.
116104
* @return options
117105
*/
118106
public ArangoSearchCreateOptions cleanupIntervalStep(final Long cleanupIntervalStep) {
@@ -164,6 +152,16 @@ public ArangoSearchCreateOptions storedValues(final StoredValue... storedValues)
164152
return this;
165153
}
166154

155+
/**
156+
* @param optimizeTopK An array of strings defining sort expressions that you want to optimize.
157+
* @return options
158+
* @since ArangoDB 3.11, Enterprise Edition only
159+
*/
160+
public ArangoSearchCreateOptions optimizeTopK(final String... optimizeTopK) {
161+
this.optimizeTopK = Arrays.asList(optimizeTopK);
162+
return this;
163+
}
164+
167165
/**
168166
* @param primarySortCache If you enable this option, then the primary sort columns are always cached in memory.
169167
* This can improve the performance of queries that utilize the primary sort order.
@@ -231,6 +229,10 @@ public Collection<StoredValue> getStoredValues() {
231229
return storedValues;
232230
}
233231

232+
public Collection<String> getOptimizeTopK() {
233+
return optimizeTopK;
234+
}
235+
234236
public Boolean getPrimarySortCache() {
235237
return primarySortCache;
236238
}

‎driver/src/test/java/com/arangodb/ArangoSearchTest.java‎

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
import com.arangodb.entity.arangosearch.analyzer.*;
2828
import com.arangodb.model.InvertedIndexOptions;
2929
import com.arangodb.model.arangosearch.*;
30-
import com.arangodb.util.TestUtils;
3130
import org.junit.jupiter.api.BeforeAll;
3231
import org.junit.jupiter.params.ParameterizedTest;
3332
import org.junit.jupiter.params.provider.MethodSource;
@@ -424,7 +423,7 @@ private void createGetAndDeleteTypedAnalyzer(ArangoDatabase db, SearchAnalyzer a
424423
// getAnalyzers
425424
SearchAnalyzer foundAnalyzer =
426425
db.getSearchAnalyzers().stream().filter(it -> it.getName().equals(fullyQualifiedName))
427-
.findFirst().get();
426+
.findFirst().get();
428427
assertThat(foundAnalyzer).isEqualTo(analyzer);
429428

430429
// deleteAnalyzer
@@ -672,6 +671,8 @@ void arangoSearchOptions(ArangoDatabase db) {
672671
.primaryKeyCache(true);
673672
StoredValue storedValue = new StoredValue(Arrays.asList("a", "b"), ArangoSearchCompression.none, true);
674673
options.storedValues(storedValue);
674+
String[] optimizeTopK = new String[]{"BM25(@doc) DESC", "TFIDF(@doc) DESC"};
675+
options.optimizeTopK(optimizeTopK);
675676

676677
final ArangoSearch view = db.arangoSearch(viewName);
677678
view.create(options);
@@ -713,6 +714,11 @@ void arangoSearchOptions(ArangoDatabase db) {
713714
FieldLink nested = fieldLink.getNested().iterator().next();
714715
assertThat(nested.getName()).isEqualTo("f2");
715716
}
717+
718+
if (isEnterprise() && isAtLeastVersion(3, 11)) {
719+
assertThat(properties.getOptimizeTopK()).containsExactly(optimizeTopK);
720+
}
721+
716722
}
717723

718724
@ParameterizedTest(name = "{index}")

‎driver/src/test/java/com/arangodb/InvertedIndexTest.java‎

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ private InvertedIndexOptions createOptions(String analyzerName) {
9393
.cache(cache)
9494
)
9595
.storedValues(new StoredValue(Arrays.asList("f3", "f4"), ArangoSearchCompression.none, cache))
96+
.optimizeTopK("BM25(@doc) DESC", "TFIDF(@doc) DESC")
9697
.analyzer(analyzerName)
9798
.features(AnalyzerFeature.position, AnalyzerFeature.frequency)
9899
.includeAllFields(false)
@@ -144,6 +145,10 @@ private void assertCorrectIndexEntity(InvertedIndexEntity indexResult, InvertedI
144145
assertThat(indexResult.getWritebufferSizeMax()).isEqualTo(options.getWritebufferSizeMax());
145146
assertThat(indexResult.getCache()).isEqualTo(options.getCache());
146147
assertThat(indexResult.getPrimaryKeyCache()).isEqualTo(options.getPrimaryKeyCache());
148+
149+
if (isEnterprise() && isAtLeastVersion(3, 11)) {
150+
assertThat(indexResult.getOptimizeTopK()).containsExactlyElementsOf(options.getOptimizeTopK());
151+
}
147152
}
148153

149154
@ParameterizedTest(name = "{index}")

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /