Skip to content

Commit 96d8b71

Browse files
committed
NearestNeighborsAnalyzer
1 parent 0c0a746 commit 96d8b71

File tree

5 files changed

+162
-1
lines changed

5 files changed

+162
-1
lines changed

src/main/java/com/arangodb/entity/arangosearch/AnalyzerType.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,5 +37,6 @@ public enum AnalyzerType {
3737
geopoint,
3838
segmentation,
3939
collation,
40-
classification
40+
classification,
41+
nearest_neighbors
4142
}
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
/*
2+
* DISCLAIMER
3+
*
4+
* Copyright 2016 ArangoDB GmbH, Cologne, Germany
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*
18+
* Copyright holder is ArangoDB GmbH, Cologne, Germany
19+
*/
20+
21+
package com.arangodb.entity.arangosearch.analyzer;
22+
23+
24+
import com.arangodb.entity.arangosearch.AnalyzerType;
25+
26+
import java.util.Objects;
27+
28+
/**
29+
* An Analyzer capable of finding nearest neighbors of tokens in the input. It applies a user-provided supervised
30+
* fastText word embedding model to retrieve nearest neighbor tokens in the text. It is able to find neighbors of
31+
* individual tokens as well as entire input strings. For entire input strings, the Analyzer will return nearest
32+
* neighbors for each token within the input string.
33+
*
34+
* @author Michele Rastelli
35+
* @see <a href= "https://www.arangodb.com/docs/stable/analyzers.html#nearest_neighbors">API Documentation</a>
36+
* @since ArangoDB 3.10
37+
*/
38+
public class NearestNeighborsAnalyzer extends SearchAnalyzer {
39+
public NearestNeighborsAnalyzer() {
40+
setType(AnalyzerType.nearest_neighbors);
41+
}
42+
43+
private NearestNeighborsAnalyzerProperties properties;
44+
45+
public NearestNeighborsAnalyzerProperties getProperties() {
46+
return properties;
47+
}
48+
49+
public void setProperties(NearestNeighborsAnalyzerProperties properties) {
50+
this.properties = properties;
51+
}
52+
53+
@Override
54+
public boolean equals(Object o) {
55+
if (this == o) return true;
56+
if (o == null || getClass() != o.getClass()) return false;
57+
if (!super.equals(o)) return false;
58+
NearestNeighborsAnalyzer that = (NearestNeighborsAnalyzer) o;
59+
return Objects.equals(properties, that.properties);
60+
}
61+
62+
@Override
63+
public int hashCode() {
64+
return Objects.hash(super.hashCode(), properties);
65+
}
66+
}
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
/*
2+
* DISCLAIMER
3+
*
4+
* Copyright 2016 ArangoDB GmbH, Cologne, Germany
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*
18+
* Copyright holder is ArangoDB GmbH, Cologne, Germany
19+
*/
20+
21+
package com.arangodb.entity.arangosearch.analyzer;
22+
23+
24+
import com.arangodb.velocypack.annotations.SerializedName;
25+
26+
import java.util.Objects;
27+
28+
/**
29+
* @author Michele Rastelli
30+
* @since ArangoDB 3.10
31+
*/
32+
public class NearestNeighborsAnalyzerProperties {
33+
34+
@SerializedName("model_location")
35+
private String modelLocation;
36+
37+
@SerializedName("top_k")
38+
private Integer topK;
39+
40+
41+
public String getModelLocation() {
42+
return modelLocation;
43+
}
44+
45+
public void setModelLocation(String modelLocation) {
46+
this.modelLocation = modelLocation;
47+
}
48+
49+
public Integer getTopK() {
50+
return topK;
51+
}
52+
53+
public void setTopK(Integer topK) {
54+
this.topK = topK;
55+
}
56+
57+
@Override
58+
public boolean equals(Object o) {
59+
if (this == o) return true;
60+
if (o == null || getClass() != o.getClass()) return false;
61+
NearestNeighborsAnalyzerProperties that = (NearestNeighborsAnalyzerProperties) o;
62+
return Objects.equals(modelLocation, that.modelLocation) && Objects.equals(topK, that.topK);
63+
}
64+
65+
@Override
66+
public int hashCode() {
67+
return Objects.hash(modelLocation, topK);
68+
}
69+
}

src/main/java/com/arangodb/internal/velocypack/VPackDeserializers.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,8 @@ public class VPackDeserializers {
104104
return context.deserialize(vpack, CollationAnalyzer.class);
105105
case classification:
106106
return context.deserialize(vpack, ClassificationAnalyzer.class);
107+
case nearest_neighbors:
108+
return context.deserialize(vpack, NearestNeighborsAnalyzer.class);
107109
default:
108110
throw new IllegalArgumentException("Unknown analyzer type: " + type);
109111
}

src/test/java/com/arangodb/ArangoSearchTest.java

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1011,6 +1011,29 @@ void classificationAnalyzer(ArangoDatabase db) {
10111011
createGetAndDeleteTypedAnalyzer(db, analyzer);
10121012
}
10131013

1014+
@ParameterizedTest(name = "{index}")
1015+
@MethodSource("dbs")
1016+
void nearestNeighborsAnalyzer(ArangoDatabase db) {
1017+
assumeTrue(isAtLeastVersion(3, 10));
1018+
assumeTrue(isEnterprise());
1019+
1020+
NearestNeighborsAnalyzerProperties properties = new NearestNeighborsAnalyzerProperties();
1021+
properties.setModelLocation("/tmp/foo.bin");
1022+
properties.setTopK(2);
1023+
1024+
Set<AnalyzerFeature> features = new HashSet<>();
1025+
features.add(AnalyzerFeature.frequency);
1026+
features.add(AnalyzerFeature.norm);
1027+
features.add(AnalyzerFeature.position);
1028+
1029+
NearestNeighborsAnalyzer analyzer = new NearestNeighborsAnalyzer();
1030+
analyzer.setName("test-" + UUID.randomUUID());
1031+
analyzer.setProperties(properties);
1032+
analyzer.setFeatures(features);
1033+
1034+
createGetAndDeleteTypedAnalyzer(db, analyzer);
1035+
}
1036+
10141037
@ParameterizedTest(name = "{index}")
10151038
@MethodSource("dbs")
10161039
void offsetFeature(ArangoDatabase db) {

0 commit comments

Comments
 (0)