Skip to content

Commit 10ec28e

Browse files
committed
[DE-385] 3.10 analyzers (#458)
* ClassificationAnalyzer * added test ML model to docker containers * NearestNeighborsAnalyzer * MinHashAnalyzer * test fixes (cherry picked from commit 1900820)
1 parent 87566e2 commit 10ec28e

12 files changed

+510
-3
lines changed

docker/foo.bin

5.41 KB
Binary file not shown.

docker/start_db.sh

+7
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,13 @@ for a in ${COORDINATORS[*]} ; do
101101
curl -u root:test --insecure --fail "$SCHEME://$a/_api/version"
102102
done
103103

104+
echo ""
105+
echo ""
106+
echo "Copying test ML models into containers..."
107+
for c in $(docker ps -a -f name=adb-.* -q) ; do
108+
docker cp "$LOCATION"/foo.bin "$c":/tmp
109+
done
110+
104111
echo ""
105112
echo ""
106113
echo "Done, your deployment is reachable at: "

src/main/java/com/arangodb/entity/InvertedIndexField.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ public class InvertedIndexField {
1818
private Boolean searchField;
1919
private Boolean trackListPositions;
2020
private final Set<AnalyzerFeature> features = new HashSet<>();
21-
private final Collection<InvertedIndexField> nested = new ArrayList<>();
21+
private Collection<InvertedIndexField> nested;
2222

2323
public String getName() {
2424
return name;
@@ -79,6 +79,7 @@ public Collection<InvertedIndexField> getNested() {
7979
}
8080

8181
public InvertedIndexField nested(InvertedIndexField... nested) {
82+
if(this.nested == null) this.nested = new ArrayList<>();
8283
Collections.addAll(this.nested, nested);
8384
return this;
8485
}

src/main/java/com/arangodb/entity/arangosearch/AnalyzerType.java

+16-1
Original file line numberDiff line numberDiff line change
@@ -24,5 +24,20 @@
2424
* @author Michele Rastelli
2525
*/
2626
public enum AnalyzerType {
27-
identity, delimiter, stem, norm, ngram, text, pipeline, stopwords, aql, geojson, geopoint, segmentation, collation
27+
identity,
28+
delimiter,
29+
stem,
30+
norm,
31+
ngram,
32+
text,
33+
pipeline,
34+
stopwords,
35+
aql,
36+
geojson,
37+
geopoint,
38+
segmentation,
39+
collation,
40+
classification,
41+
nearest_neighbors,
42+
minhash
2843
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
/*
2+
* DISCLAIMER
3+
*
4+
* Copyright 2016 ArangoDB GmbH, Cologne, Germany
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*
18+
* Copyright holder is ArangoDB GmbH, Cologne, Germany
19+
*/
20+
21+
package com.arangodb.entity.arangosearch.analyzer;
22+
23+
24+
import com.arangodb.entity.arangosearch.AnalyzerType;
25+
26+
import java.util.Objects;
27+
28+
/**
29+
* An Analyzer capable of classifying tokens in the input text. It applies a user-provided supervised fastText word
30+
* embedding model to classify the input text. It is able to classify individual tokens as well as entire inputs.
31+
*
32+
* @author Michele Rastelli
33+
* @see <a href= "https://www.arangodb.com/docs/stable/analyzers.html#classification">API Documentation</a>
34+
* @since ArangoDB 3.10
35+
*/
36+
public class ClassificationAnalyzer extends SearchAnalyzer {
37+
public ClassificationAnalyzer() {
38+
setType(AnalyzerType.classification);
39+
}
40+
41+
private ClassificationAnalyzerProperties properties;
42+
43+
public ClassificationAnalyzerProperties getProperties() {
44+
return properties;
45+
}
46+
47+
public void setProperties(ClassificationAnalyzerProperties properties) {
48+
this.properties = properties;
49+
}
50+
51+
@Override
52+
public boolean equals(Object o) {
53+
if (this == o) return true;
54+
if (o == null || getClass() != o.getClass()) return false;
55+
if (!super.equals(o)) return false;
56+
ClassificationAnalyzer that = (ClassificationAnalyzer) o;
57+
return Objects.equals(properties, that.properties);
58+
}
59+
60+
@Override
61+
public int hashCode() {
62+
return Objects.hash(super.hashCode(), properties);
63+
}
64+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
/*
2+
* DISCLAIMER
3+
*
4+
* Copyright 2016 ArangoDB GmbH, Cologne, Germany
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*
18+
* Copyright holder is ArangoDB GmbH, Cologne, Germany
19+
*/
20+
21+
package com.arangodb.entity.arangosearch.analyzer;
22+
23+
24+
import com.fasterxml.jackson.annotation.JsonProperty;
25+
26+
import java.util.Objects;
27+
28+
/**
29+
* @author Michele Rastelli
30+
* @since ArangoDB 3.10
31+
*/
32+
public class ClassificationAnalyzerProperties {
33+
34+
@JsonProperty("model_location")
35+
private String modelLocation;
36+
37+
@JsonProperty("top_k")
38+
private Integer topK;
39+
40+
private Double threshold;
41+
42+
public String getModelLocation() {
43+
return modelLocation;
44+
}
45+
46+
public void setModelLocation(String modelLocation) {
47+
this.modelLocation = modelLocation;
48+
}
49+
50+
public Integer getTopK() {
51+
return topK;
52+
}
53+
54+
public void setTopK(Integer topK) {
55+
this.topK = topK;
56+
}
57+
58+
public Double getThreshold() {
59+
return threshold;
60+
}
61+
62+
public void setThreshold(Double threshold) {
63+
this.threshold = threshold;
64+
}
65+
66+
@Override
67+
public boolean equals(Object o) {
68+
if (this == o) return true;
69+
if (o == null || getClass() != o.getClass()) return false;
70+
ClassificationAnalyzerProperties that = (ClassificationAnalyzerProperties) o;
71+
return Objects.equals(modelLocation, that.modelLocation) && Objects.equals(topK, that.topK) && Objects.equals(threshold, that.threshold);
72+
}
73+
74+
@Override
75+
public int hashCode() {
76+
return Objects.hash(modelLocation, topK, threshold);
77+
}
78+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
/*
2+
* DISCLAIMER
3+
*
4+
* Copyright 2016 ArangoDB GmbH, Cologne, Germany
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*
18+
* Copyright holder is ArangoDB GmbH, Cologne, Germany
19+
*/
20+
21+
package com.arangodb.entity.arangosearch.analyzer;
22+
23+
24+
import com.arangodb.entity.arangosearch.AnalyzerType;
25+
26+
import java.util.Objects;
27+
28+
/**
29+
* An Analyzer that computes so called MinHash signatures using a locality-sensitive hash function. It applies an
30+
* Analyzer of your choice before the hashing, for example, to break up text into words.
31+
*
32+
* @author Michele Rastelli
33+
* @see <a href= "https://www.arangodb.com/docs/stable/analyzers.html#minhash">API Documentation</a>
34+
* @since ArangoDB 3.10
35+
*/
36+
public class MinHashAnalyzer extends SearchAnalyzer {
37+
public MinHashAnalyzer() {
38+
setType(AnalyzerType.minhash);
39+
}
40+
41+
private MinHashAnalyzerProperties properties;
42+
43+
public MinHashAnalyzerProperties getProperties() {
44+
return properties;
45+
}
46+
47+
public void setProperties(MinHashAnalyzerProperties properties) {
48+
this.properties = properties;
49+
}
50+
51+
@Override
52+
public boolean equals(Object o) {
53+
if (this == o) return true;
54+
if (o == null || getClass() != o.getClass()) return false;
55+
if (!super.equals(o)) return false;
56+
MinHashAnalyzer that = (MinHashAnalyzer) o;
57+
return Objects.equals(properties, that.properties);
58+
}
59+
60+
@Override
61+
public int hashCode() {
62+
return Objects.hash(super.hashCode(), properties);
63+
}
64+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
/*
2+
* DISCLAIMER
3+
*
4+
* Copyright 2016 ArangoDB GmbH, Cologne, Germany
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*
18+
* Copyright holder is ArangoDB GmbH, Cologne, Germany
19+
*/
20+
21+
package com.arangodb.entity.arangosearch.analyzer;
22+
23+
24+
import java.util.Objects;
25+
26+
/**
27+
* @author Michele Rastelli
28+
* @since ArangoDB 3.10
29+
*/
30+
public class MinHashAnalyzerProperties {
31+
32+
private SearchAnalyzer analyzer;
33+
private Integer numHashes;
34+
35+
public SearchAnalyzer getAnalyzer() {
36+
return analyzer;
37+
}
38+
39+
public void setAnalyzer(SearchAnalyzer analyzer) {
40+
this.analyzer = analyzer;
41+
}
42+
43+
public Integer getNumHashes() {
44+
return numHashes;
45+
}
46+
47+
public void setNumHashes(Integer numHashes) {
48+
this.numHashes = numHashes;
49+
}
50+
51+
@Override
52+
public boolean equals(Object o) {
53+
if (this == o) return true;
54+
if (o == null || getClass() != o.getClass()) return false;
55+
MinHashAnalyzerProperties that = (MinHashAnalyzerProperties) o;
56+
return Objects.equals(analyzer, that.analyzer) && Objects.equals(numHashes, that.numHashes);
57+
}
58+
59+
@Override
60+
public int hashCode() {
61+
return Objects.hash(analyzer, numHashes);
62+
}
63+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
/*
2+
* DISCLAIMER
3+
*
4+
* Copyright 2016 ArangoDB GmbH, Cologne, Germany
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*
18+
* Copyright holder is ArangoDB GmbH, Cologne, Germany
19+
*/
20+
21+
package com.arangodb.entity.arangosearch.analyzer;
22+
23+
24+
import com.arangodb.entity.arangosearch.AnalyzerType;
25+
26+
import java.util.Objects;
27+
28+
/**
29+
* An Analyzer capable of finding nearest neighbors of tokens in the input. It applies a user-provided supervised
30+
* fastText word embedding model to retrieve nearest neighbor tokens in the text. It is able to find neighbors of
31+
* individual tokens as well as entire input strings. For entire input strings, the Analyzer will return nearest
32+
* neighbors for each token within the input string.
33+
*
34+
* @author Michele Rastelli
35+
* @see <a href= "https://www.arangodb.com/docs/stable/analyzers.html#nearest_neighbors">API Documentation</a>
36+
* @since ArangoDB 3.10
37+
*/
38+
public class NearestNeighborsAnalyzer extends SearchAnalyzer {
39+
public NearestNeighborsAnalyzer() {
40+
setType(AnalyzerType.nearest_neighbors);
41+
}
42+
43+
private NearestNeighborsAnalyzerProperties properties;
44+
45+
public NearestNeighborsAnalyzerProperties getProperties() {
46+
return properties;
47+
}
48+
49+
public void setProperties(NearestNeighborsAnalyzerProperties properties) {
50+
this.properties = properties;
51+
}
52+
53+
@Override
54+
public boolean equals(Object o) {
55+
if (this == o) return true;
56+
if (o == null || getClass() != o.getClass()) return false;
57+
if (!super.equals(o)) return false;
58+
NearestNeighborsAnalyzer that = (NearestNeighborsAnalyzer) o;
59+
return Objects.equals(properties, that.properties);
60+
}
61+
62+
@Override
63+
public int hashCode() {
64+
return Objects.hash(super.hashCode(), properties);
65+
}
66+
}

0 commit comments

Comments
 (0)