elastic · mayya-sharipova · Nov 1, 2023 · Aug 22, 2023 · Aug 27, 2023 · Aug 27, 2023
diff --git a/docs/changelog/98916.yaml b/docs/changelog/98916.yaml
@@ -0,0 +1,5 @@
+pr: 98916
+summary: Make knn search a query
+area: Vector Search
+type: feature
+issues: []
diff --git a/docs/reference/query-dsl/knn-query.asciidoc b/docs/reference/query-dsl/knn-query.asciidoc
@@ -0,0 +1,223 @@
+[[query-dsl-knn-query]]
+=== Knn query
+++++
+<titleabbrev>Knn</titleabbrev>
+++++
+
+Finds the _k_ nearest vectors to a query vector, as measured by a similarity
+metric. _knn_ query finds nearest vectors through approximate search on indexed
+dense_vectors. The preferred way to do approximate kNN search is through the
+<<knn-search,top level knn section>> of a search request. _knn_ query is reserved for
+expert cases, where there is a need to combine this query with other queries.
+
+[[knn-query-ex-request]]
+==== Example request
+
+[source,console]
+----
+PUT my-image-index
+{
+  "mappings": {
+    "properties": {
+       "image-vector": {
+        "type": "dense_vector",
+        "dims": 3,
+        "index": true,
+        "similarity": "l2_norm"
+      },
+      "file-type": {
+        "type": "keyword"
+      }
+    }
+  }
+}
+----
+--
+
+. Index your data.
++
+[source,console]
+----
+POST my-image-index/_bulk?refresh=true
+{ "index": { "_id": "1" } }
+{ "image-vector": [1, 5, -20], "file-type": "jpg" }
+{ "index": { "_id": "2" } }
+{ "image-vector": [42, 8, -15], "file-type": "png" }
+{ "index": { "_id": "3" } }
+{ "image-vector": [15, 11, 23], "file-type": "jpg" }
+----
+//TEST[continued]
+
+. Run the search using the `knn` query, asking for the top 3 nearest vectors.
++
+[source,console]
+----
+POST my-image-index/_search
+{
+  "size" : 3,
+  "query" : {
+    "knn": {
+      "field": "image-vector",
+      "query_vector": [-5, 9, -12],
+      "num_candidates": 10
+    }
+  }
+}
+----
+//TEST[continued]
+
+NOTE: `knn` query doesn't have a separate `k` parameter. `k` is defined by
+`size` parameter of a search request similar to other queries. `knn` query
+collects `num_candidates` results from each shard, then merges them to get
+the top `size` results.
+
+
+[[knn-query-top-level-parameters]]
+==== Top-level parameters for `knn`
+
+`field`::
++
+--
+(Required, string) The name of the vector field to search against. Must be a
+<<index-vectors-knn-search, `dense_vector` field with indexing enabled>>.
+--
+
+`query_vector`::
++
+--
+(Required, array of floats) Query vector. Must have the same number of dimensions
+as the vector field you are searching against.
+--
+
+`num_candidates`::
++
+--
+(Required, integer) The number of nearest neighbor candidates to consider per shard.
+Cannot exceed 10,000. {es} collects `num_candidates` results from each shard, then
+merges them to find the top results. Increasing `num_candidates` tends to improve the
+accuracy of the final results.
+--
+
+`filter`::
++
+--
+(Optional, query object) Query to filter the documents that can match.
+The kNN search will return the top documents that also match this filter.
+The value can be a single query or a list of queries. If `filter` is not provided,
+all documents are allowed to match.
+
+The filter is a pre-filter, meaning that it is applied **during** the approximate
+kNN search to ensure that `num_candidates` matching documents are returned.
+--
+
+`similarity`::
++
+--
+(Optional, float) The minimum similarity required for a document to be considered
+a match. The similarity value calculated relates to the raw
+<<dense-vector-similarity, `similarity`>> used. Not the document score. The matched
+documents are then scored according to <<dense-vector-similarity, `similarity`>>
+and the provided `boost` is applied.
+--
+
+`boost`::
++
+--
+(Optional, float) Floating point number used to multiply the
+scores of matched documents. This value cannot be negative. Defaults to `1.0`.
+--
+
+`_name`::
++
+--
+(Optional, string) Name field to identify the query
+--
+
+[[knn-query-filtering]]
+==== Pre-filters and post-filters in knn query
+
+There are two ways to filter documents that match a kNN query:
+
+. **pre-filtering** – filter is applied during the approximate kNN search
+to ensure that `k` matching documents are returned.
+. **post-filtering** – filter is applied after the approximate kNN search
+completes, which results in fewer than k results, even when there are enough
+matching documents.
+
+Pre-filtering is supported through the `filter` parameter of the `knn` query.
+Also filters from <<filter-alias,aliases>> are applied as pre-filters.
+
+All other filters found in the Query DSL tree are applied as post-filters.
+For example, `knn` query finds the top 3 documents with the nearest vectors
+(num_candidates=3), which are combined with  `term` filter, that is
+post-filtered. The final set of documents will contain only a single document
+that passes the post-filter.
+
+
+[source,console]
+----
+POST my-image-index/_search
+{
+  "size" : 10,
+  "query" : {
+    "bool" : {
+      "must" : {
+        "knn": {
+          "field": "image-vector",
+          "query_vector": [-5, 9, -12],
+          "num_candidates": 3
+        }
+      },
+      "filter" : {
+        "term" : { "file-type" : "png" }
+      }
+    }
+  }
+}
+----
+//TEST[continued]
+
+[[knn-query-with-nested-query]]
+==== Knn query inside a nested query
+
+`knn` query can be used inside a nested query. The behaviour here is similar
+to <<nested-knn-search, top level nested kNN search>>:
+
+* kNN search over nested dense_vectors diversifies the top results over
+the top-level document
+* `filter`  over the top-level document metadata is supported and acts as a
+post-filter
+* `filter` over `nested` field metadata is not supported
+
+A sample query can look like below:
+
+[source,js]
+----
+{
+  "query" : {
+    "nested" : {
+      "path" : "paragraph",
+        "query" : {
+          "knn": {
+            "query_vector": [
+                0.45,
+                45
+            ],
+            "field": "paragraph.vector",
+            "num_candidates": 2
+        }
+      }
+    }
+  }
+}
+----
+// NOTCONSOLE
+
+[[knn-query-aggregations]]
+==== Knn query with aggregations
+`knn` query calculates aggregations on `num_candidates` from each shard.
+Thus, the final results from aggregations contain
+`num_candidates * number_of_shards` documents. This is different from
+the <<knn-search,top level knn section>> where aggregations are
+calculated on the global top k nearest documents.
+
diff --git a/docs/reference/query-dsl/special-queries.asciidoc b/docs/reference/query-dsl/special-queries.asciidoc
@@ -17,6 +17,10 @@ or collection of documents.
 This query finds queries that are stored as documents that match with
 the specified document.
 
+<<query-dsl-knn-query,`knn` query>>::
+A query that finds the _k_ nearest vectors to a query
+vector, as measured by a similarity metric.
+
 <<query-dsl-rank-feature-query,`rank_feature` query>>::
 A query that computes scores based on the values of numeric features and is
 able to efficiently skip non-competitive hits.
@@ -43,6 +47,8 @@ include::mlt-query.asciidoc[]
 
 include::percolate-query.asciidoc[]
 
+include::knn-query.asciidoc[]
+
 include::rank-feature-query.asciidoc[]
 
 include::script-query.asciidoc[]

diff --git a/docs/reference/search/search-your-data/knn-search.asciidoc b/docs/reference/search/search-your-data/knn-search.asciidoc
@@ -43,7 +43,7 @@ based on a similarity metric, the better its match.
 {es} supports two methods for kNN search:
 
 * <<approximate-knn,Approximate kNN>> using the `knn` search
-option
+option or `knn` query
 
 * <<exact-knn,Exact, brute-force kNN>> using a `script_score` query with a
 vector function
@@ -129,7 +129,8 @@ POST image-index/_bulk?refresh=true
 //TEST[continued]
 //TEST[s/\.\.\.//]
 
-. Run the search using the <<search-api-knn, `knn` option>>.
+. Run the search using the <<search-api-knn, `knn` option>> or the
+<<query-dsl-knn-query,`knn` query>> (expert case).
 +
 [source,console]
 ----

diff --git a/...or/src/internalClusterTest/java/org/elasticsearch/percolator/PercolatorQuerySearchIT.java b/...or/src/internalClusterTest/java/org/elasticsearch/percolator/PercolatorQuerySearchIT.java
@@ -9,6 +9,7 @@
 
 import org.apache.lucene.search.join.ScoreMode;
 import org.elasticsearch.ElasticsearchException;
+import org.elasticsearch.action.index.IndexRequestBuilder;
 import org.elasticsearch.action.search.MultiSearchResponse;
 import org.elasticsearch.action.search.SearchResponse;
 import org.elasticsearch.action.support.WriteRequest;
@@ -22,10 +23,12 @@
 import org.elasticsearch.index.query.MatchPhraseQueryBuilder;
 import org.elasticsearch.index.query.MultiMatchQueryBuilder;
 import org.elasticsearch.index.query.Operator;
+import org.elasticsearch.index.query.QueryBuilder;
 import org.elasticsearch.index.query.QueryBuilders;
 import org.elasticsearch.plugins.Plugin;
 import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
 import org.elasticsearch.search.sort.SortOrder;
+import org.elasticsearch.search.vectors.KnnVectorQueryBuilder;
 import org.elasticsearch.test.ESIntegTestCase;
 import org.elasticsearch.xcontent.XContentBuilder;
 import org.elasticsearch.xcontent.XContentFactory;
@@ -1295,4 +1298,34 @@ public void testWithWildcardFieldNames() throws Exception {
         ).get();
         assertEquals(1, response.getHits().getTotalHits().value);
     }
+
+    public void testKnnQueryNotSupportedInPercolator() throws IOException {
+        String mappings = org.elasticsearch.common.Strings.format("""
+            {
+              "properties": {
+                "my_query" : {
+                  "type" : "percolator"
+                },
+                "my_vector" : {
+                  "type" : "dense_vector",
+                  "dims" : 5,
+                  "index" : true,
+                  "similarity" : "l2_norm"
+                }
+
+              }
+            }
+            """);
+        indicesAdmin().prepareCreate("index1").setMapping(mappings).get();
+        ensureGreen();
+        QueryBuilder knnVectorQueryBuilder = new KnnVectorQueryBuilder("my_vector", new float[] { 1, 1, 1, 1, 1 }, 10, null);
+
+        IndexRequestBuilder indexRequestBuilder = client().prepareIndex("index1")
+            .setId("knn_query1")
+            .setSource(jsonBuilder().startObject().field("my_query", knnVectorQueryBuilder).endObject());
+
+        DocumentParsingException exception = expectThrows(DocumentParsingException.class, () -> indexRequestBuilder.get());
+        assertThat(exception.getMessage(), containsString("the [knn] query is unsupported inside a percolator"));
+    }
+
 }
diff --git a/modules/percolator/src/main/java/org/elasticsearch/percolator/PercolatorFieldMapper.java b/modules/percolator/src/main/java/org/elasticsearch/percolator/PercolatorFieldMapper.java
@@ -61,6 +61,7 @@
 import org.elasticsearch.index.query.QueryShardException;
 import org.elasticsearch.index.query.Rewriteable;
 import org.elasticsearch.index.query.SearchExecutionContext;
+import org.elasticsearch.search.vectors.KnnVectorQueryBuilder;
 import org.elasticsearch.xcontent.XContentParser;
 
 import java.io.ByteArrayOutputStream;
@@ -438,6 +439,8 @@ static QueryBuilder parseQueryBuilder(DocumentParserContext context) {
                     throw new IllegalArgumentException("the [has_child] query is unsupported inside a percolator query");
                 } else if (queryName.equals("has_parent")) {
                     throw new IllegalArgumentException("the [has_parent] query is unsupported inside a percolator query");
+                } else if (queryName.equals(KnnVectorQueryBuilder.NAME)) {
+                    throw new IllegalArgumentException("the [knn] query is unsupported inside a percolator query");
                 }
             });
         } catch (IOException e) {