Skip to content

Commit 749fcdf

Browse files
[9.0] ESQL: Fix FieldAttribute name usage in InferNonNullAggConstraint (#128910) (#129267)
* ESQL: Fix FieldAttribute name usage in InferNonNullAggConstraint (#128910) * Fix InferNonNullAggConstraint with union types * Begin fixing LucenePushdownPredicates with union types * Introduce a dedicated wrapper record FieldName to be used where field names are really required. The fixes consist of using FieldAttribute.fieldName() instead of .name() or .field().name(). .name() can be some temporary string unrelated to the actual name of the Lucene index field, whereas .field().name() doesn't know about parent fields; .fieldName() gives the full field name (from the root of the document). The biggest offender of such misuse is SearchStats; make this always require a FieldName, not a String - and make FieldAttribute#fieldName handily return an instance of FieldName so users of SearchStats don't accidentally use the return value of FieldAttribute#name. (cherry picked from commit 0850bd7) # Conflicts: # x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java # x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java # x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/LucenePushdownPredicates.java # x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchContextStats.java # x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchStats.java # x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizerTests.java # x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java # x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java # x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/stats/DisabledSearchStats.java * [CI] Auto commit changes from spotless * Fix additional merge conflicts * [CI] Auto commit changes from spotless --------- Co-authored-by: elasticsearchmachine <[email protected]>
1 parent e649019 commit 749fcdf

File tree

19 files changed

+217
-114
lines changed

19 files changed

+217
-114
lines changed

docs/changelog/128910.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 128910
2+
summary: Fix `FieldAttribute` name usage in `InferNonNullAggConstraint`
3+
area: ES|QL
4+
type: bug
5+
issues: []

x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/FieldAttribute.java

Lines changed: 28 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,24 @@
2727

2828
/**
2929
* Attribute for an ES field.
30-
* To differentiate between the different type of fields this class offers:
31-
* - name - the fully qualified name (foo.bar.tar)
32-
* - path - the path pointing to the field name (foo.bar)
33-
* - parent - the immediate parent of the field; useful for figuring out the type of field (nested vs object)
34-
* - nestedParent - if nested, what's the parent (which might not be the immediate one)
30+
* This class offers:
31+
* - name - the name of the attribute, but not necessarily of the field.
32+
* - The raw EsField representing the field; for parent.child.grandchild this is just grandchild.
33+
* - parentName - the full path to the immediate parent of the field, e.g. parent.child (without .grandchild)
34+
*
35+
* To adequately represent e.g. union types, the name of the attribute can be altered because we may have multiple synthetic field
36+
* attributes that really belong to the same underlying field. For instance, if a multi-typed field is used both as {@code field::string}
37+
* and {@code field::ip}, we'll generate 2 field attributes called {@code $$field$converted_to$string} and {@code $$field$converted_to$ip}
38+
* but still referring to the same underlying field.
3539
*/
3640
public class FieldAttribute extends TypedAttribute {
3741

42+
/**
43+
* A field name, as found in the mapping. Includes the whole path from the root of the document.
44+
* Implemented as a wrapper around {@link String} to distinguish from the attribute name (which sometimes differs!) at compile time.
45+
*/
46+
public record FieldName(String string) {};
47+
3848
static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(
3949
Attribute.class,
4050
"FieldAttribute",
@@ -43,6 +53,7 @@ public class FieldAttribute extends TypedAttribute {
4353

4454
private final String parentName;
4555
private final EsField field;
56+
protected FieldName lazyFieldName;
4657

4758
public FieldAttribute(Source source, String name, EsField field) {
4859
this(source, null, name, field);
@@ -184,15 +195,19 @@ public String parentName() {
184195
/**
185196
* The full name of the field in the index, including all parent fields. E.g. {@code parent.subfield.this_field}.
186197
*/
187-
public String fieldName() {
188-
// Before 8.15, the field name was the same as the attribute's name.
189-
// On later versions, the attribute can be renamed when creating synthetic attributes.
190-
// Because until 8.15, we couldn't set `synthetic` to true due to a bug, in that version such FieldAttributes are marked by their
191-
// name starting with `$$`.
192-
if ((synthetic() || name().startsWith(SYNTHETIC_ATTRIBUTE_NAME_PREFIX)) == false) {
193-
return name();
198+
public FieldName fieldName() {
199+
if (lazyFieldName == null) {
200+
// Before 8.15, the field name was the same as the attribute's name.
201+
// On later versions, the attribute can be renamed when creating synthetic attributes.
202+
// Because until 8.15, we couldn't set `synthetic` to true due to a bug, in that version such FieldAttributes are marked by
203+
// their
204+
// name starting with `$$`.
205+
if ((synthetic() || name().startsWith(SYNTHETIC_ATTRIBUTE_NAME_PREFIX)) == false) {
206+
lazyFieldName = new FieldName(name());
207+
}
208+
lazyFieldName = new FieldName(Strings.hasText(parentName) ? parentName + "." + field.getName() : field.getName());
194209
}
195-
return Strings.hasText(parentName) ? parentName + "." + field.getName() : field.getName();
210+
return lazyFieldName;
196211
}
197212

198213
public EsField.Exact getExactInfo() {

x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/EsField.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ public String getWriteableName() {
117117
}
118118

119119
/**
120-
* Returns the field path
120+
* Returns the simple name, but not the full field path. The latter requires knowing the path of the parent field.
121121
*/
122122
public String getName() {
123123
return name;

x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
import org.elasticsearch.xpack.esql.core.expression.Attribute;
4242
import org.elasticsearch.xpack.esql.core.expression.Expression;
4343
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
44+
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute.FieldName;
4445
import org.elasticsearch.xpack.esql.core.expression.FoldContext;
4546
import org.elasticsearch.xpack.esql.core.expression.Literal;
4647
import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute;
@@ -230,22 +231,22 @@ public static EsRelation relation() {
230231
public static class TestSearchStats implements SearchStats {
231232

232233
@Override
233-
public boolean exists(String field) {
234+
public boolean exists(FieldName field) {
234235
return true;
235236
}
236237

237238
@Override
238-
public boolean isIndexed(String field) {
239+
public boolean isIndexed(FieldName field) {
239240
return exists(field);
240241
}
241242

242243
@Override
243-
public boolean hasDocValues(String field) {
244+
public boolean hasDocValues(FieldName field) {
244245
return exists(field);
245246
}
246247

247248
@Override
248-
public boolean hasExactSubfield(String field) {
249+
public boolean hasExactSubfield(FieldName field) {
249250
return exists(field);
250251
}
251252

@@ -255,27 +256,27 @@ public long count() {
255256
}
256257

257258
@Override
258-
public long count(String field) {
259+
public long count(FieldName field) {
259260
return exists(field) ? -1 : 0;
260261
}
261262

262263
@Override
263-
public long count(String field, BytesRef value) {
264+
public long count(FieldName field, BytesRef value) {
264265
return exists(field) ? -1 : 0;
265266
}
266267

267268
@Override
268-
public byte[] min(String field, DataType dataType) {
269+
public byte[] min(FieldName field, DataType dataType) {
269270
return null;
270271
}
271272

272273
@Override
273-
public byte[] max(String field, DataType dataType) {
274+
public byte[] max(FieldName field, DataType dataType) {
274275
return null;
275276
}
276277

277278
@Override
278-
public boolean isSingleValue(String field) {
279+
public boolean isSingleValue(FieldName field) {
279280
return false;
280281
}
281282
}
@@ -326,23 +327,23 @@ private boolean isConfigationSet(Config config, String field) {
326327
}
327328

328329
@Override
329-
public boolean exists(String field) {
330-
return isConfigationSet(Config.EXISTS, field);
330+
public boolean exists(FieldName field) {
331+
return isConfigationSet(Config.EXISTS, field.string());
331332
}
332333

333334
@Override
334-
public boolean isIndexed(String field) {
335-
return isConfigationSet(Config.INDEXED, field);
335+
public boolean isIndexed(FieldName field) {
336+
return isConfigationSet(Config.INDEXED, field.string());
336337
}
337338

338339
@Override
339-
public boolean hasDocValues(String field) {
340-
return isConfigationSet(Config.DOC_VALUES, field);
340+
public boolean hasDocValues(FieldName field) {
341+
return isConfigationSet(Config.DOC_VALUES, field.string());
341342
}
342343

343344
@Override
344-
public boolean hasExactSubfield(String field) {
345-
return isConfigationSet(Config.EXACT_SUBFIELD, field);
345+
public boolean hasExactSubfield(FieldName field) {
346+
return isConfigationSet(Config.EXACT_SUBFIELD, field.string());
346347
}
347348

348349
@Override
@@ -452,8 +453,8 @@ private static SearchStats fieldMatchingExistOrMissing(boolean exists, String...
452453
private final Set<String> fields = Set.of(names);
453454

454455
@Override
455-
public boolean exists(String field) {
456-
return fields.contains(field) == exists;
456+
public boolean exists(FieldName field) {
457+
return fields.contains(field.string()) == exists;
457458
}
458459
};
459460
}

x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1325,6 +1325,19 @@ count:long | message:keyword
13251325
3 | Connected to 10.1.0.3
13261326
;
13271327

1328+
multiIndexStatsOfMultiTypedField
1329+
required_capability: union_types
1330+
required_capability: casting_operator
1331+
required_capability: union_types_numeric_widening
1332+
1333+
FROM apps, apps_short
1334+
| STATS s = sum(id::integer)
1335+
;
1336+
1337+
s:long
1338+
210
1339+
;
1340+
13281341
multiIndexMultiColumnTypesRename
13291342
required_capability: union_types
13301343
required_capability: index_metadata_field

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/UnsupportedAttribute.java

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -122,10 +122,13 @@ public UnsupportedEsField field() {
122122
}
123123

124124
@Override
125-
public String fieldName() {
126-
// The super fieldName uses parents to compute the path; this class ignores parents, so we need to rely on the name instead.
127-
// Using field().getName() would be wrong: for subfields like parent.subfield that would return only the last part, subfield.
128-
return name();
125+
public FieldName fieldName() {
126+
if (lazyFieldName == null) {
127+
// The super fieldName uses parents to compute the path; this class ignores parents, so we need to rely on the name instead.
128+
// Using field().getName() would be wrong: for subfields like parent.subfield that would return only the last part, subfield.
129+
lazyFieldName = new FieldName(name());
130+
}
131+
return lazyFieldName;
129132
}
130133

131134
@Override

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/InferNonNullAggConstraint.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525

2626
/**
2727
* The vast majority of aggs ignore null entries - this rule adds a pushable filter, as it is cheap
28-
* to execute, to filter this entries out to begin with.
28+
* to execute, to filter these entries out to begin with.
2929
* STATS x = min(a), y = sum(b)
3030
* becomes
3131
* | WHERE a IS NOT NULL OR b IS NOT NULL
@@ -55,7 +55,7 @@ protected LogicalPlan rule(Aggregate aggregate, LocalLogicalOptimizerContext con
5555
Expression field = af.field();
5656
// ignore literals (e.g. COUNT(1))
5757
// make sure the field exists at the source and is indexed (not runtime)
58-
if (field.foldable() == false && field instanceof FieldAttribute fa && stats.isIndexed(fa.name())) {
58+
if (field.foldable() == false && field instanceof FieldAttribute fa && stats.isIndexed(fa.fieldName())) {
5959
nonNullAggFields.add(field);
6060
} else {
6161
// otherwise bail out since unless disjunction needs to cover _all_ fields, things get filtered out

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/LucenePushdownPredicates.java

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -119,27 +119,31 @@ public boolean isIndexed(FieldAttribute attr) {
119119
};
120120

121121
/**
122-
* If we have access to SearchStats over a collection of shards, we can make more fine-grained decisions about what can be pushed down.
123-
* This should open up more opportunities for lucene pushdown.
122+
* If we have access to {@link SearchStats} over a collection of shards, we can make more fine-grained decisions about what can be
123+
* pushed down. This should open up more opportunities for lucene pushdown.
124124
*/
125125
static LucenePushdownPredicates from(SearchStats stats) {
126+
// TODO: use FieldAttribute#fieldName, otherwise this doesn't apply to field attributes used for union types.
127+
// C.f. https://github.com/elastic/elasticsearch/issues/128905
126128
return new LucenePushdownPredicates() {
127129
@Override
128130
public boolean hasExactSubfield(FieldAttribute attr) {
129-
return stats.hasExactSubfield(attr.name());
131+
return stats.hasExactSubfield(new FieldAttribute.FieldName(attr.name()));
130132
}
131133

132134
@Override
133135
public boolean isIndexedAndHasDocValues(FieldAttribute attr) {
134136
// We still consider the value of isAggregatable here, because some fields like ScriptFieldTypes are always aggregatable
135137
// But this could hide issues with fields that are not indexed but are aggregatable
136138
// This is the original behaviour for ES|QL, but is it correct?
137-
return attr.field().isAggregatable() || stats.isIndexed(attr.name()) && stats.hasDocValues(attr.name());
139+
return attr.field().isAggregatable()
140+
|| stats.isIndexed(new FieldAttribute.FieldName(attr.name()))
141+
&& stats.hasDocValues(new FieldAttribute.FieldName(attr.name()));
138142
}
139143

140144
@Override
141145
public boolean isIndexed(FieldAttribute attr) {
142-
return stats.isIndexed(attr.name());
146+
return stats.isIndexed(new FieldAttribute.FieldName(attr.name()));
143147
}
144148
};
145149
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushStatsToSource.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ private Tuple<List<Attribute>, List<EsStatsQueryExec.Stat>> pushableStats(
9696
if (target instanceof FieldAttribute fa) {
9797
var fName = fa.fieldName();
9898
if (context.searchStats().isSingleValue(fName)) {
99-
fieldName = fName;
99+
fieldName = fName.string();
100100
query = QueryBuilders.existsQuery(fieldName);
101101
}
102102
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ public final PhysicalOperation fieldExtractPhysicalOperation(FieldExtractExec fi
129129

130130
private static String getFieldName(Attribute attr) {
131131
// Do not use the field attribute name, this can deviate from the field name for union types.
132-
return attr instanceof FieldAttribute fa ? fa.fieldName() : attr.name();
132+
return attr instanceof FieldAttribute fa ? fa.fieldName().string() : attr.name();
133133
}
134134

135135
private BlockLoader getBlockLoaderFor(int shardId, Attribute attr, MappedFieldType.FieldExtractPreference fieldExtractPreference) {

0 commit comments

Comments
 (0)