@@ -22,7 +22,6 @@ use std::num::NonZeroU32;
22
22
23
23
use anyhow:: { bail, Context } ;
24
24
use fnv:: FnvHashSet ;
25
- use quickwit_common:: PathHasher ;
26
25
use quickwit_proto:: types:: DocMappingUid ;
27
26
use quickwit_query:: create_default_quickwit_tokenizer_manager;
28
27
use quickwit_query:: query_ast:: QueryAst ;
@@ -31,13 +30,12 @@ use serde::{Deserialize, Serialize};
31
30
use serde_json:: { self , Value as JsonValue } ;
32
31
use serde_json_borrow:: Map as BorrowedJsonMap ;
33
32
use tantivy:: query:: Query ;
34
- use tantivy:: schema:: document:: { ReferenceValue , ReferenceValueLeaf } ;
35
- use tantivy:: schema:: {
36
- Field , FieldType , OwnedValue as TantivyValue , Schema , Value , INDEXED , STORED ,
37
- } ;
33
+ use tantivy:: schema:: { Field , FieldType , OwnedValue as TantivyValue , Schema , INDEXED , STORED } ;
38
34
use tantivy:: TantivyDocument as Document ;
39
35
40
36
use super :: field_mapping_entry:: RAW_TOKENIZER_NAME ;
37
+ use super :: field_presence:: populate_field_presence;
38
+ use super :: tantivy_val_to_json:: tantivy_value_to_json;
41
39
use super :: DocMapperBuilder ;
42
40
use crate :: doc_mapper:: mapping_tree:: {
43
41
build_field_path_from_str, build_mapping_tree, map_primitive_json_to_tantivy,
@@ -430,85 +428,6 @@ fn extract_single_obj(
430
428
}
431
429
}
432
430
433
- // TODO: Formatting according to mapper if applicable
434
- fn tantivy_value_to_json ( val : TantivyValue ) -> JsonValue {
435
- match val {
436
- TantivyValue :: Null => JsonValue :: Null ,
437
- TantivyValue :: Str ( val) => JsonValue :: String ( val) ,
438
- TantivyValue :: PreTokStr ( val) => JsonValue :: String ( val. text ) ,
439
- TantivyValue :: U64 ( val) => JsonValue :: Number ( val. into ( ) ) ,
440
- TantivyValue :: I64 ( val) => JsonValue :: Number ( val. into ( ) ) ,
441
- TantivyValue :: F64 ( val) => serde_json:: json!( val) ,
442
- TantivyValue :: Bool ( val) => JsonValue :: Bool ( val) ,
443
- TantivyValue :: Date ( val) => JsonValue :: String ( format ! ( "{:?}" , val) ) ,
444
- TantivyValue :: Facet ( val) => JsonValue :: String ( val. to_string ( ) ) ,
445
- TantivyValue :: Bytes ( val) => JsonValue :: String ( format ! ( "{:?}" , val) ) ,
446
- TantivyValue :: Array ( val) => val. into_iter ( ) . map ( tantivy_value_to_json) . collect ( ) ,
447
- TantivyValue :: Object ( val) => val
448
- . into_iter ( )
449
- . map ( |( key, val) | ( key, tantivy_value_to_json ( val) ) )
450
- . collect ( ) ,
451
- TantivyValue :: IpAddr ( val) => JsonValue :: String ( format ! ( "{:?}" , val) ) ,
452
- }
453
- }
454
-
455
- #[ inline]
456
- fn populate_field_presence_for_json_value < ' a > (
457
- json_value : impl Value < ' a > ,
458
- path_hasher : & PathHasher ,
459
- is_expand_dots_enabled : bool ,
460
- output : & mut FnvHashSet < u64 > ,
461
- ) {
462
- match json_value. as_value ( ) {
463
- ReferenceValue :: Leaf ( ReferenceValueLeaf :: Null ) => { }
464
- ReferenceValue :: Leaf ( _) => {
465
- output. insert ( path_hasher. finish ( ) ) ;
466
- }
467
- ReferenceValue :: Array ( items) => {
468
- for item in items {
469
- populate_field_presence_for_json_value (
470
- item,
471
- path_hasher,
472
- is_expand_dots_enabled,
473
- output,
474
- ) ;
475
- }
476
- }
477
- ReferenceValue :: Object ( json_obj) => {
478
- populate_field_presence_for_json_obj (
479
- json_obj,
480
- path_hasher. clone ( ) ,
481
- is_expand_dots_enabled,
482
- output,
483
- ) ;
484
- }
485
- }
486
- }
487
-
488
- fn populate_field_presence_for_json_obj < ' a , Iter : Iterator < Item = ( & ' a str , impl Value < ' a > ) > > (
489
- json_obj : Iter ,
490
- path_hasher : PathHasher ,
491
- is_expand_dots_enabled : bool ,
492
- output : & mut FnvHashSet < u64 > ,
493
- ) {
494
- for ( field_key, field_value) in json_obj {
495
- let mut child_path_hasher = path_hasher. clone ( ) ;
496
- if is_expand_dots_enabled {
497
- for segment in field_key. split ( '.' ) {
498
- child_path_hasher. append ( segment. as_bytes ( ) ) ;
499
- }
500
- } else {
501
- child_path_hasher. append ( field_key. as_bytes ( ) ) ;
502
- } ;
503
- populate_field_presence_for_json_value (
504
- field_value,
505
- & child_path_hasher,
506
- is_expand_dots_enabled,
507
- output,
508
- ) ;
509
- }
510
- }
511
-
512
431
impl DocMapper {
513
432
/// Returns the unique identifier of the doc mapping.
514
433
pub fn doc_mapping_uid ( & self ) -> DocMappingUid {
@@ -636,36 +555,9 @@ impl DocMapper {
636
555
document. add_u64 ( document_size_field, document_len) ;
637
556
}
638
557
639
- // The capacity is inexact here.
640
-
641
558
if self . index_field_presence {
642
- let mut field_presence_hashes: FnvHashSet < u64 > =
643
- FnvHashSet :: with_capacity_and_hasher ( document. len ( ) , Default :: default ( ) ) ;
644
- for ( field, value) in document. field_values ( ) {
645
- let field_entry = self . schema . get_field_entry ( field) ;
646
- if !field_entry. is_indexed ( ) || field_entry. is_fast ( ) {
647
- // We are using an tantivy's ExistsQuery for fast fields.
648
- continue ;
649
- }
650
- let mut path_hasher: PathHasher = PathHasher :: default ( ) ;
651
- path_hasher. append ( & field. field_id ( ) . to_le_bytes ( ) [ ..] ) ;
652
- if let Some ( json_obj) = value. as_object ( ) {
653
- let is_expand_dots_enabled: bool =
654
- if let FieldType :: JsonObject ( json_options) = field_entry. field_type ( ) {
655
- json_options. is_expand_dots_enabled ( )
656
- } else {
657
- false
658
- } ;
659
- populate_field_presence_for_json_obj (
660
- json_obj,
661
- path_hasher,
662
- is_expand_dots_enabled,
663
- & mut field_presence_hashes,
664
- ) ;
665
- } else {
666
- field_presence_hashes. insert ( path_hasher. finish ( ) ) ;
667
- }
668
- }
559
+ let field_presence_hashes: FnvHashSet < u64 > =
560
+ populate_field_presence ( & document, & self . schema ) ;
669
561
for field_presence_hash in field_presence_hashes {
670
562
document. add_field_value ( FIELD_PRESENCE_FIELD , & field_presence_hash) ;
671
563
}
0 commit comments