@@ -942,11 +942,6 @@ fn is_simple_all_query(search_request: &SearchRequest) -> bool {
942
942
return false ;
943
943
}
944
944
945
- // TODO: Update the logic to handle start_timestamp end_timestamp ranges
946
- if search_request. start_timestamp . is_some ( ) || search_request. end_timestamp . is_some ( ) {
947
- return false ;
948
- }
949
-
950
945
let Ok ( query_ast) = serde_json:: from_str ( & search_request. query_ast ) else {
951
946
return false ;
952
947
} ;
@@ -1000,6 +995,29 @@ impl CanSplitDoBetter {
1000
995
}
1001
996
}
1002
997
998
+ fn is_split_contained_in_search_time_range (
999
+ split : & SplitIdAndFooterOffsets ,
1000
+ search_request : & SearchRequest ,
1001
+ ) -> bool {
1002
+ if let Some ( start) = search_request. start_timestamp {
1003
+ let Some ( split_start) = split. timestamp_start else {
1004
+ return false ;
1005
+ } ;
1006
+ if split_start < start {
1007
+ return false ;
1008
+ }
1009
+ }
1010
+ if let Some ( end) = search_request. end_timestamp {
1011
+ let Some ( split_end) = split. timestamp_end else {
1012
+ return false ;
1013
+ } ;
1014
+ if split_end >= end {
1015
+ return false ;
1016
+ }
1017
+ }
1018
+ true
1019
+ }
1020
+
1003
1021
fn to_splits_with_request (
1004
1022
splits : Vec < SplitIdAndFooterOffsets > ,
1005
1023
request : Arc < SearchRequest > ,
@@ -1011,23 +1029,33 @@ impl CanSplitDoBetter {
1011
1029
}
1012
1030
1013
1031
/// Calculate the number of splits which are guaranteed to deliver enough documents.
1032
+ ///
1033
+ /// If there's a time range and not enough splits contain at least the number of requested
1034
+ /// documents, return None.
1014
1035
fn get_min_required_splits (
1015
1036
splits : & [ SplitIdAndFooterOffsets ] ,
1016
1037
request : & SearchRequest ,
1017
- ) -> usize {
1038
+ ) -> Option < usize > {
1018
1039
let num_requested_docs = request. start_offset + request. max_hits ;
1019
1040
1020
- splits
1021
- . into_iter ( )
1022
- . map ( |split| split. num_docs )
1023
- // computing the partial sum
1024
- . scan ( 0u64 , |partial_sum : & mut u64 , num_docs_in_split : u64 | {
1025
- * partial_sum += num_docs_in_split;
1026
- Some ( * partial_sum)
1027
- } )
1028
- . take_while ( |partial_sum| * partial_sum < num_requested_docs)
1029
- . count ( )
1030
- + 1
1041
+ let mut min_required_splits = 0 ;
1042
+ let mut partial_sum = 0u64 ;
1043
+
1044
+ for split in splits. iter ( ) {
1045
+ if !Self :: is_split_contained_in_search_time_range ( split, & request) {
1046
+ continue ;
1047
+ }
1048
+
1049
+ partial_sum += split. num_docs ;
1050
+
1051
+ if partial_sum >= num_requested_docs {
1052
+ return Some ( min_required_splits + 1 ) ;
1053
+ }
1054
+
1055
+ min_required_splits += 1 ;
1056
+ }
1057
+
1058
+ None
1031
1059
}
1032
1060
1033
1061
fn optimize_split_id_higher (
@@ -1042,7 +1070,11 @@ impl CanSplitDoBetter {
1042
1070
return Ok ( Self :: to_splits_with_request ( splits, request) ) ;
1043
1071
}
1044
1072
1045
- let min_required_splits = Self :: get_min_required_splits ( & splits, & request) ;
1073
+ let Some ( min_required_splits) = Self :: get_min_required_splits ( & splits, & request) else {
1074
+ // not enough splits contained in time range.
1075
+ return Ok ( Self :: to_splits_with_request ( splits, request) ) ;
1076
+ } ;
1077
+
1046
1078
let mut split_with_req = Self :: to_splits_with_request ( splits, request) ;
1047
1079
1048
1080
// In this case there is no sort order, we order by split id.
@@ -1060,14 +1092,21 @@ impl CanSplitDoBetter {
1060
1092
request : Arc < SearchRequest > ,
1061
1093
mut splits : Vec < SplitIdAndFooterOffsets > ,
1062
1094
) -> Result < Vec < ( SplitIdAndFooterOffsets , SearchRequest ) > , SearchError > {
1063
- splits. sort_unstable_by_key ( |split| std:: cmp:: Reverse ( split. timestamp_end ( ) ) ) ;
1095
+ splits. sort_unstable_by_key ( |split| {
1096
+ let contained = Self :: is_split_contained_in_search_time_range ( split, & request) ;
1097
+ ( !contained, std:: cmp:: Reverse ( split. timestamp_end ( ) ) )
1098
+ } ) ;
1064
1099
1065
1100
if !is_simple_all_query ( & request) {
1066
1101
// no optimization opportunity here.
1067
1102
return Ok ( Self :: to_splits_with_request ( splits, request) ) ;
1068
1103
}
1069
1104
1070
- let min_required_splits = Self :: get_min_required_splits ( & splits, & request) ;
1105
+ let Some ( min_required_splits) = Self :: get_min_required_splits ( & splits, & request) else {
1106
+ // not enough splits contained in time range.
1107
+ return Ok ( Self :: to_splits_with_request ( splits, request) ) ;
1108
+ } ;
1109
+
1071
1110
let mut split_with_req = Self :: to_splits_with_request ( splits, request) ;
1072
1111
1073
1112
// We order by timestamp desc. split_with_req is sorted by timestamp_end desc.
@@ -1097,14 +1136,21 @@ impl CanSplitDoBetter {
1097
1136
request : Arc < SearchRequest > ,
1098
1137
mut splits : Vec < SplitIdAndFooterOffsets > ,
1099
1138
) -> Result < Vec < ( SplitIdAndFooterOffsets , SearchRequest ) > , SearchError > {
1100
- splits. sort_unstable_by_key ( |split| split. timestamp_start ( ) ) ;
1139
+ splits. sort_unstable_by_key ( |split| {
1140
+ let contained = Self :: is_split_contained_in_search_time_range ( split, & request) ;
1141
+ ( !contained, split. timestamp_start ( ) )
1142
+ } ) ;
1101
1143
1102
1144
if !is_simple_all_query ( & request) {
1103
1145
// no optimization opportunity here.
1104
1146
return Ok ( Self :: to_splits_with_request ( splits, request) ) ;
1105
1147
}
1106
1148
1107
- let min_required_splits = Self :: get_min_required_splits ( & splits, & request) ;
1149
+ let Some ( min_required_splits) = Self :: get_min_required_splits ( & splits, & request) else {
1150
+ // not enough splits contained in time range.
1151
+ return Ok ( Self :: to_splits_with_request ( splits, request) ) ;
1152
+ } ;
1153
+
1108
1154
let mut split_with_req = Self :: to_splits_with_request ( splits, request) ;
1109
1155
1110
1156
// We order by timestamp asc. split_with_req is sorted by timestamp_start.
@@ -1141,7 +1187,10 @@ impl CanSplitDoBetter {
1141
1187
request : Arc < SearchRequest > ,
1142
1188
mut splits : Vec < SplitIdAndFooterOffsets > ,
1143
1189
) -> Result < Vec < ( SplitIdAndFooterOffsets , SearchRequest ) > , SearchError > {
1144
- splits. sort_unstable_by_key ( |split| std:: cmp:: Reverse ( split. timestamp_end ( ) ) ) ;
1190
+ splits. sort_unstable_by_key ( |split| {
1191
+ let contained = Self :: is_split_contained_in_search_time_range ( split, & request) ;
1192
+ ( !contained, std:: cmp:: Reverse ( split. timestamp_end ( ) ) )
1193
+ } ) ;
1145
1194
1146
1195
if !is_simple_all_query ( & request) {
1147
1196
// no optimization opportunity here.
@@ -1154,6 +1203,9 @@ impl CanSplitDoBetter {
1154
1203
/// This function tries to detect upfront which splits contain the top n hits and convert other
1155
1204
/// split searches to count only searches. It also optimizes split order.
1156
1205
///
1206
+ /// To skip splits in time ranged queries, we sort the splits first by whether they are
1207
+ /// contained in the search request time range.
1208
+ ///
1157
1209
/// Returns the search_requests with their split.
1158
1210
fn optimize (
1159
1211
& self ,
0 commit comments