Skip to content

Commit ed6cd7b

Browse files
committed
Optimize time ranged leaf search queries
When the search request contains a time range, we aborted the optimization of converting unneeded split searches into count queries.
1 parent 3de8d18 commit ed6cd7b

File tree

1 file changed

+72
-23
lines changed
  • quickwit/quickwit-search/src

1 file changed

+72
-23
lines changed

quickwit/quickwit-search/src/leaf.rs

+72-23
Original file line numberDiff line numberDiff line change
@@ -942,11 +942,6 @@ fn is_simple_all_query(search_request: &SearchRequest) -> bool {
942942
return false;
943943
}
944944

945-
// TODO: Update the logic to handle start_timestamp end_timestamp ranges
946-
if search_request.start_timestamp.is_some() || search_request.end_timestamp.is_some() {
947-
return false;
948-
}
949-
950945
let Ok(query_ast) = serde_json::from_str(&search_request.query_ast) else {
951946
return false;
952947
};
@@ -1000,6 +995,29 @@ impl CanSplitDoBetter {
1000995
}
1001996
}
1002997

998+
fn is_split_contained_in_search_time_range(
999+
split: &SplitIdAndFooterOffsets,
1000+
search_request: &SearchRequest,
1001+
) -> bool {
1002+
if let Some(start) = search_request.start_timestamp {
1003+
let Some(split_start) = split.timestamp_start else {
1004+
return false;
1005+
};
1006+
if split_start < start {
1007+
return false;
1008+
}
1009+
}
1010+
if let Some(end) = search_request.end_timestamp {
1011+
let Some(split_end) = split.timestamp_end else {
1012+
return false;
1013+
};
1014+
if split_end >= end {
1015+
return false;
1016+
}
1017+
}
1018+
true
1019+
}
1020+
10031021
fn to_splits_and_request(
10041022
splits: Vec<SplitIdAndFooterOffsets>,
10051023
request: Arc<SearchRequest>,
@@ -1011,23 +1029,33 @@ impl CanSplitDoBetter {
10111029
}
10121030

10131031
/// Calculate the number of splits which are guaranteed to deliver enough documents.
1032+
///
1033+
/// If there's a time range and not enough splits contain at least the number of requested
1034+
/// documents, return None.
10141035
fn get_min_required_splits(
10151036
splits: &[SplitIdAndFooterOffsets],
10161037
request: &SearchRequest,
1017-
) -> usize {
1038+
) -> Option<usize> {
10181039
let num_requested_docs = request.start_offset + request.max_hits;
10191040

1020-
splits
1021-
.into_iter()
1022-
.map(|split| split.num_docs)
1023-
// computing the partial sum
1024-
.scan(0u64, |partial_sum: &mut u64, num_docs_in_split: u64| {
1025-
*partial_sum += num_docs_in_split;
1026-
Some(*partial_sum)
1027-
})
1028-
.take_while(|partial_sum| *partial_sum < num_requested_docs)
1029-
.count()
1030-
+ 1
1041+
let mut min_required_splits = 0;
1042+
let mut partial_sum = 0u64;
1043+
1044+
for split in splits.iter() {
1045+
if !Self::is_split_contained_in_search_time_range(split, &request) {
1046+
continue;
1047+
}
1048+
1049+
partial_sum += split.num_docs;
1050+
1051+
if partial_sum >= num_requested_docs {
1052+
return Some(min_required_splits + 1);
1053+
}
1054+
1055+
min_required_splits += 1;
1056+
}
1057+
1058+
None
10311059
}
10321060

10331061
fn optimize_split_id_higher(
@@ -1042,7 +1070,10 @@ impl CanSplitDoBetter {
10421070
return Ok(Self::to_splits_and_request(splits, request));
10431071
}
10441072

1045-
let min_required_splits = Self::get_min_required_splits(&splits, &request);
1073+
let Some(min_required_splits) = Self::get_min_required_splits(&splits, &request) else {
1074+
// not enough splits contained in time range.
1075+
return Ok(Self::to_splits_and_request(splits, request));
1076+
};
10461077
let mut split_with_req = Self::to_splits_and_request(splits, request);
10471078

10481079
// In this case there is no sort order, we order by split id.
@@ -1060,14 +1091,20 @@ impl CanSplitDoBetter {
10601091
request: Arc<SearchRequest>,
10611092
mut splits: Vec<SplitIdAndFooterOffsets>,
10621093
) -> Result<Vec<(SplitIdAndFooterOffsets, SearchRequest)>, SearchError> {
1063-
splits.sort_unstable_by_key(|split| std::cmp::Reverse(split.timestamp_end()));
1094+
splits.sort_unstable_by_key(|split| {
1095+
let contained = Self::is_split_contained_in_search_time_range(split, &request);
1096+
(!contained, std::cmp::Reverse(split.timestamp_end()))
1097+
});
10641098

10651099
if !is_simple_all_query(&request) {
10661100
// no optimization opportunity here.
10671101
return Ok(Self::to_splits_and_request(splits, request));
10681102
}
10691103

1070-
let min_required_splits = Self::get_min_required_splits(&splits, &request);
1104+
let Some(min_required_splits) = Self::get_min_required_splits(&splits, &request) else {
1105+
// not enough splits contained in time range.
1106+
return Ok(Self::to_splits_and_request(splits, request));
1107+
};
10711108
let mut split_with_req = Self::to_splits_and_request(splits, request);
10721109

10731110
// We order by timestamp desc. split_with_req is sorted by timestamp_end desc.
@@ -1097,14 +1134,20 @@ impl CanSplitDoBetter {
10971134
request: Arc<SearchRequest>,
10981135
mut splits: Vec<SplitIdAndFooterOffsets>,
10991136
) -> Result<Vec<(SplitIdAndFooterOffsets, SearchRequest)>, SearchError> {
1100-
splits.sort_unstable_by_key(|split| split.timestamp_start());
1137+
splits.sort_unstable_by_key(|split| {
1138+
let contained = Self::is_split_contained_in_search_time_range(split, &request);
1139+
(!contained, split.timestamp_start())
1140+
});
11011141

11021142
if !is_simple_all_query(&request) {
11031143
// no optimization opportunity here.
11041144
return Ok(Self::to_splits_and_request(splits, request));
11051145
}
11061146

1107-
let min_required_splits = Self::get_min_required_splits(&splits, &request);
1147+
let Some(min_required_splits) = Self::get_min_required_splits(&splits, &request) else {
1148+
// not enough splits contained in time range.
1149+
return Ok(Self::to_splits_and_request(splits, request));
1150+
};
11081151
let mut split_with_req = Self::to_splits_and_request(splits, request);
11091152

11101153
// We order by timestamp asc. split_with_req is sorted by timestamp_start.
@@ -1141,7 +1184,10 @@ impl CanSplitDoBetter {
11411184
request: Arc<SearchRequest>,
11421185
mut splits: Vec<SplitIdAndFooterOffsets>,
11431186
) -> Result<Vec<(SplitIdAndFooterOffsets, SearchRequest)>, SearchError> {
1144-
splits.sort_unstable_by_key(|split| std::cmp::Reverse(split.timestamp_end()));
1187+
splits.sort_unstable_by_key(|split| {
1188+
let contained = Self::is_split_contained_in_search_time_range(split, &request);
1189+
(!contained, std::cmp::Reverse(split.timestamp_end()))
1190+
});
11451191

11461192
if !is_simple_all_query(&request) {
11471193
// no optimization opportunity here.
@@ -1154,6 +1200,9 @@ impl CanSplitDoBetter {
11541200
/// This function tries to detect upfront which splits contain the top n hits and convert other
11551201
/// split searches to count only searches. It also optimizes split order.
11561202
///
1203+
/// To skip splits in time ranged queries, we sort the splits first by whether they are
1204+
/// contained in the search request time range.
1205+
///
11571206
/// Returns the search_requests with their split.
11581207
fn optimize(
11591208
&self,

0 commit comments

Comments
 (0)