Skip to content

Commit 804be29

Browse files
committed
Optimize time ranged search queries
When the search request contains a time range, we aborted the optimization of converting unneeded split searches into count queries.
1 parent de696d4 commit 804be29

File tree

1 file changed

+33
-11
lines changed
  • quickwit/quickwit-search/src

1 file changed

+33
-11
lines changed

quickwit/quickwit-search/src/leaf.rs

+33-11
Original file line numberDiff line numberDiff line change
@@ -942,11 +942,6 @@ fn is_simple_all_query(search_request: &SearchRequest) -> bool {
942942
return false;
943943
}
944944

945-
// TODO: Update the logic to handle start_timestamp end_timestamp ranges
946-
if search_request.start_timestamp.is_some() || search_request.end_timestamp.is_some() {
947-
return false;
948-
}
949-
950945
let Ok(query_ast) = serde_json::from_str(&search_request.query_ast) else {
951946
return false;
952947
};
@@ -1000,6 +995,20 @@ impl CanSplitDoBetter {
1000995
}
1001996
}
1002997

998+
fn is_contained(split: &SplitIdAndFooterOffsets, search_request: &SearchRequest) -> bool {
999+
if let Some(start) = search_request.start_timestamp {
1000+
if split.timestamp_start() < start {
1001+
return false;
1002+
}
1003+
}
1004+
if let Some(end) = search_request.end_timestamp {
1005+
if split.timestamp_end() >= end {
1006+
return false;
1007+
}
1008+
}
1009+
true
1010+
}
1011+
10031012
/// Optimize the order in which splits will get processed based on how it can skip the most
10041013
/// splits.
10051014
///
@@ -1009,18 +1018,29 @@ impl CanSplitDoBetter {
10091018
/// are the most likely to fill our Top K.
10101019
/// In the future, as split get more metadata per column, we may be able to do this more than
10111020
/// just for timestamp and "unsorted" request.
1012-
fn optimize_split_order(&self, splits: &mut [SplitIdAndFooterOffsets]) {
1021+
///
1022+
/// To skip splits in time ranged queries, we sort the splits first by whether they are
1023+
/// contained in the search request time range.
1024+
fn optimize_split_order(
1025+
&self,
1026+
splits: &mut [SplitIdAndFooterOffsets],
1027+
search_request: &SearchRequest,
1028+
) {
10131029
match self {
10141030
CanSplitDoBetter::SplitIdHigher(_) => {
10151031
splits.sort_unstable_by(|a, b| b.split_id.cmp(&a.split_id))
10161032
}
10171033
CanSplitDoBetter::SplitTimestampHigher(_)
10181034
| CanSplitDoBetter::FindTraceIdsAggregation(_) => {
1019-
splits.sort_unstable_by_key(|split| std::cmp::Reverse(split.timestamp_end()))
1020-
}
1021-
CanSplitDoBetter::SplitTimestampLower(_) => {
1022-
splits.sort_unstable_by_key(|split| split.timestamp_start())
1035+
splits.sort_unstable_by_key(|split| {
1036+
let contained = Self::is_contained(split, search_request);
1037+
(!contained, std::cmp::Reverse(split.timestamp_end()))
1038+
})
10231039
}
1040+
CanSplitDoBetter::SplitTimestampLower(_) => splits.sort_unstable_by_key(|split| {
1041+
let contained = Self::is_contained(split, search_request);
1042+
(!contained, split.timestamp_start())
1043+
}),
10241044
CanSplitDoBetter::Uninformative => (),
10251045
}
10261046
}
@@ -1034,7 +1054,7 @@ impl CanSplitDoBetter {
10341054
request: Arc<SearchRequest>,
10351055
mut splits: Vec<SplitIdAndFooterOffsets>,
10361056
) -> Result<Vec<(SplitIdAndFooterOffsets, SearchRequest)>, SearchError> {
1037-
self.optimize_split_order(&mut splits);
1057+
self.optimize_split_order(&mut splits, &request);
10381058

10391059
if !is_simple_all_query(&request) {
10401060
// no optimization opportunity here.
@@ -1049,6 +1069,8 @@ impl CanSplitDoBetter {
10491069
// Calculate the number of splits which are guaranteed to deliver enough documents.
10501070
let min_required_splits = splits
10511071
.iter()
1072+
// splits are sorted by whether they are contained in the request time range
1073+
.filter(|split| Self::is_contained(split, &request))
10521074
.map(|split| split.num_docs)
10531075
// computing the partial sum
10541076
.scan(0u64, |partial_sum: &mut u64, num_docs_in_split: u64| {

0 commit comments

Comments
 (0)