Skip to content

Commit 05496e4

Browse files
authored
Fix panic when merging long str columns (#5704)
* Update to Tantivy version where the merge panic is fixed It includes both a fix of the source problem and a better management of panics. * Skip merge after failure
1 parent 24e9cf2 commit 05496e4

File tree

3 files changed

+41
-22
lines changed

3 files changed

+41
-22
lines changed

quickwit/Cargo.lock

Lines changed: 9 additions & 9 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

quickwit/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -335,7 +335,7 @@ quickwit-serve = { path = "quickwit-serve" }
335335
quickwit-storage = { path = "quickwit-storage" }
336336
quickwit-telemetry = { path = "quickwit-telemetry" }
337337

338-
tantivy = { git = "https://github.com/quickwit-oss/tantivy/", rev = "e843c71", default-features = false, features = [
338+
tantivy = { git = "https://github.com/quickwit-oss/tantivy/", rev = "80f5f1e", default-features = false, features = [
339339
"lz4-compression",
340340
"mmap",
341341
"quickwit",

quickwit/quickwit-indexing/src/actors/merge_executor.rs

Lines changed: 31 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ use tantivy::index::SegmentId;
4242
use tantivy::tokenizer::TokenizerManager;
4343
use tantivy::{DateTime, Directory, Index, IndexMeta, IndexWriter, SegmentReader};
4444
use tokio::runtime::Handle;
45-
use tracing::{debug, info, instrument, warn};
45+
use tracing::{debug, error, info, instrument, warn};
4646

4747
use crate::actors::Packager;
4848
use crate::controlled_directory::ControlledDirectory;
@@ -90,16 +90,35 @@ impl Handler<MergeScratch> for MergeExecutor {
9090
let start = Instant::now();
9191
let merge_task = merge_scratch.merge_task;
9292
let indexed_split_opt: Option<IndexedSplit> = match merge_task.operation_type {
93-
MergeOperationType::Merge => Some(
94-
self.process_merge(
95-
merge_task.merge_split_id.clone(),
96-
merge_task.splits.clone(),
97-
merge_scratch.tantivy_dirs,
98-
merge_scratch.merge_scratch_directory,
99-
ctx,
100-
)
101-
.await?,
102-
),
93+
MergeOperationType::Merge => {
94+
let merge_res = self
95+
.process_merge(
96+
merge_task.merge_split_id.clone(),
97+
merge_task.splits.clone(),
98+
merge_scratch.tantivy_dirs,
99+
merge_scratch.merge_scratch_directory,
100+
ctx,
101+
)
102+
.await;
103+
match merge_res {
104+
Ok(indexed_split) => Some(indexed_split),
105+
Err(err) => {
106+
// A failure in a merge is a bit special.
107+
//
108+
// Instead of failing the pipeline, we just log it.
109+
// The idea is to limit the risk associated with a potential split of death.
110+
//
111+
// Such a split is now not tracked by the merge planner and won't undergo a
112+
// merge until the merge pipeline is restarted.
113+
//
114+
// With a merge policy that marks splits as mature after a day or so, this
115+
// limits the noise associated to those failed
116+
// merges.
117+
error!(task=?merge_task, err=?err, "failed to merge splits");
118+
return Ok(());
119+
}
120+
}
121+
}
103122
MergeOperationType::DeleteAndMerge => {
104123
assert_eq!(
105124
merge_task.splits.len(),
@@ -540,7 +559,7 @@ impl MergeExecutor {
540559

541560
debug!(segment_ids=?segment_ids,"merging-segments");
542561
// TODO it would be nice if tantivy could let us run the merge in the current thread.
543-
index_writer.merge(&segment_ids).wait()?;
562+
index_writer.merge(&segment_ids).await?;
544563

545564
Ok(output_directory)
546565
}

0 commit comments

Comments
 (0)