Open
Description
I am encountering an issue while training the SMS-WSJ ASR model using Kaldi. The error occurs during stage 17, specifically when executing steps/nnet3/chain/get_egs.sh. Below are the details of the setup and the error log.
Environment Details:
OS: WSL2 on Windows 10/11 with Ubuntu 18.04
Kaldi Version: Latest master branch (commit hash: [provide commit hash if possible])
Python Version: 3.8 (via Conda virtual environment)
Hardware: Intel Core i5-13600KF, 16GB RAM
Command Used:
python -m sms_wsj.train_baseline_asr \
with egs_path=$KALDI_ROOT/egs/ \
json_path=~/data/write/sms_wsj.json \
stage=17 end_stage=20 \
num_jobs=1
Error Log from Python Script:
WARNING - Kaldi ASR baseline training - No observers have been added to this run
INFO - Kaldi ASR baseline training - Running command 'run'
INFO - Kaldi ASR baseline training - Started
Start training nnet3 model on sms_wsj
$ cuda-compiled
$ gmm_dir=exp/$gmm_data_type/${gmm}
$ ali_dir=exp/$ali_data_type/${gmm}_ali_${train_set}_sp
$ lat_dir=exp/$dataset/chain${nnet3_affix}/${gmm}_${train_set}_sp_lats
$ dir=exp/$dataset/chain${nnet3_affix}/tdnn${affix}_sp
$ train_data_dir=data/$dataset/${train_set}_sp_hires
$ train_ivector_dir=exp/$dataset/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires
$ lores_train_data_dir=data/$ali_data_type/${train_set}_sp
$ tree_dir=exp/$dataset/chain${nnet3_affix}/tree_a_sp
$ lang=data/lang_chain
$ [ $stage -le 17 ]
$ mkdir -p $dir
$ echo "$0: creating neural net configs using the xconfig parser"
/home/xuxu18/kaldi/egs/sms_single_speaker/s5/local_sms/get_nnet3_model.bash: creating neural net configs using the xconfig parser
$ num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}')
tree-info exp/sms_single_speaker/chain/tree_a_sp/tree
$ learning_rate_factor=$(python -c "print(0.5/$xent_regularize)")
$ tdnn_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim-continuous=true"
$ tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66"
$ linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0"
$ prefinal_opts="l2-regularize=0.01"
$ output_opts="l2-regularize=0.005"
$ mkdir -p $dir/configs
$ cat > $dir/configs/network.xconfig <<EOF
input dim=100 name=ivector
input dim=40 name=input
# please note that it is important to have input layer with the name=input
# as the layer immediately preceding the fixed-affine-layer to enable
# the use of short notation for the descriptor
fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
# the first splicing is moved before the lda layer, so no splicing here
relu-batchnorm-dropout-layer name=tdnn1 $tdnn_opts dim=512
tdnnf-layer name=tdnnf2 $tdnnf_opts dim=512 bottleneck-dim=128 time-stride=1
tdnnf-layer name=tdnnf3 $tdnnf_opts dim=512 bottleneck-dim=128 time-stride=1
tdnnf-layer name=tdnnf4 $tdnnf_opts dim=512 bottleneck-dim=128 time-stride=1
tdnnf-layer name=tdnnf5 $tdnnf_opts dim=512 bottleneck-dim=128 time-stride=0
tdnnf-layer name=tdnnf6 $tdnnf_opts dim=512 bottleneck-dim=128 time-stride=3
tdnnf-layer name=tdnnf7 $tdnnf_opts dim=512 bottleneck-dim=128 time-stride=3
tdnnf-layer name=tdnnf8 $tdnnf_opts dim=512 bottleneck-dim=128 time-stride=3
linear-component name=prefinal-l dim=192 $linear_opts
prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts big-dim=1024 small-dim=192
output-layer name=output include-log-softmax=false dim=$num_targets $output_opts
prefinal-layer name=prefinal-xent input=prefinal-l $prefinal_opts big-dim=1024 small-dim=192
output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts
EOF
$ steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
steps/nnet3/xconfig_to_configs.py --xconfig-file exp/sms_single_speaker/chain/tdnn1a_sp/configs/network.xconfig --config-dir exp/sms_single_speaker/chain/tdnn1a_sp/configs/
nnet3-init exp/sms_single_speaker/chain/tdnn1a_sp/configs//init.config exp/sms_single_speaker/chain/tdnn1a_sp/configs//init.raw
LOG (nnet3-init[5.5.433~1453-7637d]:main():nnet3-init.cc:80) Initialized raw neural net and wrote it to exp/sms_single_speaker/chain/tdnn1a_sp/configs//init.raw
nnet3-info exp/sms_single_speaker/chain/tdnn1a_sp/configs//init.raw
nnet3-init exp/sms_single_speaker/chain/tdnn1a_sp/configs//ref.config exp/sms_single_speaker/chain/tdnn1a_sp/configs//ref.raw
LOG (nnet3-init[5.5.433~1453-7637d]:main():nnet3-init.cc:80) Initialized raw neural net and wrote it to exp/sms_single_speaker/chain/tdnn1a_sp/configs//ref.raw
nnet3-info exp/sms_single_speaker/chain/tdnn1a_sp/configs//ref.raw
nnet3-init exp/sms_single_speaker/chain/tdnn1a_sp/configs//ref.config exp/sms_single_speaker/chain/tdnn1a_sp/configs//ref.raw
LOG (nnet3-init[5.5.433~1453-7637d]:main():nnet3-init.cc:80) Initialized raw neural net and wrote it to exp/sms_single_speaker/chain/tdnn1a_sp/configs//ref.raw
nnet3-info exp/sms_single_speaker/chain/tdnn1a_sp/configs//ref.raw
$ [ $stage -le 18 ]
$ steps/nnet3/chain/train.py --stage=$train_stage --cmd="$decode_cmd" --feat.online-ivector-dir=$train_ivector_dir --feat.cmvn-opts="--norm-means=false --norm-vars=false" --chain.xent-regularize $xent_regularize --chain.leaky-hmm-coefficient=0.1 --chain.l2-regularize=0.0 --chain.apply-deriv-weights=false --chain.lm-opts="--num-extra-lm-states=2000" --trainer.dropout-schedule $dropout_schedule --trainer.add-option="--optimization.memory-compression-level=2" --trainer.srand=$srand --trainer.max-param-change=2.0 --trainer.num-epochs=10 --trainer.frames-per-iter=100000 --trainer.optimization.num-jobs-initial=1 --trainer.optimization.num-jobs-final=1 --trainer.optimization.initial-effective-lrate=0.0005 --trainer.optimization.final-effective-lrate=0.00005 --trainer.num-chunk-per-minibatch=64,32 --trainer.optimization.momentum=0.0 --egs.chunk-width=$chunk_width --egs.chunk-left-context=0 --egs.chunk-right-context=0 --egs.dir="$common_egs_dir" --egs.opts="--frames-overlap-per-eg 0" --cleanup.remove-egs=$remove_egs --use-gpu=true --reporting.email="$reporting_email" --feat-dir=$train_data_dir --tree-dir=$tree_dir --lat-dir=$lat_dir --dir=$dir
2024-12-18 15:47:02,987 [steps/nnet3/chain/train.py:35 - <module> - INFO ] Starting chain model trainer (train.py)
steps/nnet3/chain/train.py --stage=-10 --cmd=run.pl --mem 4G --feat.online-ivector-dir=exp/sms_single_speaker/nnet3/ivectors_train_si284_sp_hires --feat.cmvn-opts=--norm-means=false --norm-vars=false --chain.xent-regularize 0.1 --chain.leaky-hmm-coefficient=0.1 --chain.l2-regularize=0.0 --chain.apply-deriv-weights=false --chain.lm-opts=--num-extra-lm-states=2000 --trainer.dropout-schedule 0,[email protected],[email protected],0 --trainer.add-option=--optimization.memory-compression-level=2 --trainer.srand=0 --trainer.max-param-change=2.0 --trainer.num-epochs=10 --trainer.frames-per-iter=100000 --trainer.optimization.num-jobs-initial=1 --trainer.optimization.num-jobs-final=1 --trainer.optimization.initial-effective-lrate=0.0005 --trainer.optimization.final-effective-lrate=0.00005 --trainer.num-chunk-per-minibatch=64,32 --trainer.optimization.momentum=0.0 --egs.chunk-width=60 --egs.chunk-left-context=0 --egs.chunk-right-context=0 --egs.dir= --egs.opts=--frames-overlap-per-eg 0 --cleanup.remove-egs=true --use-gpu=true --reporting.email= --feat-dir=data/sms_single_speaker/train_si284_sp_hires --tree-dir=exp/sms_single_speaker/chain/tree_a_sp --lat-dir=exp/sms_single_speaker/chain/tri4b_train_si284_sp_lats --dir=exp/sms_single_speaker/chain/tdnn1a_sp
['steps/nnet3/chain/train.py', '--stage=-10', '--cmd=run.pl --mem 4G', '--feat.online-ivector-dir=exp/sms_single_speaker/nnet3/ivectors_train_si284_sp_hires', '--feat.cmvn-opts=--norm-means=false --norm-vars=false', '--chain.xent-regularize', '0.1', '--chain.leaky-hmm-coefficient=0.1', '--chain.l2-regularize=0.0', '--chain.apply-deriv-weights=false', '--chain.lm-opts=--num-extra-lm-states=2000', '--trainer.dropout-schedule', '0,[email protected],[email protected],0', '--trainer.add-option=--optimization.memory-compression-level=2', '--trainer.srand=0', '--trainer.max-param-change=2.0', '--trainer.num-epochs=10', '--trainer.frames-per-iter=100000', '--trainer.optimization.num-jobs-initial=1', '--trainer.optimization.num-jobs-final=1', '--trainer.optimization.initial-effective-lrate=0.0005', '--trainer.optimization.final-effective-lrate=0.00005', '--trainer.num-chunk-per-minibatch=64,32', '--trainer.optimization.momentum=0.0', '--egs.chunk-width=60', '--egs.chunk-left-context=0', '--egs.chunk-right-context=0', '--egs.dir=', '--egs.opts=--frames-overlap-per-eg 0', '--cleanup.remove-egs=true', '--use-gpu=true', '--reporting.email=', '--feat-dir=data/sms_single_speaker/train_si284_sp_hires', '--tree-dir=exp/sms_single_speaker/chain/tree_a_sp', '--lat-dir=exp/sms_single_speaker/chain/tri4b_train_si284_sp_lats', '--dir=exp/sms_single_speaker/chain/tdnn1a_sp']
2024-12-18 15:47:02,990 [steps/nnet3/chain/train.py:274 - train - INFO ] Arguments for the experiment
{'alignment_subsampling_factor': 3,
'apply_deriv_weights': False,
'backstitch_training_interval': 1,
'backstitch_training_scale': 0.0,
'chunk_left_context': 0,
'chunk_left_context_initial': -1,
'chunk_right_context': 0,
'chunk_right_context_final': -1,
'chunk_width': '60',
'cleanup': True,
'cmvn_opts': '--norm-means=false --norm-vars=false',
'combine_sum_to_one_penalty': 0.0,
'command': 'run.pl --mem 4G',
'compute_per_dim_accuracy': False,
'deriv_truncate_margin': None,
'dir': 'exp/sms_single_speaker/chain/tdnn1a_sp',
'do_final_combination': True,
'dropout_schedule': '0,[email protected],[email protected],0',
'egs_command': None,
'egs_dir': None,
'egs_opts': '--frames-overlap-per-eg 0',
'egs_stage': 0,
'email': None,
'exit_stage': None,
'feat_dir': 'data/sms_single_speaker/train_si284_sp_hires',
'final_effective_lrate': 5e-05,
'frame_subsampling_factor': 3,
'frames_per_iter': 100000,
'initial_effective_lrate': 0.0005,
'input_model': None,
'l2_regularize': 0.0,
'lat_dir': 'exp/sms_single_speaker/chain/tri4b_train_si284_sp_lats',
'leaky_hmm_coefficient': 0.1,
'left_deriv_truncate': None,
'left_tolerance': 5,
'lm_opts': '--num-extra-lm-states=2000',
'max_lda_jobs': 10,
'max_models_combine': 20,
'max_objective_evaluations': 30,
'max_param_change': 2.0,
'momentum': 0.0,
'num_chunk_per_minibatch': '64,32',
'num_epochs': 10.0,
'num_jobs_final': 1,
'num_jobs_initial': 1,
'num_jobs_step': 1,
'online_ivector_dir': 'exp/sms_single_speaker/nnet3/ivectors_train_si284_sp_hires',
'preserve_model_interval': 100,
'presoftmax_prior_scale_power': -0.25,
'proportional_shrink': 0.0,
'rand_prune': 4.0,
'remove_egs': True,
'reporting_interval': 0.1,
'right_tolerance': 5,
'samples_per_iter': 400000,
'shrink_saturation_threshold': 0.4,
'shrink_value': 1.0,
'shuffle_buffer_size': 5000,
'srand': 0,
'stage': -10,
'train_opts': ['--optimization.memory-compression-level=2'],
'tree_dir': 'exp/sms_single_speaker/chain/tree_a_sp',
'use_gpu': 'yes',
'xent_regularize': 0.1}
2024-12-18 15:47:03,174 [steps/nnet3/chain/train.py:328 - train - INFO ] Creating phone language-model
2024-12-18 15:47:22,252 [steps/nnet3/chain/train.py:333 - train - INFO ] Creating denominator FST
copy-transition-model exp/sms_single_speaker/chain/tree_a_sp/final.mdl exp/sms_single_speaker/chain/tdnn1a_sp/0.trans_mdl
LOG (copy-transition-model[5.5.433~1453-7637d]:main():copy-transition-model.cc:62) Copied transition model.
2024-12-18 15:47:22,667 [steps/nnet3/chain/train.py:340 - train - INFO ] Initializing a basic network for estimating preconditioning matrix
2024-12-18 15:47:22,689 [steps/nnet3/chain/train.py:369 - train - INFO ] Generating egs
steps/nnet3/chain/get_egs.sh --frames-overlap-per-eg 0 --cmd run.pl --mem 4G --cmvn-opts --norm-means=false --norm-vars=false --online-ivector-dir exp/sms_single_speaker/nnet3/ivectors_train_si284_sp_hires --left-context 14 --right-context 14 --left-context-initial -1 --right-context-final -1 --left-tolerance 5 --right-tolerance 5 --frame-subsampling-factor 3 --alignment-subsampling-factor 3 --stage 0 --frames-per-iter 100000 --frames-per-eg 60 --srand 0 data/sms_single_speaker/train_si284_sp_hires exp/sms_single_speaker/chain/tdnn1a_sp exp/sms_single_speaker/chain/tri4b_train_si284_sp_lats exp/sms_single_speaker/chain/tdnn1a_sp/egs
steps/nnet3/chain/get_egs.sh: File data/sms_single_speaker/train_si284_sp_hires/utt2uniq exists, so ensuring the hold-out set includes all perturbed versions of the same source utterance.
steps/nnet3/chain/get_egs.sh: Holding out 300 utterances in validation set and 300 in training diagnostic set, out of total 604098.
steps/nnet3/chain/get_egs.sh: creating egs. To ensure they are not deleted later you can do: touch exp/sms_single_speaker/chain/tdnn1a_sp/egs/.nodelete
steps/nnet3/chain/get_egs.sh: feature type is raw
tree-info exp/sms_single_speaker/chain/tdnn1a_sp/tree
feat-to-dim scp:exp/sms_single_speaker/nnet3/ivectors_train_si284_sp_hires/ivector_online.scp -
steps/nnet3/chain/get_egs.sh: working out number of frames of training data
steps/nnet3/chain/get_egs.sh: working out feature dim
steps/nnet3/chain/get_egs.sh: creating 5700 archives, each with 1666 egs, with
steps/nnet3/chain/get_egs.sh: 60 labels per example, and (left,right) context = (14,14)
steps/nnet3/chain/get_egs.sh: Getting validation and training subset examples in background.
steps/nnet3/chain/get_egs.sh: Generating training examples on disk
steps/nnet3/chain/get_egs.sh: Getting subsets of validation examples for diagnostics and combination.
steps/nnet3/chain/get_egs.sh: recombining and shuffling order of archives on disk
run.pl: 383 / 475 failed, log is in exp/sms_single_speaker/chain/tdnn1a_sp/egs/log/shuffle.*.log
Traceback (most recent call last):
File "steps/nnet3/chain/train.py", line 634, in main
train(args, run_opts)
File "steps/nnet3/chain/train.py", line 377, in train
chain_lib.generate_chain_egs(
File "/home/xuxu18/kaldi/egs/sms_single_speaker/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py", line 80, in generate_chain_egs
common_lib.execute_command(
File "/home/xuxu18/kaldi/egs/sms_single_speaker/s5/steps/libs/common.py", line 157, in execute_command
raise Exception("Command exited with status {0}: {1}".format(
Exception: Command exited with status 1: steps/nnet3/chain/get_egs.sh --frames-overlap-per-eg 0 --cmd "run.pl --mem 4G" --cmvn-opts "--norm-means=false --norm-vars=false" --online-ivector-dir "exp/sms_single_speaker/nnet3/ivectors_train_si284_sp_hires" --left-context 14 --right-context 14 --left-context-initial -1 --right-context-final -1 --left-tolerance '5' --right-tolerance '5' --frame-subsampling-factor 3 --alignment-subsampling-factor 3 --stage 0 --frames-per-iter 100000 --frames-per-eg 60 --srand 0 data/sms_single_speaker/train_si284_sp_hires exp/sms_single_speaker/chain/tdnn1a_sp exp/sms_single_speaker/chain/tri4b_train_si284_sp_lats exp/sms_single_speaker/chain/tdnn1a_sp/egs
$ exit 1
ERROR - Kaldi ASR baseline training - Failed after 0:23:23!
Traceback (most recent calls WITHOUT Sacred internals):
File "/home/xuxu18/sms_wsj/sms_wsj/train_baseline_asr.py", line 140, in run
run_process([
File "/home/xuxu18/sms_wsj/sms_wsj/kaldi/utils.py", line 413, in run_process
subprocess.run(
File "/home/xuxu18/anaconda3/envs/uu18/lib/python3.8/subprocess.py", line 516, in run
raise CalledProcessError(retcode, process.args,
subprocess.CalledProcessError: Command '['/home/xuxu18/kaldi/egs/sms_single_speaker/s5/local_sms/get_nnet3_model.bash', '--dest_dir', '/home/xuxu18/kaldi/egs/sms_single_speaker/s5', '--cv_sets', '"cv_dev93"', '--stage', '17', '--gmm_data_type', 'wsj_8k', '--gmm', 'tri4b', '--ali_data_type', 'sms_early', '--dataset', 'sms_single_speaker', '--nj', '1']' returned non-zero exit status 1.
Log from Kaldi Script (get_egs.sh):
When I checked exp/sms_single_speaker/chain/tdnn1a_sp/egs/log/shuffle.*.log, I found the following error:
# nnet3-chain-normalize-egs --normalization-fst-scale=1.0 exp/sms_single_speaker/chain/tdnn1a_sp/normalization.fst "ark:cat exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs_orig.1.90.ark exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs_orig.2.90.ark exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs_orig.3.90.ark exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs_orig.4.90.ark exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs_orig.5.90.ark|" ark:- | nnet3-chain-shuffle-egs --srand=$[90+0] ark:- ark:- | nnet3-chain-copy-egs ark:- ark:exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs.90.1.ark ark:exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs.90.2.ark ark:exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs.90.3.ark ark:exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs.90.4.ark ark:exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs.90.5.ark ark:exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs.90.6.ark ark:exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs.90.7.ark ark:exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs.90.8.ark ark:exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs.90.9.ark ark:exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs.90.10.ark ark:exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs.90.11.ark ark:exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs.90.12.ark
# Started at Wed Dec 18 15:55:18 CST 2024
#
nnet3-chain-copy-egs ark:- ark:exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs.90.1.ark ark:exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs.90.2.ark ark:exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs.90.3.ark ark:exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs.90.4.ark ark:exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs.90.5.ark ark:exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs.90.6.ark ark:exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs.90.7.ark ark:exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs.90.8.ark ark:exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs.90.9.ark ark:exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs.90.10.ark ark:exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs.90.11.ark ark:exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs.90.12.ark
nnet3-chain-shuffle-egs --srand=90 ark:- ark:-
nnet3-chain-normalize-egs --normalization-fst-scale=1.0 exp/sms_single_speaker/chain/tdnn1a_sp/normalization.fst 'ark:cat exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs_orig.1.90.ark exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs_orig.2.90.ark exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs_orig.3.90.ark exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs_orig.4.90.ark exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs_orig.5.90.ark|' ark:-
LOG (nnet3-chain-normalize-egs[5.5.433~1453-7637d]:main():nnet3-chain-normalize-egs.cc:94) Added normalization to 19766 egs; had errors on 0
ERROR (nnet3-chain-copy-egs[5.5.433~1453-7637d]:ExpectToken():io-funcs.cc:200) Failed to read token [started at file position -1], expected </Supervision>
[ Stack-Trace: ]
/home/xuxu18/kaldi/src/lib/libkaldi-base.so(kaldi::MessageLogger::LogMessage() const+0xb42) [0x7f0b63141692]
nnet3-chain-copy-egs(kaldi::MessageLogger::LogAndThrow::operator=(kaldi::MessageLogger const&)+0x21) [0x5635d241605f]
/home/xuxu18/kaldi/src/lib/libkaldi-base.so(kaldi::ExpectToken(std::istream&, bool, char const*)+0x17f) [0x7f0b631432f5]
/home/xuxu18/kaldi/src/lib/libkaldi-chain.so(kaldi::chain::Supervision::Read(std::istream&, bool)+0x146) [0x7f0b61a9b30a]
/home/xuxu18/kaldi/src/lib/libkaldi-nnet3.so(kaldi::nnet3::NnetChainSupervision::Read(std::istream&, bool)+0x5d) [0x7f0b63a45379]
/home/xuxu18/kaldi/src/lib/libkaldi-nnet3.so(kaldi::nnet3::NnetChainExample::Read(std::istream&, bool)+0x49a) [0x7f0b63a46bf8]
nnet3-chain-copy-egs(kaldi::KaldiObjectHolder<kaldi::nnet3::NnetChainExample>::Read(std::istream&)+0x2fe) [0x5635d242389c]
nnet3-chain-copy-egs(kaldi::SequentialTableReaderArchiveImpl<kaldi::KaldiObjectHolder<kaldi::nnet3::NnetChainExample> >::Next()+0x1ca) [0x5635d2423c24]
nnet3-chain-copy-egs(main+0xb94) [0x5635d2414e4a]
/lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xe7) [0x7f0b61e41c87]
nnet3-chain-copy-egs(_start+0x2a) [0x5635d241305a]
WARNING (nnet3-chain-copy-egs[5.5.433~1453-7637d]:Read():util/kaldi-holder-inl.h:84) Exception caught reading Table object. kaldi::KaldiFatalError
WARNING (nnet3-chain-copy-egs[5.5.433~1453-7637d]:Next():util/kaldi-table-inl.h:574) Object read failed, reading archive standard input
LOG (nnet3-chain-copy-egs[5.5.433~1453-7637d]:main():nnet3-chain-copy-egs.cc:395) Read 1045 neural-network training examples, wrote 1045
ERROR (nnet3-chain-copy-egs[5.5.433~1453-7637d]:~SequentialTableReaderArchiveImpl():util/kaldi-table-inl.h:678) TableReader: error detected closing archive standard input
[ Stack-Trace: ]
/home/xuxu18/kaldi/src/lib/libkaldi-base.so(kaldi::MessageLogger::LogMessage() const+0xb42) [0x7f0b63141692]
nnet3-chain-copy-egs(kaldi::MessageLogger::LogAndThrow::operator=(kaldi::MessageLogger const&)+0x21) [0x5635d241605f]
nnet3-chain-copy-egs(kaldi::SequentialTableReaderArchiveImpl<kaldi::KaldiObjectHolder<kaldi::nnet3::NnetChainExample> >::~SequentialTableReaderArchiveImpl()+0x1a4) [0x5635d2422620]
nnet3-chain-copy-egs(kaldi::SequentialTableReaderArchiveImpl<kaldi::KaldiObjectHolder<kaldi::nnet3::NnetChainExample> >::~SequentialTableReaderArchiveImpl()+0x9) [0x5635d242288d]
nnet3-chain-copy-egs(kaldi::SequentialTableReader<kaldi::KaldiObjectHolder<kaldi::nnet3::NnetChainExample> >::~SequentialTableReader()+0x12) [0x5635d24191d4]
nnet3-chain-copy-egs(main+0xd14) [0x5635d2414fca]
/lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xe7) [0x7f0b61e41c87]
nnet3-chain-copy-egs(_start+0x2a) [0x5635d241305a]
terminate called after throwing an instance of 'kaldi::KaldiFatalError'
what(): kaldi::KaldiFatalError
bash: line 1: 9460 Done nnet3-chain-normalize-egs --normalization-fst-scale=1.0 exp/sms_single_speaker/chain/tdnn1a_sp/normalization.fst "ark:cat exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs_orig.1.90.ark exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs_orig.2.90.ark exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs_orig.3.90.ark exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs_orig.4.90.ark exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs_orig.5.90.ark|" ark:-
9461 Killed | nnet3-chain-shuffle-egs --srand=$[90+0] ark:- ark:-
9462 Aborted (core dumped) | nnet3-chain-copy-egs ark:- ark:exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs.90.1.ark ark:exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs.90.2.ark ark:exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs.90.3.ark ark:exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs.90.4.ark ark:exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs.90.5.ark ark:exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs.90.6.ark ark:exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs.90.7.ark ark:exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs.90.8.ark ark:exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs.90.9.ark ark:exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs.90.10.ark ark:exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs.90.11.ark ark:exp/sms_single_speaker/chain/tdnn1a_sp/egs/cegs.90.12.ark
# Accounting: time=88 threads=1
# Ended (code 134) at Wed Dec 18 15:56:46 CST 2024, elapsed time 88 seconds
What could cause get_egs.sh to fail at this stage?Is it possible that running Kaldi on WSL2 might be causing this issue?