Skip to content

Commit 6cf52fb

Browse files
author
Benjamin Moody
committed
Merge pull request #313 into master.
The smooth_frames argument to rdrecord is meant to determine whether the input signals are stored as {d|p}_signal (a homogeneous two-dimensional array, requiring all signals to be resampled to the same frequency) or e_{d|p}_signal (a list of one-dimensional arrays, keeping each signal at its actual sampling frequency). Previously, this only worked if you were trying to read signals with multiple samples per frame. If all signals in the record had only one sample per frame, then the smooth_frames argument was ignored. Instead, the application should not need to know in advance whether the record contains multiple samples per frame. If the application asks for a 2D array it should get a 2D array, and if the application asks for a list of 1D arrays it should get a list of 1D arrays. Furthermore, simplify internal logic by removing the "frame smoothing" functionality of _rd_segment, in favor of "smoothing" the result (if desired) afterwards. The smooth_frames function performs the same computation as _rd_dat_signals did previously, but somewhat more efficiently.
2 parents 39bbaac + 4cd1d7b commit 6cf52fb

File tree

3 files changed

+138
-174
lines changed

3 files changed

+138
-174
lines changed

tests/test_record.py

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -26,19 +26,22 @@ def test_1a(self):
2626
Target file created with:
2727
rdsamp -r sample-data/test01_00s | cut -f 2- > record-1a
2828
"""
29-
record = wfdb.rdrecord('sample-data/test01_00s', physical=False)
29+
record = wfdb.rdrecord('sample-data/test01_00s',
30+
physical=False, return_res=16)
3031
sig = record.d_signal
3132
sig_target = np.genfromtxt('tests/target-output/record-1a')
3233

3334
# Compare data streaming from Physionet
34-
record_pn = wfdb.rdrecord('test01_00s', physical=False,
35-
pn_dir='macecgdb')
35+
record_pn = wfdb.rdrecord('test01_00s', pn_dir='macecgdb',
36+
physical=False, return_res=16)
3637

3738
# Test file writing
38-
record_2 = wfdb.rdrecord('sample-data/test01_00s', physical=False)
39+
record_2 = wfdb.rdrecord('sample-data/test01_00s',
40+
physical=False, return_res=16)
3941
record_2.sig_name = ['ECG_1', 'ECG_2', 'ECG_3', 'ECG_4']
4042
record_2.wrsamp()
41-
record_write = wfdb.rdrecord('test01_00s', physical=False)
43+
record_write = wfdb.rdrecord('test01_00s',
44+
physical=False, return_res=16)
4245

4346
assert np.array_equal(sig, sig_target)
4447
assert record.__eq__(record_pn)
@@ -75,24 +78,30 @@ def test_1b(self):
7578
def test_1c(self):
7679
"""
7780
Format 16, byte offset, selected duration, selected channels,
78-
digital.
81+
digital, expanded format.
7982
8083
Target file created with:
8184
rdsamp -r sample-data/a103l -f 80 -s 0 1 | cut -f 2- > record-1c
8285
"""
8386
record = wfdb.rdrecord('sample-data/a103l',
84-
sampfrom=20000, channels=[0, 1], physical=False)
85-
sig = record.d_signal
87+
sampfrom=20000, channels=[0, 1], physical=False,
88+
smooth_frames=False)
89+
# convert expanded to uniform array
90+
sig = np.zeros((record.sig_len, record.n_sig))
91+
for i in range(record.n_sig):
92+
sig[:,i] = record.e_d_signal[i]
93+
8694
sig_target = np.genfromtxt('tests/target-output/record-1c')
8795

8896
# Compare data streaming from Physionet
8997
record_pn = wfdb.rdrecord('a103l', pn_dir='challenge-2015/training',
9098
sampfrom=20000, channels=[0, 1],
91-
physical=False)
99+
physical=False, smooth_frames=False)
92100

93101
# Test file writing
94-
record.wrsamp()
95-
record_write = wfdb.rdrecord('a103l', physical=False)
102+
record.wrsamp(expanded=True)
103+
record_write = wfdb.rdrecord('a103l', physical=False,
104+
smooth_frames=False)
96105

97106
assert np.array_equal(sig, sig_target)
98107
assert record.__eq__(record_pn)

wfdb/io/_signal.py

Lines changed: 96 additions & 141 deletions
Original file line numberDiff line numberDiff line change
@@ -745,7 +745,7 @@ def calc_checksum(self, expanded=False):
745745
746746
"""
747747
if expanded:
748-
cs = [int(np.sum(self.e_d_signal[ch]) % 65536) for ch in range(self.n_sig)]
748+
cs = [int(np.sum(s) % 65536) for s in self.e_d_signal]
749749
else:
750750
cs = np.sum(self.d_signal, 0) % 65536
751751
cs = [int(c) for c in cs]
@@ -822,37 +822,61 @@ def smooth_frames(self, sigtype='physical'):
822822
if spf[ch] is None:
823823
spf[ch] = 1
824824

825-
# Total samples per frame
826-
tspf = sum(spf)
825+
# The output data type should be the smallest type that can
826+
# represent any input sample value. The intermediate data type
827+
# must be able to represent the sum of spf[ch] sample values.
827828

828829
if sigtype == 'physical':
829-
n_sig = len(self.e_p_signal)
830-
sig_len = int(len(self.e_p_signal[0])/spf[0])
831-
signal = np.zeros((sig_len, n_sig), dtype='float64')
832-
833-
for ch in range(n_sig):
834-
if spf[ch] == 1:
835-
signal[:, ch] = self.e_p_signal[ch]
836-
else:
837-
for frame in range(spf[ch]):
838-
signal[:, ch] += self.e_p_signal[ch][frame::spf[ch]]
839-
signal[:, ch] = signal[:, ch] / spf[ch]
840-
830+
expanded_signal = self.e_p_signal
831+
intermediate_dtype = np.dtype('float64')
832+
allowed_dtypes = [
833+
np.dtype('float32'),
834+
np.dtype('float64'),
835+
]
841836
elif sigtype == 'digital':
842-
n_sig = len(self.e_d_signal)
843-
sig_len = int(len(self.e_d_signal[0])/spf[0])
844-
signal = np.zeros((sig_len, n_sig), dtype='int64')
845-
846-
for ch in range(n_sig):
847-
if spf[ch] == 1:
848-
signal[:, ch] = self.e_d_signal[ch]
849-
else:
850-
for frame in range(spf[ch]):
851-
signal[:, ch] += self.e_d_signal[ch][frame::spf[ch]]
852-
signal[:, ch] = signal[:, ch] / spf[ch]
837+
expanded_signal = self.e_d_signal
838+
intermediate_dtype = np.dtype('int64')
839+
allowed_dtypes = [
840+
np.dtype('int8'),
841+
np.dtype('int16'),
842+
np.dtype('int32'),
843+
np.dtype('int64'),
844+
]
853845
else:
854846
raise ValueError("sigtype must be 'physical' or 'digital'")
855847

848+
n_sig = len(expanded_signal)
849+
sig_len = len(expanded_signal[0]) // spf[0]
850+
input_dtypes = set()
851+
for ch in range(n_sig):
852+
if len(expanded_signal[ch]) != sig_len * spf[ch]:
853+
raise ValueError("length mismatch: signal %d has %d samples,"
854+
" expected %dx%d"
855+
% (ch, len(expanded_signal),
856+
sig_len, spf[ch]))
857+
input_dtypes.add(expanded_signal[ch].dtype)
858+
859+
for output_dtype in allowed_dtypes:
860+
if all(dt <= output_dtype for dt in input_dtypes):
861+
break
862+
863+
signal = np.empty((sig_len, n_sig), dtype=output_dtype)
864+
865+
# Large input arrays will be processed in chunks to avoid the need
866+
# to allocate a single huge temporary array.
867+
CHUNK_SIZE = 65536
868+
869+
for ch in range(n_sig):
870+
if spf[ch] == 1:
871+
signal[:, ch] = expanded_signal[ch]
872+
else:
873+
frames = expanded_signal[ch].reshape(-1, spf[ch])
874+
for chunk_start in range(0, sig_len, CHUNK_SIZE):
875+
chunk_end = chunk_start + CHUNK_SIZE
876+
signal_sum = np.sum(frames[chunk_start:chunk_end, :],
877+
axis=1, dtype=intermediate_dtype)
878+
signal[chunk_start:chunk_end, ch] = signal_sum / spf[ch]
879+
856880
return signal
857881

858882

@@ -861,7 +885,7 @@ def smooth_frames(self, sigtype='physical'):
861885

862886
def _rd_segment(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, byte_offset,
863887
samps_per_frame, skew, init_value, sampfrom, sampto, channels,
864-
smooth_frames, ignore_skew, no_file=False, sig_data=None, return_res=64):
888+
ignore_skew, no_file=False, sig_data=None, return_res=64):
865889
"""
866890
Read the digital samples from a single segment record's associated
867891
dat file(s).
@@ -894,8 +918,6 @@ def _rd_segment(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, byte_offset,
894918
The starting sample number to be read from the signals.
895919
sampto : int
896920
The final sample number to be read from the signals.
897-
smooth_frames : bool
898-
Whether to smooth channels with multiple samples/frame.
899921
ignore_skew : bool
900922
Used when reading records with at least one skewed signal.
901923
Specifies whether to apply the skew to align the signals in the
@@ -915,17 +937,14 @@ def _rd_segment(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, byte_offset,
915937
916938
Returns
917939
-------
918-
signals : ndarray, list
919-
The signals read from the dat file(s). A 2d numpy array is
920-
returned if the signals have uniform samples/frame or if
921-
`smooth_frames` is True. Otherwise a list of 1d numpy arrays
922-
is returned.
940+
signals : list
941+
The signals read from the dat file(s). Each signal is returned as a
942+
one-dimensional numpy array.
923943
924944
Notes
925945
-----
926-
'channels', 'sampfrom', 'sampto', 'smooth_frames', and 'ignore_skew'
927-
are user desired input fields. All other parameters are
928-
specifications of the segment.
946+
'channels', 'sampfrom', 'sampto', and 'ignore_skew' are user desired
947+
input fields. All other parameters are specifications of the segment.
929948
930949
"""
931950
# Check for valid inputs
@@ -989,69 +1008,38 @@ def _rd_segment(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, byte_offset,
9891008
r_w_channel[fn] = [c - min(datchannel[fn]) for c in w_channel[fn]]
9901009
out_dat_channel[fn] = [channels.index(c) for c in w_channel[fn]]
9911010

992-
# Signals with multiple samples/frame are smoothed, or all signals have 1 sample/frame.
993-
# Return uniform numpy array
994-
if smooth_frames or sum(samps_per_frame) == n_sig:
995-
# Figure out the largest required dtype for the segment to minimize memory usage
996-
max_dtype = _np_dtype(_fmt_res(fmt, max_res=True), discrete=True)
997-
# Allocate signal array. Minimize dtype
998-
signals = np.zeros([sampto-sampfrom, len(channels)], dtype=max_dtype)
999-
1000-
# Read each wanted dat file and store signals
1001-
for fn in w_file_name:
1002-
datsignals = _rd_dat_signals(
1003-
file_name=fn,
1004-
dir_name=dir_name,
1005-
pn_dir=pn_dir,
1006-
fmt=w_fmt[fn],
1007-
n_sig=len(datchannel[fn]),
1008-
sig_len=sig_len,
1009-
byte_offset=w_byte_offset[fn],
1010-
samps_per_frame=w_samps_per_frame[fn],
1011-
skew=w_skew[fn],
1012-
init_value=w_init_value[fn],
1013-
sampfrom=sampfrom,
1014-
sampto=sampto,
1015-
smooth_frames=smooth_frames,
1016-
no_file=no_file,
1017-
sig_data=sig_data)
1018-
signals[:, out_dat_channel[fn]] = datsignals[:, r_w_channel[fn]]
1019-
10201011
# Return each sample in signals with multiple samples/frame, without smoothing.
10211012
# Return a list of numpy arrays for each signal.
1022-
else:
1023-
signals = [None] * len(channels)
1024-
1025-
for fn in w_file_name:
1026-
# Get the list of all signals contained in the dat file
1027-
datsignals = _rd_dat_signals(
1028-
file_name=fn,
1029-
dir_name=dir_name,
1030-
pn_dir=pn_dir,
1031-
fmt=w_fmt[fn],
1032-
n_sig=len(datchannel[fn]),
1033-
sig_len=sig_len,
1034-
byte_offset=w_byte_offset[fn],
1035-
samps_per_frame=w_samps_per_frame[fn],
1036-
skew=w_skew[fn],
1037-
init_value=w_init_value[fn],
1038-
sampfrom=sampfrom,
1039-
sampto=sampto,
1040-
smooth_frames=smooth_frames,
1041-
no_file=no_file,
1042-
sig_data=sig_data)
1043-
1044-
# Copy over the wanted signals
1045-
for cn in range(len(out_dat_channel[fn])):
1046-
signals[out_dat_channel[fn][cn]] = datsignals[r_w_channel[fn][cn]]
1013+
signals = [None] * len(channels)
1014+
1015+
for fn in w_file_name:
1016+
# Get the list of all signals contained in the dat file
1017+
datsignals = _rd_dat_signals(
1018+
file_name=fn,
1019+
dir_name=dir_name,
1020+
pn_dir=pn_dir,
1021+
fmt=w_fmt[fn],
1022+
n_sig=len(datchannel[fn]),
1023+
sig_len=sig_len,
1024+
byte_offset=w_byte_offset[fn],
1025+
samps_per_frame=w_samps_per_frame[fn],
1026+
skew=w_skew[fn],
1027+
init_value=w_init_value[fn],
1028+
sampfrom=sampfrom,
1029+
sampto=sampto,
1030+
no_file=no_file,
1031+
sig_data=sig_data)
1032+
1033+
# Copy over the wanted signals
1034+
for cn in range(len(out_dat_channel[fn])):
1035+
signals[out_dat_channel[fn][cn]] = datsignals[r_w_channel[fn][cn]]
10471036

10481037
return signals
10491038

10501039

10511040
def _rd_dat_signals(file_name, dir_name, pn_dir, fmt, n_sig, sig_len,
10521041
byte_offset, samps_per_frame, skew, init_value,
1053-
sampfrom, sampto, smooth_frames,
1054-
no_file=False, sig_data=None):
1042+
sampfrom, sampto, no_file=False, sig_data=None):
10551043
"""
10561044
Read all signals from a WFDB dat file.
10571045
@@ -1083,8 +1071,6 @@ def _rd_dat_signals(file_name, dir_name, pn_dir, fmt, n_sig, sig_len,
10831071
The starting sample number to be read from the signals.
10841072
sampto : int
10851073
The final sample number to be read from the signals.
1086-
smooth_frames : bool
1087-
Whether to smooth channels with multiple samples/frame.
10881074
no_file : bool, optional
10891075
Used when using this function with just an array of signal data
10901076
and no associated file to read the data from.
@@ -1095,16 +1081,13 @@ def _rd_dat_signals(file_name, dir_name, pn_dir, fmt, n_sig, sig_len,
10951081
Returns
10961082
-------
10971083
signal : ndarray, list
1098-
The signals read from the dat file(s). A 2d numpy array is
1099-
returned if the signals have uniform samples/frame or if
1100-
`smooth_frames` is True. Otherwise a list of 1d numpy arrays
1101-
is returned.
1084+
The signals read from the dat file(s). Each signal is returned as a
1085+
one-dimensional numpy array.
11021086
11031087
Notes
11041088
-----
1105-
'channels', 'sampfrom', 'sampto', 'smooth_frames', and 'ignore_skew'
1106-
are user desired input fields. All other parameters are
1107-
specifications of the segment.
1089+
'channels', 'sampfrom', 'sampto', and 'ignore_skew' are user desired
1090+
input fields. All other parameters are specifications of the segment.
11081091
11091092
"""
11101093
# Check for valid inputs
@@ -1206,46 +1189,18 @@ def _rd_dat_signals(file_name, dir_name, pn_dir, fmt, n_sig, sig_len,
12061189
# At this point, dtype of sig_data is the minimum integer format
12071190
# required for storing the final digital samples.
12081191

1209-
# No extra samples/frame. Obtain original uniform numpy array
1210-
if tsamps_per_frame == n_sig:
1211-
# Reshape into multiple channels
1212-
signal = sig_data.reshape(-1, n_sig)
1213-
# Skew the signal
1214-
signal = _skew_sig(signal, skew, n_sig, read_len, fmt, nan_replace)
1215-
# Extra frames present to be smoothed. Obtain averaged uniform numpy array
1216-
elif smooth_frames:
1217-
# Allocate memory for smoothed signal.
1218-
signal = np.zeros((int(len(sig_data) / tsamps_per_frame) , n_sig),
1219-
dtype=sig_data.dtype)
1220-
1221-
# Transfer and average samples
1222-
for ch in range(n_sig):
1223-
if samps_per_frame[ch] == 1:
1224-
signal[:, ch] = sig_data[sum(([0] + samps_per_frame)[:ch + 1])::tsamps_per_frame]
1225-
else:
1226-
if ch == 0:
1227-
startind = 0
1228-
else:
1229-
startind = np.sum(samps_per_frame[:ch])
1230-
signal[:,ch] = [np.average(sig_data[ind:ind+samps_per_frame[ch]]) for ind in range(startind,len(sig_data),tsamps_per_frame)]
1231-
# Skew the signal
1232-
signal = _skew_sig(signal, skew, n_sig, read_len, fmt, nan_replace)
1233-
1234-
# Extra frames present without wanting smoothing. Return all
1235-
# expanded samples.
1236-
else:
1237-
# List of 1d numpy arrays
1238-
signal = []
1239-
# Transfer over samples
1240-
sig_frames = sig_data.reshape(-1, tsamps_per_frame)
1241-
ch_start = 0
1242-
for ch in range(n_sig):
1243-
ch_end = ch_start + samps_per_frame[ch]
1244-
ch_signal = sig_frames[:, ch_start:ch_end].reshape(-1)
1245-
signal.append(ch_signal)
1246-
ch_start = ch_end
1247-
# Skew the signal
1248-
signal = _skew_sig(signal, skew, n_sig, read_len, fmt, nan_replace, samps_per_frame)
1192+
# List of 1d numpy arrays
1193+
signal = []
1194+
# Transfer over samples
1195+
sig_frames = sig_data.reshape(-1, tsamps_per_frame)
1196+
ch_start = 0
1197+
for ch in range(n_sig):
1198+
ch_end = ch_start + samps_per_frame[ch]
1199+
ch_signal = sig_frames[:, ch_start:ch_end].reshape(-1)
1200+
signal.append(ch_signal)
1201+
ch_start = ch_end
1202+
# Skew the signal
1203+
signal = _skew_sig(signal, skew, n_sig, read_len, fmt, nan_replace, samps_per_frame)
12491204

12501205
# Integrity check of signal shape after reading
12511206
_check_sig_dims(signal, read_len, n_sig, samps_per_frame)

0 commit comments

Comments
 (0)