Skip to content

Commit 8fca86f

Browse files
authored
LanguageID sample updates: IPEX, BF16, script clean up (#1411)
* Add new oneAPI Sample IPEX Inference Optimization * Replacing random.randint() with random.sample() * Add support for IPEX BF16 and INT8 model option * Revert "Add support for IPEX BF16 and INT8 model option" This reverts commit 2b987db. * Add options for BF16 and INT8 model * updates from latest testing, README updates * fix typo * update train_ecapa patch file * apply torchscript to multiple model modules, update initialize scripts to PyTorch 1.13.1, IPEX 1.13.10
1 parent 2c7e596 commit 8fca86f

File tree

9 files changed

+175
-49
lines changed

9 files changed

+175
-49
lines changed

AI-and-Analytics/End-to-end-Workloads/LanguageIdentification/Inference/clean.sh

+2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
#!/bin/bash
2+
13
rm -R RIRS_NOISES
24
rm -R tmp
35
rm -R speechbrain

AI-and-Analytics/End-to-end-Workloads/LanguageIdentification/Inference/inference_commonVoice.py

+69-12
Original file line numberDiff line numberDiff line change
@@ -46,33 +46,86 @@ def trim_wav(self, newWavPath , start, end ):
4646
wavfile.write( newWavPath, self.sampleRate, self.waveData[startSample:endSample])
4747

4848
class speechbrain_inference:
49-
def __init__(self, ipex_op=False):
49+
def __init__(self, ipex_op=False, bf16=False, int8_model=False):
5050
source_model_path = "./lang_id_commonvoice_model"
5151
self.language_id = EncoderClassifier.from_hparams(source=source_model_path, savedir="tmp")
5252
print("Model: " + source_model_path)
5353

54-
# Optimize for inference with IPEX
55-
if ipex_op:
54+
if int8_model:
55+
# INT8 model
56+
source_model_int8_path = "./lang_id_commonvoice_model_INT8"
57+
print("Inference with INT8 model: " + source_model_int8_path)
58+
from neural_compressor.utils.pytorch import load
59+
self.model_int8 = load(source_model_int8_path, self.language_id)
60+
self.model_int8.eval()
61+
elif ipex_op:
62+
# Optimize for inference with IPEX
5663
print("Optimizing inference with IPEX")
5764
self.language_id.eval()
5865
sampleInput = (torch.load("./sample_input_features.pt"), torch.load("./sample_input_wav_lens.pt"))
59-
self.language_id.mods["embedding_model"] = ipex.optimize(self.language_id.mods["embedding_model"], sample_input=sampleInput)
66+
if bf16:
67+
print("BF16 enabled")
68+
self.language_id.mods["compute_features"] = ipex.optimize(self.language_id.mods["compute_features"], dtype=torch.bfloat16)
69+
self.language_id.mods["mean_var_norm"] = ipex.optimize(self.language_id.mods["mean_var_norm"], dtype=torch.bfloat16)
70+
self.language_id.mods["embedding_model"] = ipex.optimize(self.language_id.mods["embedding_model"], dtype=torch.bfloat16)
71+
self.language_id.mods["classifier"] = ipex.optimize(self.language_id.mods["classifier"], dtype=torch.bfloat16)
72+
else:
73+
self.language_id.mods["compute_features"] = ipex.optimize(self.language_id.mods["compute_features"])
74+
self.language_id.mods["mean_var_norm"] = ipex.optimize(self.language_id.mods["mean_var_norm"])
75+
self.language_id.mods["embedding_model"] = ipex.optimize(self.language_id.mods["embedding_model"])
76+
self.language_id.mods["classifier"] = ipex.optimize(self.language_id.mods["classifier"])
77+
6078
# Torchscript to resolve performance issues with reorder operations
61-
self.language_id.mods["embedding_model"] = torch.jit.trace(self.language_id.mods["embedding_model"], example_inputs=sampleInput)
79+
with torch.no_grad():
80+
I2 = self.language_id.mods["embedding_model"](*sampleInput)
81+
if bf16:
82+
with torch.cpu.amp.autocast():
83+
self.language_id.mods["compute_features"] = torch.jit.trace( self.language_id.mods["compute_features"] , example_inputs=(torch.rand(1,32000)))
84+
self.language_id.mods["mean_var_norm"] = torch.jit.trace(self.language_id.mods["mean_var_norm"], example_inputs=sampleInput)
85+
self.language_id.mods["embedding_model"] = torch.jit.trace(self.language_id.mods["embedding_model"], example_inputs=sampleInput)
86+
self.language_id.mods["classifier"] = torch.jit.trace(self.language_id.mods["classifier"], example_inputs=I2)
87+
88+
self.language_id.mods["compute_features"] = torch.jit.freeze(self.language_id.mods["compute_features"])
89+
self.language_id.mods["mean_var_norm"] = torch.jit.freeze(self.language_id.mods["mean_var_norm"])
90+
self.language_id.mods["embedding_model"] = torch.jit.freeze(self.language_id.mods["embedding_model"])
91+
self.language_id.mods["classifier"] = torch.jit.freeze( self.language_id.mods["classifier"])
92+
else:
93+
self.language_id.mods["compute_features"] = torch.jit.trace( self.language_id.mods["compute_features"] , example_inputs=(torch.rand(1,32000)))
94+
self.language_id.mods["mean_var_norm"] = torch.jit.trace(self.language_id.mods["mean_var_norm"], example_inputs=sampleInput)
95+
self.language_id.mods["embedding_model"] = torch.jit.trace(self.language_id.mods["embedding_model"], example_inputs=sampleInput)
96+
self.language_id.mods["classifier"] = torch.jit.trace(self.language_id.mods["classifier"], example_inputs=I2)
97+
98+
self.language_id.mods["compute_features"] = torch.jit.freeze(self.language_id.mods["compute_features"])
99+
self.language_id.mods["mean_var_norm"] = torch.jit.freeze(self.language_id.mods["mean_var_norm"])
100+
self.language_id.mods["embedding_model"] = torch.jit.freeze(self.language_id.mods["embedding_model"])
101+
self.language_id.mods["classifier"] = torch.jit.freeze( self.language_id.mods["classifier"])
102+
62103
return
63104

64-
def predict(self, data_path="", verbose=False):
105+
def predict(self, data_path="", ipex_op=False, bf16=False, int8_model=False, verbose=False):
65106
signal = self.language_id.load_audio(data_path)
66107
inference_start_time = time()
67-
prediction = self.language_id.classify_batch(signal)
108+
109+
if int8_model: # INT8 model from INC
110+
prediction = self.model_int8(signal)
111+
elif ipex_op: # IPEX
112+
with torch.no_grad():
113+
if bf16:
114+
with torch.cpu.amp.autocast():
115+
prediction = self.language_id.classify_batch(signal)
116+
else:
117+
prediction = self.language_id.classify_batch(signal)
118+
else: # default
119+
prediction = self.language_id.classify_batch(signal)
120+
68121
inference_end_time = time()
69122
inference_latency = inference_end_time - inference_start_time
70123
if verbose:
71124
print(" Inference latency: %.5f seconds" %(inference_latency))
72-
125+
73126
# prediction is a tuple of format (out_prob, score, index) due to modification of speechbrain.pretrained.interfaces.py
74127
label = self.language_id.hparams.label_encoder.decode_torch(prediction[2])[0]
75-
128+
76129
return label, inference_latency
77130

78131
def main(argv):
@@ -82,13 +135,17 @@ def main(argv):
82135
parser.add_argument('-d', type=int, default=3, help="Duration of each wave sample in seconds")
83136
parser.add_argument('-s', type=int, default=5, help="Sample size of waves to be taken from the audio file")
84137
parser.add_argument('--ipex', action="store_true", default=False, help="Enable Intel Extension for PyTorch (IPEX) optimizations")
138+
parser.add_argument('--bf16', action="store_true", default=False, help="Use bfloat16 precision (supported on 4th Gen Xeon Scalable Processors or newer")
139+
parser.add_argument('--int8_model', action="store_true", default=False, help="Run inference with INT8 model generated from Intel Neural Compressor (INC)")
85140
parser.add_argument('--verbose', action="store_true", default=False, help="Print additional debug info")
86141
args = parser.parse_args()
87142

88143
path = args.p
89144
sample_dur = args.d
90145
sample_size = args.s
91146
use_ipex = args.ipex
147+
use_bf16 = args.bf16
148+
use_int8_model = args.int8_model
92149
verbose = args.verbose
93150
print("\nTaking %d samples of %d seconds each" %(sample_size, sample_dur))
94151

@@ -103,7 +160,7 @@ def main(argv):
103160
writer = csv.writer(f)
104161
writer.writerow(["Language", "Total Samples", "Correct Predictions", "Accuracy"])
105162

106-
speechbrain_inf = speechbrain_inference(ipex_op=use_ipex)
163+
speechbrain_inf = speechbrain_inference(ipex_op=use_ipex, bf16=use_bf16, int8_model=use_int8_model)
107164
for language in languageList:
108165
print("\nTesting on %s data" %language)
109166
testDataDirectory = path + "/" + language
@@ -132,7 +189,7 @@ def main(argv):
132189
newWavPath = 'trim_tmp.wav'
133190
data.trim_wav(newWavPath, start, start + sample_dur)
134191
try:
135-
label, inference_latency = speechbrain_inf.predict(data_path=newWavPath, verbose=verbose)
192+
label, inference_latency = speechbrain_inf.predict(data_path=newWavPath, ipex_op=use_ipex, bf16=use_bf16, int8_model=use_int8_model, verbose=verbose)
136193
if verbose:
137194
print(" start-end : " + str(start) + " " + str(start + sample_dur) + " prediction : " + label)
138195
predict_list.append(label)
@@ -174,4 +231,4 @@ def main(argv):
174231

175232
if __name__ == "__main__":
176233
import sys
177-
sys.exit(main(sys.argv))
234+
sys.exit(main(sys.argv))

AI-and-Analytics/End-to-end-Workloads/LanguageIdentification/Inference/inference_custom.py

+66-9
Original file line numberDiff line numberDiff line change
@@ -48,25 +48,78 @@ def trim_wav(self, newWavPath , start, end ):
4848

4949

5050
class speechbrain_inference:
51-
def __init__(self, ipex_op=False):
51+
def __init__(self, ipex_op=False, bf16=False, int8_model=False):
5252
source_model_path = "./lang_id_commonvoice_model"
5353
self.language_id = EncoderClassifier.from_hparams(source=source_model_path, savedir="tmp")
5454
print("Model: " + source_model_path)
5555

56-
# Optimize for inference with IPEX
57-
if ipex_op:
56+
if int8_model:
57+
# INT8 model
58+
source_model_int8_path = "./lang_id_commonvoice_model_INT8"
59+
print("Inference with INT8 model: " + source_model_int8_path)
60+
from neural_compressor.utils.pytorch import load
61+
self.model_int8 = load(source_model_int8_path, self.language_id)
62+
self.model_int8.eval()
63+
elif ipex_op:
64+
# Optimize for inference with IPEX
5865
print("Optimizing inference with IPEX")
5966
self.language_id.eval()
6067
sampleInput = (torch.load("./sample_input_features.pt"), torch.load("./sample_input_wav_lens.pt"))
61-
self.language_id.mods["embedding_model"] = ipex.optimize(self.language_id.mods["embedding_model"], sample_input=sampleInput)
68+
if bf16:
69+
print("BF16 enabled")
70+
self.language_id.mods["compute_features"] = ipex.optimize(self.language_id.mods["compute_features"], dtype=torch.bfloat16)
71+
self.language_id.mods["mean_var_norm"] = ipex.optimize(self.language_id.mods["mean_var_norm"], dtype=torch.bfloat16)
72+
self.language_id.mods["embedding_model"] = ipex.optimize(self.language_id.mods["embedding_model"], dtype=torch.bfloat16)
73+
self.language_id.mods["classifier"] = ipex.optimize(self.language_id.mods["classifier"], dtype=torch.bfloat16)
74+
else:
75+
self.language_id.mods["compute_features"] = ipex.optimize(self.language_id.mods["compute_features"])
76+
self.language_id.mods["mean_var_norm"] = ipex.optimize(self.language_id.mods["mean_var_norm"])
77+
self.language_id.mods["embedding_model"] = ipex.optimize(self.language_id.mods["embedding_model"])
78+
self.language_id.mods["classifier"] = ipex.optimize(self.language_id.mods["classifier"])
79+
6280
# Torchscript to resolve performance issues with reorder operations
63-
self.language_id.mods["embedding_model"] = torch.jit.trace(self.language_id.mods["embedding_model"], example_inputs=sampleInput)
81+
with torch.no_grad():
82+
I2 = self.language_id.mods["embedding_model"](*sampleInput)
83+
if bf16:
84+
with torch.cpu.amp.autocast():
85+
self.language_id.mods["compute_features"] = torch.jit.trace( self.language_id.mods["compute_features"] , example_inputs=(torch.rand(1,32000)))
86+
self.language_id.mods["mean_var_norm"] = torch.jit.trace(self.language_id.mods["mean_var_norm"], example_inputs=sampleInput)
87+
self.language_id.mods["embedding_model"] = torch.jit.trace(self.language_id.mods["embedding_model"], example_inputs=sampleInput)
88+
self.language_id.mods["classifier"] = torch.jit.trace(self.language_id.mods["classifier"], example_inputs=I2)
89+
90+
self.language_id.mods["compute_features"] = torch.jit.freeze(self.language_id.mods["compute_features"])
91+
self.language_id.mods["mean_var_norm"] = torch.jit.freeze(self.language_id.mods["mean_var_norm"])
92+
self.language_id.mods["embedding_model"] = torch.jit.freeze(self.language_id.mods["embedding_model"])
93+
self.language_id.mods["classifier"] = torch.jit.freeze( self.language_id.mods["classifier"])
94+
else:
95+
self.language_id.mods["compute_features"] = torch.jit.trace( self.language_id.mods["compute_features"] , example_inputs=(torch.rand(1,32000)))
96+
self.language_id.mods["mean_var_norm"] = torch.jit.trace(self.language_id.mods["mean_var_norm"], example_inputs=sampleInput)
97+
self.language_id.mods["embedding_model"] = torch.jit.trace(self.language_id.mods["embedding_model"], example_inputs=sampleInput)
98+
self.language_id.mods["classifier"] = torch.jit.trace(self.language_id.mods["classifier"], example_inputs=I2)
99+
100+
self.language_id.mods["compute_features"] = torch.jit.freeze(self.language_id.mods["compute_features"])
101+
self.language_id.mods["mean_var_norm"] = torch.jit.freeze(self.language_id.mods["mean_var_norm"])
102+
self.language_id.mods["embedding_model"] = torch.jit.freeze(self.language_id.mods["embedding_model"])
103+
self.language_id.mods["classifier"] = torch.jit.freeze( self.language_id.mods["classifier"])
104+
64105
return
65106

66-
def predict(self, data_path="", verbose=False):
107+
def predict(self, data_path="", ipex_op=False, bf16=False, int8_model=False, verbose=False):
67108
signal = self.language_id.load_audio(data_path)
68109
inference_start_time = time()
69-
prediction = self.language_id.classify_batch(signal)
110+
111+
if int8_model: # INT8 model from INC
112+
prediction = self.model_int8(signal)
113+
elif ipex_op: # IPEX
114+
with torch.no_grad():
115+
if bf16:
116+
with torch.cpu.amp.autocast():
117+
prediction = self.language_id.classify_batch(signal)
118+
else:
119+
prediction = self.language_id.classify_batch(signal)
120+
else: # default
121+
prediction = self.language_id.classify_batch(signal)
122+
70123
inference_end_time = time()
71124
inference_latency = inference_end_time - inference_start_time
72125
if verbose:
@@ -86,6 +139,8 @@ def main(argv):
86139
parser.add_argument('-s', type=int, default=100, help="Sample size of waves to be taken from the audio file")
87140
parser.add_argument('--vad', action="store_true", default=False, help="Use Voice Activity Detection (VAD) to extract only the speech segments of the audio file")
88141
parser.add_argument('--ipex', action="store_true", default=False, help="Enable Intel Extension for PyTorch (IPEX) optimizations")
142+
parser.add_argument('--bf16', action="store_true", default=False, help="Use bfloat16 precision (supported on 4th Gen Xeon Scalable Processors or newer")
143+
parser.add_argument('--int8_model', action="store_true", default=False, help="Run inference with INT8 model generated from Intel Neural Compressor (INC)")
89144
parser.add_argument('--ground_truth_compare', action="store_true", default=False, help="Enable comparison of prediction labels to ground truth values")
90145
parser.add_argument('--verbose', action="store_true", default=False, help="Print additional debug info")
91146
args = parser.parse_args()
@@ -95,6 +150,8 @@ def main(argv):
95150
sample_size = args.s
96151
use_vad = args.vad
97152
use_ipex = args.ipex
153+
use_bf16 = args.bf16
154+
use_int8_model = args.int8_model
98155
ground_truth_compare = args.ground_truth_compare
99156
verbose = args.verbose
100157
print("\nTaking %d samples of %d seconds each" %(sample_size, sample_dur))
@@ -119,7 +176,7 @@ def main(argv):
119176
else:
120177
raise Exception("Ground truth labels file does not exist.")
121178

122-
speechbrain_inf = speechbrain_inference(ipex_op=use_ipex)
179+
speechbrain_inf = speechbrain_inference(ipex_op=use_ipex, bf16=use_bf16, int8_model=use_int8_model)
123180
if use_vad:
124181
from speechbrain.pretrained import VAD
125182
print("Using Voice Activity Detection")
@@ -232,7 +289,7 @@ def main(argv):
232289
newWavPath = 'trim_tmp.wav'
233290
data.trim_wav(newWavPath, start, start + sample_dur)
234291
try:
235-
label, inference_latency = speechbrain_inf.predict(data_path=newWavPath, verbose=verbose)
292+
label, inference_latency = speechbrain_inf.predict(data_path=newWavPath, ipex_op=use_ipex, bf16=use_bf16, int8_model=use_int8_model, verbose=verbose)
236293
if verbose:
237294
print(" start-end : " + str(start) + " " + str(start + sample_dur) + " prediction : " + label)
238295
predict_list.append(label)

AI-and-Analytics/End-to-end-Workloads/LanguageIdentification/Inference/initialize.sh

+7-3
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,14 @@ pip install -r requirements.txt
1010
pip install --editable .
1111
cd ..
1212

13+
# Add speechbrain to environment variable PYTHONPATH
14+
export PYTHONPATH=$PYTHONPATH:/Inference/speechbrain
15+
1316
# Install PyTorch and Intel Extension for PyTorch (IPEX)
14-
pip install torch==1.12.0 torchaudio==0.12.0 torchvision==0.13.0
15-
pip install intel_extension_for_pytorch==1.12.0
16-
pip install neural-compressor==1.14.2
17+
pip install torch==1.13.1 torchaudio
18+
pip intall --no-deps torchvision==0.14.0
19+
pip install intel_extension_for_pytorch==1.13.100
20+
pip install neural-compressor==2.0
1721

1822
# Update packages
1923
apt-get update && apt-get install libgl1

AI-and-Analytics/End-to-end-Workloads/LanguageIdentification/Inference/lang_id_inference.ipynb

+3-1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
"-s : size of sample waves, default is 100 \n",
2020
"--vad : enable VAD model to detect active speech (inference_custom.py only) \n",
2121
"--ipex : run inference with optimizations from Intel Extension for PyTorch \n",
22+
"--bf16 : run inference with auto-mixed precision featuring Bfloat16 \n",
23+
"--int8_model : Run inference with the INT8 model generated from Intel® Neural Compressor \n",
2224
"--ground_truth_compare : enable comparison of prediction labels to ground truth values (inference_custom.py only) \n",
2325
"--verbose : prints additional debug information, such as latency \n",
2426
"\n",
@@ -128,7 +130,7 @@
128130
"metadata": {},
129131
"outputs": [],
130132
"source": [
131-
"!python quantize_model.py -p ./lang_id_commonvoice_model"
133+
"!python quantize_model.py -p ./lang_id_commonvoice_model -datapath $COMMON_VOICE_PATH/dev"
132134
]
133135
},
134136
{

0 commit comments

Comments
 (0)