|
118 | 118 | "## Training the Model\n",
|
119 | 119 | "The function trainModel() will train the Resnet50 model based on the whether Intel® AMX should be enabled, and whether to use FP32 or BF16 data type. The environment variable `ONEDNN_MAX_CPU_ISA` is used to enable or disable Intel® AMX. **Note that this environment variable is only initialized once.** This means to run with Intel® AMX and VNNI, there will need to be separate processes. The best practice is to set this environment variable before running your script. For more information, refer to the [oneDNN documentation on CPU Dispatcher Control](https://www.intel.com/content/www/us/en/develop/documentation/onednn-developer-guide-and-reference/top/performance-profiling-and-inspection/cpu-dispatcher-control.html). \n",
|
120 | 120 | "\n",
|
121 |
| - "To use BF16 in operations, use the `torch.cpu.amp.autocast()` function to perform forward and backward propagation." |
| 121 | + "To use BF16 in operations, use the `torch.amp.autocast('cpu')` function to perform forward and backward propagation." |
122 | 122 | ]
|
123 | 123 | },
|
124 | 124 | {
|
|
128 | 128 | "metadata": {},
|
129 | 129 | "outputs": [],
|
130 | 130 | "source": [
|
131 |
| - "os.environ[\"ONEDNN_MAX_CPU_ISA\"] = \"AVX512_CORE_BF16\"" |
| 131 | + "os.environ[\"ONEDNN_MAX_CPU_ISA\"] = \"AVX512_CORE_AMX\"" |
132 | 132 | ]
|
133 | 133 | },
|
134 | 134 | {
|
|
171 | 171 | " for batch_idx, (data, target) in enumerate(train_loader):\n",
|
172 | 172 | " optimizer.zero_grad()\n",
|
173 | 173 | " if \"bf16\" == dataType:\n",
|
174 |
| - " with torch.cpu.amp.autocast(): # Auto Mixed Precision\n", |
| 174 | + " with torch.amp.autocast('cpu'): # Auto Mixed Precision\n", |
175 | 175 | " # Setting memory_format to torch.channels_last could improve performance with 4D input data. This is optional.\n",
|
176 | 176 | " data = data.to(memory_format=torch.channels_last)\n",
|
177 | 177 | " output = model(data)\n",
|
|
240 | 240 | "## Training with FP32 and BF16, including Intel® AMX\n",
|
241 | 241 | "Train the Resnet50 model in three different cases:\n",
|
242 | 242 | "1. FP32 (baseline) \n",
|
243 |
| - "2. BF16 without Intel® AMX \n", |
244 |
| - "3. BF16 with Intel® AMX \n", |
| 243 | + "2. BF16 with Intel® AMX\n", |
| 244 | + "x. BF16 without Intel® AMX\n", |
245 | 245 | "\n",
|
246 | 246 | "The training time is recorded."
|
247 | 247 | ]
|
|
254 | 254 | "outputs": [],
|
255 | 255 | "source": [
|
256 | 256 | "print(\"Training model with FP32\")\n",
|
257 |
| - "os.environ[\"ONEDNN_MAX_CPU_ISA\"] = \"AVX512_CORE_AMX\"\n", |
258 | 257 | "fp32_training_time = trainModel(train_loader, modelName=\"fp32\", dataType=\"fp32\")"
|
259 | 258 | ]
|
260 | 259 | },
|
261 | 260 | {
|
262 | 261 | "cell_type": "code",
|
263 | 262 | "execution_count": null,
|
264 |
| - "id": "a9bd6dec", |
| 263 | + "id": "3faaf5de", |
265 | 264 | "metadata": {},
|
266 | 265 | "outputs": [],
|
267 | 266 | "source": [
|
268 |
| - "print(\"Training model with BF16 with AVX512\")\n", |
269 |
| - "os.environ[\"ONEDNN_MAX_CPU_ISA\"] = \"AVX512_CORE_BF16\"\n", |
270 |
| - "bf16_avx512_training_time = trainModel(train_loader, modelName=\"bf16_noAmx\", dataType=\"bf16\")" |
| 267 | + "print(\"Training model with BF16 with Intel® AMX\")\n", |
| 268 | + "bf16_amx_training_time = trainModel(train_loader, modelName=\"bf16_withAmx\", dataType=\"bf16\")" |
271 | 269 | ]
|
272 | 270 | },
|
273 | 271 | {
|
274 | 272 | "cell_type": "code",
|
275 | 273 | "execution_count": null,
|
276 |
| - "id": "2fdc8a70-509a-4714-8524-084f34e287c3", |
| 274 | + "id": "a9bd6dec", |
277 | 275 | "metadata": {},
|
278 | 276 | "outputs": [],
|
279 | 277 | "source": [
|
280 |
| - "print(\"Training model with BF16 with Intel® AMX\")\n", |
281 |
| - "os.environ[\"ONEDNN_MAX_CPU_ISA\"] = \"AVX512_CORE_AMX\"\n", |
282 |
| - "bf16_amx_training_time = trainModel(train_loader, modelName=\"bf16_withAmx\", dataType=\"bf16\")" |
| 278 | + "print(\"Training model with BF16 with AVX512\")\n", |
| 279 | + "!python pytorch_training_avx512_bf16.py\n", |
| 280 | + "\n", |
| 281 | + "# Read the variable\n", |
| 282 | + "with open('bf16_avx512_training_time.txt', 'r') as f:\n", |
| 283 | + " bf16_avx512_training_time = float(f.read().strip())" |
283 | 284 | ]
|
284 | 285 | },
|
285 | 286 | {
|
|
383 | 384 | ],
|
384 | 385 | "metadata": {
|
385 | 386 | "kernelspec": {
|
386 |
| - "display_name": "pytorch", |
| 387 | + "display_name": "pytorch_test", |
387 | 388 | "language": "python",
|
388 |
| - "name": "pytorch" |
| 389 | + "name": "pytorch_test" |
389 | 390 | },
|
390 | 391 | "language_info": {
|
391 | 392 | "codemirror_mode": {
|
|
398 | 399 | "nbconvert_exporter": "python",
|
399 | 400 | "pygments_lexer": "ipython3",
|
400 | 401 | "version": "3.11.0"
|
401 |
| - }, |
402 |
| - "vscode": { |
403 |
| - "interpreter": { |
404 |
| - "hash": "ed6ae0d06e7bec0fef5f1fb38f177ceea45508ce95c68ed2f49461dd6a888a39" |
405 |
| - } |
406 | 402 | }
|
407 | 403 | },
|
408 | 404 | "nbformat": 4,
|
|
0 commit comments