-
Notifications
You must be signed in to change notification settings - Fork 309
[backends] Add functionality to TRT backend #1753
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
Closed
Changes from all commits
Commits
Show all changes
4 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,223 @@ | ||
import argparse | ||
import traceback | ||
import torch | ||
|
||
import numpy as np | ||
|
||
import json | ||
import os | ||
import time | ||
from datetime import datetime | ||
from typing import List | ||
|
||
from torchbenchmark import ( | ||
load_canary_model_by_name, | ||
load_model_by_name, | ||
list_models, | ||
ModelNotFoundError, | ||
) | ||
|
||
|
||
def cli(args: List[str]): | ||
"""Parse input arguments, extracting model specification and batch size""" | ||
arg_parser = argparse.ArgumentParser(args) | ||
arg_parser.add_argument( | ||
"--model", | ||
help="Full or partial name of a model to run. If partial, picks the first match.", | ||
default="", | ||
type=str, | ||
) | ||
arg_parser.add_argument( | ||
"--bs", | ||
help="Input batch size to test.", | ||
default=1, | ||
type=int, | ||
) | ||
arg_parser.add_argument( | ||
"--num_warmup", | ||
help="Number of inference warmup iterations.", | ||
default=10, | ||
type=int, | ||
) | ||
arg_parser.add_argument( | ||
"--num_iter", | ||
help="Number of inference iterations for benchmarking.", | ||
default=100, | ||
type=int, | ||
) | ||
parsed_args, unknown = arg_parser.parse_known_args() | ||
|
||
return vars(parsed_args), unknown | ||
|
||
|
||
def save_metrics(metrics): | ||
"""Save metrics to a JSON file with formatted filename""" | ||
metrics_json = { | ||
"name": "torch_trt", | ||
"environ": { | ||
"metrics_version": "v0.1", | ||
"pytorch_git_version": torch.version.git_version, | ||
}, | ||
"metrics": metrics, | ||
} | ||
|
||
# Obtain target save directory for JSON metrics from current save directory | ||
current_dir = os.path.dirname(os.path.abspath(__file__)) | ||
target_dir = os.path.normpath( | ||
os.path.join(current_dir, "../../.userbenchmark/torch_trt/") | ||
) | ||
|
||
os.makedirs(target_dir, exist_ok=True) | ||
|
||
# Format filename and path to save metrics | ||
metrics_file = "metrics-{}.json".format( | ||
datetime.fromtimestamp(time.time()).strftime("%Y%m%d%H%M%S") | ||
) | ||
metrics_save_path = os.path.join(target_dir, metrics_file) | ||
|
||
with open(metrics_save_path, "w") as f: | ||
json.dump(metrics_json, f, indent=4) | ||
|
||
|
||
def run_single_model( | ||
Model, | ||
batch_size: int, | ||
extra_args: List[str], | ||
selected_ir: str, | ||
num_warmup: int, | ||
num_iter: int, | ||
): | ||
"""Run inference benchmarking on a single model""" | ||
# Build TorchBench model instance, with backend having the userbenchmark name | ||
# This invokes the torch_trt backend functionality directly | ||
model = Model( | ||
device="cuda", | ||
test="eval", | ||
jit=False, | ||
batch_size=batch_size, | ||
extra_args=[ | ||
"--backend", | ||
] | ||
+ extra_args, | ||
) | ||
|
||
metrics = run_one_step(model.invoke, model, num_warmup, num_iter, selected_ir) | ||
|
||
# Print dynamo compilation metrics, if there are any. | ||
try: | ||
if model.pt2_compilation_time: | ||
metrics[ | ||
f"{model.name}.bs_{model.batch_size}.precision_{model.dargs.precision}." | ||
+ f"ir_{selected_ir}.pt2_compilation_time" | ||
] = model.pt2_compilation_time | ||
except: | ||
pass | ||
|
||
return metrics | ||
|
||
|
||
def run_one_step(func, model, num_warmup, num_iter, selected_ir): | ||
# Warmup model inference | ||
for _ in range(num_warmup): | ||
func() | ||
|
||
result_summary = [] | ||
|
||
# Run inference for the specified number of iterations | ||
for _ in range(num_iter): | ||
torch.cuda.synchronize() | ||
start_event = torch.cuda.Event(enable_timing=True) | ||
end_event = torch.cuda.Event(enable_timing=True) | ||
|
||
# Collect time_ns() instead of time() which does not provide better precision than 1 | ||
# second according to https://docs.python.org/3/library/time.html#time.time. | ||
t0 = time.time_ns() | ||
start_event.record() | ||
func() | ||
end_event.record() | ||
torch.cuda.synchronize() | ||
t1 = time.time_ns() | ||
result_summary.append( | ||
(start_event.elapsed_time(end_event), (t1 - t0) / 1_000_000) | ||
) | ||
|
||
# Get median times for GPU and CPU Walltime | ||
gpu_time = np.median(list(map(lambda x: x[0], result_summary))) | ||
cpu_walltime = np.median(list(map(lambda x: x[1], result_summary))) | ||
|
||
if hasattr(model, "NUM_BATCHES"): | ||
median_gpu_time_per_batch = gpu_time / model.NUM_BATCHES | ||
median_cpu_walltime_per_batch = cpu_walltime / model.NUM_BATCHES | ||
else: | ||
median_gpu_time_per_batch = gpu_time | ||
median_cpu_walltime_per_batch = cpu_walltime | ||
|
||
metrics = { | ||
f"{model.name}.bs_{model.batch_size}.precision_{model.dargs.precision}." | ||
+ f"ir_{selected_ir}.median_gpu_time_per_batch": median_gpu_time_per_batch, | ||
f"{model.name}.bs_{model.batch_size}.precision_{model.dargs.precision}." | ||
+ f"ir_{selected_ir}.median_cpu_walltime_per_batch": median_cpu_walltime_per_batch, | ||
} | ||
|
||
return metrics | ||
|
||
|
||
def run(args: List[str]): | ||
"""Run inference and extract requested metrics""" | ||
parsed_args, unknown_args = cli(args) | ||
|
||
# Attempt to extract specified IR for logging purposes | ||
try: | ||
ir_idx = unknown_args.index("--ir") | ||
selected_ir = unknown_args[ir_idx + 1] | ||
except (ValueError, IndexError): | ||
selected_ir = "default" | ||
|
||
# Parse model string if specified, otherwise run all models | ||
# Adapted from benchmark/run.py | ||
if parsed_args["model"]: | ||
try: | ||
Model = load_model_by_name(parsed_args["model"]) | ||
except ModuleNotFoundError: | ||
traceback.print_exc() | ||
exit(-1) | ||
except ModelNotFoundError: | ||
print( | ||
f"Warning: The model {parsed_args['model']} cannot be found at core set." | ||
) | ||
if not Model: | ||
try: | ||
Model = load_canary_model_by_name(parsed_args["model"]) | ||
except ModuleNotFoundError: | ||
traceback.print_exc() | ||
exit(-1) | ||
except ModelNotFoundError: | ||
print( | ||
f"Error: The model {parsed_args['model']} cannot be found at either core or canary model set." | ||
) | ||
exit(-1) | ||
|
||
all_metrics = run_single_model( | ||
Model, | ||
parsed_args["bs"], | ||
unknown_args, | ||
selected_ir, | ||
parsed_args["num_warmup"], | ||
parsed_args["num_iter"], | ||
) | ||
|
||
else: | ||
all_metrics = {} | ||
|
||
for Model in list_models(): | ||
metrics = run_single_model( | ||
Model, | ||
parsed_args["bs"], | ||
unknown_args, | ||
selected_ir, | ||
parsed_args["num_warmup"], | ||
parsed_args["num_iter"], | ||
) | ||
all_metrics = {**all_metrics, **metrics} | ||
|
||
save_metrics(all_metrics) |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This code will work for running a single model. However, it won't work to run a batch of models.
This is because there is no isolation between running the models. For example, model 1 might set some global torch configuration that will model 2 to be very slow or even crash (for example,
torch.cudnn.benchmark
). Some models have benign "memory leak" that won't cause problem in model training, but it will cause problem in benchmarking multiple models in the same process.We suggest using the
ModelTask()
approach used by thetorch-nightly
userbenchmark: https://github.com/pytorch/benchmark/blob/main/userbenchmark/torch-nightly/run.py#L163It will run each model in an isolated process, and doesn't have the limits mentioned above.