10
10
11
11
12
12
@dc .dataclass
13
- class LowerSetting :
13
+ class LowerSettingBasic :
14
+ """
15
+ Basic class for lowering.
16
+ max_batch_size: The maximum batch size for lowering job.
17
+ If run with TensorRT lowering, this is the maximum
18
+ batch size which can be used at execution time,
19
+ and also the batch size for which the ICudaEngine
20
+ will be optimized.
21
+ If run with AITemplate lowering, this the max batch_size
22
+ for the model.
23
+ lower_precision: lower precision dtype during lowering.
24
+ min_acc_module_size(int): minimal number of nodes for an accelerate submodule.
25
+ ast_rewriter_allow_list (Optional[Set[nn.Module]]): Optional allow list of
26
+ modules that need AST rewriting. This is aiming to eliminate input variable involve in
27
+ exception checking control flow.
28
+ leaf_module_list (Optional[Set[nn.Module]]): Optional leaf module list where
29
+ modules will not be traced into.
30
+ verbose_profile (bool): verbosity of profiler, default to False.
14
31
"""
15
- Basic configuration for lowering stack.
16
32
17
- Args:
18
- max_batch_size: The maximum batch size which can be used at execution time,
19
- and also the batch size for which the ICudaEngine will be optimized.
33
+ max_batch_size : int = 2048
34
+ lower_precision : LowerPrecision = LowerPrecision .FP32
35
+ min_acc_module_size : int = 10
36
+ ast_rewriter_allow_list : Optional [Set [Type [nn .Module ]]] = None
37
+ leaf_module_list : Optional [Set [Type [nn .Module ]]] = None
38
+ verbose_profile : bool = False
20
39
40
+
41
+ @dc .dataclass
42
+ class LowerSetting (LowerSettingBasic ):
43
+ """
44
+ Basic configuration for lowering stack.
45
+ Args:
21
46
input_specs: Specs for inputs to engine, can either be a single size or a
22
47
range defined by Min, Optimal, Max sizes.
23
-
24
48
explicit_batch_dimension: Use explicit batch dimension during lowering.
25
-
26
49
explicit_precision: Use explicit precision during lowering.
27
-
28
- lower_precision: lower precision dtype during lowering.
29
-
30
50
max_workspace_size: The maximum workspace size. The maximum GPU temporary
31
51
memory which the TensorRT engine can use at execution time.
32
-
33
52
strict_type_constraints: Require TensorRT engine to strictly follow data type
34
53
setting at execution time.
35
-
36
54
customized_fuse_pass: List of custmozied pass to apply during lowering process.
37
-
38
55
lower_basic_fuse_pass: Enable basic pass fuse duirng lowering, i.e. fuse multiple operations
39
56
as (a->b->c->d)=>(e). Current basic fuse patterns are:
40
57
permute->linear
41
58
permute->matmul
42
-
43
59
verbose_log: Enable TensorRT engine verbose log mode.
44
-
45
60
algo_selector: Enable TensorRT algorithm selector at execution time.
46
-
47
61
timing_cache_prefix: TensorRT timing cache file path. TensorRT engine will use timing
48
62
cache file at execution time if valid timing cache file is provided.
49
-
50
63
save_timing_cache: Save updated timing cache data into timing cache file if the timing
51
64
cache file is provided.
52
-
53
- ast_rewriter_allow_list (Optional[Set[nn.Module]]): Optional allow list of
54
- modules that need AST rewriting. This is aiming to eliminate input variable involve in
55
- exception checking control flow.
56
-
57
- leaf_module_list (Optional[Set[nn.Module]]): Optional leaf module list where
58
- modules will not be traced into.
59
-
60
65
cuda_graph_batch_size (int): Cuda graph batch size, default to be -1.
61
-
62
- verbose_profile (bool): verbosity of profiler, default to False.
63
-
64
- min_acc_module_size(int): minimal number of nodes for an accelerate submodule.
65
66
"""
66
67
67
- max_batch_size : int = 2048
68
68
input_specs : List [InputTensorSpec ] = dc .field (default_factory = list )
69
69
explicit_batch_dimension : bool = True
70
70
explicit_precision : bool = False
71
- lower_precision : LowerPrecision = LowerPrecision .FP32
72
71
max_workspace_size : int = 1 << 30
73
72
strict_type_constraints : bool = False
74
73
customized_fuse_pass : PassManager = PassManager .build_from_passlist ([])
@@ -79,8 +78,4 @@ class LowerSetting:
79
78
algo_selector = None
80
79
timing_cache_prefix : str = ""
81
80
save_timing_cache : bool = False
82
- ast_rewriter_allow_list : Optional [Set [Type [nn .Module ]]] = None
83
- leaf_module_list : Optional [Set [Type [nn .Module ]]] = None
84
81
cuda_graph_batch_size : int = - 1
85
- verbose_profile : bool = False
86
- min_acc_module_size : int = 10
0 commit comments