10
10
from utils .hparams import hparams
11
11
12
12
13
+ class Conv1d (torch .nn .Conv1d ):
14
+ def __init__ (self , * args , ** kwargs ):
15
+ super ().__init__ (* args , ** kwargs )
16
+ nn .init .kaiming_normal_ (self .weight )
17
+
18
+
13
19
class SwiGLU (nn .Module ):
14
20
# Swish-Applies the gated linear unit function.
15
21
def __init__ (self , dim = - 1 ):
@@ -39,7 +45,7 @@ def calc_same_padding(kernel_size):
39
45
pad = kernel_size // 2
40
46
return pad , pad - (kernel_size + 1 ) % 2
41
47
42
- def __init__ (self , dim , expansion_factor , kernel_size = 31 , activation = 'PReLU' , dropout = 0. ):
48
+ def __init__ (self , dim , expansion_factor , kernel_size = 31 , activation = 'PReLU' , dropout = 0.1 ):
43
49
super ().__init__ ()
44
50
inner_dim = dim * expansion_factor
45
51
activation_classes = {
@@ -57,7 +63,7 @@ def __init__(self, dim, expansion_factor, kernel_size=31, activation='PReLU', dr
57
63
else :
58
64
_dropout = nn .Identity ()
59
65
self .net = nn .Sequential (
60
- nn .LayerNorm (dim ),
66
+ nn .LayerNorm (dim , eps = 1e-6 ),
61
67
Transpose ((1 , 2 )),
62
68
nn .Conv1d (dim , inner_dim * 2 , 1 ),
63
69
SwiGLU (dim = 1 ),
@@ -73,16 +79,17 @@ def forward(self, x):
73
79
74
80
75
81
class LYNXNetResidualLayer (nn .Module ):
76
- def __init__ (self , dim_cond , dim , expansion_factor , kernel_size = 31 , activation = 'PReLU' , dropout = 0. ):
82
+ def __init__ (self , dim_cond , dim , expansion_factor , kernel_size = 31 , activation = 'PReLU' , dropout = 0.1 ):
77
83
super ().__init__ ()
78
84
self .diffusion_projection = nn .Conv1d (dim , dim , 1 )
79
85
self .conditioner_projection = nn .Conv1d (dim_cond , dim , 1 )
80
86
self .convmodule = LYNXConvModule (dim = dim , expansion_factor = expansion_factor , kernel_size = kernel_size ,
81
87
activation = activation , dropout = dropout )
82
88
83
89
def forward (self , x , conditioner , diffusion_step ):
90
+ x = x + self .conditioner_projection (conditioner )
84
91
res_x = x .transpose (1 , 2 )
85
- x = x + self .diffusion_projection (diffusion_step ) + self . conditioner_projection ( conditioner )
92
+ x = x + self .diffusion_projection (diffusion_step )
86
93
x = x .transpose (1 , 2 )
87
94
x = self .convmodule (x ) # (#batch, dim, length)
88
95
x = x + res_x
@@ -93,7 +100,7 @@ def forward(self, x, conditioner, diffusion_step):
93
100
94
101
class LYNXNet (nn .Module ):
95
102
def __init__ (self , in_dims , n_feats , * , num_layers = 6 , num_channels = 512 , expansion_factor = 2 , kernel_size = 31 ,
96
- activation = 'PReLU' , dropout = 0. ):
103
+ activation = 'PReLU' , dropout = 0.1 ):
97
104
"""
98
105
LYNXNet(Linear Gated Depthwise Separable Convolution Network)
99
106
TIPS:You can control the style of the generated results by modifying the 'activation',
@@ -104,7 +111,7 @@ def __init__(self, in_dims, n_feats, *, num_layers=6, num_channels=512, expansio
104
111
super ().__init__ ()
105
112
self .in_dims = in_dims
106
113
self .n_feats = n_feats
107
- self .input_projection = nn . Conv1d (in_dims * n_feats , num_channels , 1 )
114
+ self .input_projection = Conv1d (in_dims * n_feats , num_channels , 1 )
108
115
self .diffusion_embedding = nn .Sequential (
109
116
SinusoidalPosEmb (num_channels ),
110
117
nn .Linear (num_channels , num_channels * 4 ),
@@ -124,8 +131,8 @@ def __init__(self, in_dims, n_feats, *, num_layers=6, num_channels=512, expansio
124
131
for i in range (num_layers )
125
132
]
126
133
)
127
- self .norm = nn .LayerNorm (num_channels )
128
- self .output_projection = nn . Conv1d (num_channels , in_dims * n_feats , kernel_size = 1 )
134
+ self .norm = nn .LayerNorm (num_channels , eps = 1e-6 )
135
+ self .output_projection = Conv1d (num_channels , in_dims * n_feats , kernel_size = 1 )
129
136
nn .init .zeros_ (self .output_projection .weight )
130
137
131
138
def forward (self , spec , diffusion_step , cond ):
@@ -142,7 +149,7 @@ def forward(self, spec, diffusion_step, cond):
142
149
x = spec .flatten (start_dim = 1 , end_dim = 2 ) # [B, F x M, T]
143
150
144
151
x = self .input_projection (x ) # x [B, residual_channel, T]
145
- x = F .gelu (x )
152
+ # x = F.gelu(x)
146
153
147
154
diffusion_step = self .diffusion_embedding (diffusion_step ).unsqueeze (- 1 )
148
155
0 commit comments