Skip to content
This repository was archived by the owner on Mar 19, 2024. It is now read-only.

Commit 7337369

Browse files
QuentinDuvalfacebook-github-bot
authored andcommitted
Add new RegNet for SwAV (#214)
Summary: New regnet for SwAV plus unit test to check that associated pre-training is working on 1 node of 8 GPUs Pull Request resolved: fairinternal/ssl_scaling#214 Reviewed By: prigoyal Differential Revision: D33801136 Pulled By: QuentinDuval fbshipit-source-id: 3b8bf89039d91ab7cb9686bf8e60d640ace95907
1 parent 7b18cd7 commit 7337369

File tree

2 files changed

+146
-0
lines changed

2 files changed

+146
-0
lines changed
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
# @package _global_
2+
config:
3+
TRAINER:
4+
TASK_NAME: self_supervision_fsdp_task
5+
DATA:
6+
TRAIN:
7+
BATCHSIZE_PER_REPLICA: 16
8+
TRANSFORMS:
9+
- name: ImgPilToMultiCrop
10+
total_num_crops: 6
11+
size_crops: [160, 96]
12+
num_crops: [2, 4]
13+
crop_scales: [[0.14, 1], [0.05, 0.14]]
14+
- name: RandomHorizontalFlip
15+
p: 0.5
16+
- name: ImgPilColorDistortion
17+
strength: 1.0
18+
- name: ImgPilGaussianBlur
19+
p: 0.5
20+
radius_min: 0.1
21+
radius_max: 2.0
22+
- name: ToTensor
23+
- name: Normalize
24+
mean: [0.485, 0.456, 0.406]
25+
std: [0.229, 0.224, 0.225]
26+
COLLATE_FUNCTION_PARAMS:
27+
create_multidimensional_tensor: True
28+
MODEL:
29+
TRUNK:
30+
NAME: regnet_fsdp
31+
REGNET:
32+
block_type: res_bottleneck_block
33+
depth: 27
34+
group_width: 1010
35+
w_0: 1744
36+
w_a: 620.83
37+
w_m: 2.52
38+
stage_checkpoints: [[2], [7], [9, 17], []]
39+
HEAD:
40+
PARAMS: [
41+
["swav_head_fsdp", {
42+
"dims": [28280, 8192, 8192, 256],
43+
"use_bn": False,
44+
"num_clusters": [16000]
45+
}],
46+
]
47+
FSDP_CONFIG:
48+
AUTO_WRAP_THRESHOLD: 100000000
49+
flatten_parameters: False
50+
mixed_precision: True
51+
fp32_reduce_scatter: False
52+
compute_dtype: float16
53+
CUDA_CACHE:
54+
CLEAR_CUDA_CACHE: True
55+
CLEAR_FREQ: 5000
56+
SYNC_BN_CONFIG:
57+
CONVERT_BN_TO_SYNC_BN: True
58+
SYNC_BN_TYPE: "pytorch"
59+
AMP_PARAMS:
60+
USE_AMP: True
61+
AMP_TYPE: "pytorch"
62+
ACTIVATION_CHECKPOINTING:
63+
USE_ACTIVATION_CHECKPOINTING: True
64+
LOSS:
65+
swav_loss:
66+
num_iters: 10
67+
epsilon: 0.03
68+
temp_hard_assignment_iters: 0
69+
num_crops: 6
70+
num_prototypes: [16000]
71+
OPTIMIZER:
72+
name: "sgd_fsdp"
73+
use_larc: True
74+
construct_single_param_group_only: True
75+
weight_decay: 0.00001
76+
num_epochs: 1
77+
param_schedulers:
78+
lr:
79+
# we make it convenient to scale Learning rate automatically as per the scaling
80+
# rule specified in https://arxiv.org/abs/1706.02677 (ImageNet in 1Hour).
81+
auto_lr_scaling:
82+
auto_scale: True
83+
base_value: 0.3
84+
lengths: [0.043648,0.956352]
85+
CHECKPOINT:
86+
CHECKPOINT_ITER_FREQUENCY: 100
87+
LATEST_CHECKPOINT_RESUME_FILE_NUM: 1
88+
USE_SYMLINK_CHECKPOINT_FOR_RESUME: True
89+
DISTRIBUTED:
90+
NCCL_DEBUG: False
91+
NUM_NODES: 62
92+
NUM_PROC_PER_NODE: 8
93+
NCCL_SOCKET_NTHREADS: ''
94+
LOG_FREQUENCY: 1

tests/test_regnet_fsdp_10b.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# Copyright (c) Facebook, Inc. and its affiliates.
2+
3+
# This source code is licensed under the MIT license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
6+
import unittest
7+
8+
from vissl.utils.hydra_config import compose_hydra_configuration, convert_to_attrdict
9+
from vissl.utils.test_utils import (
10+
gpu_test,
11+
in_temporary_directory,
12+
run_integration_test,
13+
)
14+
15+
16+
class TestRegnet10B(unittest.TestCase):
17+
@staticmethod
18+
def _create_10B_pretrain_config(num_gpus: int, num_steps: int, batch_size: int):
19+
data_limit = num_steps * batch_size * num_gpus
20+
cfg = compose_hydra_configuration(
21+
[
22+
"config=pretrain/swav/swav_8node_resnet",
23+
"+config/pretrain/seer/models=regnet10B",
24+
"config.OPTIMIZER.num_epochs=1",
25+
"config.LOG_FREQUENCY=1",
26+
# Testing on fake images
27+
"config.DATA.TRAIN.DATA_SOURCES=[synthetic]",
28+
"config.DATA.TRAIN.RANDOM_SYNTHETIC_IMAGES=True",
29+
"config.DATA.TRAIN.USE_DEBUGGING_SAMPLER=True",
30+
# Disable overlap communication and computation for test
31+
"config.MODEL.FSDP_CONFIG.FORCE_SYNC_CUDA=True",
32+
# Testing on 8 V100 32GB GPU only
33+
f"config.DATA.TRAIN.BATCHSIZE_PER_REPLICA={batch_size}",
34+
f"config.DATA.TRAIN.DATA_LIMIT={data_limit}",
35+
"config.DISTRIBUTED.NUM_NODES=1",
36+
f"config.DISTRIBUTED.NUM_PROC_PER_NODE={num_gpus}",
37+
"config.DISTRIBUTED.RUN_ID=auto",
38+
]
39+
)
40+
args, config = convert_to_attrdict(cfg)
41+
return config
42+
43+
@gpu_test(gpu_count=8)
44+
def test_regnet_10b_swav_pretraining(self):
45+
with in_temporary_directory():
46+
config = self._create_10B_pretrain_config(
47+
num_gpus=8, num_steps=2, batch_size=4
48+
)
49+
results = run_integration_test(config)
50+
losses = results.get_losses()
51+
print(losses)
52+
self.assertEqual(len(losses), 2)

0 commit comments

Comments
 (0)