forked from NeuralMMO/environment
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathForge.py
230 lines (194 loc) · 7.57 KB
/
Forge.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
'''Main file for the neural-mmo/projekt demo
/projeckt contains all necessary RLlib wrappers to train and
evaluate capable policies on Neural MMO as well as rendering,
logging, and visualization tools.
Associated docs and tutorials are hosted on jsuarez5341.github.io.'''
from pdb import set_trace as TT
import numpy as np
import torch
from fire import Fire
import json
import copy
import ray
from ray import rllib
from forge.ethyr.torch import utils
from forge.blade.systems import ai
from forge.trinity.visualize import BokehServer
from forge.trinity.evaluator import Evaluator
import projekt
from projekt import rllib_wrapper as wrapper
from forge.blade.core import terrain
from evolution.utils import get_exp_shorthand
def createPolicies(config, mapPolicy):
'''Generate RLlib policies'''
obs = wrapper.observationSpace(config)
atns = wrapper.actionSpace(config)
policies = {}
for i in range(config.NPOLICIES):
params = {
"agent_id": i,
"obs_space_dict": obs,
"act_space_dict": atns}
key = mapPolicy(i)
policies[key] = (None, obs, atns, params)
return policies
def loadTrainer(config):
'''Create monolithic RLlib trainer object'''
if config.load_arguments != -1:
load_args = json.load(
open('configs/settings_{}.json'.format(config.load_arguments), 'r'))
[config.set(k, v) for (k, v) in load_args.items()]
if config.PAIRED and not config.EVALUATE:
config.set('NPOP', 2)
config.set('NPOLICIES', 2)
torch.set_num_threads(1)
ray.shutdown()
ray.init(local_mode=config.LOCAL_MODE)
#Register custom env
ray.tune.registry.register_env("Neural_MMO",
lambda config: wrapper.RLlibEnv(config))
#Create policies
rllib.models.ModelCatalog.register_custom_model('godsword', wrapper.RLlibPolicy)
mapPolicy = lambda agentID: 'policy_{}'.format(agentID % config.NPOLICIES)
policies = createPolicies(config, mapPolicy)
#Instantiate monolithic RLlib Trainer object.
return wrapper.SanePPOTrainer(config={
'num_workers': config.NUM_WORKERS,
'num_gpus_per_worker': config.NUM_GPUS_PER_WORKER,
'num_gpus': config.NUM_GPUS,
'num_envs_per_worker': 1,
'train_batch_size': config.TRAIN_BATCH_SIZE,
'rollout_fragment_length': config.ROLLOUT_FRAGMENT_LENGTH,
'sgd_minibatch_size': config.SGD_MINIBATCH_SIZE,
'num_sgd_iter': config.NUM_SGD_ITER,
'framework': 'torch',
'horizon': np.inf,
'soft_horizon': False,
# '_use_trajectory_view_api': False,
'no_done_at_end': False,
'callbacks': wrapper.RLlibLogCallbacks,
'env_config': {
'config': config,
},
'multiagent': {
'policies': policies,
'policy_mapping_fn': mapPolicy,
'count_steps_by': 'agent_steps'
},
'model': {
'custom_model': 'godsword',
'custom_model_config': {'config': config}
},
})
def loadEvaluator(config):
'''Create test/render evaluator'''
if config.PAIRED:
pass # WHY? # FIXME
elif config.NPOLICIES > 1 or config.COMPETITIVE_EVAL:
models = config.MODELS
# Randomize order of models to randomize spawn order for fair evaluation over multiple trials
np.random.shuffle(models)
config.set('MULTI_MODEL_EXPERIMENTS', models)
model_names = [get_exp_shorthand(m) for m in models]
config.set('MULTI_MODEL_NAMES', model_names)
return wrapper.RLlibMultiEvaluator(config, loadModels(config))
else:
print(f"SETTING MULTI_MODEL_NAMES (currently {config.MULTI_MODEL_NAMES} FROM MODEL {config.MODEL}:")
if not config.MULTI_MODEL_NAMES and config.MODEL:
config.set('MULTI_MODEL_NAMES', [get_exp_shorthand(config.MODEL)])
if config.MODEL not in ('scripted-forage', 'scripted-combat'):
return wrapper.RLlibEvaluator(config, loadModel(config, model_idx=0))
#Scripted policy backend
if config.MODEL == 'scripted-forage':
policy = ai.policy.forage
else:
policy = ai.policy.combat
#Search backend
err = 'SCRIPTED_BACKEND may be either dijkstra or dynamic_programming'
assert config.SCRIPTED_BACKEND in ('dijkstra', 'dynamic_programming'), err
if config.SCRIPTED_BACKEND == 'dijkstra':
backend = ai.behavior.forageDijkstra
elif config.SCRIPTED_BACKEND == 'dynamic_programming':
backend = ai.behavior.forageDP
return Evaluator(config, policy, config.SCRIPTED_EXPLORE, backend)
def loadModels(config):
models = config.MULTI_MODEL_EXPERIMENTS
trainers = []
for m in models:
# Initialize a separate trainer for each model
m_config = copy.deepcopy(config)
m_config.NPOLICIES = 1
m_config.NPOP = 1
m_config.MODEL = m
trainer = loadTrainer(m_config)
utils.modelSize(trainer.defaultModel())
trainer.restore(m)
trainers.append(trainer)
return trainers
def loadModel(config, model_idx):
'''Load NN weights and optimizer state'''
trainer = loadTrainer(config)
utils.modelSize(trainer.defaultModel())
trainer.restore(config.MODEL)
return trainer
class Anvil():
'''Neural MMO CLI powered by Google Fire
Main file for the RLlib demo included with Neural MMO.
Usage:
python Forge.py <COMMAND> --config=<CONFIG> --ARG1=<ARG1> ...
The User API documents core env flags. Additional config options specific
to this demo are available in projekt/config.py.
The --config flag may be used to load an entire group of options at once.
The Debug, SmallMaps, and LargeMaps options are included in this demo with
the latter being the default -- or write your own in projekt/config.py
'''
def __init__(self, **kwargs):
if 'help' in kwargs:
kwargs.pop('help')
if 'config' in kwargs:
config = kwargs.pop('config')
config = getattr(projekt.config, config)()
else:
config = projekt.config.LargeMaps()
config.override(**kwargs)
self.config = config
models = self.config.MODELS.strip('[').strip(']').split(',')
# Fix the fact that we input this as a string :(
self.config.set('MODELS', models)
def train(self, **kwargs):
'''Train a model starting with the current value of --MODEL'''
loadModel(self.config).train()
def evaluate(self, **kwargs):
'''Evaluate a model on --EVAL_MAPS maps'''
self.config.EVALUATE = True
for model in self.config.MODELS:
new_config = copy.deepcopy(self.config)
new_config.set('MODEL', model)
try:
evaluator = loadEvaluator(new_config).evaluate(new_config.GENERALIZE)
del (evaluator)
except FileNotFoundError as err:
TT()
print(f"Failed to load evaluator with new model, error message: {err}")
print("Assuming this is because no such model exists. Skipping model.")
def render(self, **kwargs):
'''Start a WebSocket server that autoconnects to the 3D Unity client'''
self.config.RENDER = True
for model in self.config.MODELS:
new_config = copy.deepcopy(self.config)
new_config.set('MODEL', model)
evaluator = loadEvaluator(new_config).render()
del(evaluator)
def generate(self, **kwargs):
'''Generate game maps for the current --config setting'''
terrain.MapGenerator(self.config).generate()
def visualize(self, **kwargs):
'''Training/Evaluation results Web dashboard'''
BokehServer(self.config)
if __name__ == '__main__':
def Display(lines, out):
text = "\n".join(lines) + "\n"
out.write(text)
from fire import core
core.Display = Display
Fire(Anvil)