From c934a966c686748ee1d5afc38f585206eb13c91d Mon Sep 17 00:00:00 2001 From: gessyoo <35972345+gessyoo@users.noreply.github.com> Date: Mon, 19 Feb 2024 10:02:28 -0500 Subject: [PATCH] Update training_cli.py Slight change to fix training won't start with 2 a6000s, even if only one enabled in cli --- training_cli.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/training_cli.py b/training_cli.py index a2b30a5..39e042f 100644 --- a/training_cli.py +++ b/training_cli.py @@ -6,9 +6,12 @@ from lib.train import utils import datetime - +# Fix for training won't start with 2 GPUs, even if only 1 enabled hps = utils.get_hparams() os.environ["CUDA_VISIBLE_DEVICES"] = hps.gpus.replace("-", ",") +n_gpus = len(hps.gpus.split("-")) +from random import shuffle, randint +import traceback, json, argparse, itertools, math, torch, pdb os.environ["NCCL_P2P_DISABLE"] = 1 from random import shuffle, randint