Prepare the environment of the experiment control
Tutorial from https://shenxiaohai.me/2019/01/17/sacred-tool/
Installation
To install Sacred at client (t.g. conda environment)
pip install sacred
pip install numpy pymongo
Server: database
# 1. Import the public key used by the package management system.
wget -qO - https://www.mongodb.org/static/pgp/server-4.2.asc | sudo apt-key add -
# 2. Create a list file for MongoDB.
echo "deb [ arch=amd64,arm64 ] https://repo.mongodb.org/apt/ubuntu bionic/mongodb-org/4.2 multiverse" | sudo tee /etc/apt/sources.list.d/mongodb-org-4.2.list
# 3. Reload local package database.
sudo apt-get update
# 4. Install the MongoDB packages.
sudo apt-get install -y mongodb-org
# 4.1 prevent MongoDB upgrades when use apt-get
echo "mongodb-org hold" | sudo dpkg --set-selections
echo "mongodb-org-server hold" | sudo dpkg --set-selections
echo "mongodb-org-shell hold" | sudo dpkg --set-selections
echo "mongodb-org-mongos hold" | sudo dpkg --set-selections
echo "mongodb-org-tools hold" | sudo dpkg --set-selections
# control the service
sudo service mongod start
sudo service mongod stop
sudo service mongod restart
# start the mongod service with startup automatically
sudo systemctl enable mongod && sudo systemctl start mongod
# create a new database to store our experiment
# enter mongobb
mongo
use sacred
Enable all outside access
sudo vim /etc/mongod.conf
Then comment out the line:
# network interfaces
net:
port: 27017
# bindIp: 0.0.0.0 <- comment out this line, and change the ip
start Ominiboard
omniboard -m localhost:27017:sacred
Training script
from sacred import Experiment
from sacred.utils import apply_backspaces_and_linefeeds
from sacred.observers import MongoObserver
Name_Exp = 'YourExperimentName'
exp = Experiment(Name_Exp)
exp.observers.append(MongoObserver(url='localhost:27017', db_name='yourDBname'))
exp.add_source_file("train.py") # save source file to database
exp.captured_out_filter = apply_backspaces_and_linefeeds
@exp.config
def cfg():
#parser.add_argument('--lr', type=float, default=1e-4, help='Learning Rate. Default=0.0001')
opt = parser.parse_args()
return
def log_metrics(_run, logs, iter, end_str=" "):
str_print = ''
for key, value in logs.items():
_run.log_scalar(key, float(value), iter)
str_print = str_print + "%s: %.4f || " % (key, value)
print(str_print, end=end_str)
@exp.automain
def main(opt, _run):
logs = {
"loss": loss.data,
"loss_l1": loss_l1.data,
}
log_metrics(_run, logs, over_Iter)
_run.log_scalar("epoch_loss", loss, epoch)
_run.log_scalar("Accuracy", accuracy, epoch)
exp.add_artifact("Path of the trained model, e.g., /home/user/epoch111.pth") # save trained model to database
Move the database to another disk (ubuntu)
firstly mount the disk to root path
sudo mount /dev/sda2 /data
Modify the config file of the MongoDB
sudo service mongod stop
sudo vim /etc/mongod.conf
#
# storage:
# dbPath: /var/lib/mongodb < -- modify this line, and save it ("esc" + ":ad" to save and exist)
#
sudo service mongod start