Prepare the environment of the experiment control
Tutorial from https://shenxiaohai.me/2019/01/17/sacred-tool/
Installation
To install Sacred at client (t.g. conda environment)
pip install sacred pip install numpy pymongo
Server: database
# 1. Import the public key used by the package management system. wget -qO - https://www.mongodb.org/static/pgp/server-4.2.asc | sudo apt-key add - # 2. Create a list file for MongoDB. echo "deb [ arch=amd64,arm64 ] https://repo.mongodb.org/apt/ubuntu bionic/mongodb-org/4.2 multiverse" | sudo tee /etc/apt/sources.list.d/mongodb-org-4.2.list # 3. Reload local package database. sudo apt-get update # 4. Install the MongoDB packages. sudo apt-get install -y mongodb-org # 4.1 prevent MongoDB upgrades when use apt-get echo "mongodb-org hold" | sudo dpkg --set-selections echo "mongodb-org-server hold" | sudo dpkg --set-selections echo "mongodb-org-shell hold" | sudo dpkg --set-selections echo "mongodb-org-mongos hold" | sudo dpkg --set-selections echo "mongodb-org-tools hold" | sudo dpkg --set-selections # control the service sudo service mongod start sudo service mongod stop sudo service mongod restart # start the mongod service with startup automatically sudo systemctl enable mongod && sudo systemctl start mongod # create a new database to store our experiment # enter mongobb mongo use sacred
Enable all outside access
sudo vim /etc/mongod.conf
Then comment out the line:
# network interfaces net: port: 27017 # bindIp: 0.0.0.0 <- comment out this line, and change the ip
start Ominiboard
omniboard -m localhost:27017:sacred
Training script
from sacred import Experiment from sacred.utils import apply_backspaces_and_linefeeds from sacred.observers import MongoObserver Name_Exp = 'YourExperimentName' exp = Experiment(Name_Exp) exp.observers.append(MongoObserver(url='localhost:27017', db_name='yourDBname')) exp.add_source_file("train.py") # save source file to database exp.captured_out_filter = apply_backspaces_and_linefeeds @exp.config def cfg(): #parser.add_argument('--lr', type=float, default=1e-4, help='Learning Rate. Default=0.0001') opt = parser.parse_args() return def log_metrics(_run, logs, iter, end_str=" "): str_print = '' for key, value in logs.items(): _run.log_scalar(key, float(value), iter) str_print = str_print + "%s: %.4f || " % (key, value) print(str_print, end=end_str) @exp.automain def main(opt, _run): logs = { "loss": loss.data, "loss_l1": loss_l1.data, } log_metrics(_run, logs, over_Iter) _run.log_scalar("epoch_loss", loss, epoch) _run.log_scalar("Accuracy", accuracy, epoch) exp.add_artifact("Path of the trained model, e.g., /home/user/epoch111.pth") # save trained model to database
Move the database to another disk (ubuntu)
firstly mount the disk to root path
sudo mount /dev/sda2 /data
Modify the config file of the MongoDB
sudo service mongod stop sudo vim /etc/mongod.conf # # storage: # dbPath: /var/lib/mongodb < -- modify this line, and save it ("esc" + ":ad" to save and exist) # sudo service mongod start