Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save 7error/811bdbcce90c1f71733e3078d34e8b23 to your computer and use it in GitHub Desktop.
Save 7error/811bdbcce90c1f71733e3078d34e8b23 to your computer and use it in GitHub Desktop.
nvidia-ami-linux に nvidia-docker
# インスタンスを立てるときに marketplace から nvidia で検索して Amazon Linux AMI with NVIDIA GRID GPU Driver を入れる
# docker を入れる
sudo yum install -y docker
sudo /etc/init.d/docker start
# nvidia-docker を入れる
wget -P /tmp https://github.com/NVIDIA/nvidia-docker/releases/download/v1.0.0-rc.3/nvidia-docker-1.0.0.rc.3-1.x86_64.rpm
# systemd に依存しているので依存を無視する
sudo rpm -ihv /tmp/nvidia-docker-1.0.0.rc.3-1.x86_64.rpm --nodeps
# systemd用の起動ファイルしかないので手動で作る (nvidia-docker を /etc/init.d/nvidia-docker として保存)
sudo wget https://gist.githubusercontent.com/nazoking/031076441fd60835e13fc7e424e75440/raw/nvidia-docker -O /etc/init.d/nvidia-docker
sudo chmod +x /etc/init.d/nvidia-docker
sudo chkconfig --add nvidia-docker
sudo /etc/init.d/nvidia-docker start
# docker を自分の権限で起動できるように
gpasswd -u $(whoami) docker
# 確認
# nvidia-docker run --rm nvidia/cuda:7.5 nvidia-smi
# でエラーが出なければ ok
# Error: Cannot connect to the Docker daemon. Is the docker daemon running on this host?
# は、権限がない(=再起動する)か /etc/init.d/docker が成功していない
#!/bin/sh
#
# chkconfig: 2345 96 04
# description: NVIDIA Docker plugin
### BEGIN INIT INFO
# Provides: nvidia-docker-plugin
# Required-Start: docker
# Required-Stop:
# Should-Start:
# Should-Stop:
# Default-Start: 2 3 4 5
# Default-Stop: 0 1 6
# Short-Description: start and stop docker
# Description: Daemon for docker.com
### END INIT INFO
# Source function library.
. /etc/rc.d/init.d/functions
unshare=/usr/bin/unshare
prog="nvidia-docker-plugin"
exec="/usr/bin/$prog"
pidfile="/var/run/$prog.pid"
lockfile="/var/lock/subsys/$prog"
logfile="/var/log/$prog"
SOCK_DIR=/var/lib/nvidia-docker
SPEC_FILE=/etc/docker/plugins/nvidia-docker.spec
NDP_OPTIONS="-s $SOCK_DIR"
start(){
if [ -e $pidfile ];then
ps $(cat $pidfile) > /dev/null
if [ $? -eq 0 ];then
echo allready runnning as $(cat $pidfile)
exit
fi
fi
echo -n $"Starting $prog: "
"$unshare" -m -- nohup $exec ${NDP_OPTIONS} &>> $logfile &
pid=$!
retval=$?
if [ $retval -eq 0 ];then
echo $pid > $pidfile
mkdir -p $(dirname $SPEC_FILE)
echo unix://$SOCK_DIR/nvidia-docker.sock > $SPEC_FILE
success
else
failure
fi
}
stop(){
echo -n $"Stopping $prog: "
killproc -p $pidfile -d 300 $prog
retval=$?
echo
[ $retval -eq 0 ] && rm -f $pidfile
return $retval
}
restart() {
stop
start
}
reload() {
restart
}
case "$1" in
start)
$1
;;
stop)
$1
;;
restart)
$1
;;
reload)
$1
;;
*)
echo $"Usage: $0 {start|stop|status|restart|reload}"
exit 2
esac
exit $?
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment