This is a Prometheus Exporter for Collect Nvidia GPU metrics. It uses the Nvidia NVML Go Bindings, not call the nvidia-smi binary.
make docker
# ls -l /lib/x86_64-linux-gnu | grep libdl -rw-r--r-- 1 root root 14640 Jan 14 2018 libdl-2.24.so lrwxrwxrwx 1 root root 13 Jan 14 2018 libdl.so.2 -> libdl-2.24.so ./gpu-exporter run -v 5 --logtostderr
# HELP nvidia_gpu_memory_total_mb Memory Total of the GPU device in MB # TYPE nvidia_gpu_memory_total_mb gauge nvidia_gpu_memory_total_mb{name="Tesla P4",path="/dev/nvidia0",uuid="GPU-9bb57e8e-f94d-4b95-7c33-ee279c8fb75c"} 7606 nvidia_gpu_memory_total_mb{name="Tesla P4",path="/dev/nvidia1",uuid="GPU-2d9067e4-8fab-5820-6228-69e2e74b3d58"} 7606 # HELP nvidia_gpu_memory_used_mb Memory used by the GPU device in MB # TYPE nvidia_gpu_memory_used_mb gauge nvidia_gpu_memory_used_mb{name="Tesla P4",path="/dev/nvidia0",uuid="GPU-9bb57e8e-f94d-4b95-7c33-ee279c8fb75c"} 0 nvidia_gpu_memory_used_mb{name="Tesla P4",path="/dev/nvidia1",uuid="GPU-2d9067e4-8fab-5820-6228-69e2e74b3d58"} 0 # HELP nvidia_gpu_num_devices Number of Nvidia GPU devices # TYPE nvidia_gpu_num_devices gauge nvidia_gpu_num_devices 2 # HELP nvidia_gpu_power_usage Power usage of the GPU device in watts # TYPE nvidia_gpu_power_usage gauge nvidia_gpu_power_usage{name="Tesla P4",path="/dev/nvidia0",uuid="GPU-9bb57e8e-f94d-4b95-7c33-ee279c8fb75c"} 7 nvidia_gpu_power_usage{name="Tesla P4",path="/dev/nvidia1",uuid="GPU-2d9067e4-8fab-5820-6228-69e2e74b3d58"} 6 # HELP nvidia_gpu_temperature Temperature of the GPU device in celsius # TYPE nvidia_gpu_temperature gauge nvidia_gpu_temperature{name="Tesla P4",path="/dev/nvidia0",uuid="GPU-9bb57e8e-f94d-4b95-7c33-ee279c8fb75c"} 45 nvidia_gpu_temperature{name="Tesla P4",path="/dev/nvidia1",uuid="GPU-2d9067e4-8fab-5820-6228-69e2e74b3d58"} 39