line-segment-matching / collect_env.py
Johannes
update
25a8011
raw
history blame
17.3 kB
from __future__ import print_function
# Unlike the rest of the PyTorch this file must be python2 compliant.
# This script outputs relevant system environment info
# Run it with `python collect_env.py`.
import datetime
import locale
import re
import subprocess
import sys
import os
from collections import namedtuple
try:
import torch
TORCH_AVAILABLE = True
except (ImportError, NameError, AttributeError, OSError):
TORCH_AVAILABLE = False
# System Environment Information
SystemEnv = namedtuple(
"SystemEnv",
[
"torch_version",
"is_debug_build",
"cuda_compiled_version",
"gcc_version",
"clang_version",
"cmake_version",
"os",
"libc_version",
"python_version",
"python_platform",
"is_cuda_available",
"cuda_runtime_version",
"nvidia_driver_version",
"nvidia_gpu_models",
"cudnn_version",
"pip_version", # 'pip' or 'pip3'
"pip_packages",
"conda_packages",
"hip_compiled_version",
"hip_runtime_version",
"miopen_runtime_version",
"caching_allocator_config",
"is_xnnpack_available",
],
)
def run(command):
"""Returns (return-code, stdout, stderr)"""
p = subprocess.Popen(
command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True
)
raw_output, raw_err = p.communicate()
rc = p.returncode
if get_platform() == "win32":
enc = "oem"
else:
enc = locale.getpreferredencoding()
output = raw_output.decode(enc)
err = raw_err.decode(enc)
return rc, output.strip(), err.strip()
def run_and_read_all(run_lambda, command):
"""Runs command using run_lambda; reads and returns entire output if rc is 0"""
rc, out, _ = run_lambda(command)
if rc != 0:
return None
return out
def run_and_parse_first_match(run_lambda, command, regex):
"""Runs command using run_lambda, returns the first regex match if it exists"""
rc, out, _ = run_lambda(command)
if rc != 0:
return None
match = re.search(regex, out)
if match is None:
return None
return match.group(1)
def run_and_return_first_line(run_lambda, command):
"""Runs command using run_lambda and returns first line if output is not empty"""
rc, out, _ = run_lambda(command)
if rc != 0:
return None
return out.split("\n")[0]
def get_conda_packages(run_lambda):
conda = os.environ.get("CONDA_EXE", "conda")
out = run_and_read_all(run_lambda, "{} list".format(conda))
if out is None:
return out
return "\n".join(
line
for line in out.splitlines()
if not line.startswith("#")
and any(
name in line
for name in {
"torch",
"numpy",
"cudatoolkit",
"soumith",
"mkl",
"magma",
"mkl",
}
)
)
def get_gcc_version(run_lambda):
return run_and_parse_first_match(run_lambda, "gcc --version", r"gcc (.*)")
def get_clang_version(run_lambda):
return run_and_parse_first_match(
run_lambda, "clang --version", r"clang version (.*)"
)
def get_cmake_version(run_lambda):
return run_and_parse_first_match(run_lambda, "cmake --version", r"cmake (.*)")
def get_nvidia_driver_version(run_lambda):
if get_platform() == "darwin":
cmd = "kextstat | grep -i cuda"
return run_and_parse_first_match(
run_lambda, cmd, r"com[.]nvidia[.]CUDA [(](.*?)[)]"
)
smi = get_nvidia_smi()
return run_and_parse_first_match(run_lambda, smi, r"Driver Version: (.*?) ")
def get_gpu_info(run_lambda):
if get_platform() == "darwin" or (
TORCH_AVAILABLE
and hasattr(torch.version, "hip")
and torch.version.hip is not None
):
if TORCH_AVAILABLE and torch.cuda.is_available():
return torch.cuda.get_device_name(None)
return None
smi = get_nvidia_smi()
uuid_regex = re.compile(r" \(UUID: .+?\)")
rc, out, _ = run_lambda(smi + " -L")
if rc != 0:
return None
# Anonymize GPUs by removing their UUID
return re.sub(uuid_regex, "", out)
def get_running_cuda_version(run_lambda):
return run_and_parse_first_match(run_lambda, "nvcc --version", r"release .+ V(.*)")
def get_cudnn_version(run_lambda):
"""This will return a list of libcudnn.so; it's hard to tell which one is being used"""
if get_platform() == "win32":
system_root = os.environ.get("SYSTEMROOT", "C:\\Windows")
cuda_path = os.environ.get("CUDA_PATH", "%CUDA_PATH%")
where_cmd = os.path.join(system_root, "System32", "where")
cudnn_cmd = '{} /R "{}\\bin" cudnn*.dll'.format(where_cmd, cuda_path)
elif get_platform() == "darwin":
# CUDA libraries and drivers can be found in /usr/local/cuda/. See
# https://docs.nvidia.com/cuda/cuda-installation-guide-mac-os-x/index.html#install
# https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installmac
# Use CUDNN_LIBRARY when cudnn library is installed elsewhere.
cudnn_cmd = "ls /usr/local/cuda/lib/libcudnn*"
else:
cudnn_cmd = 'ldconfig -p | grep libcudnn | rev | cut -d" " -f1 | rev'
rc, out, _ = run_lambda(cudnn_cmd)
# find will return 1 if there are permission errors or if not found
if len(out) == 0 or (rc != 1 and rc != 0):
l = os.environ.get("CUDNN_LIBRARY")
if l is not None and os.path.isfile(l):
return os.path.realpath(l)
return None
files_set = set()
for fn in out.split("\n"):
fn = os.path.realpath(fn) # eliminate symbolic links
if os.path.isfile(fn):
files_set.add(fn)
if not files_set:
return None
# Alphabetize the result because the order is non-deterministic otherwise
files = list(sorted(files_set))
if len(files) == 1:
return files[0]
result = "\n".join(files)
return "Probably one of the following:\n{}".format(result)
def get_nvidia_smi():
# Note: nvidia-smi is currently available only on Windows and Linux
smi = "nvidia-smi"
if get_platform() == "win32":
system_root = os.environ.get("SYSTEMROOT", "C:\\Windows")
program_files_root = os.environ.get("PROGRAMFILES", "C:\\Program Files")
legacy_path = os.path.join(
program_files_root, "NVIDIA Corporation", "NVSMI", smi
)
new_path = os.path.join(system_root, "System32", smi)
smis = [new_path, legacy_path]
for candidate_smi in smis:
if os.path.exists(candidate_smi):
smi = '"{}"'.format(candidate_smi)
break
return smi
def get_platform():
if sys.platform.startswith("linux"):
return "linux"
elif sys.platform.startswith("win32"):
return "win32"
elif sys.platform.startswith("cygwin"):
return "cygwin"
elif sys.platform.startswith("darwin"):
return "darwin"
else:
return sys.platform
def get_mac_version(run_lambda):
return run_and_parse_first_match(run_lambda, "sw_vers -productVersion", r"(.*)")
def get_windows_version(run_lambda):
system_root = os.environ.get("SYSTEMROOT", "C:\\Windows")
wmic_cmd = os.path.join(system_root, "System32", "Wbem", "wmic")
findstr_cmd = os.path.join(system_root, "System32", "findstr")
return run_and_read_all(
run_lambda, "{} os get Caption | {} /v Caption".format(wmic_cmd, findstr_cmd)
)
def get_lsb_version(run_lambda):
return run_and_parse_first_match(
run_lambda, "lsb_release -a", r"Description:\t(.*)"
)
def check_release_file(run_lambda):
return run_and_parse_first_match(
run_lambda, "cat /etc/*-release", r'PRETTY_NAME="(.*)"'
)
def get_os(run_lambda):
from platform import machine
platform = get_platform()
if platform == "win32" or platform == "cygwin":
return get_windows_version(run_lambda)
if platform == "darwin":
version = get_mac_version(run_lambda)
if version is None:
return None
return "macOS {} ({})".format(version, machine())
if platform == "linux":
# Ubuntu/Debian based
desc = get_lsb_version(run_lambda)
if desc is not None:
return "{} ({})".format(desc, machine())
# Try reading /etc/*-release
desc = check_release_file(run_lambda)
if desc is not None:
return "{} ({})".format(desc, machine())
return "{} ({})".format(platform, machine())
# Unknown platform
return platform
def get_python_platform():
import platform
return platform.platform()
def get_libc_version():
import platform
if get_platform() != "linux":
return "N/A"
return "-".join(platform.libc_ver())
def get_pip_packages(run_lambda):
"""Returns `pip list` output. Note: will also find conda-installed pytorch
and numpy packages."""
# People generally have `pip` as `pip` or `pip3`
# But here it is incoved as `python -mpip`
def run_with_pip(pip):
out = run_and_read_all(run_lambda, "{} list --format=freeze".format(pip))
return "\n".join(
line
for line in out.splitlines()
if any(
name in line
for name in {
"torch",
"numpy",
"mypy",
}
)
)
pip_version = "pip3" if sys.version[0] == "3" else "pip"
out = run_with_pip(sys.executable + " -mpip")
return pip_version, out
def get_cachingallocator_config():
ca_config = os.environ.get("PYTORCH_CUDA_ALLOC_CONF", "")
return ca_config
def is_xnnpack_available():
if TORCH_AVAILABLE:
import torch.backends.xnnpack
return str(torch.backends.xnnpack.enabled) # type: ignore[attr-defined]
else:
return "N/A"
def get_env_info():
run_lambda = run
pip_version, pip_list_output = get_pip_packages(run_lambda)
if TORCH_AVAILABLE:
version_str = torch.__version__
debug_mode_str = str(torch.version.debug)
cuda_available_str = str(torch.cuda.is_available())
cuda_version_str = torch.version.cuda
if (
not hasattr(torch.version, "hip") or torch.version.hip is None
): # cuda version
hip_compiled_version = hip_runtime_version = miopen_runtime_version = "N/A"
else: # HIP version
cfg = torch._C._show_config().split("\n")
hip_runtime_version = [
s.rsplit(None, 1)[-1] for s in cfg if "HIP Runtime" in s
][0]
miopen_runtime_version = [
s.rsplit(None, 1)[-1] for s in cfg if "MIOpen" in s
][0]
cuda_version_str = "N/A"
hip_compiled_version = torch.version.hip
else:
version_str = debug_mode_str = cuda_available_str = cuda_version_str = "N/A"
hip_compiled_version = hip_runtime_version = miopen_runtime_version = "N/A"
sys_version = sys.version.replace("\n", " ")
return SystemEnv(
torch_version=version_str,
is_debug_build=debug_mode_str,
python_version="{} ({}-bit runtime)".format(
sys_version, sys.maxsize.bit_length() + 1
),
python_platform=get_python_platform(),
is_cuda_available=cuda_available_str,
cuda_compiled_version=cuda_version_str,
cuda_runtime_version=get_running_cuda_version(run_lambda),
nvidia_gpu_models=get_gpu_info(run_lambda),
nvidia_driver_version=get_nvidia_driver_version(run_lambda),
cudnn_version=get_cudnn_version(run_lambda),
hip_compiled_version=hip_compiled_version,
hip_runtime_version=hip_runtime_version,
miopen_runtime_version=miopen_runtime_version,
pip_version=pip_version,
pip_packages=pip_list_output,
conda_packages=get_conda_packages(run_lambda),
os=get_os(run_lambda),
libc_version=get_libc_version(),
gcc_version=get_gcc_version(run_lambda),
clang_version=get_clang_version(run_lambda),
cmake_version=get_cmake_version(run_lambda),
caching_allocator_config=get_cachingallocator_config(),
is_xnnpack_available=is_xnnpack_available(),
)
env_info_fmt = """
PyTorch version: {torch_version}
Is debug build: {is_debug_build}
CUDA used to build PyTorch: {cuda_compiled_version}
ROCM used to build PyTorch: {hip_compiled_version}
OS: {os}
GCC version: {gcc_version}
Clang version: {clang_version}
CMake version: {cmake_version}
Libc version: {libc_version}
Python version: {python_version}
Python platform: {python_platform}
Is CUDA available: {is_cuda_available}
CUDA runtime version: {cuda_runtime_version}
GPU models and configuration: {nvidia_gpu_models}
Nvidia driver version: {nvidia_driver_version}
cuDNN version: {cudnn_version}
HIP runtime version: {hip_runtime_version}
MIOpen runtime version: {miopen_runtime_version}
Is XNNPACK available: {is_xnnpack_available}
Versions of relevant libraries:
{pip_packages}
{conda_packages}
""".strip()
def pretty_str(envinfo):
def replace_nones(dct, replacement="Could not collect"):
for key in dct.keys():
if dct[key] is not None:
continue
dct[key] = replacement
return dct
def replace_bools(dct, true="Yes", false="No"):
for key in dct.keys():
if dct[key] is True:
dct[key] = true
elif dct[key] is False:
dct[key] = false
return dct
def prepend(text, tag="[prepend]"):
lines = text.split("\n")
updated_lines = [tag + line for line in lines]
return "\n".join(updated_lines)
def replace_if_empty(text, replacement="No relevant packages"):
if text is not None and len(text) == 0:
return replacement
return text
def maybe_start_on_next_line(string):
# If `string` is multiline, prepend a \n to it.
if string is not None and len(string.split("\n")) > 1:
return "\n{}\n".format(string)
return string
mutable_dict = envinfo._asdict()
# If nvidia_gpu_models is multiline, start on the next line
mutable_dict["nvidia_gpu_models"] = maybe_start_on_next_line(
envinfo.nvidia_gpu_models
)
# If the machine doesn't have CUDA, report some fields as 'No CUDA'
dynamic_cuda_fields = [
"cuda_runtime_version",
"nvidia_gpu_models",
"nvidia_driver_version",
]
all_cuda_fields = dynamic_cuda_fields + ["cudnn_version"]
all_dynamic_cuda_fields_missing = all(
mutable_dict[field] is None for field in dynamic_cuda_fields
)
if (
TORCH_AVAILABLE
and not torch.cuda.is_available()
and all_dynamic_cuda_fields_missing
):
for field in all_cuda_fields:
mutable_dict[field] = "No CUDA"
if envinfo.cuda_compiled_version is None:
mutable_dict["cuda_compiled_version"] = "None"
# Replace True with Yes, False with No
mutable_dict = replace_bools(mutable_dict)
# Replace all None objects with 'Could not collect'
mutable_dict = replace_nones(mutable_dict)
# If either of these are '', replace with 'No relevant packages'
mutable_dict["pip_packages"] = replace_if_empty(mutable_dict["pip_packages"])
mutable_dict["conda_packages"] = replace_if_empty(mutable_dict["conda_packages"])
# Tag conda and pip packages with a prefix
# If they were previously None, they'll show up as ie '[conda] Could not collect'
if mutable_dict["pip_packages"]:
mutable_dict["pip_packages"] = prepend(
mutable_dict["pip_packages"], "[{}] ".format(envinfo.pip_version)
)
if mutable_dict["conda_packages"]:
mutable_dict["conda_packages"] = prepend(
mutable_dict["conda_packages"], "[conda] "
)
return env_info_fmt.format(**mutable_dict)
def get_pretty_env_info():
return pretty_str(get_env_info())
def main():
print("Collecting environment information...")
output = get_pretty_env_info()
print(output)
if (
TORCH_AVAILABLE
and hasattr(torch, "utils")
and hasattr(torch.utils, "_crash_handler")
):
minidump_dir = torch.utils._crash_handler.DEFAULT_MINIDUMP_DIR
if sys.platform == "linux" and os.path.exists(minidump_dir):
dumps = [
os.path.join(minidump_dir, dump) for dump in os.listdir(minidump_dir)
]
latest = max(dumps, key=os.path.getctime)
ctime = os.path.getctime(latest)
creation_time = datetime.datetime.fromtimestamp(ctime).strftime(
"%Y-%m-%d %H:%M:%S"
)
msg = (
"\n*** Detected a minidump at {} created on {}, ".format(
latest, creation_time
)
+ "if this is related to your bug please include it when you file a report ***"
)
print(msg, file=sys.stderr)
if __name__ == "__main__":
main()