Detect CUDA compute capability at runtime

2022-03-09 16:20:38 +00:00 · 2022-03-09 16:20:38 +00:00 · 29fa4eb36f
commit 29fa4eb36f
--- a/opendm/gpu.py
+++ b/opendm/gpu.py
@ -1,6 +1,7 @@
 import os
 import sys
 import shutil
+import ctypes
 from opendm import log
 from repoze.lru import lru_cache

@ -9,9 +10,20 @@ def gpu_disabled_by_user():

@lru_cache(maxsize=None)
 def has_popsift_and_can_handle_texsize(width, height):
+    # We first check that we have the required compute capabilities
+    # As we do not support compute capabilities less than 3.5
+    try:
+        compute_major, compute_minor = get_cuda_compute_version(0)
+        if compute_major < 3 or (compute_major == 3 and compute_minor < 5):
+            # Not supported
+            log.ODM_WARNING("CUDA compute platform is not supported (detected: %s.%s but we need at least 3.5)" % (compute_major, compute_minor))
+            return False
+    except Exception as e:
+        log.ODM_WARNING("Cannot use GPU for feature extraction: %s" % str(e))
+
    try:
        from opensfm import pypopsift
-        fits = pypopsift.fits_texture(int(width * 1.025), int(height * 1.025))
+        fits = pypopsift.fits_texture(int(width * 1.02), int(height * 1.02))
        if not fits:
            log.ODM_WARNING("Image size (%sx%spx) would not fit in GPU memory, falling back to CPU" % (width, height))
        return fits
@ -21,6 +33,40 @@ def has_popsift_and_can_handle_texsize(width, height):
        log.ODM_WARNING(str(e))
        return False

+@lru_cache(maxsize=None)
+def get_cuda_compute_version(device_id = 0):
+    cuda_lib = "libcuda.so"
+    if sys.platform == 'win32':
+        cuda_lib = "nvcuda.dll"
+
+    nvcuda = ctypes.cdll.LoadLibrary(cuda_lib)
+
+    nvcuda.cuInit.argtypes = (ctypes.c_uint32, )
+    nvcuda.cuInit.restypes = (ctypes.c_int32)
+
+    if nvcuda.cuInit(0) != 0:
+        raise Exception("Cannot initialize CUDA")
+
+    nvcuda.cuDeviceGetCount.argtypes = (ctypes.POINTER(ctypes.c_int32), )
+    nvcuda.cuDeviceGetCount.restypes = (ctypes.c_int32)
+    
+    device_count = ctypes.c_int32()
+    if nvcuda.cuDeviceGetCount(ctypes.byref(device_count)) != 0:
+        raise Exception("Cannot get device count")
+
+    if device_count.value == 0:
+        raise Exception("No devices")
+
+    nvcuda.cuDeviceComputeCapability.argtypes = (ctypes.POINTER(ctypes.c_int32), ctypes.POINTER(ctypes.c_int32), ctypes.c_int32)
+    nvcuda.cuDeviceComputeCapability.restypes = (ctypes.c_int32)
+    compute_major = ctypes.c_int32()
+    compute_minor = ctypes.c_int32()
+
+    if nvcuda.cuDeviceComputeCapability(ctypes.byref(compute_major), ctypes.byref(compute_minor), device_id) != 0:
+        raise Exception("Cannot get CUDA compute version")
+
+    return (compute_major.value, compute_minor.value)
+
@lru_cache(maxsize=None)
 def has_gpu():
    if gpu_disabled_by_user():
--- a/opendm/system.py
+++ b/opendm/system.py
@ -7,7 +7,6 @@ import subprocess
 import string
 import signal
 import io
-import ctypes
 from collections import deque

 from opendm import context
@ -74,11 +73,8 @@ def run(cmd, env_paths=[context.superbuild_bin_path], env_vars={}, packages_path
    env = os.environ.copy()

    sep = ":"
-    flags = 0
    if sys.platform == 'win32':
        sep = ";"
-        ctypes.windll.kernel32.SetErrorMode(0x0002) #SEM_NOGPFAULTERRORBOX
-        flags = 0x8000000 #CREATE_NO_WINDOW

    if len(env_paths) > 0:
        env["PATH"] = env["PATH"] + sep + sep.join(env_paths)
@ -89,7 +85,7 @@ def run(cmd, env_paths=[context.superbuild_bin_path], env_vars={}, packages_path
    for k in env_vars:
        env[k] = str(env_vars[k])

-    p = subprocess.Popen(cmd, shell=True, env=env, start_new_session=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, creationflags=flags)
+    p = subprocess.Popen(cmd, shell=True, env=env, start_new_session=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    running_subprocesses.append(p)
    lines = deque()
    for line in io.TextIOWrapper(p.stdout):