From 5e71da45305874fcbeeb793ec762ef1734dcf2ef Mon Sep 17 00:00:00 2001 From: Jon Beniston Date: Sun, 13 Aug 2023 23:52:57 +0100 Subject: [PATCH 1/4] Add VkFFT support and profiler --- CMakeLists.txt | 6 + external/CMakeLists.txt | 14 ++ sdrbase/CMakeLists.txt | 134 ++++++++++- sdrbase/dsp/cudavkfftengine.cpp | 154 +++++++++++++ sdrbase/dsp/cudavkfftengine.h | 52 +++++ sdrbase/dsp/fftengine.cpp | 108 ++++++++- sdrbase/dsp/fftengine.h | 30 ++- sdrbase/dsp/fftfactory.cpp | 18 +- sdrbase/dsp/fftfactory.h | 2 +- sdrbase/dsp/fftwengine.cpp | 14 +- sdrbase/dsp/fftwengine.h | 2 + sdrbase/dsp/kissengine.cpp | 13 +- sdrbase/dsp/kissengine.h | 2 + sdrbase/dsp/vkfftengine.cpp | 170 ++++++++++++++ sdrbase/dsp/vkfftengine.h | 71 ++++++ sdrbase/dsp/vkfftutils.cpp | 327 +++++++++++++++++++++++++++ sdrbase/dsp/vkfftutils.h | 47 ++++ sdrbase/dsp/vulkanvkfftengine.cpp | 344 +++++++++++++++++++++++++++++ sdrbase/dsp/vulkanvkfftengine.h | 61 +++++ sdrbase/settings/mainsettings.h | 7 + sdrbase/settings/preferences.cpp | 4 + sdrbase/settings/preferences.h | 8 +- sdrbase/util/profiler.cpp | 39 ++++ sdrbase/util/profiler.h | 123 +++++++++++ sdrgui/CMakeLists.txt | 8 + sdrgui/gui/fftdialog.cpp | 48 ++++ sdrgui/gui/fftdialog.h | 44 ++++ sdrgui/gui/fftdialog.ui | 104 +++++++++ sdrgui/gui/glspectrumview.cpp | 26 +++ sdrgui/gui/glspectrumview.h | 4 + sdrgui/gui/nanosecondsdelegate.cpp | 48 ++++ sdrgui/gui/nanosecondsdelegate.h | 34 +++ sdrgui/gui/profiledialog.cpp | 135 +++++++++++ sdrgui/gui/profiledialog.h | 59 +++++ sdrgui/gui/profiledialog.ui | 142 ++++++++++++ sdrgui/mainwindow.cpp | 48 +++- sdrgui/mainwindow.h | 4 + 37 files changed, 2418 insertions(+), 36 deletions(-) create mode 100644 sdrbase/dsp/cudavkfftengine.cpp create mode 100644 sdrbase/dsp/cudavkfftengine.h create mode 100644 sdrbase/dsp/vkfftengine.cpp create mode 100644 sdrbase/dsp/vkfftengine.h create mode 100644 sdrbase/dsp/vkfftutils.cpp create mode 100644 sdrbase/dsp/vkfftutils.h create mode 100644 sdrbase/dsp/vulkanvkfftengine.cpp create mode 100644 sdrbase/dsp/vulkanvkfftengine.h create mode 100644 sdrbase/util/profiler.cpp create mode 100644 sdrbase/util/profiler.h create mode 100644 sdrgui/gui/fftdialog.cpp create mode 100644 sdrgui/gui/fftdialog.h create mode 100644 sdrgui/gui/fftdialog.ui create mode 100644 sdrgui/gui/nanosecondsdelegate.cpp create mode 100644 sdrgui/gui/nanosecondsdelegate.h create mode 100644 sdrgui/gui/profiledialog.cpp create mode 100644 sdrgui/gui/profiledialog.h create mode 100644 sdrgui/gui/profiledialog.ui diff --git a/CMakeLists.txt b/CMakeLists.txt index 1ed7dc6c3..b290ab5a2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -30,6 +30,8 @@ option(BUNDLE "Enable distribution bundle" OFF) set(ARCH_OPT "native" CACHE STRING "Specify instruction set to use. Will be passed directly as `-march` or `/arch:` argument on supported compilers. \ 'native' option will figure out host machine compatibilities and set flags accordingly (even with MSVC).") option(ENABLE_QT6 "Build with Qt6 rather than Qt5" OFF) +option(ENABLE_PROFILER "Enable runtime profiler" OFF) +set(VKFFT_BACKEND 1 CACHE STRING "vkFFT Backend: 0 - Vulkan, 1 - CUDA") # Sampling devices enablers option(ENABLE_AIRSPY "Enable AirSpy support" ON) @@ -574,6 +576,10 @@ else() message(STATUS "Compiling for 16 bit Rx DSP chain") endif() +if (ENABLE_PROFILER) + add_compile_definitions(ENABLE_PROFILER) +endif() + # Set compiler options based on target architecture and selected extensions include(CompilerOptions) diff --git a/external/CMakeLists.txt b/external/CMakeLists.txt index 556676a00..0079964a6 100644 --- a/external/CMakeLists.txt +++ b/external/CMakeLists.txt @@ -832,6 +832,20 @@ if(ENABLE_FEATURE_SATELLITETRACKER OR ENABLE_CHANNELRX_DEMODAPT) endif () endif () +# VkFFT (header only library) +ExternalProject_Add(vkfft + GIT_REPOSITORY https://github.com/DTolm/VkFFT.git + GIT_TAG v1.3.1 + PREFIX "${EXTERNAL_BUILD_LIBRARIES}/vkfft" + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + BUILD_BYPRODUCTS "" + INSTALL_COMMAND "" + TEST_COMMAND "" + ) +ExternalProject_Get_Property(vkfft source_dir) +set(VKFFT_INCLUDE_DIR "${source_dir}" CACHE INTERNAL "") + # requirements needed by many packages on windows if (WIN32) ExternalProject_Add(pthreads4w diff --git a/sdrbase/CMakeLists.txt b/sdrbase/CMakeLists.txt index 0bb193ab4..42a0b4ede 100644 --- a/sdrbase/CMakeLists.txt +++ b/sdrbase/CMakeLists.txt @@ -19,18 +19,54 @@ if(FFTW3F_FOUND) add_definitions(-DUSE_FFTW) include_directories(${FFTW3F_INCLUDE_DIRS}) set(sdrbase_FFTW3F_LIB ${FFTW3F_LIBRARIES}) -else(FFTW3F_FOUND) +endif(FFTW3F_FOUND) + +# Kiss FFT is always available +set(sdrbase_SOURCES + ${sdrbase_SOURCES} + dsp/kissengine.cpp + dsp/kissfft.h +) +set(sdrbase_HEADERS + ${sdrbase_HEADERS} + dsp/kissengine.h +) +add_definitions(-DUSE_KISSFFT) + +# Vulkan SDK: https://vulkan.lunarg.com/ +# Windows Vulkan SDK is missing glslang_c_interface.h +# See bug: https://vulkan.lunarg.com/issue/view/63d158a85df11200d569b2ab +# Copy it from Linux SDK +find_package(Vulkan) +if(Vulkan_FOUND AND (${VKFFT_BACKEND} EQUAL 0)) set(sdrbase_SOURCES ${sdrbase_SOURCES} - dsp/kissengine.cpp - dsp/kissfft.h + dsp/vulkanvkfftengine.cpp + dsp/vulkanvkfftengine.h ) - set(sdrbase_HEADERS - ${sdrbase_HEADERS} - dsp/kissengine.h +endif() + +# CUDA Toolkit: https://developer.nvidia.com/cuda-downloads +find_package(CUDA 9.0) +if(CUDA_FOUND AND (${VKFFT_BACKEND} EQUAL 1)) + enable_language(CUDA) + set(sdrbase_SOURCES + ${sdrbase_SOURCES} + dsp/cudavkfftengine.cpp + dsp/cudavkfftengine.h ) - add_definitions(-DUSE_KISSFFT) -endif(FFTW3F_FOUND) +endif() + +if(Vulkan_FOUND OR CUDA_FOUND) + set(sdrbase_SOURCES + ${sdrbase_SOURCES} + dsp/vkfftengine.cpp + dsp/vkfftengine.h + dsp/vkfftutils.cpp + dsp/vkfftutils.h + ) + include_directories(${VKFFT_INCLUDE_DIR}) +endif() if (LIBSIGMF_FOUND) set(sdrbase_SOURCES @@ -207,6 +243,7 @@ set(sdrbase_SOURCES util/planespotters.cpp util/png.cpp util/prettyprint.cpp + util/profiler.cpp util/radiosonde.cpp util/rtpsink.cpp util/syncmessenger.cpp @@ -442,6 +479,7 @@ set(sdrbase_HEADERS util/planespotters.h util/png.h util/prettyprint.h + util/profiler.h util/radiosonde.h util/rtpsink.h util/syncmessenger.h @@ -507,6 +545,86 @@ if(DEFINED LIBSIGMF_DEPENDS) add_dependencies(sdrbase "${LIBSIGMF_DEPENDS}") endif() +if(Vulkan_FOUND AND (${VKFFT_BACKEND} EQUAL 0)) + target_compile_definitions(sdrbase PUBLIC -DVK_API_VERSION=11) + target_include_directories(sdrbase PUBLIC ${Vulkan_INCLUDE_DIR} ${Vulkan_INCLUDE_DIR}/glslang/Include) + add_compile_definitions(sdrbase VKFFT_BACKEND=0) + + find_library(VULKAN_SPIRV_LIB SPIRV HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) + find_library(VULKAN_SPVREMAPPER_LIB SPVRemapper HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) + find_library(VULKAN_SPIRV_TOOLS_LIB SPIRV-Tools HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) + find_library(VULKAN_SPIRV_TOOLS_OPT_LIB SPIRV-Tools-Opt HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) + find_library(VULKAN_SPIRV_TOOLS_SHARED_LIB SPIRV-Tools-Shared HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) + find_library(VULKAN_HLSL_LIB HLSL HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) + find_library(VULKAN_OGLCOMPILER_LIB OGLCompiler HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) + find_library(VULKAN_OSDEPENDENT_LIB OSDependent HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) + find_library(VULKAN_GLSLANG_LIB glslang HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) + find_library(VULKAN_GLSLANG_RES_LIB glslang-default-resource-limits HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) + find_library(VULKAN_MACHINEINDEPENDENT_LIB MachineIndependent HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) + find_library(VULKAN_GENERICCODEGEN_LIB GenericCodeGen HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) + + find_library(VULKAN_SPIRVD_LIB SPIRVd HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) + find_library(VULKAN_SPVREMAPPERD_LIB SPVRemapperd HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) + find_library(VULKAN_SPIRV_TOOLSD_LIB SPIRV-Toolsd HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) + find_library(VULKAN_SPIRV_TOOLS_OPTD_LIB SPIRV-Tools-Optd HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) + find_library(VULKAN_SPIRV_TOOLS_SHAREDD_LIB SPIRV-Tools-Sharedd HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) + find_library(VULKAN_HLSLD_LIB HLSLd HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) + find_library(VULKAN_OGLCOMPILERD_LIB OGLCompilerd HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) + find_library(VULKAN_OSDEPENDENTD_LIB OSDependentd HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) + find_library(VULKAN_GLSLANGD_LIB glslangd HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) + find_library(VULKAN_GLSLANG_RESD_LIB glslang-default-resource-limitsd HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) + find_library(VULKAN_MACHINEINDEPENDENTD_LIB MachineIndependentd HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) + find_library(VULKAN_GENERICCODEGEND_LIB GenericCodeGend HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) + + target_link_libraries(sdrbase + optimized ${VULKAN_SPIRV_LIB} + optimized ${VULKAN_SPVREMAPPER_LIB} + optimized ${VULKAN_SPIRV_TOOLS_LIB} + optimized ${VULKAN_SPIRV_TOOLS_OPT_LIB} + optimized ${VULKAN_SPIRV_TOOLS_SHARED_LIB} + optimized ${VULKAN_HLSL_LIB} + optimized ${VULKAN_OGLCOMPILER_LIB} + optimized ${VULKAN_OSDEPENDENT_LIB} + optimized ${VULKAN_GLSLANG_LIB} + optimized ${VULKAN_GLSLANG_RES_LIB} + optimized ${VULKAN_MACHINEINDEPENDENT_LIB} + optimized ${VULKAN_GENERICCODEGEN_LIB} + optimized Vulkan::Vulkan + debug ${VULKAN_SPIRVD_LIB} + debug ${VULKAN_SPVREMAPPERD_LIB} + debug ${VULKAN_SPIRV_TOOLSD_LIB} + debug ${VULKAN_SPIRV_TOOLS_OPTD_LIB} + debug ${VULKAN_SPIRV_TOOLS_SHAREDD_LIB} + debug ${VULKAN_HLSLD_LIB} + debug ${VULKAN_OGLCOMPILERD_LIB} + debug ${VULKAN_OSDEPENDENTD_LIB} + debug ${VULKAN_GLSLANGD_LIB} + debug ${VULKAN_GLSLANG_RESD_LIB} + debug ${VULKAN_MACHINEINDEPENDENTD_LIB} + debug ${VULKAN_GENERICCODEGEND_LIB} + Vulkan::Vulkan + ) +endif() + +if(CUDA_FOUND AND (${VKFFT_BACKEND} EQUAL 1)) + set_property(TARGET sdrbase PROPERTY CUDA_ARCHITECTURES 60 70 75 80 86) + add_compile_definitions(sdrbase VKFFT_BACKEND=1) + target_compile_options(sdrbase PUBLIC + "$<$:SHELL: + -DVKFFT_BACKEND=1 + -gencode arch=compute_60,code=compute_60 + -gencode arch=compute_70,code=compute_70 + -gencode arch=compute_75,code=compute_75 + -gencode arch=compute_80,code=compute_80 + -gencode arch=compute_86,code=compute_86>") + set_target_properties(sdrbase PROPERTIES CUDA_SEPARABLE_COMPILATION ON) + set_target_properties(sdrbase PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS ON) + find_library(CUDA_NVRTC_LIB libnvrtc nvrtc HINTS "${CUDA_TOOLKIT_ROOT_DIR}/lib64" "${LIBNVRTC_LIBRARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64" /usr/lib64 /usr/local/cuda/lib64 REQUIRED) + find_library(CUDA_LIB cuda HINTS "${CUDA_TOOLKIT_ROOT_DIR}/lib64" "${LIBNVRTC_LIBRARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64" /usr/lib64 /usr/local/cuda/lib64 REQUIRED) + target_link_libraries(sdrbase ${CUDA_LIBRARIES} ${CUDA_LIB} ${CUDA_NVRTC_LIB}) + target_include_directories(sdrbase PUBLIC ${CUDA_INCLUDE_DIRS}) +endif() + target_link_libraries(sdrbase ${OPUS_LIBRARIES} ${sdrbase_FFTW3F_LIB} diff --git a/sdrbase/dsp/cudavkfftengine.cpp b/sdrbase/dsp/cudavkfftengine.cpp new file mode 100644 index 000000000..6a38b340a --- /dev/null +++ b/sdrbase/dsp/cudavkfftengine.cpp @@ -0,0 +1,154 @@ +/////////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2023 Jon Beniston, M7RCE // +// // +// This program is free software; you can redistribute it and/or modify // +// it under the terms of the GNU General Public License as published by // +// the Free Software Foundation as version 3 of the License, or // +// (at your option) any later version. // +// // +// This program is distributed in the hope that it will be useful, // +// but WITHOUT ANY WARRANTY; without even the implied warranty of // +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // +// GNU General Public License V3 for more details. // +// // +// You should have received a copy of the GNU General Public License // +// along with this program. If not, see . // +/////////////////////////////////////////////////////////////////////////////////// + +#include + +#include "dsp/cudavkfftengine.h" + +CUDAvkFFTEngine::CUDAvkFFTEngine() +{ + VkFFTResult resFFT; + resFFT = gpuInit(); + if (resFFT != VKFFT_SUCCESS) + { + qDebug() << "CUDAvkFFTEngine::CUDAvkFFTEngine: Failed to initialise GPU" << getVkFFTErrorString(resFFT); + delete vkGPU; + vkGPU = nullptr; + } +} + +CUDAvkFFTEngine::~CUDAvkFFTEngine() +{ + if (vkGPU) + { + freeAll(); + cuCtxDestroy(vkGPU->context); + } +} + +const QString CUDAvkFFTEngine::m_name = "vkFFT (CUDA)"; + +QString CUDAvkFFTEngine::getName() const +{ + return m_name; +} + +VkFFTResult CUDAvkFFTEngine::gpuInit() +{ + CUresult res = CUDA_SUCCESS; + cudaError_t res2 = cudaSuccess; + res = cuInit(0); + if (res != CUDA_SUCCESS) { + return VKFFT_ERROR_FAILED_TO_INITIALIZE; + } + res2 = cudaSetDevice((int)vkGPU->device_id); + if (res2 != cudaSuccess) { + return VKFFT_ERROR_FAILED_TO_SET_DEVICE_ID; + } + res = cuDeviceGet(&vkGPU->device, (int)vkGPU->device_id); + if (res != CUDA_SUCCESS) { + return VKFFT_ERROR_FAILED_TO_GET_DEVICE; + } + res = cuDevicePrimaryCtxRetain(&vkGPU->context, (int)vkGPU->device); + if (res != CUDA_SUCCESS) { + return VKFFT_ERROR_FAILED_TO_CREATE_CONTEXT; + } + return VKFFT_SUCCESS; +} + +VkFFTResult CUDAvkFFTEngine::gpuAllocateBuffers() +{ + cudaError_t res; + CUDAPlan *plan = reinterpret_cast(m_currentPlan); + + // Allocate DMA accessible pinned memory, which may be faster than malloc'ed memory + res = cudaHostAlloc(&plan->m_in, sizeof(Complex) * plan->n, cudaHostAllocMapped); + if (res != cudaSuccess) { + return VKFFT_ERROR_FAILED_TO_ALLOCATE; + } + res = cudaHostAlloc(&plan->m_out, sizeof(Complex) * plan->n, cudaHostAllocMapped); + if (res != cudaSuccess) { + return VKFFT_ERROR_FAILED_TO_ALLOCATE; + } + + // Allocate GPU memory + res = cudaMalloc((void**)&plan->m_buffer, sizeof(cuFloatComplex) * plan->n * 2); + if (res != cudaSuccess) { + return VKFFT_ERROR_FAILED_TO_ALLOCATE; + } + + plan->m_configuration->buffer = (void**)&plan->m_buffer; + + return VKFFT_SUCCESS; +} + +VkFFTResult CUDAvkFFTEngine::gpuConfigure() +{ + return VKFFT_SUCCESS; +} + +void CUDAvkFFTEngine::transform() +{ + if (m_currentPlan) + { + CUDAPlan *plan = reinterpret_cast(m_currentPlan); + cudaError_t res = cudaSuccess; + void* buffer = ((void**)&plan->m_buffer)[0]; + + // Transfer input from CPU to GPU memory + PROFILER_START() + res = cudaMemcpy(buffer, plan->m_in, plan->m_bufferSize, cudaMemcpyHostToDevice); + PROFILER_STOP(QString("%1 TX %2").arg(getName()).arg(m_currentPlan->n)) + if (res != cudaSuccess) { + qDebug() << "CUDAvkFFTEngine::transform: cudaMemcpy host to device failed"; + } + + // Perform FFT + PROFILER_RESTART() + VkFFTLaunchParams launchParams = {}; + VkFFTResult resFFT = VkFFTAppend(plan->m_app, plan->m_inverse ? 1 : -1, &launchParams); + PROFILER_STOP(QString("%1 FFT %2").arg(getName()).arg(m_currentPlan->n)) + if (resFFT != VKFFT_SUCCESS) { + qDebug() << "CUDAvkFFTEngine::transform: VkFFTAppend failed:" << getVkFFTErrorString(resFFT); + } + + // Transfer result from GPU to CPU memory + PROFILER_RESTART() + res = cudaMemcpy(plan->m_out, buffer, plan->m_bufferSize, cudaMemcpyDeviceToHost); + PROFILER_STOP(QString("%1 RX %2").arg(getName()).arg(m_currentPlan->n)) + if (res != cudaSuccess) { + qDebug() << "CUDAvkFFTEngine::transform: cudaMemcpy device to host failed"; + } + } + +} + +vkFFTEngine::Plan *CUDAvkFFTEngine::gpuAllocatePlan() +{ + return new CUDAPlan(); +} + +void CUDAvkFFTEngine::gpuDeallocatePlan(Plan *p) +{ + CUDAPlan *plan = reinterpret_cast(p); + + cudaFree(plan->m_in); + plan->m_in = nullptr; + cudaFree(plan->m_out); + plan->m_out = nullptr; + cudaFree(plan->m_buffer); +} diff --git a/sdrbase/dsp/cudavkfftengine.h b/sdrbase/dsp/cudavkfftengine.h new file mode 100644 index 000000000..d89158fee --- /dev/null +++ b/sdrbase/dsp/cudavkfftengine.h @@ -0,0 +1,52 @@ +/////////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2023 Jon Beniston, M7RCE // +// // +// This program is free software; you can redistribute it and/or modify // +// it under the terms of the GNU General Public License as published by // +// the Free Software Foundation as version 3 of the License, or // +// (at your option) any later version. // +// // +// This program is distributed in the hope that it will be useful, // +// but WITHOUT ANY WARRANTY; without even the implied warranty of // +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // +// GNU General Public License V3 for more details. // +// // +// You should have received a copy of the GNU General Public License // +// along with this program. If not, see . // +/////////////////////////////////////////////////////////////////////////////////// + +#ifndef INCLUDE_CUDAVKFFTENGINE_H +#define INCLUDE_CUDAVKFFTENGINE_H + +#include "vkfftengine.h" + +#include +#include +#include +#include +#include + +class SDRBASE_API CUDAvkFFTEngine : public vkFFTEngine { +public: + CUDAvkFFTEngine(); + virtual ~CUDAvkFFTEngine(); + + void transform() override; + QString getName() const override; + static const QString m_name; + +protected: + + struct CUDAPlan : Plan { + cuFloatComplex* m_buffer; // GPU memory + }; + + VkFFTResult gpuInit() override; + VkFFTResult gpuAllocateBuffers() override; + VkFFTResult gpuConfigure() override; + Plan *gpuAllocatePlan() override; + void gpuDeallocatePlan(Plan *) override; + +}; + +#endif // INCLUDE_CUDAVKFFTENGINE_H diff --git a/sdrbase/dsp/fftengine.cpp b/sdrbase/dsp/fftengine.cpp index 9c7821818..d548d8136 100644 --- a/sdrbase/dsp/fftengine.cpp +++ b/sdrbase/dsp/fftengine.cpp @@ -1,26 +1,110 @@ +/////////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2015-2020 Edouard Griffiths, F4EXB // +// Copyright (C) 2023 Jon Beniston, M7RCE // +// // +// This program is free software; you can redistribute it and/or modify // +// it under the terms of the GNU General Public License as published by // +// the Free Software Foundation as version 3 of the License, or // +// (at your option) any later version. // +// // +// This program is distributed in the hope that it will be useful, // +// but WITHOUT ANY WARRANTY; without even the implied warranty of // +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // +// GNU General Public License V3 for more details. // +// // +// You should have received a copy of the GNU General Public License // +// along with this program. If not, see . // +/////////////////////////////////////////////////////////////////////////////////// + +#include + #include "dsp/fftengine.h" #ifdef USE_KISSFFT #include "dsp/kissengine.h" #endif #ifdef USE_FFTW #include "dsp/fftwengine.h" -#endif // USE_FFTW +#endif +#if VKFFT_BACKEND==0 +#include "dsp/vulkanvkfftengine.h" +#elif VKFFT_BACKEND==1 +#include "dsp/cudavkfftengine.h" +#endif + +QStringList FFTEngine::m_allAvailableEngines; FFTEngine::~FFTEngine() { } -FFTEngine* FFTEngine::create(const QString& fftWisdomFileName) +FFTEngine* FFTEngine::create(const QString& fftWisdomFileName, const QString& preferredEngine) { -#ifdef USE_FFTW - qDebug("FFTEngine::create: using FFTW engine"); - return new FFTWEngine(fftWisdomFileName); -#elif USE_KISSFFT - qDebug("FFTEngine::create: using KissFFT engine"); - (void) fftWisdomFileName; - return new KissEngine; -#else // USE_KISSFFT - qCritical("FFTEngine::create: no engine built"); - return 0; + QStringList allNames = getAllNames(); + QString engine; + + if (allNames.size() == 0) + { + // No engines available + qCritical("FFTEngine::create: no engine built"); + return nullptr; + } + else if (!preferredEngine.isEmpty() && allNames.contains(preferredEngine)) + { + // Use the preferred engine + engine = preferredEngine; + } + else + { + // Use first available + engine = allNames[0]; + } + + qDebug("FFTEngine::create: using %s engine", qPrintable(engine)); + +#if VKFFT_BACKEND==0 + if (engine == VulkanvkFFTEngine::m_name) { + return new VulkanvkFFTEngine(); + } #endif +#if VKFFT_BACKEND==1 + if (engine == CUDAvkFFTEngine::m_name) { + return new CUDAvkFFTEngine(); + } +#endif +#ifdef USE_FFTW + if (engine == FFTWEngine::m_name) { + return new FFTWEngine(fftWisdomFileName); + } +#endif +#ifdef USE_KISSFFT + if (engine == KissEngine::m_name) { + return new KissEngine; + } +#endif + return nullptr; +} + +QStringList FFTEngine::getAllNames() +{ + if (m_allAvailableEngines.size() == 0) + { +#ifdef USE_FFTW + m_allAvailableEngines.append(FFTWEngine::m_name); +#endif +#ifdef USE_KISSFFT + m_allAvailableEngines.append(KissEngine::m_name); +#endif +#if VKFFT_BACKEND==0 + VulkanvkFFTEngine vulkanvkFFT; + if (vulkanvkFFT.isAvailable()) { + m_allAvailableEngines.append(vulkanvkFFT.getName()); + } +#elif VKFFT_BACKEND==1 + CUDAvkFFTEngine cudavkFFT; + if (cudavkFFT.isAvailable()) { + m_allAvailableEngines.append(cudavkFFT.getName()); + } +#endif + } + return m_allAvailableEngines; } diff --git a/sdrbase/dsp/fftengine.h b/sdrbase/dsp/fftengine.h index 320eb3c84..8d1a0a53c 100644 --- a/sdrbase/dsp/fftengine.h +++ b/sdrbase/dsp/fftengine.h @@ -1,9 +1,28 @@ +/////////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2015-2020 Edouard Griffiths, F4EXB // +// Copyright (C) 2023 Jon Beniston, M7RCE // +// // +// This program is free software; you can redistribute it and/or modify // +// it under the terms of the GNU General Public License as published by // +// the Free Software Foundation as version 3 of the License, or // +// (at your option) any later version. // +// // +// This program is distributed in the hope that it will be useful, // +// but WITHOUT ANY WARRANTY; without even the implied warranty of // +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // +// GNU General Public License V3 for more details. // +// // +// You should have received a copy of the GNU General Public License // +// along with this program. If not, see . // +/////////////////////////////////////////////////////////////////////////////////// + #ifndef INCLUDE_FFTENGINE_H #define INCLUDE_FFTENGINE_H #include #include "dsp/dsptypes.h" +#include "util/profiler.h" #include "export.h" class SDRBASE_API FFTEngine { @@ -18,7 +37,16 @@ public: virtual void setReuse(bool reuse) = 0; - static FFTEngine* create(const QString& fftWisdomFileName); + static FFTEngine* create(const QString& fftWisdomFileName, const QString& preferredEngine=""); + + virtual bool isAvailable() { return true; } // Is this FFT engine available to be used? + virtual QString getName() const = 0; // Get the name of this FFT engine + + static QStringList getAllNames(); // Get names of all available FFT engines + +private: + static QStringList m_allAvailableEngines; + }; #endif // INCLUDE_FFTENGINE_H diff --git a/sdrbase/dsp/fftfactory.cpp b/sdrbase/dsp/fftfactory.cpp index f073abbb5..7b87f1cba 100644 --- a/sdrbase/dsp/fftfactory.cpp +++ b/sdrbase/dsp/fftfactory.cpp @@ -17,6 +17,7 @@ #include #include "fftfactory.h" +#include "maincore.h" FFTFactory::FFTFactory(const QString& fftwWisdomFileName) : m_fftwWisdomFileName(fftwWisdomFileName) @@ -69,12 +70,20 @@ void FFTFactory::preallocate( } } -unsigned int FFTFactory::getEngine(unsigned int fftSize, bool inverse, FFTEngine **engine) +unsigned int FFTFactory::getEngine(unsigned int fftSize, bool inverse, FFTEngine **engine, const QString& preferredEngine) { QMutexLocker mutexLocker(&m_mutex); std::map>& enginesBySize = inverse ? m_invFFTEngineBySize : m_fftEngineBySize; + // If no preferred engine requested, use user preference + QString requestedEngine = preferredEngine; + if (requestedEngine.isEmpty()) + { + const MainSettings& mainSettings = MainCore::instance()->getSettings(); + requestedEngine = mainSettings.getFFTEngine(); + } + if (enginesBySize.find(fftSize) == enginesBySize.end()) { qDebug("FFTFactory::getEngine: new FFT %s size: %u", (inverse ? "inv" : "fwd"), fftSize); @@ -82,7 +91,7 @@ unsigned int FFTFactory::getEngine(unsigned int fftSize, bool inverse, FFTEngine std::vector& engines = enginesBySize[fftSize]; engines.push_back(AllocatedEngine()); engines.back().m_inUse = true; - engines.back().m_engine = FFTEngine::create(m_fftwWisdomFileName); + engines.back().m_engine = FFTEngine::create(m_fftwWisdomFileName, requestedEngine); engines.back().m_engine->setReuse(false); engines.back().m_engine->configure(fftSize, inverse); *engine = engines.back().m_engine; @@ -92,9 +101,10 @@ unsigned int FFTFactory::getEngine(unsigned int fftSize, bool inverse, FFTEngine { unsigned int i = 0; + // Look for existing engine of requested size and type not currently in use for (; i < enginesBySize[fftSize].size(); i++) { - if (!enginesBySize[fftSize][i].m_inUse) { + if (!enginesBySize[fftSize][i].m_inUse && (enginesBySize[fftSize][i].m_engine->getName() == requestedEngine)) { break; } } @@ -112,7 +122,7 @@ unsigned int FFTFactory::getEngine(unsigned int fftSize, bool inverse, FFTEngine qDebug("FFTFactory::getEngine: create engine: %lu FFT %s size: %u", engines.size(), (inverse ? "inv" : "fwd"), fftSize); engines.push_back(AllocatedEngine()); engines.back().m_inUse = true; - engines.back().m_engine = FFTEngine::create(m_fftwWisdomFileName); + engines.back().m_engine = FFTEngine::create(m_fftwWisdomFileName, requestedEngine); engines.back().m_engine->setReuse(false); engines.back().m_engine->configure(fftSize, inverse); *engine = engines.back().m_engine; diff --git a/sdrbase/dsp/fftfactory.h b/sdrbase/dsp/fftfactory.h index 85aa5244e..0cb06c0a0 100644 --- a/sdrbase/dsp/fftfactory.h +++ b/sdrbase/dsp/fftfactory.h @@ -34,7 +34,7 @@ public: ~FFTFactory(); void preallocate(unsigned int minLog2Size, unsigned int maxLog2Size, unsigned int numberFFT, unsigned int numberInvFFT); - unsigned int getEngine(unsigned int fftSize, bool inverse, FFTEngine **engine); //!< returns an engine sequence + unsigned int getEngine(unsigned int fftSize, bool inverse, FFTEngine **engine, const QString& preferredEngine=""); //!< returns an engine sequence void releaseEngine(unsigned int fftSize, bool inverse, unsigned int engineSequence); private: diff --git a/sdrbase/dsp/fftwengine.cpp b/sdrbase/dsp/fftwengine.cpp index ba9bb8d5c..8b97e0c3a 100644 --- a/sdrbase/dsp/fftwengine.cpp +++ b/sdrbase/dsp/fftwengine.cpp @@ -15,7 +15,8 @@ // along with this program. If not, see . // /////////////////////////////////////////////////////////////////////////////////// -#include +#include + #include "dsp/fftwengine.h" FFTWEngine::FFTWEngine(const QString& fftWisdomFileName) : @@ -31,6 +32,13 @@ FFTWEngine::~FFTWEngine() freeAll(); } +const QString FFTWEngine::m_name = "FFTW"; + +QString FFTWEngine::getName() const +{ + return m_name; +} + void FFTWEngine::configure(int n, bool inverse) { if (m_reuse) @@ -78,8 +86,12 @@ void FFTWEngine::configure(int n, bool inverse) void FFTWEngine::transform() { + PROFILER_START() + if(m_currentPlan != NULL) fftwf_execute(m_currentPlan->plan); + + PROFILER_STOP(QString("%1 %2").arg(getName()).arg(m_currentPlan->n)) } Complex* FFTWEngine::in() diff --git a/sdrbase/dsp/fftwengine.h b/sdrbase/dsp/fftwengine.h index a8a16fea3..221cde3ae 100644 --- a/sdrbase/dsp/fftwengine.h +++ b/sdrbase/dsp/fftwengine.h @@ -38,6 +38,8 @@ public: virtual Complex* out(); virtual void setReuse(bool reuse) { m_reuse = reuse; } + QString getName() const override; + static const QString m_name; protected: static QMutex m_globalPlanMutex; diff --git a/sdrbase/dsp/kissengine.cpp b/sdrbase/dsp/kissengine.cpp index 6491a6fea..9bb896fa0 100644 --- a/sdrbase/dsp/kissengine.cpp +++ b/sdrbase/dsp/kissengine.cpp @@ -1,5 +1,12 @@ #include "dsp/kissengine.h" +const QString KissEngine::m_name = "Kiss"; + +QString KissEngine::getName() const +{ + return m_name; +} + void KissEngine::configure(int n, bool inverse) { m_fft.configure(n, inverse); @@ -11,7 +18,11 @@ void KissEngine::configure(int n, bool inverse) void KissEngine::transform() { + PROFILER_START() + m_fft.transform(&m_in[0], &m_out[0]); + + PROFILER_STOP(QString("%1 %2").arg(getName()).arg(m_out.size())) } Complex* KissEngine::in() @@ -27,4 +38,4 @@ Complex* KissEngine::out() void KissEngine::setReuse(bool reuse) { (void) reuse; -} \ No newline at end of file +} diff --git a/sdrbase/dsp/kissengine.h b/sdrbase/dsp/kissengine.h index 418abc569..96f4f8f7c 100644 --- a/sdrbase/dsp/kissengine.h +++ b/sdrbase/dsp/kissengine.h @@ -14,6 +14,8 @@ public: virtual Complex* out(); virtual void setReuse(bool reuse); + QString getName() const override; + static const QString m_name; protected: typedef kissfft KissFFT; diff --git a/sdrbase/dsp/vkfftengine.cpp b/sdrbase/dsp/vkfftengine.cpp new file mode 100644 index 000000000..267215b73 --- /dev/null +++ b/sdrbase/dsp/vkfftengine.cpp @@ -0,0 +1,170 @@ +/////////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2023 Jon Beniston, M7RCE // +// // +// This program is free software; you can redistribute it and/or modify // +// it under the terms of the GNU General Public License as published by // +// the Free Software Foundation as version 3 of the License, or // +// (at your option) any later version. // +// // +// This program is distributed in the hope that it will be useful, // +// but WITHOUT ANY WARRANTY; without even the implied warranty of // +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // +// GNU General Public License V3 for more details. // +// // +// You should have received a copy of the GNU General Public License // +// along with this program. If not, see . // +/////////////////////////////////////////////////////////////////////////////////// + +#include + +#include "dsp/vkfftengine.h" + +QMutex vkFFTEngine::m_globalPlanMutex; + +vkFFTEngine::vkFFTEngine() : + m_currentPlan(nullptr), + m_reuse(true) +{ + vkGPU = new VkGPU(); + memset(vkGPU, sizeof(VkGPU), 0); + vkGPU->device_id = 0; // Could be set in GUI to support multiple GPUs +} + +vkFFTEngine::~vkFFTEngine() +{ +} + +bool vkFFTEngine::isAvailable() +{ + return vkGPU != nullptr; +} + +void vkFFTEngine::configure(int n, bool inverse) +{ + if (m_reuse) + { + for (const auto plan : m_plans) + { + if ((plan->n == n) && (plan->m_inverse == inverse)) + { + m_currentPlan = plan; + return; + } + } + } + + m_currentPlan = gpuAllocatePlan(); + m_currentPlan->n = n; + + QElapsedTimer t; + t.start(); + m_globalPlanMutex.lock(); + + VkFFTResult resFFT; + + // Allocate and intialise plan + m_currentPlan->m_configuration = new VkFFTConfiguration(); + memset(m_currentPlan->m_configuration, sizeof(VkFFTConfiguration), 0); + m_currentPlan->m_app = new VkFFTApplication(); + memset(m_currentPlan->m_app, sizeof(VkFFTApplication), 0); + m_currentPlan->m_configuration->FFTdim = 1; + m_currentPlan->m_configuration->size[0] = n; + m_currentPlan->m_configuration->size[1] = 1; + m_currentPlan->m_configuration->size[2] = 1; + m_currentPlan->m_configuration->numberBatches = 1; + m_currentPlan->m_configuration->performR2C = false; + m_currentPlan->m_configuration->performDCT = false; + m_currentPlan->m_configuration->doublePrecision = false; + m_currentPlan->m_configuration->halfPrecision = false; + m_currentPlan->m_bufferSize = sizeof(float) * 2 * n; + m_currentPlan->m_inverse = inverse; + + m_currentPlan->m_configuration->device = &vkGPU->device; +#if(VKFFT_BACKEND==0) + m_currentPlan->m_configuration->queue = &vkGPU->queue; + m_currentPlan->m_configuration->fence = &vkGPU->fence; + m_currentPlan->m_configuration->commandPool = &vkGPU->commandPool; + m_currentPlan->m_configuration->physicalDevice = &vkGPU->physicalDevice; + m_currentPlan->m_configuration->isCompilerInitialized = true; +#endif + m_currentPlan->m_configuration->bufferSize = &m_currentPlan->m_bufferSize; + + resFFT = gpuAllocateBuffers(); + if (resFFT != VKFFT_SUCCESS) + { + qDebug() << "vkFFTEngine::configure: gpuAllocateBuffers failed:" << getVkFFTErrorString(resFFT); + m_globalPlanMutex.unlock(); + delete m_currentPlan; + m_currentPlan = nullptr; + return; + } + + m_currentPlan->m_configuration->bufferSize = &m_currentPlan->m_bufferSize; + + resFFT = initializeVkFFT(m_currentPlan->m_app, *m_currentPlan->m_configuration); + if (resFFT != VKFFT_SUCCESS) + { + qDebug() << "vkFFTEngine::configure: initializeVkFFT failed:" << getVkFFTErrorString(resFFT); + m_globalPlanMutex.unlock(); + delete m_currentPlan; + m_currentPlan = nullptr; + return; + } + + resFFT = gpuConfigure(); + if (resFFT != VKFFT_SUCCESS) + { + qDebug() << "vkFFTEngine::configure: gpuConfigure failed:" << getVkFFTErrorString(resFFT); + m_globalPlanMutex.unlock(); + delete m_currentPlan; + m_currentPlan = nullptr; + return; + } + + m_globalPlanMutex.unlock(); + + qDebug("FFT: creating vkFFT plan (n=%d,%s) took %lld ms", n, inverse ? "inverse" : "forward", t.elapsed()); + m_plans.push_back(m_currentPlan); +} + +Complex* vkFFTEngine::in() +{ + if (m_currentPlan != nullptr) { + return m_currentPlan->m_in; + } else { + return nullptr; + } +} + +Complex* vkFFTEngine::out() +{ + if (m_currentPlan != nullptr) { + return m_currentPlan->m_out; + } else { + return nullptr; + } +} + +void vkFFTEngine::freeAll() +{ + for (auto plan : m_plans) + { + gpuDeallocatePlan(plan); + delete plan; + } + m_plans.clear(); +} + +vkFFTEngine::Plan::Plan() : + m_configuration(nullptr), + m_app(nullptr) +{ +} + +vkFFTEngine::Plan::~Plan() +{ + if (m_app) { + deleteVkFFT(m_app); + } + delete m_configuration; +} diff --git a/sdrbase/dsp/vkfftengine.h b/sdrbase/dsp/vkfftengine.h new file mode 100644 index 000000000..ee11caa66 --- /dev/null +++ b/sdrbase/dsp/vkfftengine.h @@ -0,0 +1,71 @@ +/////////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2023 Jon Beniston, M7RCE // +// // +// This program is free software; you can redistribute it and/or modify // +// it under the terms of the GNU General Public License as published by // +// the Free Software Foundation as version 3 of the License, or // +// (at your option) any later version. // +// // +// This program is distributed in the hope that it will be useful, // +// but WITHOUT ANY WARRANTY; without even the implied warranty of // +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // +// GNU General Public License V3 for more details. // +// // +// You should have received a copy of the GNU General Public License // +// along with this program. If not, see . // +/////////////////////////////////////////////////////////////////////////////////// + +#ifndef INCLUDE_VKFFTENGINE_H +#define INCLUDE_VKFFTENGINE_H + +#include + +#include + +#include "dsp/fftengine.h" +#include "dsp/vkfftutils.h" +#include "export.h" + +class SDRBASE_API vkFFTEngine : public FFTEngine { +public: + vkFFTEngine(); + virtual ~vkFFTEngine(); + + virtual void configure(int n, bool inverse); + + virtual Complex* in(); + virtual Complex* out(); + + virtual void setReuse(bool reuse) { m_reuse = reuse; } + bool isAvailable() override; + +protected: + static QMutex m_globalPlanMutex; + + struct Plan { + Plan(); + virtual ~Plan(); + int n; + uint64_t m_bufferSize; + bool m_inverse; + VkFFTConfiguration* m_configuration; + VkFFTApplication* m_app; + Complex* m_in; // CPU memory + Complex* m_out; + }; + QList m_plans; + Plan* m_currentPlan; + bool m_reuse; + + VkGPU *vkGPU; + + virtual VkFFTResult gpuInit() = 0; + virtual VkFFTResult gpuAllocateBuffers() = 0; + virtual VkFFTResult gpuConfigure() = 0; + virtual Plan *gpuAllocatePlan() = 0; + virtual void gpuDeallocatePlan(Plan *plan) = 0; + + void freeAll(); +}; + +#endif // INCLUDE_VKFFTENGINE_H diff --git a/sdrbase/dsp/vkfftutils.cpp b/sdrbase/dsp/vkfftutils.cpp new file mode 100644 index 000000000..440a327c1 --- /dev/null +++ b/sdrbase/dsp/vkfftutils.cpp @@ -0,0 +1,327 @@ +// Selected code from https://github.com/DTolm/VkFFT/blob/master/benchmark_scripts/vkFFT_scripts/src/utils_VkFFT.cpp +// Formatting kept the same as source, to allow easier future merges + +#include "vkfftutils.h" + +#if(VKFFT_BACKEND==0) +#include "vulkan/vulkan.h" +#include "glslang_c_interface.h" +#endif + + +#if(VKFFT_BACKEND==0) + +VkResult CreateDebugUtilsMessengerEXT(VkGPU* vkGPU, const VkDebugUtilsMessengerCreateInfoEXT* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDebugUtilsMessengerEXT* pDebugMessenger) { + //pointer to the function, as it is not part of the core. Function creates debugging messenger + PFN_vkCreateDebugUtilsMessengerEXT func = (PFN_vkCreateDebugUtilsMessengerEXT)vkGetInstanceProcAddr(vkGPU->instance, "vkCreateDebugUtilsMessengerEXT"); + if (func != NULL) { + return func(vkGPU->instance, pCreateInfo, pAllocator, pDebugMessenger); + } + else { + return VK_ERROR_EXTENSION_NOT_PRESENT; + } +} +void DestroyDebugUtilsMessengerEXT(VkGPU* vkGPU, const VkAllocationCallbacks* pAllocator) { + //pointer to the function, as it is not part of the core. Function destroys debugging messenger + PFN_vkDestroyDebugUtilsMessengerEXT func = (PFN_vkDestroyDebugUtilsMessengerEXT)vkGetInstanceProcAddr(vkGPU->instance, "vkDestroyDebugUtilsMessengerEXT"); + if (func != NULL) { + func(vkGPU->instance, vkGPU->debugMessenger, pAllocator); + } +} + +static VKAPI_ATTR VkBool32 VKAPI_CALL debugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity, VkDebugUtilsMessageTypeFlagsEXT messageType, const VkDebugUtilsMessengerCallbackDataEXT* pCallbackData, void* pUserData) { + printf("validation layer: %s\n", pCallbackData->pMessage); + return VK_FALSE; +} + +VkResult setupDebugMessenger(VkGPU* vkGPU) { + //function that sets up the debugging messenger + if (vkGPU->enableValidationLayers == 0) return VK_SUCCESS; + + VkDebugUtilsMessengerCreateInfoEXT createInfo = { VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT }; + createInfo.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT; + createInfo.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT; + createInfo.pfnUserCallback = debugCallback; + + if (CreateDebugUtilsMessengerEXT(vkGPU, &createInfo, NULL, &vkGPU->debugMessenger) != VK_SUCCESS) { + return VK_ERROR_INITIALIZATION_FAILED; + } + return VK_SUCCESS; +} + +VkResult checkValidationLayerSupport() { + //check if validation layers are supported when an instance is created + uint32_t layerCount; + vkEnumerateInstanceLayerProperties(&layerCount, NULL); + + VkLayerProperties* availableLayers = (VkLayerProperties*)malloc(sizeof(VkLayerProperties) * layerCount); + if (!availableLayers) return VK_INCOMPLETE; + vkEnumerateInstanceLayerProperties(&layerCount, availableLayers); + if (availableLayers) { + for (uint64_t i = 0; i < layerCount; i++) { + if (strcmp("VK_LAYER_KHRONOS_validation", availableLayers[i].layerName) == 0) { + free(availableLayers); + return VK_SUCCESS; + } + } + free(availableLayers); + } + else { + return VK_INCOMPLETE; + } + return VK_ERROR_LAYER_NOT_PRESENT; +} + +std::vector getRequiredExtensions(VkGPU* vkGPU, uint64_t sample_id) { + std::vector extensions; + + if (vkGPU->enableValidationLayers) { + extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); + } + switch (sample_id) { +#if (VK_API_VERSION>10) + case 2: case 102: + extensions.push_back("VK_KHR_get_physical_device_properties2"); + break; +#endif + default: + break; + } + + + return extensions; +} + +VkResult createInstance(VkGPU* vkGPU, uint64_t sample_id) { + //create instance - a connection between the application and the Vulkan library + VkResult res = VK_SUCCESS; + //check if validation layers are supported + if (vkGPU->enableValidationLayers == 1) { + res = checkValidationLayerSupport(); + if (res != VK_SUCCESS) return res; + } + + VkApplicationInfo applicationInfo = { VK_STRUCTURE_TYPE_APPLICATION_INFO }; + applicationInfo.pApplicationName = "VkFFT"; + applicationInfo.applicationVersion = (uint32_t)VkFFTGetVersion(); + applicationInfo.pEngineName = "VkFFT"; + applicationInfo.engineVersion = 1; +#if (VK_API_VERSION>=12) + applicationInfo.apiVersion = VK_API_VERSION_1_2; +#elif (VK_API_VERSION==11) + applicationInfo.apiVersion = VK_API_VERSION_1_1; +#else + applicationInfo.apiVersion = VK_API_VERSION_1_0; +#endif + + VkInstanceCreateInfo createInfo = { VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO }; + createInfo.flags = 0; + createInfo.pApplicationInfo = &applicationInfo; + + auto extensions = getRequiredExtensions(vkGPU, sample_id); + createInfo.enabledExtensionCount = (uint32_t)(extensions.size()); + createInfo.ppEnabledExtensionNames = extensions.data(); + + VkDebugUtilsMessengerCreateInfoEXT debugCreateInfo = { VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT }; + + if (vkGPU->enableValidationLayers) { + //query for the validation layer support in the instance + createInfo.enabledLayerCount = 1; + const char* validationLayers = "VK_LAYER_KHRONOS_validation"; + createInfo.ppEnabledLayerNames = &validationLayers; + debugCreateInfo.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT; + debugCreateInfo.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT; + debugCreateInfo.pfnUserCallback = debugCallback; + createInfo.pNext = (VkDebugUtilsMessengerCreateInfoEXT*)&debugCreateInfo; + } + else { + createInfo.enabledLayerCount = 0; + + createInfo.pNext = nullptr; + } + + res = vkCreateInstance(&createInfo, NULL, &vkGPU->instance); + if (res != VK_SUCCESS) { + return res; + + } + return res; +} + +VkResult findPhysicalDevice(VkGPU* vkGPU) { + //check if there are GPUs that support Vulkan and select one + VkResult res = VK_SUCCESS; + uint32_t deviceCount; + res = vkEnumeratePhysicalDevices(vkGPU->instance, &deviceCount, NULL); + if (res != VK_SUCCESS) return res; + if (deviceCount == 0) { + return VK_ERROR_DEVICE_LOST; + } + + VkPhysicalDevice* devices = (VkPhysicalDevice*)malloc(sizeof(VkPhysicalDevice) * deviceCount); + if (!devices) return VK_INCOMPLETE; + res = vkEnumeratePhysicalDevices(vkGPU->instance, &deviceCount, devices); + if (res != VK_SUCCESS) return res; + if (devices) { + vkGPU->physicalDevice = devices[vkGPU->device_id]; + free(devices); + return VK_SUCCESS; + } + else + return VK_INCOMPLETE; +} +VkResult getComputeQueueFamilyIndex(VkGPU* vkGPU) { + //find a queue family for a selected GPU, select the first available for use + uint32_t queueFamilyCount; + vkGetPhysicalDeviceQueueFamilyProperties(vkGPU->physicalDevice, &queueFamilyCount, NULL); + + VkQueueFamilyProperties* queueFamilies = (VkQueueFamilyProperties*)malloc(sizeof(VkQueueFamilyProperties) * queueFamilyCount); + if (!queueFamilies) return VK_INCOMPLETE; + if (queueFamilies) { + vkGetPhysicalDeviceQueueFamilyProperties(vkGPU->physicalDevice, &queueFamilyCount, queueFamilies); + uint64_t i = 0; + for (; i < queueFamilyCount; i++) { + VkQueueFamilyProperties props = queueFamilies[i]; + + if (props.queueCount > 0 && (props.queueFlags & VK_QUEUE_COMPUTE_BIT)) { + break; + } + } + free(queueFamilies); + if (i == queueFamilyCount) { + return VK_ERROR_INITIALIZATION_FAILED; + } + vkGPU->queueFamilyIndex = i; + return VK_SUCCESS; + } + else + return VK_INCOMPLETE; +} + +VkResult createDevice(VkGPU* vkGPU, uint64_t sample_id) { + //create logical device representation + VkResult res = VK_SUCCESS; + VkDeviceQueueCreateInfo queueCreateInfo = { VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO }; + res = getComputeQueueFamilyIndex(vkGPU); + if (res != VK_SUCCESS) return res; + queueCreateInfo.queueFamilyIndex = (uint32_t)vkGPU->queueFamilyIndex; + queueCreateInfo.queueCount = 1; + float queuePriorities = 1.0; + queueCreateInfo.pQueuePriorities = &queuePriorities; + VkDeviceCreateInfo deviceCreateInfo = { VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO }; + VkPhysicalDeviceFeatures deviceFeatures = {}; + switch (sample_id) { + case 1: case 12: case 17: case 18: case 101: case 201: case 1001: { + deviceFeatures.shaderFloat64 = true; + deviceCreateInfo.enabledExtensionCount = (uint32_t)vkGPU->enabledDeviceExtensions.size(); + deviceCreateInfo.ppEnabledExtensionNames = vkGPU->enabledDeviceExtensions.data(); + deviceCreateInfo.pQueueCreateInfos = &queueCreateInfo; + deviceCreateInfo.queueCreateInfoCount = 1; + deviceCreateInfo.pEnabledFeatures = &deviceFeatures; + res = vkCreateDevice(vkGPU->physicalDevice, &deviceCreateInfo, NULL, &vkGPU->device); + if (res != VK_SUCCESS) return res; + vkGetDeviceQueue(vkGPU->device, (uint32_t)vkGPU->queueFamilyIndex, 0, &vkGPU->queue); + break; + } +#if (VK_API_VERSION>10) + case 2: case 102: { + VkPhysicalDeviceFeatures2 deviceFeatures2 = {}; + VkPhysicalDevice16BitStorageFeatures shaderFloat16 = {}; + shaderFloat16.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES; + shaderFloat16.storageBuffer16BitAccess = true; + /*VkPhysicalDeviceShaderFloat16Int8Features shaderFloat16 = {}; + shaderFloat16.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES; + shaderFloat16.shaderFloat16 = true; + shaderFloat16.shaderInt8 = true;*/ + deviceFeatures2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; + deviceFeatures2.pNext = &shaderFloat16; + deviceFeatures2.features = deviceFeatures; + vkGetPhysicalDeviceFeatures2(vkGPU->physicalDevice, &deviceFeatures2); + deviceCreateInfo.pNext = &deviceFeatures2; + vkGPU->enabledDeviceExtensions.push_back("VK_KHR_16bit_storage"); + deviceCreateInfo.enabledExtensionCount = (uint32_t)vkGPU->enabledDeviceExtensions.size(); + deviceCreateInfo.ppEnabledExtensionNames = vkGPU->enabledDeviceExtensions.data(); + deviceCreateInfo.pQueueCreateInfos = &queueCreateInfo; + deviceCreateInfo.queueCreateInfoCount = 1; + deviceCreateInfo.pEnabledFeatures = NULL; + res = vkCreateDevice(vkGPU->physicalDevice, &deviceCreateInfo, NULL, &vkGPU->device); + if (res != VK_SUCCESS) return res; + vkGetDeviceQueue(vkGPU->device, (uint32_t)vkGPU->queueFamilyIndex, 0, &vkGPU->queue); + break; + } +#endif + default: { + deviceCreateInfo.enabledExtensionCount = (uint32_t)vkGPU->enabledDeviceExtensions.size(); + deviceCreateInfo.ppEnabledExtensionNames = vkGPU->enabledDeviceExtensions.data(); + deviceCreateInfo.pQueueCreateInfos = &queueCreateInfo; + deviceCreateInfo.queueCreateInfoCount = 1; + deviceCreateInfo.pEnabledFeatures = NULL; + deviceCreateInfo.pEnabledFeatures = &deviceFeatures; + res = vkCreateDevice(vkGPU->physicalDevice, &deviceCreateInfo, NULL, &vkGPU->device); + if (res != VK_SUCCESS) return res; + vkGetDeviceQueue(vkGPU->device, (uint32_t)vkGPU->queueFamilyIndex, 0, &vkGPU->queue); + break; + } + } + return res; +} +VkResult createFence(VkGPU* vkGPU) { + //create fence for synchronization + VkResult res = VK_SUCCESS; + VkFenceCreateInfo fenceCreateInfo = { VK_STRUCTURE_TYPE_FENCE_CREATE_INFO }; + fenceCreateInfo.flags = 0; + res = vkCreateFence(vkGPU->device, &fenceCreateInfo, NULL, &vkGPU->fence); + return res; +} +VkResult createCommandPool(VkGPU* vkGPU) { + //create a place, command buffer memory is allocated from + VkResult res = VK_SUCCESS; + VkCommandPoolCreateInfo commandPoolCreateInfo = { VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO }; + commandPoolCreateInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; + commandPoolCreateInfo.queueFamilyIndex = (uint32_t)vkGPU->queueFamilyIndex; + res = vkCreateCommandPool(vkGPU->device, &commandPoolCreateInfo, NULL, &vkGPU->commandPool); + return res; +} + +VkFFTResult findMemoryType(VkGPU* vkGPU, uint64_t memoryTypeBits, uint64_t memorySize, VkMemoryPropertyFlags properties, uint32_t* memoryTypeIndex) { + VkPhysicalDeviceMemoryProperties memoryProperties = { 0 }; + + vkGetPhysicalDeviceMemoryProperties(vkGPU->physicalDevice, &memoryProperties); + + for (uint64_t i = 0; i < memoryProperties.memoryTypeCount; ++i) { + if ((memoryTypeBits & ((uint64_t)1 << i)) && ((memoryProperties.memoryTypes[i].propertyFlags & properties) == properties) && (memoryProperties.memoryHeaps[memoryProperties.memoryTypes[i].heapIndex].size >= memorySize)) + { + memoryTypeIndex[0] = (uint32_t)i; + return VKFFT_SUCCESS; + } + } + return VKFFT_ERROR_FAILED_TO_FIND_MEMORY; +} + +VkFFTResult allocateBuffer(VkGPU* vkGPU, VkBuffer* buffer, VkDeviceMemory* deviceMemory, VkBufferUsageFlags usageFlags, VkMemoryPropertyFlags propertyFlags, uint64_t size) { + //allocate the buffer used by the GPU with specified properties + VkFFTResult resFFT = VKFFT_SUCCESS; + VkResult res = VK_SUCCESS; + uint32_t queueFamilyIndices; + VkBufferCreateInfo bufferCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; + bufferCreateInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + bufferCreateInfo.queueFamilyIndexCount = 1; + bufferCreateInfo.pQueueFamilyIndices = &queueFamilyIndices; + bufferCreateInfo.size = size; + bufferCreateInfo.usage = usageFlags; + res = vkCreateBuffer(vkGPU->device, &bufferCreateInfo, NULL, buffer); + if (res != VK_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_BUFFER; + VkMemoryRequirements memoryRequirements = { 0 }; + vkGetBufferMemoryRequirements(vkGPU->device, buffer[0], &memoryRequirements); + VkMemoryAllocateInfo memoryAllocateInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO }; + memoryAllocateInfo.allocationSize = memoryRequirements.size; + resFFT = findMemoryType(vkGPU, memoryRequirements.memoryTypeBits, memoryRequirements.size, propertyFlags, &memoryAllocateInfo.memoryTypeIndex); + if (resFFT != VKFFT_SUCCESS) return resFFT; + res = vkAllocateMemory(vkGPU->device, &memoryAllocateInfo, NULL, deviceMemory); + if (res != VK_SUCCESS) return VKFFT_ERROR_FAILED_TO_ALLOCATE_MEMORY; + res = vkBindBufferMemory(vkGPU->device, buffer[0], deviceMemory[0], 0); + if (res != VK_SUCCESS) return VKFFT_ERROR_FAILED_TO_BIND_BUFFER_MEMORY; + return resFFT; +} +#endif + diff --git a/sdrbase/dsp/vkfftutils.h b/sdrbase/dsp/vkfftutils.h new file mode 100644 index 000000000..111ec32c5 --- /dev/null +++ b/sdrbase/dsp/vkfftutils.h @@ -0,0 +1,47 @@ +// Selected code from https://github.com/DTolm/VkFFT/blob/master/benchmark_scripts/vkFFT_scripts/include/utils_VkFFT.h + +#ifndef VKFFT_UTILS_H +#define VKFFT_UTILS_H + +#include + +#include + +typedef struct { +#if(VKFFT_BACKEND==0) + VkInstance instance; + VkPhysicalDevice physicalDevice; + VkPhysicalDeviceProperties physicalDeviceProperties; + VkPhysicalDeviceMemoryProperties physicalDeviceMemoryProperties; + VkDevice device; + VkDebugUtilsMessengerEXT debugMessenger; + uint64_t queueFamilyIndex; + VkQueue queue; + VkCommandPool commandPool; + VkFence fence; + std::vector enabledDeviceExtensions; + uint64_t enableValidationLayers; +#elif(VKFFT_BACKEND==1) + CUdevice device; + CUcontext context; +#endif + uint64_t device_id; +} VkGPU; + +#if(VKFFT_BACKEND==0) +VkResult CreateDebugUtilsMessengerEXT(VkGPU* vkGPU, const VkDebugUtilsMessengerCreateInfoEXT* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDebugUtilsMessengerEXT* pDebugMessenger); +void DestroyDebugUtilsMessengerEXT(VkGPU* vkGPU, const VkAllocationCallbacks* pAllocator); +VkResult setupDebugMessenger(VkGPU* vkGPU); +VkResult checkValidationLayerSupport(); +std::vector getRequiredExtensions(VkGPU* vkGPU, uint64_t sample_id); +VkResult createInstance(VkGPU* vkGPU, uint64_t sample_id); +VkResult findPhysicalDevice(VkGPU* vkGPU); +VkResult getComputeQueueFamilyIndex(VkGPU* vkGPU); +VkResult createDevice(VkGPU* vkGPU, uint64_t sample_id); +VkResult createFence(VkGPU* vkGPU); +VkResult createCommandPool(VkGPU* vkGPU); +VkFFTResult findMemoryType(VkGPU* vkGPU, uint64_t memoryTypeBits, uint64_t memorySize, VkMemoryPropertyFlags properties, uint32_t* memoryTypeIndex); +VkFFTResult allocateBuffer(VkGPU* vkGPU, VkBuffer* buffer, VkDeviceMemory* deviceMemory, VkBufferUsageFlags usageFlags, VkMemoryPropertyFlags propertyFlags, uint64_t size); +#endif + +#endif // VKFFT_UTILS_H diff --git a/sdrbase/dsp/vulkanvkfftengine.cpp b/sdrbase/dsp/vulkanvkfftengine.cpp new file mode 100644 index 000000000..09aff1bc1 --- /dev/null +++ b/sdrbase/dsp/vulkanvkfftengine.cpp @@ -0,0 +1,344 @@ +/////////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2023 Jon Beniston, M7RCE // +// // +// This program is free software; you can redistribute it and/or modify // +// it under the terms of the GNU General Public License as published by // +// the Free Software Foundation as version 3 of the License, or // +// (at your option) any later version. // +// // +// This program is distributed in the hope that it will be useful, // +// but WITHOUT ANY WARRANTY; without even the implied warranty of // +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // +// GNU General Public License V3 for more details. // +// // +// You should have received a copy of the GNU General Public License // +// along with this program. If not, see . // +/////////////////////////////////////////////////////////////////////////////////// + +#include + +#include "glslang_c_interface.h" + +#include "dsp/vulkanvkfftengine.h" + +class GLSInitialiser { +public: + GLSInitialiser() { + glslang_initialize_process(); + }; + + ~GLSInitialiser() { + glslang_finalize_process(); + } +}; + +static GLSInitialiser glsInitialiser; + +VulkanvkFFTEngine::VulkanvkFFTEngine() +{ + VkFFTResult resFFT; + resFFT = gpuInit(); + if (resFFT != VKFFT_SUCCESS) + { + qDebug() << "VulkanvkFFTEngine::VulkanvkFFTEngine: Failed to initialise GPU:" << getVkFFTErrorString(resFFT); + delete vkGPU; + vkGPU = nullptr; + } +} + +VulkanvkFFTEngine::~VulkanvkFFTEngine() +{ + if (vkGPU) + { + freeAll(); + vkDestroyFence(vkGPU->device, vkGPU->fence, nullptr); + vkDestroyCommandPool(vkGPU->device, vkGPU->commandPool, nullptr); + vkDestroyDevice(vkGPU->device, nullptr); + DestroyDebugUtilsMessengerEXT(vkGPU, nullptr); + vkDestroyInstance(vkGPU->instance, nullptr); + } +} + +const QString VulkanvkFFTEngine::m_name = "vkFFT (Vulkan)"; + +QString VulkanvkFFTEngine::getName() const +{ + return m_name; +} + +VkFFTResult VulkanvkFFTEngine::gpuInit() +{ + VkResult res = VK_SUCCESS; + + // To enable validation on Windows: + // set VK_LAYER_PATH=%VULKAN_SDK%\Bin + // set VK_INSTANCE_LAYERS=VK_LAYER_LUNARG_api_dump;VK_LAYER_KHRONOS_validation + // https://vulkan.lunarg.com/doc/view/1.3.204.1/windows/layer_configuration.html + // Create vk_layer_settings.txt in working dir + // Or run vkconfig to do so + + // Create instance - a connection between the application and the Vulkan library + res = createInstance(vkGPU, 0); + if (res != 0) { + return VKFFT_ERROR_FAILED_TO_CREATE_INSTANCE; + } + // Set up the debugging messenger + res = setupDebugMessenger(vkGPU); + if (res != 0) { + return VKFFT_ERROR_FAILED_TO_SETUP_DEBUG_MESSENGER; + } + // Check if there are GPUs that support Vulkan and select one + res = findPhysicalDevice(vkGPU); + if (res != 0) { + return VKFFT_ERROR_FAILED_TO_FIND_PHYSICAL_DEVICE; + } + // Create logical device representation + res = createDevice(vkGPU, 0); + if (res != 0) { + return VKFFT_ERROR_FAILED_TO_CREATE_DEVICE; + } + // Create fence for synchronization + res = createFence(vkGPU); + if (res != 0) { + return VKFFT_ERROR_FAILED_TO_CREATE_FENCE; + } + // Create a place, command buffer memory is allocated from + res = createCommandPool(vkGPU); + if (res != 0) { + return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_POOL; + } + vkGetPhysicalDeviceProperties(vkGPU->physicalDevice, &vkGPU->physicalDeviceProperties); + vkGetPhysicalDeviceMemoryProperties(vkGPU->physicalDevice, &vkGPU->physicalDeviceMemoryProperties); + + return VKFFT_SUCCESS; +} + +VkFFTResult VulkanvkFFTEngine::gpuAllocateBuffers() +{ + VkFFTResult resFFT; + VulkanPlan *plan = reinterpret_cast(m_currentPlan); + + // Allocate GPU memory + resFFT = allocateBuffer(vkGPU, + &plan->m_buffer, + &plan->m_bufferDeviceMemory, + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, + VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, + plan->m_bufferSize); + if (resFFT != VKFFT_SUCCESS) { + return resFFT; + } + + // Allocate CPU/GPU memory (Requires m_currentPlan->m_buffer to have been created) + resFFT = vulkanAllocateIn(plan); + if (resFFT != VKFFT_SUCCESS) { + return resFFT; + } + resFFT = vulkanAllocateOut(plan); + if (resFFT != VKFFT_SUCCESS) { + return resFFT; + } + + plan->m_configuration->buffer = &plan->m_buffer; + + return VKFFT_SUCCESS; +} + +VkFFTResult VulkanvkFFTEngine::gpuConfigure() +{ + VkFFTResult resFFT; + VulkanPlan *plan = reinterpret_cast(m_currentPlan); + + // Allocate command buffer with command to perform FFT + resFFT = vulkanAllocateFFTCommand(plan); + if (resFFT != VKFFT_SUCCESS) { + return resFFT; + } + + return VKFFT_SUCCESS; +} + +// Allocate CPU to GPU memory buffer +VkFFTResult VulkanvkFFTEngine::vulkanAllocateIn(VulkanPlan *plan) +{ + VkFFTResult resFFT; + VkResult res = VK_SUCCESS; + VkBuffer* buffer = (VkBuffer*)&plan->m_buffer; + + resFFT = allocateBuffer(vkGPU, &plan->m_inBuffer, &plan->m_inMemory, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT, m_currentPlan->m_bufferSize); + if (resFFT != VKFFT_SUCCESS) { + return resFFT; + } + + void* data; + res = vkMapMemory(vkGPU->device, plan->m_inMemory, 0, plan->m_bufferSize, 0, &data); + if (res != VK_SUCCESS) { + return VKFFT_ERROR_FAILED_TO_MAP_MEMORY; + } + plan->m_in = (Complex*) data; + + return VKFFT_SUCCESS; +} + +// Allocate GPU to CPU memory buffer +VkFFTResult VulkanvkFFTEngine::vulkanAllocateOut(VulkanPlan *plan) +{ + VkFFTResult resFFT; + VkResult res; + VkBuffer* buffer = (VkBuffer*)&plan->m_buffer; + + resFFT = allocateBuffer(vkGPU, &plan->m_outBuffer, &plan->m_outMemory, VK_BUFFER_USAGE_TRANSFER_DST_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT, m_currentPlan->m_bufferSize); + if (resFFT != VKFFT_SUCCESS) { + return resFFT; + } + + void* data; + res = vkMapMemory(vkGPU->device, plan->m_outMemory, 0, plan->m_bufferSize, 0, &data); + if (res != VK_SUCCESS) { + return VKFFT_ERROR_FAILED_TO_MAP_MEMORY; + } + plan->m_out = (Complex*) data; + + return VKFFT_SUCCESS; +} + +void VulkanvkFFTEngine::vulkanDeallocateIn(VulkanPlan *plan) +{ + vkUnmapMemory(vkGPU->device, plan->m_inMemory); + vkDestroyBuffer(vkGPU->device, plan->m_inBuffer, nullptr); + vkFreeMemory(vkGPU->device, plan->m_inMemory, nullptr); + plan->m_in = nullptr; +} + +void VulkanvkFFTEngine::vulkanDeallocateOut(VulkanPlan *plan) +{ + vkUnmapMemory(vkGPU->device, plan->m_outMemory); + vkDestroyBuffer(vkGPU->device, plan->m_outBuffer, nullptr); + vkFreeMemory(vkGPU->device, plan->m_outMemory, nullptr); + plan->m_out = nullptr; +} + +VkFFTResult VulkanvkFFTEngine::vulkanAllocateFFTCommand(VulkanPlan *plan) +{ + VkFFTResult resFFT; + VkResult res = VK_SUCCESS; + VkCommandBufferAllocateInfo commandBufferAllocateInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO }; + commandBufferAllocateInfo.commandPool = vkGPU->commandPool; + commandBufferAllocateInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + commandBufferAllocateInfo.commandBufferCount = 1; + res = vkAllocateCommandBuffers(vkGPU->device, &commandBufferAllocateInfo, &plan->m_commandBuffer); + if (res != 0) { + return VKFFT_ERROR_FAILED_TO_ALLOCATE_COMMAND_BUFFERS; + } + VkCommandBufferBeginInfo commandBufferBeginInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO }; + commandBufferBeginInfo.flags = 0; + res = vkBeginCommandBuffer(plan->m_commandBuffer, &commandBufferBeginInfo); + if (res != 0) { + return VKFFT_ERROR_FAILED_TO_BEGIN_COMMAND_BUFFER; + } + + VkBuffer* buffer = (VkBuffer*)&plan->m_buffer; + + // Copy from CPU to GPU + VkBufferCopy copyRegionIn = { 0 }; + copyRegionIn.srcOffset = 0; + copyRegionIn.dstOffset = 0; + copyRegionIn.size = plan->m_bufferSize; + vkCmdCopyBuffer(plan->m_commandBuffer, plan->m_inBuffer, buffer[0], 1, ©RegionIn); + + // Wait for copy to complete + VkMemoryBarrier memoryBarrierIn = { + VK_STRUCTURE_TYPE_MEMORY_BARRIER, + 0, + VK_ACCESS_SHADER_WRITE_BIT, + VK_ACCESS_SHADER_READ_BIT, + }; + vkCmdPipelineBarrier( + plan->m_commandBuffer, + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + 0, + 1, + &memoryBarrierIn, + 0, 0, 0, 0); + + // Perform FFT + VkFFTLaunchParams launchParams = {}; + launchParams.commandBuffer = &plan->m_commandBuffer; + resFFT = VkFFTAppend(plan->m_app, plan->m_inverse, &launchParams); + if (resFFT != VKFFT_SUCCESS) { + return resFFT; + } + + // Wait for FFT to complete + VkMemoryBarrier memoryBarrierOut = { + VK_STRUCTURE_TYPE_MEMORY_BARRIER, + 0, + VK_ACCESS_SHADER_WRITE_BIT, + VK_ACCESS_HOST_READ_BIT, + }; + vkCmdPipelineBarrier( + plan->m_commandBuffer, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_HOST_BIT, + 0, + 1, + &memoryBarrierIn, + 0, 0, 0, 0); + + // Copy from GPU to CPU + VkBufferCopy copyRegionOut = { 0 }; + copyRegionOut.srcOffset = 0; + copyRegionOut.dstOffset = 0; + copyRegionOut.size = plan->m_bufferSize; + vkCmdCopyBuffer(plan->m_commandBuffer, buffer[0], plan->m_outBuffer, 1, ©RegionOut); + + res = vkEndCommandBuffer(plan->m_commandBuffer); + if (res != 0) { + return VKFFT_ERROR_FAILED_TO_END_COMMAND_BUFFER; + } + return VKFFT_SUCCESS; +} + +void VulkanvkFFTEngine::transform() +{ + PROFILER_START() + + VkResult res = VK_SUCCESS; + VulkanPlan *plan = reinterpret_cast(m_currentPlan); + + VkSubmitInfo submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO }; + submitInfo.commandBufferCount = 1; + submitInfo.pCommandBuffers = &plan->m_commandBuffer; + res = vkQueueSubmit(vkGPU->queue, 1, &submitInfo, vkGPU->fence); + if (res != 0) { + qDebug() << "VulkanvkFFTEngine::transform: Failed to submit to queue"; + } + res = vkWaitForFences(vkGPU->device, 1, &vkGPU->fence, VK_TRUE, 100000000000); + if (res != 0) { + qDebug() << "VulkanvkFFTEngine::transform: Failed to wait for fences"; + } + res = vkResetFences(vkGPU->device, 1, &vkGPU->fence); + if (res != 0) { + qDebug() << "VulkanvkFFTEngine::transform: Failed to reset fences"; + } + + PROFILER_STOP(QString("%1 FFT %2").arg(getName()).arg(m_currentPlan->n)) +} + +vkFFTEngine::Plan *VulkanvkFFTEngine::gpuAllocatePlan() +{ + return new VulkanPlan(); +} + +void VulkanvkFFTEngine::gpuDeallocatePlan(Plan *p) +{ + VulkanPlan *plan = reinterpret_cast(p); + + vulkanDeallocateOut(plan); + vulkanDeallocateIn(plan); + + vkFreeCommandBuffers(vkGPU->device, vkGPU->commandPool, 1, &plan->m_commandBuffer); + vkDestroyBuffer(vkGPU->device, plan->m_buffer, nullptr); + vkFreeMemory(vkGPU->device, plan->m_bufferDeviceMemory, nullptr); +} diff --git a/sdrbase/dsp/vulkanvkfftengine.h b/sdrbase/dsp/vulkanvkfftengine.h new file mode 100644 index 000000000..1a620d538 --- /dev/null +++ b/sdrbase/dsp/vulkanvkfftengine.h @@ -0,0 +1,61 @@ +/////////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2023 Jon Beniston, M7RCE // +// // +// This program is free software; you can redistribute it and/or modify // +// it under the terms of the GNU General Public License as published by // +// the Free Software Foundation as version 3 of the License, or // +// (at your option) any later version. // +// // +// This program is distributed in the hope that it will be useful, // +// but WITHOUT ANY WARRANTY; without even the implied warranty of // +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // +// GNU General Public License V3 for more details. // +// // +// You should have received a copy of the GNU General Public License // +// along with this program. If not, see . // +/////////////////////////////////////////////////////////////////////////////////// + +#ifndef INCLUDE_VULKANVKFFTENGINE_H +#define INCLUDE_VULKANVKFFTENGINE_H + +#include "vkfftengine.h" + +#include "vulkan/vulkan.h" + +class SDRBASE_API VulkanvkFFTEngine : public vkFFTEngine { +public: + VulkanvkFFTEngine(); + virtual ~VulkanvkFFTEngine(); + + void transform() override; + QString getName() const override; + static const QString m_name; + +protected: + + struct VulkanPlan : Plan { + VkBuffer m_inBuffer; // CPU input memory + VkDeviceMemory m_inMemory; + VkBuffer m_outBuffer; // CPU output memory + VkDeviceMemory m_outMemory; + VkBuffer m_buffer; // GPU memory + VkDeviceMemory m_bufferDeviceMemory; + VkCommandBuffer m_commandBuffer; + }; + + VkFFTResult gpuInit() override; + VkFFTResult gpuAllocateBuffers() override; + VkFFTResult gpuConfigure() override; + + Plan *gpuAllocatePlan() override; + void gpuDeallocatePlan(Plan *) override; + + VkFFTResult vulkanAllocateOut(VulkanPlan *plan); + VkFFTResult vulkanAllocateIn(VulkanPlan *plan); + void vulkanDeallocateOut(VulkanPlan *plan); + void vulkanDeallocateIn(VulkanPlan *plan); + VkFFTResult vulkanAllocateFFTCommand(VulkanPlan *plan); + +}; + +#endif // INCLUDE_VULKANVKFFTENGINE_H diff --git a/sdrbase/settings/mainsettings.h b/sdrbase/settings/mainsettings.h index 4807555d9..a53f6d53d 100644 --- a/sdrbase/settings/mainsettings.h +++ b/sdrbase/settings/mainsettings.h @@ -210,6 +210,13 @@ public: emit preferenceChanged(Preferences::MapSmoothing); } + const QString& getFFTEngine() const { return m_preferences.getFFTEngine(); } + void setFFTEngine(const QString& fftEngine) + { + m_preferences.setFFTEngine(fftEngine); + emit preferenceChanged(Preferences::FFTEngine); + } + signals: void preferenceChanged(int); diff --git a/sdrbase/settings/preferences.cpp b/sdrbase/settings/preferences.cpp index e04f1d9f6..e5e8f6e76 100644 --- a/sdrbase/settings/preferences.cpp +++ b/sdrbase/settings/preferences.cpp @@ -25,6 +25,7 @@ void Preferences::resetToDefaults() m_multisampling = 0; m_mapMultisampling = 0; m_mapSmoothing = true; + m_fftEngine = "FFTW"; } QByteArray Preferences::serialize() const @@ -47,6 +48,7 @@ QByteArray Preferences::serialize() const s.writeBool((int) AutoUpdatePosition, m_autoUpdatePosition); s.writeS32((int) MapMultisampling, m_mapMultisampling); s.writeBool((int) MapSmoothing, m_mapSmoothing); + s.writeString((int) FFTEngine, m_fftEngine); return s.final(); } @@ -105,6 +107,8 @@ bool Preferences::deserialize(const QByteArray& data) d.readS32((int) MapMultisampling, &m_mapMultisampling, 0); d.readBool((int) MapSmoothing, &m_mapSmoothing, true); + d.readString((int) FFTEngine, &m_fftEngine, "FFTW"); + return true; } else diff --git a/sdrbase/settings/preferences.h b/sdrbase/settings/preferences.h index 2c521be8d..8d19f8914 100644 --- a/sdrbase/settings/preferences.h +++ b/sdrbase/settings/preferences.h @@ -25,7 +25,8 @@ public: Multisampling, AutoUpdatePosition, MapMultisampling, - MapSmoothing + MapSmoothing, + FFTEngine }; Preferences(); @@ -87,6 +88,9 @@ public: bool getMapSmoothing() const { return m_mapSmoothing; } void setMapSmoothing(bool smoothing) { m_mapSmoothing = smoothing; } + const QString& getFFTEngine() const { return m_fftEngine; } + void setFFTEngine(const QString& fftEngine) { m_fftEngine = fftEngine; } + protected: QString m_sourceDevice; //!< Identification of the source used in R0 tab (GUI flavor) at startup int m_sourceIndex; //!< Index of the source used in R0 tab (GUI flavor) at startup @@ -109,6 +113,8 @@ protected: int m_multisampling; //!< Number of samples to use for multisampling anti-aliasing for spectrums (typically 0 or 4) int m_mapMultisampling; //!< Number of samples to use for multisampling anti-aliasing for 2D maps (16 gives best text, if not using mapSmoothing) bool m_mapSmoothing; //!< Whether to use smoothing for text boxes on 2D maps + + QString m_fftEngine; //!< FFT Engine (FFTW, Kiss, vkFFT) }; #endif // INCLUDE_PREFERENCES_H diff --git a/sdrbase/util/profiler.cpp b/sdrbase/util/profiler.cpp new file mode 100644 index 000000000..50406b252 --- /dev/null +++ b/sdrbase/util/profiler.cpp @@ -0,0 +1,39 @@ +////////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2023 Jon Beniston, M7RCE // +// // +// This program is free software; you can redistribute it and/or modify // +// it under the terms of the GNU General Public License as published by // +// the Free Software Foundation as version 3 of the License, or // +// (at your option) any later version. // +// // +// This program is distributed in the hope that it will be useful, // +// but WITHOUT ANY WARRANTY; without even the implied warranty of // +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // +// GNU General Public License V3 for more details. // +// // +// You should have received a copy of the GNU General Public License // +// along with this program. If not, see . // +////////////////////////////////////////////////////////////////////////////////// + +#include "profiler.h" + +QHash GlobalProfileData::m_profileData; +QMutex GlobalProfileData::m_mutex; + +QHash& GlobalProfileData::getProfileData() +{ + m_mutex.lock(); + return m_profileData; +} + +void GlobalProfileData::releaseProfileData() +{ + m_mutex.unlock(); +} + +void GlobalProfileData::resetProfileData() +{ + m_mutex.lock(); + m_profileData.clear(); + m_mutex.unlock(); +} diff --git a/sdrbase/util/profiler.h b/sdrbase/util/profiler.h new file mode 100644 index 000000000..4d99b6b1c --- /dev/null +++ b/sdrbase/util/profiler.h @@ -0,0 +1,123 @@ +////////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2023 Jon Beniston, M7RCE // +// // +// This program is free software; you can redistribute it and/or modify // +// it under the terms of the GNU General Public License as published by // +// the Free Software Foundation as version 3 of the License, or // +// (at your option) any later version. // +// // +// This program is distributed in the hope that it will be useful, // +// but WITHOUT ANY WARRANTY; without even the implied warranty of // +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // +// GNU General Public License V3 for more details. // +// // +// You should have received a copy of the GNU General Public License // +// along with this program. If not, see . // +////////////////////////////////////////////////////////////////////////////////// + +#ifndef INCLUDE_UTIL_PROFILEDATA_H_ +#define INCLUDE_UTIL_PROFILEDATA_H_ + +#include +#include +#include + +#include + +#include "export.h" + +// Profiler enables runtime collection of profile data (time taken to execute code) +// that can be displayed in the GUI +// +// PROFILER_START() and PROFILER_STOP() macros should be used in the same function: +// void func() { +// PROFILER_START() +// do_something(); +// PROFILER_STOP("slow_code") +// } +// +// The parameters to PROFILER_STOP are: +// name: Name for the code being profiled. Profiles using the same name are averaged. + +#ifdef ENABLE_PROFILER +#define PROFILER_START() \ + QElapsedTimer profileTimer; \ + profileTimer.start(); +#define PROFILER_RESTART() \ + profileTimer.start(); +#define PROFILER_STOP(name) \ + { \ + qint64 timeNanoSec = profileTimer.nsecsElapsed(); \ + QHash& globalData = GlobalProfileData::getProfileData(); \ + if (!globalData.contains(name)) { \ + globalData.insert(name, ProfileData()); \ + } \ + ProfileData& profileData = globalData[name]; \ + profileData.add(timeNanoSec); \ + GlobalProfileData::releaseProfileData(); \ + } +#else +#define PROFILER_START() +#define PROFILER_RESTART() +#define PROFILER_STOP(name) +#endif + +class ProfileData +{ +public: + ProfileData() : + m_numSamples(0), + m_last(0), + m_total(0) + { } + + void reset() + { + m_numSamples = 0; + m_total = 0; + } + + void add(qint64 sample) + { + m_last = sample; + m_total += sample; + m_numSamples++; + } + + double getAverage() const + { + if (m_numSamples > 0) { + return m_total / (double)m_numSamples; + } else { + return nan(""); + } + } + + qint64 getTotal() const { return m_total; } + qint64 getLast() const { return m_last; } + quint64 getNumSamples() const { return m_numSamples; } + +private: + quint64 m_numSamples; + qint64 m_last; + qint64 m_total; +}; + +// Global thread-safe profile data that can be displayed in the GUI +class SDRBASE_API GlobalProfileData +{ +public: + + // Calls to getProfileData must be paired with releaseProfileData + static QHash& getProfileData(); + static void releaseProfileData(); + static void resetProfileData(); + +private: + + static QHash m_profileData; + static QMutex m_mutex; + +}; + +#endif /* INCLUDE_UTIL_PROFILEDATA_H_ */ diff --git a/sdrgui/CMakeLists.txt b/sdrgui/CMakeLists.txt index 4d4155196..8ec2ba10c 100644 --- a/sdrgui/CMakeLists.txt +++ b/sdrgui/CMakeLists.txt @@ -41,6 +41,7 @@ set(sdrgui_SOURCES gui/featureadddialog.cpp gui/featurelayout.cpp gui/featurepresetsdialog.cpp + gui/fftdialog.cpp gui/fftwisdomdialog.cpp gui/flowlayout.cpp gui/framelesswindowresizer.cpp @@ -65,8 +66,10 @@ set(sdrgui_SOURCES gui/logslider.cpp gui/loglabelslider.cpp gui/mypositiondialog.cpp + gui/nanosecondsdelegate.cpp gui/pluginsdialog.cpp gui/presetitem.cpp + gui/profiledialog.cpp gui/rollupcontents.cpp gui/rollupwidget.cpp gui/samplingdevicedialog.cpp @@ -157,6 +160,7 @@ set(sdrgui_HEADERS gui/featureadddialog.h gui/featurelayout.h gui/featurepresetsdialog.h + gui/fftdialog.h gui/fftwisdomdialog.h gui/flowlayout.h gui/framelesswindowresizer.h @@ -181,9 +185,11 @@ set(sdrgui_HEADERS gui/logslider.h gui/loglabelslider.h gui/mypositiondialog.h + gui/nanosecondsdelegate.h gui/physicalunit.h gui/pluginsdialog.h gui/presetitem.h + gui/profiledialog.h gui/qtcompatibility.h gui/rollupcontents.h gui/rollupwidget.h @@ -254,11 +260,13 @@ set(sdrgui_FORMS gui/fmpreemphasisdialog.ui gui/featureadddialog.ui gui/featurepresetsdialog.ui + gui/fftdialog.ui gui/fftwisdomdialog.ui gui/glscopegui.ui gui/glspectrumgui.ui gui/graphicsdialog.ui gui/pluginsdialog.ui + gui/profiledialog.ui gui/audiodialog.ui gui/audioselectdialog.ui gui/samplingdevicecontrol.ui diff --git a/sdrgui/gui/fftdialog.cpp b/sdrgui/gui/fftdialog.cpp new file mode 100644 index 000000000..1b5097725 --- /dev/null +++ b/sdrgui/gui/fftdialog.cpp @@ -0,0 +1,48 @@ +/////////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2023 Jon Beniston, M7RCE // +// // +// This program is free software; you can redistribute it and/or modify // +// it under the terms of the GNU General Public License as published by // +// the Free Software Foundation as version 3 of the License, or // +// (at your option) any later version. // +// // +// This program is distributed in the hope that it will be useful, // +// but WITHOUT ANY WARRANTY; without even the implied warranty of // +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // +// GNU General Public License V3 for more details. // +// // +// You should have received a copy of the GNU General Public License // +// along with this program. If not, see . // +/////////////////////////////////////////////////////////////////////////////////// + +#include "dsp/fftengine.h" + +#include "fftdialog.h" +#include "ui_fftdialog.h" + +FFTDialog::FFTDialog(MainSettings& mainSettings, QWidget* parent) : + QDialog(parent), + ui(new Ui::FFTDialog), + m_mainSettings(mainSettings) +{ + ui->setupUi(this); + + for (const auto& engine: FFTEngine::getAllNames()) { + ui->fftEngine->addItem(engine); + } + int idx = ui->fftEngine->findText(m_mainSettings.getFFTEngine()); + if (idx != -1) { + ui->fftEngine->setCurrentIndex(idx); + } +} + +FFTDialog::~FFTDialog() +{ + delete ui; +} + +void FFTDialog::accept() +{ + m_mainSettings.setFFTEngine(ui->fftEngine->currentText()); + QDialog::accept(); +} diff --git a/sdrgui/gui/fftdialog.h b/sdrgui/gui/fftdialog.h new file mode 100644 index 000000000..6d754ce09 --- /dev/null +++ b/sdrgui/gui/fftdialog.h @@ -0,0 +1,44 @@ +/////////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2023 Jon Beniston, M7RCE // +// // +// This program is free software; you can redistribute it and/or modify // +// it under the terms of the GNU General Public License as published by // +// the Free Software Foundation as version 3 of the License, or // +// (at your option) any later version. // +// // +// This program is distributed in the hope that it will be useful, // +// but WITHOUT ANY WARRANTY; without even the implied warranty of // +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // +// GNU General Public License V3 for more details. // +// // +// You should have received a copy of the GNU General Public License // +// along with this program. If not, see . // +/////////////////////////////////////////////////////////////////////////////////// + +#ifndef SDRGUI_GUI_FFTDIALOG_H_ +#define SDRGUI_GUI_FFTDIALOG_H_ + +#include + +#include "settings/mainsettings.h" +#include "export.h" + +namespace Ui { + class FFTDialog; +} + +class SDRGUI_API FFTDialog : public QDialog { + Q_OBJECT +public: + explicit FFTDialog(MainSettings& mainSettings, QWidget* parent = nullptr); + ~FFTDialog(); + +private slots: + void accept(); + +private: + Ui::FFTDialog *ui; + MainSettings& m_mainSettings; +}; + +#endif // SDRGUI_GUI_FFTDIALOG_H_ diff --git a/sdrgui/gui/fftdialog.ui b/sdrgui/gui/fftdialog.ui new file mode 100644 index 000000000..3419dbd40 --- /dev/null +++ b/sdrgui/gui/fftdialog.ui @@ -0,0 +1,104 @@ + + + FFTDialog + + + + 0 + 0 + 298 + 118 + + + + + Liberation Sans + 9 + + + + FFTW Wisdom file generator + + + true + + + + + + + + + 0 + 0 + + + + Path to fftwf-wisdom executable + + + FFT Engine + + + + + + + Select FFT engine. + +Changes only apply to new devices / channels until SDRangel is restarted. + + + + + + + + + Qt::Horizontal + + + QDialogButtonBox::Cancel|QDialogButtonBox::Ok + + + + + + + + + + + buttonBox + accepted() + FFTDialog + accept() + + + 248 + 254 + + + 157 + 274 + + + + + buttonBox + rejected() + FFTDialog + reject() + + + 316 + 260 + + + 286 + 274 + + + + + diff --git a/sdrgui/gui/glspectrumview.cpp b/sdrgui/gui/glspectrumview.cpp index 04c2eafd2..c5c47c0b2 100644 --- a/sdrgui/gui/glspectrumview.cpp +++ b/sdrgui/gui/glspectrumview.cpp @@ -34,6 +34,7 @@ #include "settings/mainsettings.h" #include "util/messagequeue.h" #include "util/db.h" +#include "util/profiler.h" #include @@ -958,6 +959,8 @@ void GLSpectrumView::clearSpectrumHistogram() void GLSpectrumView::paintGL() { + PROFILER_START() + if (!m_mutex.tryLock(2)) { return; } @@ -1808,6 +1811,29 @@ void GLSpectrumView::paintGL() } m_mutex.unlock(); + +#ifdef ENABLE_PROFILER + if (m_profileName.isEmpty()) + { + // Try to use the window name for the profile name + QString windowTitle; + for (QWidget *widget = parentWidget(); widget != nullptr; widget = widget->parentWidget()) + { + windowTitle = widget->windowTitle(); + if (!windowTitle.isEmpty()) { + break; + } + } + // Add this address so we get per-spectrum profile data + if (windowTitle.isEmpty()) { + m_profileName = QString("Spectrum @%1").arg((quint64)this, 0, 16); + } else { + m_profileName = QString("%1 @%2").arg(windowTitle).arg((quint64)this, 0, 16); + } + } +#endif + + PROFILER_STOP(m_profileName) } // paintGL // Hightlight power band for SFDR diff --git a/sdrgui/gui/glspectrumview.h b/sdrgui/gui/glspectrumview.h index 77bf8565c..2d8a85c9a 100644 --- a/sdrgui/gui/glspectrumview.h +++ b/sdrgui/gui/glspectrumview.h @@ -424,6 +424,10 @@ private: static const QVector4D m_measurementLightMarkerColor; static const QVector4D m_measurementDarkMarkerColor; +#ifdef ENABLE_PROFILER + QString m_profileName; +#endif + void updateWaterfall(const Real *spectrum); void update3DSpectrogram(const Real *spectrum); void updateHistogram(const Real *spectrum); diff --git a/sdrgui/gui/nanosecondsdelegate.cpp b/sdrgui/gui/nanosecondsdelegate.cpp new file mode 100644 index 000000000..de0aaceba --- /dev/null +++ b/sdrgui/gui/nanosecondsdelegate.cpp @@ -0,0 +1,48 @@ +/////////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2023 Jon Beniston, M7RCE // +// // +// This program is free software; you can redistribute it and/or modify // +// it under the terms of the GNU General Public License as published by // +// the Free Software Foundation as version 3 of the License, or // +// (at your option) any later version. // +// // +// This program is distributed in the hope that it will be useful, // +// but WITHOUT ANY WARRANTY; without even the implied warranty of // +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // +// GNU General Public License V3 for more details. // +// // +// You should have received a copy of the GNU General Public License // +// along with this program. If not, see . // +/////////////////////////////////////////////////////////////////////////////////// + +#include "nanosecondsdelegate.h" + +NanoSecondsDelegate::NanoSecondsDelegate() +{ +} + +QString NanoSecondsDelegate::displayText(const QVariant &value, const QLocale &locale) const +{ + (void) locale; + + if (value.toString() == "") + { + return ""; + } + else + { + double timeInNanoSec = value.toDouble(); + QString s; + + if (timeInNanoSec < 1e3) { + s = QString("%1 ns").arg(timeInNanoSec, 0, 'f', 3); + } else if (timeInNanoSec < 1e6) { + s = QString("%1 us").arg(timeInNanoSec/1e3, 0, 'f', 3); + } else if (timeInNanoSec < 1e9) { + s = QString("%1 ms").arg(timeInNanoSec/1e6, 0, 'f', 3); + } else { + s = QString("%1 s").arg(timeInNanoSec/1e9, 0, 'f', 3); + } + return s; + } +} diff --git a/sdrgui/gui/nanosecondsdelegate.h b/sdrgui/gui/nanosecondsdelegate.h new file mode 100644 index 000000000..f4c90eb43 --- /dev/null +++ b/sdrgui/gui/nanosecondsdelegate.h @@ -0,0 +1,34 @@ +/////////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2023 Jon Beniston, M7RCE // +// // +// This program is free software; you can redistribute it and/or modify // +// it under the terms of the GNU General Public License as published by // +// the Free Software Foundation as version 3 of the License, or // +// (at your option) any later version. // +// // +// This program is distributed in the hope that it will be useful, // +// but WITHOUT ANY WARRANTY; without even the implied warranty of // +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // +// GNU General Public License V3 for more details. // +// // +// You should have received a copy of the GNU General Public License // +// along with this program. If not, see . // +/////////////////////////////////////////////////////////////////////////////////// + +#ifndef SDRGUI_GUI_NANOSECONDSDELGATE_H +#define SDRGUI_GUI_NANOSECONDSDELGATE_H + +#include + +#include "export.h" + +// Delegate for table to display a time that's been measured in nanoseconds in s, ms, us or ns +class SDRGUI_API NanoSecondsDelegate : public QStyledItemDelegate { + +public: + NanoSecondsDelegate(); + virtual QString displayText(const QVariant &value, const QLocale &locale) const override; + +}; + +#endif // SDRGUI_GUI_NANOSECONDSDELGATE_H diff --git a/sdrgui/gui/profiledialog.cpp b/sdrgui/gui/profiledialog.cpp new file mode 100644 index 000000000..ba6729d60 --- /dev/null +++ b/sdrgui/gui/profiledialog.cpp @@ -0,0 +1,135 @@ +/////////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2023 Jon Beniston, M7RCE // +// // +// This program is free software; you can redistribute it and/or modify // +// it under the terms of the GNU General Public License as published by // +// the Free Software Foundation as version 3 of the License, or // +// (at your option) any later version. // +// // +// This program is distributed in the hope that it will be useful, // +// but WITHOUT ANY WARRANTY; without even the implied warranty of // +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // +// GNU General Public License V3 for more details. // +// // +// You should have received a copy of the GNU General Public License // +// along with this program. If not, see . // +/////////////////////////////////////////////////////////////////////////////////// + +#include + +#include "profiledialog.h" +#include "ui_profiledialog.h" +#include "gui/nanosecondsdelegate.h" +#include "util/profiler.h" + +ProfileDialog::ProfileDialog(QWidget* parent) : + QDialog(parent), + ui(new Ui::ProfileDialog) +{ + ui->setupUi(this); + connect(&m_timer, &QTimer::timeout, this, &ProfileDialog::updateData); + resizeTable(); + m_timer.start(500); +} + +ProfileDialog::~ProfileDialog() +{ + delete ui; +} + +void ProfileDialog::accept() +{ + QDialog::accept(); +} + +void ProfileDialog::clicked(QAbstractButton *button) +{ + if (ui->buttonBox->buttonRole(button) == QDialogButtonBox::ButtonRole::ResetRole) + { + ui->table->setRowCount(0); + GlobalProfileData::resetProfileData(); + } +} + +void ProfileDialog::resizeTable() +{ + int row = ui->table->rowCount(); + ui->table->setRowCount(row + 1); + ui->table->setItem(row, COL_NAME, new QTableWidgetItem("Random-SDR[0] Spectrum @12345678910")); + ui->table->setItem(row, COL_TOTAL, new QTableWidgetItem("1000.000 ms")); + ui->table->setItem(row, COL_AVERAGE, new QTableWidgetItem("1000.000 ns/frame")); + ui->table->setItem(row, COL_LAST, new QTableWidgetItem("1000.000 ms")); + ui->table->setItem(row, COL_NUM_SAMPLES, new QTableWidgetItem("1000000000")); + ui->table->resizeColumnsToContents(); + ui->table->setRowCount(row); +} + +// Update table with latest profile data +void ProfileDialog::updateData() +{ + QHash& profileData = GlobalProfileData::getProfileData(); + + QHashIterator itr(profileData); + + while (itr.hasNext()) + { + itr.next(); + QString key = itr.key(); + const ProfileData& data = itr.value(); + double totalTime = data.getTotal(); + double averageTime = data.getAverage(); + double lastTime = data.getLast(); + + int i = 0; + for (; i < ui->table->rowCount(); i++) + { + QString name = ui->table->item(i, COL_NAME)->text(); + if (name == key) + { + // Update existing row + ui->table->item(i, COL_TOTAL)->setData(Qt::DisplayRole, totalTime); + ui->table->item(i, COL_AVERAGE)->setData(Qt::DisplayRole, averageTime); + ui->table->item(i, COL_LAST)->setData(Qt::DisplayRole, lastTime); + ui->table->item(i, COL_NUM_SAMPLES)->setData(Qt::DisplayRole, data.getNumSamples()); + break; + } + } + if (i >= ui->table->rowCount()) + { + // Add new row + ui->table->setSortingEnabled(false); + int row = ui->table->rowCount(); + ui->table->setRowCount(row + 1); + + QTableWidgetItem *name = new QTableWidgetItem(key); + QTableWidgetItem *total = new QTableWidgetItem(); + QTableWidgetItem *average = new QTableWidgetItem(); + QTableWidgetItem *last = new QTableWidgetItem(); + QTableWidgetItem *numSamples = new QTableWidgetItem(); + + ui->table->setItem(row, COL_NAME, name); + ui->table->setItem(row, COL_TOTAL, total); + ui->table->setItem(row, COL_AVERAGE, average); + ui->table->setItem(row, COL_LAST, last); + ui->table->setItem(row, COL_NUM_SAMPLES, numSamples); + + total->setTextAlignment(Qt::AlignRight | Qt::AlignVCenter); + average->setTextAlignment(Qt::AlignRight | Qt::AlignVCenter); + last->setTextAlignment(Qt::AlignRight | Qt::AlignVCenter); + numSamples->setTextAlignment(Qt::AlignRight | Qt::AlignVCenter); + + total->setData(Qt::DisplayRole, totalTime); + average->setData(Qt::DisplayRole, averageTime); + last->setData(Qt::DisplayRole, lastTime); + numSamples->setData(Qt::DisplayRole, data.getNumSamples()); + + ui->table->setItemDelegateForColumn(COL_TOTAL, new NanoSecondsDelegate()); + ui->table->setItemDelegateForColumn(COL_AVERAGE, new NanoSecondsDelegate()); + ui->table->setItemDelegateForColumn(COL_LAST, new NanoSecondsDelegate()); + + ui->table->setSortingEnabled(true); + } + } + + GlobalProfileData::releaseProfileData(); +} diff --git a/sdrgui/gui/profiledialog.h b/sdrgui/gui/profiledialog.h new file mode 100644 index 000000000..409dda8c6 --- /dev/null +++ b/sdrgui/gui/profiledialog.h @@ -0,0 +1,59 @@ +/////////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2023 Jon Beniston, M7RCE // +// // +// This program is free software; you can redistribute it and/or modify // +// it under the terms of the GNU General Public License as published by // +// the Free Software Foundation as version 3 of the License, or // +// (at your option) any later version. // +// // +// This program is distributed in the hope that it will be useful, // +// but WITHOUT ANY WARRANTY; without even the implied warranty of // +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // +// GNU General Public License V3 for more details. // +// // +// You should have received a copy of the GNU General Public License // +// along with this program. If not, see . // +/////////////////////////////////////////////////////////////////////////////////// + +#ifndef SDRGUI_GUI_PROFILEDIALOG_H_ +#define SDRGUI_GUI_PROFILEDIALOG_H_ + +#include +#include + +#include "settings/mainsettings.h" +#include "export.h" + +namespace Ui { + class ProfileDialog; +} + +class QAbstractButton; + +class SDRGUI_API ProfileDialog : public QDialog { + Q_OBJECT +public: + explicit ProfileDialog(QWidget* parent = nullptr); + ~ProfileDialog(); + +private slots: + void accept(); + void clicked(QAbstractButton *button); + void updateData(); + +private: + Ui::ProfileDialog *ui; + QTimer m_timer; + + enum Cols { + COL_NAME, + COL_TOTAL, + COL_AVERAGE, + COL_LAST, + COL_NUM_SAMPLES + }; + + void resizeTable(); +}; + +#endif // SDRGUI_GUI_PROFILEDIALOG_H_ diff --git a/sdrgui/gui/profiledialog.ui b/sdrgui/gui/profiledialog.ui new file mode 100644 index 000000000..5456be141 --- /dev/null +++ b/sdrgui/gui/profiledialog.ui @@ -0,0 +1,142 @@ + + + ProfileDialog + + + + 0 + 0 + 700 + 296 + + + + + Liberation Sans + 9 + + + + Profile Data + + + false + + + + + + + + + Name + + + Name of profile + + + + + Total + + + Total time spent executing the code + + + + + Average + + + Average time executing the code + + + + + Last + + + Time for last execution of the code + + + + + Samples + + + Number of times code was executed + + + + + + + + + + Qt::Horizontal + + + QDialogButtonBox::Close|QDialogButtonBox::Reset + + + + + + + + + + + buttonBox + accepted() + ProfileDialog + accept() + + + 248 + 254 + + + 157 + 274 + + + + + buttonBox + rejected() + ProfileDialog + reject() + + + 316 + 260 + + + 286 + 274 + + + + + buttonBox + clicked(QAbstractButton*) + ProfileDialog + clicked(QAbstractButton*) + + + 191 + 275 + + + 191 + 147 + + + + + + clicked(QAbstractButton*) + + diff --git a/sdrgui/mainwindow.cpp b/sdrgui/mainwindow.cpp index 61c7d84bd..7935bb3f9 100644 --- a/sdrgui/mainwindow.cpp +++ b/sdrgui/mainwindow.cpp @@ -65,6 +65,7 @@ #include "gui/deviceuserargsdialog.h" #include "gui/sdrangelsplash.h" #include "gui/mypositiondialog.h" +#include "gui/fftdialog.h" #include "gui/fftwisdomdialog.h" #include "gui/workspace.h" #include "gui/featurepresetsdialog.h" @@ -73,6 +74,7 @@ #include "gui/configurationsdialog.h" #include "gui/dialogpositioner.h" #include "gui/welcomedialog.h" +#include "gui/profiledialog.h" #include "dsp/dspengine.h" #include "dsp/spectrumvis.h" #include "dsp/dspcommands.h" @@ -121,6 +123,7 @@ MainWindow::MainWindow(qtwebapp::LoggerWithFile *logger, const MainParser& parse m_dateTimeWidget(nullptr), m_showSystemWidget(nullptr), m_commandKeyReceiver(nullptr), + m_profileDialog(nullptr), m_fftWisdomProcess(nullptr) { #ifdef ANDROID @@ -190,7 +193,11 @@ MainWindow::MainWindow(qtwebapp::LoggerWithFile *logger, const MainParser& parse connect(&m_statusTimer, SIGNAL(timeout()), this, SLOT(updateStatus())); m_statusTimer.start(1000); - splash->showStatusMessage("allocate FFTs...", Qt::white); + splash->showStatusMessage("load settings...", Qt::white); + qDebug() << "MainWindow::MainWindow: load settings..."; + + loadSettings(); + splash->showStatusMessage("allocate FFTs...", Qt::white); if (parser.getFFTWFWisdomFileName().length() != 0) @@ -213,11 +220,6 @@ MainWindow::MainWindow(qtwebapp::LoggerWithFile *logger, const MainParser& parse m_dspEngine->preAllocateFFTs(); - splash->showStatusMessage("load settings...", Qt::white); - qDebug() << "MainWindow::MainWindow: load settings..."; - - loadSettings(); - splash->showStatusMessage("load plugins...", Qt::white); qDebug() << "MainWindow::MainWindow: load plugins..."; @@ -322,6 +324,7 @@ MainWindow::~MainWindow() removeAllFeatureSets(); delete m_commandKeyReceiver; + delete m_profileDialog; for (const auto& workspace : m_workspaces) { delete workspace; @@ -1657,6 +1660,11 @@ void MainWindow::createMenuBar(QToolButton *button) keepscreenonAction->setCheckable(true); QObject::connect(keepscreenonAction, &QAction::triggered, this, &MainWindow::on_action_View_KeepScreenOn_toggled); #endif +#ifdef ENABLE_PROFILER + QAction *profileAction = viewMenu->addAction("&Profile data..."); + profileAction->setToolTip("View profile data"); + QObject::connect(profileAction, &QAction::triggered, this, &MainWindow::on_action_Profile_triggered); +#endif QAction *newWorkspaceAction = workspacesMenu->addAction("&New"); newWorkspaceAction->setToolTip("Add a new workspace"); @@ -1684,8 +1692,11 @@ void MainWindow::createMenuBar(QToolButton *button) myPositionAction->setToolTip("Set station position"); QObject::connect(myPositionAction, &QAction::triggered, this, &MainWindow::on_action_My_Position_triggered); QAction *fftAction = preferencesMenu->addAction("&FFT..."); - fftAction->setToolTip("Set FFT cache"); + fftAction->setToolTip("Set FFT preferences"); QObject::connect(fftAction, &QAction::triggered, this, &MainWindow::on_action_FFT_triggered); + QAction *fftWisdomAction = preferencesMenu->addAction("&FFTW Wisdom..."); + fftWisdomAction->setToolTip("Set FFTW cache"); + QObject::connect(fftWisdomAction, &QAction::triggered, this, &MainWindow::on_action_FFTWisdom_triggered); QMenu *devicesMenu = preferencesMenu->addMenu("&Devices"); QAction *userArgumentsAction = devicesMenu->addAction("&User arguments..."); userArgumentsAction->setToolTip("Device custom user arguments"); @@ -1744,6 +1755,10 @@ void MainWindow::closeEvent(QCloseEvent *closeEvent) removeLastDeviceSet(); } + if (m_profileDialog) { + m_profileDialog->close(); + } + closeEvent->accept(); } @@ -2223,6 +2238,17 @@ void MainWindow::on_action_View_Fullscreen_toggled(bool checked) } } +void MainWindow::on_action_Profile_triggered() +{ + if (m_profileDialog == nullptr) + { + m_profileDialog = new ProfileDialog(); + new DialogPositioner(m_profileDialog, true); + } + m_profileDialog->show(); + m_profileDialog->raise(); +} + void MainWindow::commandKeysConnect(QObject *object, const char *slot) { setFocus(); @@ -2345,6 +2371,14 @@ void MainWindow::on_action_commands_triggered() void MainWindow::on_action_FFT_triggered() { qDebug("MainWindow::on_action_FFT_triggered"); + FFTDialog fftDialog(m_mainCore->m_settings, this); + new DialogPositioner(&fftDialog, true); + fftDialog.exec(); +} + +void MainWindow::on_action_FFTWisdom_triggered() +{ + qDebug("MainWindow::on_action_FFTWisdom_triggered"); if (m_fftWisdomProcess) { diff --git a/sdrgui/mainwindow.h b/sdrgui/mainwindow.h index 5af37cc08..70da0ddea 100644 --- a/sdrgui/mainwindow.h +++ b/sdrgui/mainwindow.h @@ -59,6 +59,7 @@ class Command; class FeatureSetPreset; class CommandKeyReceiver; class ConfigurationsDialog; +class ProfileDialog; class QMenuBar; class Workspace; @@ -127,6 +128,7 @@ private: QAction *m_spectrumToggleViewAction; CommandKeyReceiver *m_commandKeyReceiver; + ProfileDialog *m_profileDialog; QProcess *m_fftWisdomProcess; @@ -183,12 +185,14 @@ private slots: void on_action_View_KeepScreenOn_toggled(bool checked); #endif void on_action_View_Fullscreen_toggled(bool checked); + void on_action_Profile_triggered(); void on_action_saveAll_triggered(); void on_action_Configurations_triggered(); void on_action_Audio_triggered(); void on_action_Graphics_triggered(); void on_action_Logging_triggered(); void on_action_FFT_triggered(); + void on_action_FFTWisdom_triggered(); void on_action_My_Position_triggered(); void on_action_DeviceUserArguments_triggered(); void on_action_commands_triggered(); From 3ce2f285028a7a385b923d5d29a6420e505477e0 Mon Sep 17 00:00:00 2001 From: Jon Beniston Date: Mon, 14 Aug 2023 10:37:27 +0100 Subject: [PATCH 2/4] Fix Vulkan compilation on Linux --- external/CMakeLists.txt | 2 +- sdrbase/CMakeLists.txt | 71 ++++++++++++++++++++++----------------- sdrbase/dsp/fftengine.cpp | 8 ++++- sdrbase/dsp/vkfftengine.h | 2 +- sdrbase/dsp/vkfftutils.h | 2 +- 5 files changed, 50 insertions(+), 35 deletions(-) diff --git a/external/CMakeLists.txt b/external/CMakeLists.txt index 0079964a6..72092ac64 100644 --- a/external/CMakeLists.txt +++ b/external/CMakeLists.txt @@ -844,7 +844,7 @@ ExternalProject_Add(vkfft TEST_COMMAND "" ) ExternalProject_Get_Property(vkfft source_dir) -set(VKFFT_INCLUDE_DIR "${source_dir}" CACHE INTERNAL "") +set(VKFFT_INCLUDE_DIR "${source_dir}/vkFFT" CACHE INTERNAL "") # requirements needed by many packages on windows if (WIN32) diff --git a/sdrbase/CMakeLists.txt b/sdrbase/CMakeLists.txt index 42a0b4ede..f62369946 100644 --- a/sdrbase/CMakeLists.txt +++ b/sdrbase/CMakeLists.txt @@ -37,27 +37,31 @@ add_definitions(-DUSE_KISSFFT) # Windows Vulkan SDK is missing glslang_c_interface.h # See bug: https://vulkan.lunarg.com/issue/view/63d158a85df11200d569b2ab # Copy it from Linux SDK -find_package(Vulkan) -if(Vulkan_FOUND AND (${VKFFT_BACKEND} EQUAL 0)) - set(sdrbase_SOURCES - ${sdrbase_SOURCES} - dsp/vulkanvkfftengine.cpp - dsp/vulkanvkfftengine.h - ) +if (${VKFFT_BACKEND} EQUAL 0) + find_package(Vulkan) + if(Vulkan_FOUND) + set(sdrbase_SOURCES + ${sdrbase_SOURCES} + dsp/vulkanvkfftengine.cpp + dsp/vulkanvkfftengine.h + ) + endif() endif() # CUDA Toolkit: https://developer.nvidia.com/cuda-downloads -find_package(CUDA 9.0) -if(CUDA_FOUND AND (${VKFFT_BACKEND} EQUAL 1)) - enable_language(CUDA) - set(sdrbase_SOURCES - ${sdrbase_SOURCES} - dsp/cudavkfftengine.cpp - dsp/cudavkfftengine.h - ) +if (${VKFFT_BACKEND} EQUAL 1) + find_package(CUDA 9.0) + if(CUDA_FOUND) + enable_language(CUDA) + set(sdrbase_SOURCES + ${sdrbase_SOURCES} + dsp/cudavkfftengine.cpp + dsp/cudavkfftengine.h + ) + endif() endif() -if(Vulkan_FOUND OR CUDA_FOUND) +if((Vulkan_FOUND AND (${VKFFT_BACKEND} EQUAL 0)) OR (CUDA_FOUND AND (${VKFFT_BACKEND} EQUAL 1))) set(sdrbase_SOURCES ${sdrbase_SOURCES} dsp/vkfftengine.cpp @@ -553,8 +557,8 @@ if(Vulkan_FOUND AND (${VKFFT_BACKEND} EQUAL 0)) find_library(VULKAN_SPIRV_LIB SPIRV HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) find_library(VULKAN_SPVREMAPPER_LIB SPVRemapper HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) find_library(VULKAN_SPIRV_TOOLS_LIB SPIRV-Tools HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) - find_library(VULKAN_SPIRV_TOOLS_OPT_LIB SPIRV-Tools-Opt HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) - find_library(VULKAN_SPIRV_TOOLS_SHARED_LIB SPIRV-Tools-Shared HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) + find_library(VULKAN_SPIRV_TOOLS_OPT_LIB SPIRV-Tools-opt HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) + find_library(VULKAN_SPIRV_TOOLS_SHARED_LIB SPIRV-Tools-shared HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) find_library(VULKAN_HLSL_LIB HLSL HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) find_library(VULKAN_OGLCOMPILER_LIB OGLCompiler HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) find_library(VULKAN_OSDEPENDENT_LIB OSDependent HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) @@ -563,18 +567,23 @@ if(Vulkan_FOUND AND (${VKFFT_BACKEND} EQUAL 0)) find_library(VULKAN_MACHINEINDEPENDENT_LIB MachineIndependent HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) find_library(VULKAN_GENERICCODEGEN_LIB GenericCodeGen HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) - find_library(VULKAN_SPIRVD_LIB SPIRVd HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) - find_library(VULKAN_SPVREMAPPERD_LIB SPVRemapperd HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) - find_library(VULKAN_SPIRV_TOOLSD_LIB SPIRV-Toolsd HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) - find_library(VULKAN_SPIRV_TOOLS_OPTD_LIB SPIRV-Tools-Optd HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) - find_library(VULKAN_SPIRV_TOOLS_SHAREDD_LIB SPIRV-Tools-Sharedd HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) - find_library(VULKAN_HLSLD_LIB HLSLd HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) - find_library(VULKAN_OGLCOMPILERD_LIB OGLCompilerd HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) - find_library(VULKAN_OSDEPENDENTD_LIB OSDependentd HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) - find_library(VULKAN_GLSLANGD_LIB glslangd HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) - find_library(VULKAN_GLSLANG_RESD_LIB glslang-default-resource-limitsd HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) - find_library(VULKAN_MACHINEINDEPENDENTD_LIB MachineIndependentd HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) - find_library(VULKAN_GENERICCODEGEND_LIB GenericCodeGend HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) + if(MSVC) + set(VULKAN_DEBUG_POSTFIX "d") + else() + set(VULKAN_DEBUG_POSTFIX "") + endif() + find_library(VULKAN_SPIRVD_LIB SPIRV${VULKAN_DEBUG_POSTFIX} HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) + find_library(VULKAN_SPVREMAPPERD_LIB SPVRemapper${VULKAN_DEBUG_POSTFIX} HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) + find_library(VULKAN_SPIRV_TOOLSD_LIB SPIRV-Tools${VULKAN_DEBUG_POSTFIX} HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) + find_library(VULKAN_SPIRV_TOOLS_OPTD_LIB SPIRV-Tools-opt${VULKAN_DEBUG_POSTFIX} HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) + find_library(VULKAN_SPIRV_TOOLS_SHAREDD_LIB SPIRV-Tools-shared${VULKAN_DEBUG_POSTFIX} HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) + find_library(VULKAN_HLSLD_LIB HLSL${VULKAN_DEBUG_POSTFIX} HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) + find_library(VULKAN_OGLCOMPILERD_LIB OGLCompiler${VULKAN_DEBUG_POSTFIX} HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) + find_library(VULKAN_OSDEPENDENTD_LIB OSDependent${VULKAN_DEBUG_POSTFIX} HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) + find_library(VULKAN_GLSLANGD_LIB glslang${VULKAN_DEBUG_POSTFIX} HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) + find_library(VULKAN_GLSLANG_RESD_LIB glslang-default-resource-limits${VULKAN_DEBUG_POSTFIX} HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) + find_library(VULKAN_MACHINEINDEPENDENTD_LIB MachineIndependent${VULKAN_DEBUG_POSTFIX} HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) + find_library(VULKAN_GENERICCODEGEND_LIB GenericCodeGen${VULKAN_DEBUG_POSTFIX} HINTS "$ENV{VULKAN_SDK}/lib" REQUIRED) target_link_libraries(sdrbase optimized ${VULKAN_SPIRV_LIB} @@ -602,7 +611,7 @@ if(Vulkan_FOUND AND (${VKFFT_BACKEND} EQUAL 0)) debug ${VULKAN_GLSLANG_RESD_LIB} debug ${VULKAN_MACHINEINDEPENDENTD_LIB} debug ${VULKAN_GENERICCODEGEND_LIB} - Vulkan::Vulkan + debug Vulkan::Vulkan ) endif() diff --git a/sdrbase/dsp/fftengine.cpp b/sdrbase/dsp/fftengine.cpp index d548d8136..f7156c923 100644 --- a/sdrbase/dsp/fftengine.cpp +++ b/sdrbase/dsp/fftengine.cpp @@ -25,11 +25,13 @@ #ifdef USE_FFTW #include "dsp/fftwengine.h" #endif +#ifdef VKFFT_BACKEND #if VKFFT_BACKEND==0 #include "dsp/vulkanvkfftengine.h" -#elif VKFFT_BACKEND==1 +#elif VKFFT_BACKEND==1 #include "dsp/cudavkfftengine.h" #endif +#endif QStringList FFTEngine::m_allAvailableEngines; @@ -61,6 +63,7 @@ FFTEngine* FFTEngine::create(const QString& fftWisdomFileName, const QString& pr qDebug("FFTEngine::create: using %s engine", qPrintable(engine)); +#ifdef VKFFT_BACKEND #if VKFFT_BACKEND==0 if (engine == VulkanvkFFTEngine::m_name) { return new VulkanvkFFTEngine(); @@ -71,6 +74,7 @@ FFTEngine* FFTEngine::create(const QString& fftWisdomFileName, const QString& pr return new CUDAvkFFTEngine(); } #endif +#endif #ifdef USE_FFTW if (engine == FFTWEngine::m_name) { return new FFTWEngine(fftWisdomFileName); @@ -94,6 +98,7 @@ QStringList FFTEngine::getAllNames() #ifdef USE_KISSFFT m_allAvailableEngines.append(KissEngine::m_name); #endif +#ifdef VKFFT_BACKEND #if VKFFT_BACKEND==0 VulkanvkFFTEngine vulkanvkFFT; if (vulkanvkFFT.isAvailable()) { @@ -104,6 +109,7 @@ QStringList FFTEngine::getAllNames() if (cudavkFFT.isAvailable()) { m_allAvailableEngines.append(cudavkFFT.getName()); } +#endif #endif } return m_allAvailableEngines; diff --git a/sdrbase/dsp/vkfftengine.h b/sdrbase/dsp/vkfftengine.h index ee11caa66..d6b39c44a 100644 --- a/sdrbase/dsp/vkfftengine.h +++ b/sdrbase/dsp/vkfftengine.h @@ -20,7 +20,7 @@ #include -#include +#include #include "dsp/fftengine.h" #include "dsp/vkfftutils.h" diff --git a/sdrbase/dsp/vkfftutils.h b/sdrbase/dsp/vkfftutils.h index 111ec32c5..fe96a5ab8 100644 --- a/sdrbase/dsp/vkfftutils.h +++ b/sdrbase/dsp/vkfftutils.h @@ -5,7 +5,7 @@ #include -#include +#include typedef struct { #if(VKFFT_BACKEND==0) From 3e2addac05799698dea353b509414bbdd6dc27b4 Mon Sep 17 00:00:00 2001 From: Jon Beniston Date: Mon, 14 Aug 2023 12:55:14 +0100 Subject: [PATCH 3/4] Install CUDA runtime --- cmake/cpack/CMakeLists.txt | 19 ++++++++++++++++++- sdrbase/CMakeLists.txt | 3 +++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/cmake/cpack/CMakeLists.txt b/cmake/cpack/CMakeLists.txt index 03c1390ca..dde2356e3 100644 --- a/cmake/cpack/CMakeLists.txt +++ b/cmake/cpack/CMakeLists.txt @@ -236,6 +236,19 @@ elseif(WIN32 OR MINGW) ) endforeach(libsigmf_dll) + if(${VKFFT_BACKEND} EQUAL 1) + find_package(CUDA) + if(CUDA_FOUND) + file(GLOB CUDA_DLLS "${CUDA_TOOLKIT_ROOT_DIR}/bin/nvrtc*${CMAKE_SHARED_LIBRARY_SUFFIX}") + foreach(cuda_dll ${CUDA_DLLS}) + get_filename_component(cuda_dll_name "${cuda_dll}" NAME) + add_custom_target(copy_cuda_${cuda_dll_name} ALL + COMMAND ${CMAKE_COMMAND} -E copy_if_different "${cuda_dll}" "${SDRANGEL_BINARY_BIN_DIR}/" + ) + endforeach(cuda_dll) + endif() + endif() + # TODO we need a way to fixup_bundle() on the build bin/ directory without call install if(BUILD_GUI) install(CODE " @@ -295,7 +308,11 @@ elseif(WIN32 OR MINGW) # libsigmf install(DIRECTORY "${LIBSIGMF_DLL_DIR}/" DESTINATION "${INSTALL_LIB_DIR}" FILES_MATCHING PATTERN "*${CMAKE_SHARED_LIBRARY_SUFFIX}") - + # CUDA runtime + if((${VKFFT_BACKEND} EQUAL 1) AND CUDA_FOUND) + install(DIRECTORY "${CUDA_TOOLKIT_ROOT_DIR}/bin" DESTINATION "${INSTALL_LIB_DIR}" + FILES_MATCHING PATTERN "nvrtc*${CMAKE_SHARED_LIBRARY_SUFFIX}") + endif() install(CODE " # remove *.lib files diff --git a/sdrbase/CMakeLists.txt b/sdrbase/CMakeLists.txt index f62369946..81131f0ac 100644 --- a/sdrbase/CMakeLists.txt +++ b/sdrbase/CMakeLists.txt @@ -59,6 +59,9 @@ if (${VKFFT_BACKEND} EQUAL 1) dsp/cudavkfftengine.h ) endif() + if(WIN32) + set(WINDOWS_FIXUP_BUNDLE_LIB_DIRS ${WINDOWS_FIXUP_BUNDLE_LIB_DIRS} ${CUDA_TOOLKIT_ROOT_DIR}/bin PARENT_SCOPE) + endif() endif() if((Vulkan_FOUND AND (${VKFFT_BACKEND} EQUAL 0)) OR (CUDA_FOUND AND (${VKFFT_BACKEND} EQUAL 1))) From d5268ecad4ba37b8d46029116eda9a21f266f63d Mon Sep 17 00:00:00 2001 From: Jon Beniston Date: Mon, 14 Aug 2023 14:37:46 +0100 Subject: [PATCH 4/4] Add trailing / so files are installed in correct directory --- cmake/cpack/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/cpack/CMakeLists.txt b/cmake/cpack/CMakeLists.txt index dde2356e3..318e20552 100644 --- a/cmake/cpack/CMakeLists.txt +++ b/cmake/cpack/CMakeLists.txt @@ -310,7 +310,7 @@ elseif(WIN32 OR MINGW) FILES_MATCHING PATTERN "*${CMAKE_SHARED_LIBRARY_SUFFIX}") # CUDA runtime if((${VKFFT_BACKEND} EQUAL 1) AND CUDA_FOUND) - install(DIRECTORY "${CUDA_TOOLKIT_ROOT_DIR}/bin" DESTINATION "${INSTALL_LIB_DIR}" + install(DIRECTORY "${CUDA_TOOLKIT_ROOT_DIR}/bin/" DESTINATION "${INSTALL_LIB_DIR}" FILES_MATCHING PATTERN "nvrtc*${CMAKE_SHARED_LIBRARY_SUFFIX}") endif()