diff options
| author | Egor Yusov <egor.yusov@gmail.com> | 2018-06-14 15:18:35 +0000 |
|---|---|---|
| committer | Egor Yusov <egor.yusov@gmail.com> | 2018-06-14 15:18:35 +0000 |
| commit | 01a4d50c9bc2dbdf906542cf08f7a887352f4a1e (patch) | |
| tree | fa72c3a34b909d3a323d10ec272aba8865b4402b /Graphics/GraphicsEngineVulkan | |
| parent | Improved setting dynamic buffer offsets (diff) | |
| download | DiligentCore-01a4d50c9bc2dbdf906542cf08f7a887352f4a1e.tar.gz DiligentCore-01a4d50c9bc2dbdf906542cf08f7a887352f4a1e.zip | |
Reworked dynamic buffer mapping using ring buffers
Diffstat (limited to 'Graphics/GraphicsEngineVulkan')
14 files changed, 409 insertions, 336 deletions
diff --git a/Graphics/GraphicsEngineVulkan/include/BufferVkImpl.h b/Graphics/GraphicsEngineVulkan/include/BufferVkImpl.h index cc797677..75d0d944 100644 --- a/Graphics/GraphicsEngineVulkan/include/BufferVkImpl.h +++ b/Graphics/GraphicsEngineVulkan/include/BufferVkImpl.h @@ -34,6 +34,7 @@ #include "VulkanUtilities/VulkanObjectWrappers.h" #include "VulkanUtilities/VulkanMemoryManager.h" #include "VulkanDynamicHeap.h" +#include "STDAllocator.h" namespace Diligent { @@ -64,16 +65,28 @@ public: virtual void Map( IDeviceContext *pContext, MAP_TYPE MapType, Uint32 MapFlags, PVoid &pMappedData )override; virtual void Unmap( IDeviceContext *pContext, MAP_TYPE MapType, Uint32 MapFlags )override; -//#ifdef _DEBUG -// void DbgVerifyDynamicAllocation(Uint32 ContextId); -//#endif - - Uint32 GetDynamicOffset(Uint32 CtxId)const{return 0;} +#ifdef _DEBUG + void DbgVerifyDynamicAllocation(Uint32 ContextId)const; +#endif - VkBuffer GetVkBuffer()const override final + Uint32 GetDynamicOffset(Uint32 CtxId)const { - return m_VulkanBuffer; + if(m_VulkanBuffer != VK_NULL_HANDLE) + { + return 0; + } + else + { + VERIFY(m_Desc.Usage == USAGE_DYNAMIC, "Dynamic buffer is expected"); + VERIFY_EXPR(!m_DynamicAllocations.empty()); +#ifdef _DEBUG + DbgVerifyDynamicAllocation(CtxId); +#endif + auto& DynAlloc = m_DynamicAllocations[CtxId]; + return static_cast<Uint32>(DynAlloc.Offset); + } } + VkBuffer GetVkBuffer()const override final; virtual void* GetNativeHandle()override final { @@ -95,7 +108,9 @@ private: #ifdef _DEBUG std::vector< std::pair<MAP_TYPE, Uint32> > m_DbgMapType; #endif - + + std::vector<VulkanDynamicAllocation, STDAllocatorRawMem<VulkanDynamicAllocation> > m_DynamicAllocations; + VulkanUtilities::BufferWrapper m_VulkanBuffer; VulkanUtilities::VulkanMemoryAllocation m_MemoryAllocation; }; diff --git a/Graphics/GraphicsEngineVulkan/include/DeviceContextVkImpl.h b/Graphics/GraphicsEngineVulkan/include/DeviceContextVkImpl.h index 841c3086..0f2fe325 100644 --- a/Graphics/GraphicsEngineVulkan/include/DeviceContextVkImpl.h +++ b/Graphics/GraphicsEngineVulkan/include/DeviceContextVkImpl.h @@ -147,6 +147,8 @@ public: return m_DynamicDescriptorPool.Allocate(SetLayout); } + VulkanDynamicAllocation AllocateDynamicSpace(Uint32 SizeInBytes); + std::vector<uint32_t>& GetDynamicBufferOffsets(){return m_DynamicBufferOffsets;} private: diff --git a/Graphics/GraphicsEngineVulkan/include/RenderDeviceVkImpl.h b/Graphics/GraphicsEngineVulkan/include/RenderDeviceVkImpl.h index 7aa33423..10d77889 100644 --- a/Graphics/GraphicsEngineVulkan/include/RenderDeviceVkImpl.h +++ b/Graphics/GraphicsEngineVulkan/include/RenderDeviceVkImpl.h @@ -44,6 +44,7 @@ #include "FramebufferCache.h" #include "CommandPoolManager.h" #include "ResourceReleaseQueue.h" +#include "VulkanDynamicHeap.h" /// Namespace for the Direct3D11 implementation of the graphics engine namespace Diligent @@ -96,7 +97,8 @@ public: ICommandQueueVk *GetCmdQueue(){return m_pCommandQueue;} - void IdleGPU(bool ReleaseStaleObjects); + // Idles GPU and returns fence value that was signaled + Uint64 IdleGPU(bool ReleaseStaleObjects); // pImmediateCtx parameter is only used to make sure the command buffer is submitted from the immediate context // The method returns fence value associated with the submitted command buffer Uint64 ExecuteCommandBuffer(const VkSubmitInfo &SubmitInfo, class DeviceContextVkImpl* pImmediateCtx); @@ -128,23 +130,26 @@ public: return m_MemoryMgr.Allocate(MemReqs, MemoryProperties); } + VulkanDynamicAllocation AllocateDynamicSpace(Uint32 CtxId, Uint32 SizeInBytes); + const VulkanDynamicHeap& GetDynamicHeap()const{return m_DynamicHeap;} + private: virtual void TestTextureFormat( TEXTURE_FORMAT TexFormat )override final; void ProcessStaleResources(Uint64 SubmittedCmdBufferNumber, Uint64 SubmittedFenceValue, Uint64 CompletedFenceValue); // Submits command buffer for execution to the command queue - // Returns the submitted command buffer number and the fence value that has been set to signal by GPU + // Returns the submitted command buffer number and the fence value // Parameters: // * SubmittedCmdBuffNumber - submitted command buffer number // * SubmittedFenceValue - fence value associated with the submitted command buffer void SubmitCommandBuffer(const VkSubmitInfo& SubmitInfo, Uint64& SubmittedCmdBuffNumber, Uint64& SubmittedFenceValue); - std::shared_ptr<VulkanUtilities::VulkanInstance> m_VulkanInstance; - std::unique_ptr<VulkanUtilities::VulkanPhysicalDevice> m_PhysicalDevice; - std::shared_ptr<VulkanUtilities::VulkanLogicalDevice> m_LogicalVkDevice; + std::shared_ptr<VulkanUtilities::VulkanInstance> m_VulkanInstance; + std::unique_ptr<VulkanUtilities::VulkanPhysicalDevice> m_PhysicalDevice; + std::shared_ptr<VulkanUtilities::VulkanLogicalDevice> m_LogicalVkDevice; - std::mutex m_CmdQueueMutex; - RefCntAutoPtr<ICommandQueueVk> m_pCommandQueue; + std::mutex m_CmdQueueMutex; + RefCntAutoPtr<ICommandQueueVk> m_pCommandQueue; EngineVkAttribs m_EngineAttribs; @@ -189,6 +194,8 @@ private: VulkanUtilities::VulkanMemoryManager m_MemoryMgr; ResourceReleaseQueue<DynamicStaleResourceWrapper> m_ReleaseQueue; + + VulkanDynamicHeap m_DynamicHeap; }; } diff --git a/Graphics/GraphicsEngineVulkan/include/VulkanDynamicHeap.h b/Graphics/GraphicsEngineVulkan/include/VulkanDynamicHeap.h index a36a124a..2127ac3a 100644 --- a/Graphics/GraphicsEngineVulkan/include/VulkanDynamicHeap.h +++ b/Graphics/GraphicsEngineVulkan/include/VulkanDynamicHeap.h @@ -23,6 +23,7 @@ #pragma once +#include "Vulkan.h" #include "RingBuffer.h" #include "VulkanUtilities/VulkanLogicalDevice.h" #include "VulkanUtilities/VulkanObjectWrappers.h" @@ -30,103 +31,100 @@ namespace Diligent { -// Constant blocks must be multiples of 16 constants @ 16 bytes each -#define DEFAULT_ALIGN 256 - class RenderDeviceVkImpl; +class VulkanDynamicHeap; struct VulkanDynamicAllocation { - VulkanDynamicAllocation(VkBuffer _Buff, size_t _Offset, size_t _Size, void *_CPUAddress) - : vkBuffer(_Buff), Offset(_Offset), Size(_Size), CPUAddress(_CPUAddress) + VulkanDynamicAllocation(){} + + VulkanDynamicAllocation(VulkanDynamicHeap& _ParentHeap, size_t _Offset, size_t _Size) : + pParentDynamicHeap(&_ParentHeap), + Offset (_Offset), + Size (_Size) {} - VkBuffer vkBuffer = VK_NULL_HANDLE; // Vulkan buffer associated with this memory. - size_t Offset = 0; // Offset from the start of the buffer resource - size_t Size = 0; // Reserved size of this allocation - void *CPUAddress = nullptr; + VulkanDynamicAllocation (const VulkanDynamicAllocation&) = delete; + VulkanDynamicAllocation& operator = (const VulkanDynamicAllocation&) = delete; + VulkanDynamicAllocation (VulkanDynamicAllocation&& rhs)noexcept : + pParentDynamicHeap(rhs.pParentDynamicHeap), + Offset (rhs.Offset), + Size (rhs.Size) #ifdef _DEBUG - Uint64 FrameNum = static_cast<Uint64>(-1); + , dbgFrameNumber(rhs.dbgFrameNumber) #endif -}; - -class VulkanRingBuffer : public RingBuffer -{ -public: - VulkanRingBuffer(size_t MaxSize, IMemoryAllocator &Allocator, RenderDeviceVkImpl* pDeviceVk); - - VulkanRingBuffer(VulkanRingBuffer&& rhs)noexcept : - RingBuffer (std::move(rhs)), - m_pDeviceVk (rhs.m_pDeviceVk), - m_VkBuffer (std::move(rhs.m_VkBuffer)), - m_BufferMemory(std::move(rhs.m_BufferMemory)), - m_CPUAddress (rhs.m_CPUAddress) { - rhs.m_CPUAddress = nullptr; + rhs.pParentDynamicHeap = nullptr; + rhs.Offset = 0; + rhs.Size = 0; +#ifdef _DEBUG + rhs.dbgFrameNumber = 0; +#endif } - VulkanRingBuffer (const VulkanRingBuffer&) = delete; - VulkanRingBuffer& operator= (VulkanRingBuffer&) = delete; - VulkanRingBuffer& operator= (VulkanRingBuffer&& rhs) + VulkanDynamicAllocation& operator = (VulkanDynamicAllocation&& rhs)noexcept // Must be noexcept on MSVC, so can't use = default { - Destroy(); - - static_cast<RingBuffer&>(*this) = std::move(rhs); - m_pDeviceVk = rhs.m_pDeviceVk; - m_VkBuffer = std::move(rhs.m_VkBuffer); - m_BufferMemory = std::move(rhs.m_BufferMemory); - m_CPUAddress = rhs.m_CPUAddress; - rhs.m_CPUAddress = nullptr; - + pParentDynamicHeap = rhs.pParentDynamicHeap; + Offset = rhs.Offset; + Size = rhs.Size; + rhs.pParentDynamicHeap = nullptr; + rhs.Offset = 0; + rhs.Size = 0; +#ifdef _DEBUG + dbgFrameNumber = rhs.dbgFrameNumber; + rhs.dbgFrameNumber = 0; +#endif return *this; } - ~VulkanRingBuffer(); - - VulkanDynamicAllocation Allocate(size_t SizeInBytes) - { - auto Offset = RingBuffer::Allocate(SizeInBytes); - if (Offset != RingBuffer::InvalidOffset) - { - return VulkanDynamicAllocation {m_VkBuffer, Offset, SizeInBytes, m_CPUAddress + Offset}; - } - else - { - return VulkanDynamicAllocation {nullptr, 0, 0, nullptr}; - } - } - -private: - void Destroy(); - - RenderDeviceVkImpl* m_pDeviceVk; - VulkanUtilities::BufferWrapper m_VkBuffer; - VulkanUtilities::DeviceMemoryWrapper m_BufferMemory; - Uint8* m_CPUAddress; + VulkanDynamicHeap* pParentDynamicHeap = nullptr; + size_t Offset = 0; // Offset from the start of the buffer resource + size_t Size = 0; // Reserved size of this allocation +#ifdef _DEBUG + Uint64 dbgFrameNumber = 0; +#endif }; class VulkanDynamicHeap { public: - VulkanDynamicHeap(IMemoryAllocator &Allocator, class RenderDeviceVkImpl* pDeviceVk, size_t InitialSize); - + VulkanDynamicHeap(IMemoryAllocator& Allocator, + class RenderDeviceVkImpl* pDeviceVk, + Uint32 ImmediateCtxHeapSize, + Uint32 DeferredCtxHeapSize, + Uint32 DeferredCtxCount); + ~VulkanDynamicHeap(); + VulkanDynamicHeap (const VulkanDynamicHeap&) = delete; VulkanDynamicHeap (VulkanDynamicHeap&&) = delete; VulkanDynamicHeap& operator= (const VulkanDynamicHeap&) = delete; VulkanDynamicHeap& operator= (VulkanDynamicHeap&&) = delete; - VulkanDynamicAllocation Allocate( size_t SizeInBytes, size_t Alignment = DEFAULT_ALIGN ); + VulkanDynamicAllocation Allocate( Uint32 CtxId, size_t SizeInBytes, size_t Alignment = 0); void FinishFrame(Uint64 FenceValue, Uint64 LastCompletedFenceValue); + void Destroy(); + + VkBuffer GetVkBuffer() const{return m_VkBuffer;} + Uint8* GetCPUAddress()const{return m_CPUAddress;} private: - // When a chunk of dynamic memory is requested, the heap first tries to allocate the memory in the largest GPU buffer. - // If allocation fails, a new ring buffer is created that provides enough space and requests memory from that buffer. - // Only the largest buffer is used for allocation and all other buffers are released when GPU is done with corresponding frames - std::vector<VulkanRingBuffer, STDAllocatorRawMem<VulkanRingBuffer> > m_RingBuffers; - IMemoryAllocator &m_Allocator; - RenderDeviceVkImpl* m_pDeviceVk = nullptr; - //std::mutex m_Mutex; + struct VulkanRingBuffer + { + VulkanRingBuffer(Uint32 Size, IMemoryAllocator &Allocator, Uint32 _BaseOffset) : + RingBuff(Size, Allocator), + BaseOffset(_BaseOffset) + {} + RingBuffer RingBuff; + const Uint32 BaseOffset; + }; + std::vector<VulkanRingBuffer> m_RingBuffers; + RenderDeviceVkImpl* const m_pDeviceVk; + + VulkanUtilities::BufferWrapper m_VkBuffer; + VulkanUtilities::DeviceMemoryWrapper m_BufferMemory; + Uint8* m_CPUAddress; + const uint32_t m_DefaultAlignment; }; } diff --git a/Graphics/GraphicsEngineVulkan/src/BufferViewVkImpl.cpp b/Graphics/GraphicsEngineVulkan/src/BufferViewVkImpl.cpp index 1c2fdf29..ec4b22ce 100644 --- a/Graphics/GraphicsEngineVulkan/src/BufferViewVkImpl.cpp +++ b/Graphics/GraphicsEngineVulkan/src/BufferViewVkImpl.cpp @@ -42,7 +42,7 @@ BufferViewVkImpl::BufferViewVkImpl( IReferenceCounters *pRefCounters, BufferViewVkImpl::~BufferViewVkImpl() { - auto *pDeviceVkImpl = ValidatedCast<RenderDeviceVkImpl>(GetDevice()); + auto *pDeviceVkImpl = GetDevice<RenderDeviceVkImpl>(); pDeviceVkImpl->SafeReleaseVkObject(std::move(m_BuffView)); } diff --git a/Graphics/GraphicsEngineVulkan/src/BufferVkImpl.cpp b/Graphics/GraphicsEngineVulkan/src/BufferVkImpl.cpp index 99d5267e..f9e4b840 100644 --- a/Graphics/GraphicsEngineVulkan/src/BufferVkImpl.cpp +++ b/Graphics/GraphicsEngineVulkan/src/BufferVkImpl.cpp @@ -36,16 +36,17 @@ namespace Diligent { -BufferVkImpl :: BufferVkImpl(IReferenceCounters *pRefCounters, - FixedBlockMemoryAllocator &BuffViewObjMemAllocator, - RenderDeviceVkImpl *pRenderDeviceVk, - const BufferDesc& BuffDesc, - const BufferData &BuffData /*= BufferData()*/) : +BufferVkImpl :: BufferVkImpl(IReferenceCounters* pRefCounters, + FixedBlockMemoryAllocator& BuffViewObjMemAllocator, + RenderDeviceVkImpl* pRenderDeviceVk, + const BufferDesc& BuffDesc, + const BufferData& BuffData /*= BufferData()*/) : TBufferBase(pRefCounters, BuffViewObjMemAllocator, pRenderDeviceVk, BuffDesc, false), + m_AccessFlags(0), #ifdef _DEBUG m_DbgMapType(1 + pRenderDeviceVk->GetNumDeferredContexts()), #endif - m_AccessFlags(0) + m_DynamicAllocations(STD_ALLOCATOR_RAW_MEM(VulkanDynamicAllocation, GetRawAllocator(), "Allocator for vector<VulkanDynamicAllocation>")) { #define LOG_BUFFER_ERROR_AND_THROW(...) LOG_ERROR_AND_THROW("Buffer \"", BuffDesc.Name ? BuffDesc.Name : "", "\": ", ##__VA_ARGS__); @@ -90,105 +91,127 @@ BufferVkImpl :: BufferVkImpl(IReferenceCounters *pRefCounters, if (m_Desc.BindFlags & BIND_UNIFORM_BUFFER) VkBuffCI.usage |= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; - VkBuffCI.sharingMode = VK_SHARING_MODE_EXCLUSIVE; // sharing mode of the buffer when it will be accessed by multiple queue families. - VkBuffCI.queueFamilyIndexCount = 0; // number of entries in the pQueueFamilyIndices array - VkBuffCI.pQueueFamilyIndices = nullptr; // list of queue families that will access this buffer - // (ignored if sharingMode is not VK_SHARING_MODE_CONCURRENT). + if(m_Desc.Usage == USAGE_DYNAMIC) + { + auto CtxCount = 1 + pRenderDeviceVk->GetNumDeferredContexts(); + m_DynamicAllocations.reserve(CtxCount); + for(Uint32 ctx=0; ctx < CtxCount; ++ctx) + m_DynamicAllocations.emplace_back(); + } - m_VulkanBuffer = LogicalDevice.CreateBuffer(VkBuffCI, m_Desc.Name); + if (m_Desc.Usage == USAGE_DYNAMIC && (VkBuffCI.usage & (VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT)) == 0) + { + // Dynamic constant/vertex/index buffers are suballocated in the upload heap when Map() is called. + // Dynamic buffers with SRV or UAV flags need to be allocated in GPU-only memory + m_AccessFlags = VK_ACCESS_INDIRECT_COMMAND_READ_BIT | + VK_ACCESS_INDEX_READ_BIT | + VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | + VK_ACCESS_UNIFORM_READ_BIT | + VK_ACCESS_SHADER_READ_BIT | + VK_ACCESS_TRANSFER_READ_BIT; + } + else + { + VkBuffCI.sharingMode = VK_SHARING_MODE_EXCLUSIVE; // sharing mode of the buffer when it will be accessed by multiple queue families. + VkBuffCI.queueFamilyIndexCount = 0; // number of entries in the pQueueFamilyIndices array + VkBuffCI.pQueueFamilyIndices = nullptr; // list of queue families that will access this buffer + // (ignored if sharingMode is not VK_SHARING_MODE_CONCURRENT). - VkMemoryRequirements MemReqs = LogicalDevice.GetBufferMemoryRequirements(m_VulkanBuffer); + m_VulkanBuffer = LogicalDevice.CreateBuffer(VkBuffCI, m_Desc.Name); - VkMemoryPropertyFlags BufferMemoryFlags = 0; - if (m_Desc.Usage == USAGE_CPU_ACCESSIBLE) - BufferMemoryFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; - else - BufferMemoryFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + VkMemoryRequirements MemReqs = LogicalDevice.GetBufferMemoryRequirements(m_VulkanBuffer); - m_MemoryAllocation = pRenderDeviceVk->AllocateMemory(MemReqs, BufferMemoryFlags); + VkMemoryPropertyFlags BufferMemoryFlags = 0; + if (m_Desc.Usage == USAGE_CPU_ACCESSIBLE) + BufferMemoryFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + else + BufferMemoryFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; - VERIFY( (MemReqs.alignment & (MemReqs.alignment-1)) == 0, "Alignment is not power of 2!"); - auto AlignedOffset = (m_MemoryAllocation.UnalignedOffset + (MemReqs.alignment-1)) & ~(MemReqs.alignment-1); - auto Memory = m_MemoryAllocation.Page->GetVkMemory(); - auto err = LogicalDevice.BindBufferMemory(m_VulkanBuffer, Memory, AlignedOffset); - CHECK_VK_ERROR_AND_THROW(err, "Failed to bind buffer memory"); + m_MemoryAllocation = pRenderDeviceVk->AllocateMemory(MemReqs, BufferMemoryFlags); - bool bInitializeBuffer = (BuffData.pData != nullptr && BuffData.DataSize > 0); - if( bInitializeBuffer ) - { - VkBufferCreateInfo VkStaginBuffCI = VkBuffCI; - VkStaginBuffCI.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; - - std::string StagingBufferName = "Staging buffer for '"; - StagingBufferName += m_Desc.Name; - StagingBufferName += '\''; - VulkanUtilities::BufferWrapper StagingBuffer = LogicalDevice.CreateBuffer(VkStaginBuffCI, StagingBufferName.c_str()); - - VkMemoryRequirements StagingBufferMemReqs = LogicalDevice.GetBufferMemoryRequirements(StagingBuffer); - - // VK_MEMORY_PROPERTY_HOST_COHERENT_BIT bit specifies that the host cache management commands vkFlushMappedMemoryRanges - // and vkInvalidateMappedMemoryRanges are NOT needed to flush host writes to the device or make device writes visible - // to the host (10.2) - auto StagingMemoryAllocation = pRenderDeviceVk->AllocateMemory(StagingBufferMemReqs, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); - auto StagingBufferMemory = StagingMemoryAllocation.Page->GetVkMemory(); - auto AlignedStagingMemOffset = (StagingMemoryAllocation.UnalignedOffset + (StagingBufferMemReqs.alignment-1)) & ~(StagingBufferMemReqs.alignment-1); - - auto *StagingData = reinterpret_cast<uint8_t*>(StagingMemoryAllocation.Page->GetCPUMemory()); - VERIFY_EXPR(StagingData != nullptr); - memcpy(StagingData + AlignedStagingMemOffset, BuffData.pData, BuffData.DataSize); + VERIFY( (MemReqs.alignment & (MemReqs.alignment-1)) == 0, "Alignment is not power of 2!"); + auto AlignedOffset = (m_MemoryAllocation.UnalignedOffset + (MemReqs.alignment-1)) & ~(MemReqs.alignment-1); + auto Memory = m_MemoryAllocation.Page->GetVkMemory(); + auto err = LogicalDevice.BindBufferMemory(m_VulkanBuffer, Memory, AlignedOffset); + CHECK_VK_ERROR_AND_THROW(err, "Failed to bind buffer memory"); + + bool bInitializeBuffer = (BuffData.pData != nullptr && BuffData.DataSize > 0); + if( bInitializeBuffer ) + { + VkBufferCreateInfo VkStaginBuffCI = VkBuffCI; + VkStaginBuffCI.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + + std::string StagingBufferName = "Staging buffer for '"; + StagingBufferName += m_Desc.Name; + StagingBufferName += '\''; + VulkanUtilities::BufferWrapper StagingBuffer = LogicalDevice.CreateBuffer(VkStaginBuffCI, StagingBufferName.c_str()); + + VkMemoryRequirements StagingBufferMemReqs = LogicalDevice.GetBufferMemoryRequirements(StagingBuffer); + + // VK_MEMORY_PROPERTY_HOST_COHERENT_BIT bit specifies that the host cache management commands vkFlushMappedMemoryRanges + // and vkInvalidateMappedMemoryRanges are NOT needed to flush host writes to the device or make device writes visible + // to the host (10.2) + auto StagingMemoryAllocation = pRenderDeviceVk->AllocateMemory(StagingBufferMemReqs, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + auto StagingBufferMemory = StagingMemoryAllocation.Page->GetVkMemory(); + auto AlignedStagingMemOffset = (StagingMemoryAllocation.UnalignedOffset + (StagingBufferMemReqs.alignment-1)) & ~(StagingBufferMemReqs.alignment-1); + + auto *StagingData = reinterpret_cast<uint8_t*>(StagingMemoryAllocation.Page->GetCPUMemory()); + VERIFY_EXPR(StagingData != nullptr); + memcpy(StagingData + AlignedStagingMemOffset, BuffData.pData, BuffData.DataSize); - err = LogicalDevice.BindBufferMemory(StagingBuffer, StagingBufferMemory, AlignedStagingMemOffset); - CHECK_VK_ERROR_AND_THROW(err, "Failed to bind staging bufer memory"); - - VulkanUtilities::CommandPoolWrapper CmdPool; - VkCommandBuffer vkCmdBuff; - pRenderDeviceVk->AllocateTransientCmdPool(CmdPool, vkCmdBuff, "Transient command pool to copy staging data to a device buffer"); - - VulkanUtilities::VulkanCommandBuffer::BufferMemoryBarrier(vkCmdBuff, StagingBuffer, 0, VK_ACCESS_TRANSFER_READ_BIT); - m_AccessFlags = VK_ACCESS_TRANSFER_WRITE_BIT; - VulkanUtilities::VulkanCommandBuffer::BufferMemoryBarrier(vkCmdBuff, m_VulkanBuffer, 0, m_AccessFlags); - - // Copy commands MUST be recorded outside of a render pass instance. This is OK here - // as copy will be the only command in the cmd buffer - VkBufferCopy BuffCopy = {}; - BuffCopy.srcOffset = 0; - BuffCopy.dstOffset = 0; - BuffCopy.size = VkBuffCI.size; - vkCmdCopyBuffer(vkCmdBuff, StagingBuffer, m_VulkanBuffer, 1, &BuffCopy); - - pRenderDeviceVk->ExecuteAndDisposeTransientCmdBuff(vkCmdBuff, std::move(CmdPool)); - - - // After command buffer is submitted, safe-release staging resources. This strategy - // is little overconservative as the resources will only be released after the - // first command buffer submitted through the immediate context is complete - - // Next Cmd Buff| Next Fence | This Thread | Immediate Context - // | | | - // N | F | | - // | | | - // | | ExecuteAndDisposeTransientCmdBuff(vkCmdBuff) | - // | | - SubmittedCmdBuffNumber = N | - // | | - SubmittedFenceValue = F | - // N+1 - - | - F+1 - | | - // | | Release(StagingBuffer) | - // | | - {N+1, StagingBuffer} -> Stale Objects | - // | | | - // | | | - // | | | ExecuteCommandBuffer() - // | | | - SubmittedCmdBuffNumber = N+1 - // | | | - SubmittedFenceValue = F+1 - // N+2 - - | - F+2 - | - - - - - - - - - - - - | - // | | | - DiscardStaleVkObjects(N+1, F+1) - // | | | - {F+1, StagingBuffer} -> Release Queue - // | | | - - pRenderDeviceVk->SafeReleaseVkObject(std::move(StagingBuffer)); - pRenderDeviceVk->SafeReleaseVkObject(std::move(StagingMemoryAllocation)); - } - else - { - m_AccessFlags = 0; + err = LogicalDevice.BindBufferMemory(StagingBuffer, StagingBufferMemory, AlignedStagingMemOffset); + CHECK_VK_ERROR_AND_THROW(err, "Failed to bind staging bufer memory"); + + VulkanUtilities::CommandPoolWrapper CmdPool; + VkCommandBuffer vkCmdBuff; + pRenderDeviceVk->AllocateTransientCmdPool(CmdPool, vkCmdBuff, "Transient command pool to copy staging data to a device buffer"); + + VulkanUtilities::VulkanCommandBuffer::BufferMemoryBarrier(vkCmdBuff, StagingBuffer, 0, VK_ACCESS_TRANSFER_READ_BIT); + m_AccessFlags = VK_ACCESS_TRANSFER_WRITE_BIT; + VulkanUtilities::VulkanCommandBuffer::BufferMemoryBarrier(vkCmdBuff, m_VulkanBuffer, 0, m_AccessFlags); + + // Copy commands MUST be recorded outside of a render pass instance. This is OK here + // as copy will be the only command in the cmd buffer + VkBufferCopy BuffCopy = {}; + BuffCopy.srcOffset = 0; + BuffCopy.dstOffset = 0; + BuffCopy.size = VkBuffCI.size; + vkCmdCopyBuffer(vkCmdBuff, StagingBuffer, m_VulkanBuffer, 1, &BuffCopy); + + pRenderDeviceVk->ExecuteAndDisposeTransientCmdBuff(vkCmdBuff, std::move(CmdPool)); + + + // After command buffer is submitted, safe-release staging resources. This strategy + // is little overconservative as the resources will only be released after the + // first command buffer submitted through the immediate context is complete + + // Next Cmd Buff| Next Fence | This Thread | Immediate Context + // | | | + // N | F | | + // | | | + // | | ExecuteAndDisposeTransientCmdBuff(vkCmdBuff) | + // | | - SubmittedCmdBuffNumber = N | + // | | - SubmittedFenceValue = F | + // N+1 - - | - F+1 - | | + // | | Release(StagingBuffer) | + // | | - {N+1, StagingBuffer} -> Stale Objects | + // | | | + // | | | + // | | | ExecuteCommandBuffer() + // | | | - SubmittedCmdBuffNumber = N+1 + // | | | - SubmittedFenceValue = F+1 + // N+2 - - | - F+2 - | - - - - - - - - - - - - | + // | | | - DiscardStaleVkObjects(N+1, F+1) + // | | | - {F+1, StagingBuffer} -> Release Queue + // | | | + + pRenderDeviceVk->SafeReleaseVkObject(std::move(StagingBuffer)); + pRenderDeviceVk->SafeReleaseVkObject(std::move(StagingMemoryAllocation)); + } + else + { + m_AccessFlags = 0; + } } } @@ -241,10 +264,11 @@ BufferVkImpl :: BufferVkImpl(IReferenceCounters *pRefCounters, const BufferDesc& BuffDesc, void *pVkBuffer) : TBufferBase(pRefCounters, BuffViewObjMemAllocator, pRenderDeviceVk, BufferDescFromVkResource(BuffDesc, pVkBuffer), false), + m_AccessFlags(0), #ifdef _DEBUG m_DbgMapType(1 + pRenderDeviceVk->GetNumDeferredContexts()), #endif - m_AccessFlags(0) + m_DynamicAllocations(STD_ALLOCATOR_RAW_MEM(VulkanDynamicAllocation, GetRawAllocator(), "Allocator for vector<VulkanDynamicAllocation>")) { #if 0 m_pVkResource = pVkBuffer; @@ -259,10 +283,13 @@ BufferVkImpl :: BufferVkImpl(IReferenceCounters *pRefCounters, BufferVkImpl :: ~BufferVkImpl() { - auto *pDeviceVkImpl = ValidatedCast<RenderDeviceVkImpl>(GetDevice()); - // Vk object can only be destroyed when it is no longer used by the GPU - pDeviceVkImpl->SafeReleaseVkObject(std::move(m_VulkanBuffer)); - pDeviceVkImpl->SafeReleaseVkObject(std::move(m_MemoryAllocation)); + auto *pDeviceVkImpl = GetDevice<RenderDeviceVkImpl>(); + if(m_VulkanBuffer != VK_NULL_HANDLE) + { + // Vk object can only be destroyed when it is no longer used by the GPU + pDeviceVkImpl->SafeReleaseVkObject(std::move(m_VulkanBuffer)); + pDeviceVkImpl->SafeReleaseVkObject(std::move(m_MemoryAllocation)); + } } IMPLEMENT_QUERY_INTERFACE( BufferVkImpl, IID_BufferVk, TBufferBase ) @@ -288,7 +315,8 @@ void BufferVkImpl :: Map(IDeviceContext *pContext, MAP_TYPE MapType, Uint32 MapF { TBufferBase::Map( pContext, MapType, MapFlags, pMappedData ); - auto *pDeviceContextVk = ValidatedCast<DeviceContextVkImpl>(pContext); + auto* pDeviceContextVk = ValidatedCast<DeviceContextVkImpl>(pContext); + auto* pDeviceVk = GetDevice<RenderDeviceVkImpl>(); #ifdef _DEBUG if(pDeviceContextVk != nullptr) m_DbgMapType[pDeviceContextVk->GetContextId()] = std::make_pair(MapType, MapFlags); @@ -299,7 +327,6 @@ void BufferVkImpl :: Map(IDeviceContext *pContext, MAP_TYPE MapType, Uint32 MapF #if 0 LOG_WARNING_MESSAGE_ONCE("Mapping CPU buffer for reading on Vk currently requires flushing context and idling GPU"); pDeviceContextVk->Flush(); - auto *pDeviceVk = ValidatedCast<RenderDeviceVkImpl>(GetDevice()); pDeviceVk->IdleGPU(false); VERIFY(m_Desc.Usage == USAGE_CPU_ACCESSIBLE, "Buffer must be created as USAGE_CPU_ACCESSIBLE to be mapped for reading"); @@ -326,7 +353,11 @@ void BufferVkImpl :: Map(IDeviceContext *pContext, MAP_TYPE MapType, Uint32 MapF else if (m_Desc.Usage == USAGE_DYNAMIC) { VERIFY(MapFlags & MAP_FLAG_DISCARD, "Vk buffer must be mapped for writing with MAP_FLAG_DISCARD flag"); - pMappedData = pDeviceContextVk->AllocateUploadSpace(this, m_Desc.uiSizeInBytes); + auto DynAlloc = pDeviceContextVk->AllocateDynamicSpace(m_Desc.uiSizeInBytes); + const auto& DynamicHeap = pDeviceVk->GetDynamicHeap(); + auto* CPUAddress = DynamicHeap.GetCPUAddress(); + pMappedData = CPUAddress + DynAlloc.Offset; + m_DynamicAllocations[pDeviceContextVk->GetContextId()] = std::move(DynAlloc); } else { @@ -381,7 +412,7 @@ void BufferVkImpl::Unmap( IDeviceContext *pContext, MAP_TYPE MapType, Uint32 Map else if (m_Desc.Usage == USAGE_DYNAMIC) { VERIFY(MapFlags & MAP_FLAG_DISCARD, "Vk buffer must be mapped for writing with MAP_FLAG_DISCARD flag"); - pDeviceContextVk->CopyAndFreeDynamicUploadData(this); + //pDeviceContextVk->CopyAndFreeDynamicUploadData(this); } } @@ -401,7 +432,7 @@ void BufferVkImpl::CreateViewInternal( const BufferViewDesc &OrigViewDesc, IBuff try { - auto *pDeviceVkImpl = ValidatedCast<RenderDeviceVkImpl>(GetDevice()); + auto *pDeviceVkImpl = GetDevice<RenderDeviceVkImpl>(); auto &BuffViewAllocator = pDeviceVkImpl->GetBuffViewObjAllocator(); VERIFY( &BuffViewAllocator == &m_dbgBuffViewAllocator, "Buff view allocator does not match allocator provided at buffer initialization" ); @@ -440,11 +471,32 @@ VulkanUtilities::BufferViewWrapper BufferVkImpl::CreateView(struct BufferViewDes ViewCI.offset = ViewDesc.ByteOffset; ViewCI.range = ViewDesc.ByteWidth; // size in bytes of the buffer view - auto *pDeviceVkImpl = static_cast<RenderDeviceVkImpl*>(GetDevice()); + auto *pDeviceVkImpl = GetDevice<RenderDeviceVkImpl>(); const auto& LogicalDevice = pDeviceVkImpl->GetLogicalDevice(); BuffView = LogicalDevice.CreateBufferView(ViewCI, ViewDesc.Name); } return BuffView; } +VkBuffer BufferVkImpl::GetVkBuffer()const +{ + if (m_VulkanBuffer != VK_NULL_HANDLE) + return m_VulkanBuffer; + else + { + VERIFY(m_Desc.Usage == USAGE_DYNAMIC, "Dynamic buffer expected"); + return GetDevice<RenderDeviceVkImpl>()->GetDynamicHeap().GetVkBuffer(); + } +} + +#ifdef _DEBUG +void BufferVkImpl::DbgVerifyDynamicAllocation(Uint32 ContextId)const +{ + const auto& DynAlloc = m_DynamicAllocations[ContextId]; + VERIFY(DynAlloc.pParentDynamicHeap != nullptr, "Dynamic buffer must be mapped before the first use"); + auto CurrentFrame = GetDevice<RenderDeviceVkImpl>()->GetCurrentFrameNumber(); + VERIFY(DynAlloc.dbgFrameNumber == CurrentFrame, "Dynamic allocation is out-of-date. Dynamic buffer \"", m_Desc.Name, "\" must be mapped in the same frame it is used."); +} +#endif + } diff --git a/Graphics/GraphicsEngineVulkan/src/DeviceContextVkImpl.cpp b/Graphics/GraphicsEngineVulkan/src/DeviceContextVkImpl.cpp index 84779d77..daf257b4 100644 --- a/Graphics/GraphicsEngineVulkan/src/DeviceContextVkImpl.cpp +++ b/Graphics/GraphicsEngineVulkan/src/DeviceContextVkImpl.cpp @@ -76,39 +76,16 @@ namespace Diligent {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, Attribs.DynamicDescriptorPoolSize.NumStorageImageDescriptors}, {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, Attribs.DynamicDescriptorPoolSize.NumUniformTexelBufferDescriptors}, {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, Attribs.DynamicDescriptorPoolSize.NumStorageTexelBufferDescriptors}, - {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, Attribs.DynamicDescriptorPoolSize.NumUniformBufferDescriptors }, - {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, Attribs.DynamicDescriptorPoolSize.NumStorageBufferDescriptors }, - //{VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, Attribs.DynamicDescriptorPoolSize.NumUniformBufferDescriptors }, - //{VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC, Attribs.DynamicDescriptorPoolSize.NumStorageBufferDescriptors }, + //{VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, Attribs.DynamicDescriptorPoolSize.NumUniformBufferDescriptors}, + //{VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, Attribs.DynamicDescriptorPoolSize.NumStorageBufferDescriptors}, + {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, Attribs.DynamicDescriptorPoolSize.NumUniformBufferDescriptors}, + {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC, Attribs.DynamicDescriptorPoolSize.NumStorageBufferDescriptors}, }, Attribs.DynamicDescriptorPoolSize.MaxDescriptorSets, }, m_NextCmdBuffNumber(0) { -#if 0 - auto *pVkDevice = pDeviceVkImpl->GetVkDevice(); - - Vk_COMMAND_SIGNATURE_DESC CmdSignatureDesc = {}; - Vk_INDIRECT_ARGUMENT_DESC IndirectArg = {}; - CmdSignatureDesc.NodeMask = 0; - CmdSignatureDesc.NumArgumentDescs = 1; - CmdSignatureDesc.pArgumentDescs = &IndirectArg; - - CmdSignatureDesc.ByteStride = sizeof(UINT)*4; - IndirectArg.Type = Vk_INDIRECT_ARGUMENT_TYPE_DRAW; - auto hr = pVkDevice->CreateCommandSignature(&CmdSignatureDesc, nullptr, __uuidof(m_pDrawIndirectSignature), reinterpret_cast<void**>(static_cast<IVkCommandSignature**>(&m_pDrawIndirectSignature)) ); - CHECK_D3D_RESULT_THROW(hr, "Failed to create indirect draw command signature") - - CmdSignatureDesc.ByteStride = sizeof(UINT)*5; - IndirectArg.Type = Vk_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED; - hr = pVkDevice->CreateCommandSignature(&CmdSignatureDesc, nullptr, __uuidof(m_pDrawIndexedIndirectSignature), reinterpret_cast<void**>(static_cast<IVkCommandSignature**>(&m_pDrawIndexedIndirectSignature)) ); - CHECK_D3D_RESULT_THROW(hr, "Failed to create draw indexed indirect command signature") - - CmdSignatureDesc.ByteStride = sizeof(UINT)*3; - IndirectArg.Type = Vk_INDIRECT_ARGUMENT_TYPE_DISPATCH; - hr = pVkDevice->CreateCommandSignature(&CmdSignatureDesc, nullptr, __uuidof(m_pDispatchIndirectSignature), reinterpret_cast<void**>(static_cast<IVkCommandSignature**>(&m_pDispatchIndirectSignature)) ); - CHECK_D3D_RESULT_THROW(hr, "Failed to create dispatch indirect command signature") -#endif + m_DynamicBufferOffsets.reserve(64); } DeviceContextVkImpl::~DeviceContextVkImpl() @@ -371,42 +348,35 @@ namespace Diligent VkBuffer vkVertexBuffers[MaxBufferSlots];// = {} VkDeviceSize Offsets[MaxBufferSlots]; VERIFY( m_NumVertexStreams <= MaxBufferSlots, "Too many buffers are being set" ); - //bool DynamicBufferPresent = false; + bool DynamicBufferPresent = false; for( UINT slot = 0; slot < m_NumVertexStreams; ++slot ) { auto &CurrStream = m_VertexStreams[slot]; - //auto &VBView = VBViews[Buff]; VERIFY( CurrStream.pBuffer, "Attempting to bind a null buffer for rendering" ); auto *pBufferVk = CurrStream.pBuffer.RawPtr<BufferVkImpl>(); -// if (pBufferVk->GetDesc().Usage == USAGE_DYNAMIC) -// { -// DynamicBufferPresent = true; -//#ifdef _DEBUG -// pBufferVk->DbgVerifyDynamicAllocation(m_ContextId); -//#endif -// } + if (pBufferVk->GetDesc().Usage == USAGE_DYNAMIC) + { + DynamicBufferPresent = true; +#ifdef _DEBUG + pBufferVk->DbgVerifyDynamicAllocation(m_ContextId); +#endif + } if(!pBufferVk->CheckAccessFlags(VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT)) BufferMemoryBarrier(*pBufferVk, VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT); // Device context keeps strong references to all vertex buffers. - // When a buffer is unbound, a reference to Vk resource is added to the context, - // so there is no need to reference the resource here - //GraphicsCtx.AddReferencedObject(pVkResource); vkVertexBuffers[slot] = pBufferVk->GetVkBuffer(); - Offsets[slot] = CurrStream.Offset; - - ///VBView.BufferLocation = pBufferVk->GetGPUAddress(m_ContextId) + CurrStream.Offset; + Offsets[slot] = CurrStream.Offset + pBufferVk->GetDynamicOffset(m_ContextId); } //GraphCtx.FlushResourceBarriers(); if(m_NumVertexStreams > 0) m_CommandBuffer.BindVertexBuffers( 0, m_NumVertexStreams, vkVertexBuffers, Offsets ); - // GPU virtual address of a dynamic vertex buffer can change every time - // a draw command is invoked - m_State.CommittedVBsUpToDate = true;//!DynamicBufferPresent; + // GPU offset for a dynamic vertex buffer can change every time a draw command is invoked + m_State.CommittedVBsUpToDate = !DynamicBufferPresent; } @@ -439,7 +409,7 @@ namespace Diligent VERIFY(DrawAttribs.IndexType == VT_UINT16 || DrawAttribs.IndexType == VT_UINT32, "Unsupported index format. Only R16_UINT and R32_UINT are allowed."); VkIndexType vkIndexType = DrawAttribs.IndexType == VT_UINT16 ? VK_INDEX_TYPE_UINT16 : VK_INDEX_TYPE_UINT32; - m_CommandBuffer.BindIndexBuffer(pBuffVk->GetVkBuffer(), m_IndexDataStartOffset, vkIndexType); + m_CommandBuffer.BindIndexBuffer(pBuffVk->GetVkBuffer(), m_IndexDataStartOffset + pBuffVk->GetDynamicOffset(m_ContextId), vkIndexType); } if(m_State.CommittedVBsUpToDate) @@ -486,9 +456,9 @@ namespace Diligent BufferMemoryBarrier(*pBufferVk, VK_ACCESS_INDIRECT_COMMAND_READ_BIT); if( DrawAttribs.IsIndexed ) - m_CommandBuffer.DrawIndexedIndirect(pBufferVk->GetVkBuffer(), DrawAttribs.IndirectDrawArgsOffset, 1, 0); + m_CommandBuffer.DrawIndexedIndirect(pBufferVk->GetVkBuffer(), pBufferVk->GetDynamicOffset(m_ContextId) + DrawAttribs.IndirectDrawArgsOffset, 1, 0); else - m_CommandBuffer.DrawIndirect(pBufferVk->GetVkBuffer(), DrawAttribs.IndirectDrawArgsOffset, 1, 0); + m_CommandBuffer.DrawIndirect(pBufferVk->GetVkBuffer(), pBufferVk->GetDynamicOffset(m_ContextId) + DrawAttribs.IndirectDrawArgsOffset, 1, 0); } } else @@ -989,6 +959,7 @@ namespace Diligent CopyRegion.srcOffset = Allocation.MemAllocation.UnalignedOffset; CopyRegion.dstOffset = DstOffset; CopyRegion.size = NumBytes; + VERIFY(pBuffVk->m_VulkanBuffer != VK_NULL_HANDLE, "Copy destination buffer must not be suballocated"); m_CommandBuffer.CopyBuffer(Allocation.vkBuffer, pBuffVk->GetVkBuffer(), 1, &CopyRegion); ++m_State.NumCommands; } @@ -1017,15 +988,11 @@ namespace Diligent if(!pDstBuffVk->CheckAccessFlags(VK_ACCESS_TRANSFER_WRITE_BIT)) BufferMemoryBarrier(*pDstBuffVk, VK_ACCESS_TRANSFER_WRITE_BIT); VkBufferCopy CopyRegion; - CopyRegion.srcOffset = SrcOffset; + CopyRegion.srcOffset = SrcOffset + pSrcBuffVk->GetDynamicOffset(m_ContextId); CopyRegion.dstOffset = DstOffset; CopyRegion.size = NumBytes; - //size_t DstDataStartByteOffset; - //auto *pVkDstBuff = pDstBuffVk->GetVkBuffer(DstDataStartByteOffset, m_ContextId); - //VERIFY(DstDataStartByteOffset == 0, "Dst buffer must not be suballocated"); - - //size_t SrcDataStartByteOffset; - //auto *pVkSrcBuff = pSrcBuffVk->GetVkBuffer(SrcDataStartByteOffset, m_ContextId); + VERIFY(pDstBuffVk->m_VulkanBuffer != VK_NULL_HANDLE, "Copy destination buffer must not be suballocated"); + VERIFY_EXPR(pDstBuffVk->GetDynamicOffset(m_ContextId) == 0); m_CommandBuffer.CopyBuffer(pSrcBuffVk->GetVkBuffer(), pDstBuffVk->GetVkBuffer(), 1, &CopyRegion); ++m_State.NumCommands; } @@ -1121,6 +1088,11 @@ namespace Diligent void DeviceContextVkImpl::FinishCommandList(class ICommandList **ppCommandList) { + if (m_CommandBuffer.GetState().RenderPass != VK_NULL_HANDLE) + { + m_CommandBuffer.EndRenderPass(); + } + auto vkCmdBuff = m_CommandBuffer.GetVkCmdBuffer(); auto err = vkEndCommandBuffer(vkCmdBuff); VERIFY(err == VK_SUCCESS, "Failed to end command buffer"); @@ -1252,6 +1224,8 @@ namespace Diligent VERIFY(!BufferVk.CheckAccessFlags(NewAccessFlags), "The buffer already has requested access flags"); EnsureVkCmdBuffer(); + VERIFY(BufferVk.m_VulkanBuffer != VK_NULL_HANDLE, "Cannot transition suballocated buffer"); + VERIFY_EXPR(BufferVk.GetDynamicOffset(m_ContextId) == 0); auto vkBuff = BufferVk.GetVkBuffer(); m_CommandBuffer.BufferMemoryBarrier(vkBuff, BufferVk.m_AccessFlags, NewAccessFlags); BufferVk.SetAccessFlags(NewAccessFlags); @@ -1283,4 +1257,10 @@ namespace Diligent UNEXPECTED("Unable to find dynamic allocation for this buffer"); } } + + VulkanDynamicAllocation DeviceContextVkImpl::AllocateDynamicSpace(Uint32 SizeInBytes) + { + auto *pDeviceVkImpl = m_pDevice.RawPtr<RenderDeviceVkImpl>(); + return pDeviceVkImpl->AllocateDynamicSpace(m_ContextId, SizeInBytes); + } } diff --git a/Graphics/GraphicsEngineVulkan/src/PipelineStateVkImpl.cpp b/Graphics/GraphicsEngineVulkan/src/PipelineStateVkImpl.cpp index 357c8b12..72d1b1c2 100644 --- a/Graphics/GraphicsEngineVulkan/src/PipelineStateVkImpl.cpp +++ b/Graphics/GraphicsEngineVulkan/src/PipelineStateVkImpl.cpp @@ -380,7 +380,7 @@ PipelineStateVkImpl :: PipelineStateVkImpl(IReferenceCounters *pRefCounters, Ren PipelineStateVkImpl::~PipelineStateVkImpl() { - auto pDeviceVkImpl = ValidatedCast<RenderDeviceVkImpl>(GetDevice()); + auto pDeviceVkImpl = GetDevice<RenderDeviceVkImpl>(); pDeviceVkImpl->GetFramebufferCache().OnDestroyRenderPass(m_RenderPass); pDeviceVkImpl->SafeReleaseVkObject(std::move(m_RenderPass)); pDeviceVkImpl->SafeReleaseVkObject(std::move(m_Pipeline)); @@ -422,7 +422,7 @@ void PipelineStateVkImpl::BindShaderResources(IResourceMapping *pResourceMapping void PipelineStateVkImpl::CreateShaderResourceBinding(IShaderResourceBinding **ppShaderResourceBinding) { - auto *pRenderDeviceVk = ValidatedCast<RenderDeviceVkImpl>( GetDevice() ); + auto *pRenderDeviceVk = GetDevice<RenderDeviceVkImpl>(); auto &SRBAllocator = pRenderDeviceVk->GetSRBAllocator(); auto pResBindingVk = NEW_RC_OBJ(SRBAllocator, "ShaderResourceBindingVkImpl instance", ShaderResourceBindingVkImpl)(this, false); pResBindingVk->QueryInterface(IID_ShaderResourceBinding, reinterpret_cast<IObject**>(ppShaderResourceBinding)); diff --git a/Graphics/GraphicsEngineVulkan/src/RenderDeviceVkImpl.cpp b/Graphics/GraphicsEngineVulkan/src/RenderDeviceVkImpl.cpp index 6e98bd1b..08d248a6 100644 --- a/Graphics/GraphicsEngineVulkan/src/RenderDeviceVkImpl.cpp +++ b/Graphics/GraphicsEngineVulkan/src/RenderDeviceVkImpl.cpp @@ -78,16 +78,24 @@ RenderDeviceVkImpl :: RenderDeviceVkImpl(IReferenceCounters* {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, CreationAttribs.MainDescriptorPoolSize.NumStorageImageDescriptors}, {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, CreationAttribs.MainDescriptorPoolSize.NumUniformTexelBufferDescriptors}, {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, CreationAttribs.MainDescriptorPoolSize.NumStorageTexelBufferDescriptors}, - {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, CreationAttribs.MainDescriptorPoolSize.NumUniformBufferDescriptors }, - {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, CreationAttribs.MainDescriptorPoolSize.NumStorageBufferDescriptors }, - //{VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, CreationAttribs.MainDescriptorPoolSize.NumUniformBufferDescriptors }, - //{VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC, CreationAttribs.MainDescriptorPoolSize.NumStorageBufferDescriptors }, + //{VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, CreationAttribs.MainDescriptorPoolSize.NumUniformBufferDescriptors}, + //{VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, CreationAttribs.MainDescriptorPoolSize.NumStorageBufferDescriptors}, + {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, CreationAttribs.MainDescriptorPoolSize.NumUniformBufferDescriptors}, + {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC, CreationAttribs.MainDescriptorPoolSize.NumStorageBufferDescriptors}, }, CreationAttribs.MainDescriptorPoolSize.MaxDescriptorSets }, m_TransientCmdPoolMgr(*m_LogicalVkDevice, pCmdQueue->GetQueueFamilyIndex(), VK_COMMAND_POOL_CREATE_TRANSIENT_BIT), m_MemoryMgr("Global resource memory manager", *m_LogicalVkDevice, *m_PhysicalDevice, GetRawAllocator(), CreationAttribs.DeviceLocalMemoryPageSize, CreationAttribs.HostVisibleMemoryPageSize, CreationAttribs.DeviceLocalMemoryReserveSize, CreationAttribs.HostVisibleMemoryReserveSize), - m_ReleaseQueue(GetRawAllocator()) + m_ReleaseQueue(GetRawAllocator()), + m_DynamicHeap + { + GetRawAllocator(), + this, + CreationAttribs.ImmediateCtxDynamicHeapSize, + CreationAttribs.DeferredCtxDynamicHeapSize, + NumDeferredContexts + } { m_DeviceCaps.DevType = DeviceType::Vulkan; m_DeviceCaps.MajorVersion = 1; @@ -100,9 +108,11 @@ RenderDeviceVkImpl :: RenderDeviceVkImpl(IReferenceCounters* RenderDeviceVkImpl::~RenderDeviceVkImpl() { + // Explicitly destroy dynamic heap + m_DynamicHeap.Destroy(); // Finish current frame. This will release resources taken by previous frames, and // will move all stale resources to the release queues. The resources will not be - // release until next call to FinishFrame() + // release until the next call to FinishFrame() FinishFrame(false); // Wait for the GPU to complete all its operations IdleGPU(true); @@ -140,8 +150,8 @@ void RenderDeviceVkImpl::AllocateTransientCmdPool(VulkanUtilities::CommandPoolWr CmdBuffBeginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; CmdBuffBeginInfo.pNext = nullptr; CmdBuffBeginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; // Each recording of the command buffer will only be - // submitted once, and the command buffer will be reset - // and recorded again between each submission. + // submitted once, and the command buffer will be reset + // and recorded again between each submission. CmdBuffBeginInfo.pInheritanceInfo = nullptr; // Ignored for a primary command buffer vkBeginCommandBuffer(vkCmdBuff, &CmdBuffBeginInfo); } @@ -237,7 +247,7 @@ Uint64 RenderDeviceVkImpl::ExecuteCommandBuffer(const VkSubmitInfo &SubmitInfo, } -void RenderDeviceVkImpl::IdleGPU(bool ReleaseStaleObjects) +Uint64 RenderDeviceVkImpl::IdleGPU(bool ReleaseStaleObjects) { Uint64 SubmittedFenceValue = 0; Uint64 SubmittedCmdBuffNumber = 0; @@ -246,6 +256,7 @@ void RenderDeviceVkImpl::IdleGPU(bool ReleaseStaleObjects) // Lock the command queue to avoid other threads interfering with the GPU std::lock_guard<std::mutex> LockGuard(m_CmdQueueMutex); SubmittedFenceValue = m_pCommandQueue->GetNextFenceValue(); + // CommandQueueVkImpl::IdleGPU increments next fence value m_pCommandQueue->IdleGPU(); m_LogicalVkDevice->WaitIdle(); @@ -268,6 +279,8 @@ void RenderDeviceVkImpl::IdleGPU(bool ReleaseStaleObjects) auto CompletedFenceValue = SubmittedFenceValue; ProcessStaleResources(SubmittedCmdBuffNumber, SubmittedFenceValue, CompletedFenceValue); } + + return SubmittedFenceValue; } @@ -317,7 +330,7 @@ void RenderDeviceVkImpl::FinishFrame(bool ReleaseAllResources) // Lock the command queue to avoid other threads interfering with the GPU std::lock_guard<std::mutex> LockGuard(m_CmdQueueMutex); NextFenceValue = m_pCommandQueue->GetNextFenceValue(); - // Increment cmd list number while keeping queue locked. + // Increment cmd buffer number while keeping queue locked. // This guarantees that any Vk object released after the lock // is released, will be associated with the incremented cmd list number SubmittedCmdBuffNumber = m_NextCmdBuffNumber; @@ -327,9 +340,10 @@ void RenderDeviceVkImpl::FinishFrame(bool ReleaseAllResources) // Discard all remaining objects. This is important to do if there were // no command lists submitted during the frame. All stale resources will // be associated with the next fence value and thus will not be released - // until the next command list is finished by the GPU + // until the next command buffer is finished by the GPU ProcessStaleResources(SubmittedCmdBuffNumber, NextFenceValue, CompletedFenceValue); - + + m_DynamicHeap.FinishFrame(NextFenceValue, CompletedFenceValue); Atomics::AtomicIncrement(m_FrameNumber); } @@ -344,6 +358,19 @@ void RenderDeviceVkImpl::ProcessStaleResources(Uint64 SubmittedCmdBufferNumber, } +VulkanDynamicAllocation RenderDeviceVkImpl::AllocateDynamicSpace(Uint32 CtxId, Uint32 SizeInBytes) +{ + auto DynAlloc = m_DynamicHeap.Allocate(CtxId, SizeInBytes); + if (DynAlloc.pParentDynamicHeap == nullptr) + { + UNSUPPORTED("Failed to allocate dynamic memory"); + } +#ifdef _DEBUG + DynAlloc.dbgFrameNumber = m_FrameNumber; +#endif + return DynAlloc; +} + void RenderDeviceVkImpl::TestTextureFormat( TEXTURE_FORMAT TexFormat ) { auto &TexFormatInfo = m_TextureFormatsInfo[TexFormat]; @@ -551,18 +578,4 @@ void RenderDeviceVkImpl :: CreateSampler(const SamplerDesc& SamplerDesc, ISample ); } -#if 0 -DescriptorHeapAllocation RenderDeviceVkImpl :: AllocateDescriptor(Vk_DESCRIPTOR_HEAP_TYPE Type, UINT Count /*= 1*/) -{ - VERIFY(Type >= Vk_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV && Type < Vk_DESCRIPTOR_HEAP_TYPE_NUM_TYPES, "Invalid heap type"); - return m_CPUDescriptorHeaps[Type].Allocate(Count); -} - -DescriptorHeapAllocation RenderDeviceVkImpl :: AllocateGPUDescriptors(Vk_DESCRIPTOR_HEAP_TYPE Type, UINT Count /*= 1*/) -{ - VERIFY(Type >= Vk_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV && Type <= Vk_DESCRIPTOR_HEAP_TYPE_SAMPLER, "Invalid heap type"); - return m_GPUDescriptorHeaps[Type].Allocate(Count); -} -#endif - } diff --git a/Graphics/GraphicsEngineVulkan/src/SamplerVkImpl.cpp b/Graphics/GraphicsEngineVulkan/src/SamplerVkImpl.cpp index 0b6f4057..e6b481a3 100644 --- a/Graphics/GraphicsEngineVulkan/src/SamplerVkImpl.cpp +++ b/Graphics/GraphicsEngineVulkan/src/SamplerVkImpl.cpp @@ -67,7 +67,7 @@ SamplerVkImpl::SamplerVkImpl(IReferenceCounters *pRefCounters, RenderDeviceVkImp SamplerVkImpl::~SamplerVkImpl() { - auto pDeviceVkImpl = ValidatedCast<RenderDeviceVkImpl>(GetDevice()); + auto pDeviceVkImpl = GetDevice<RenderDeviceVkImpl>(); pDeviceVkImpl->SafeReleaseVkObject(std::move(m_VkSampler)); } diff --git a/Graphics/GraphicsEngineVulkan/src/ShaderResourceBindingVkImpl.cpp b/Graphics/GraphicsEngineVulkan/src/ShaderResourceBindingVkImpl.cpp index 499b4997..c238beb2 100644 --- a/Graphics/GraphicsEngineVulkan/src/ShaderResourceBindingVkImpl.cpp +++ b/Graphics/GraphicsEngineVulkan/src/ShaderResourceBindingVkImpl.cpp @@ -38,7 +38,7 @@ ShaderResourceBindingVkImpl::ShaderResourceBindingVkImpl( IReferenceCounters *pR auto *ppShaders = pPSO->GetShaders(); m_NumShaders = pPSO->GetNumShaders(); - auto *pRenderDeviceVkImpl = ValidatedCast<RenderDeviceVkImpl>(pPSO->GetDevice()); + auto *pRenderDeviceVkImpl = pPSO->GetDevice<RenderDeviceVkImpl>(); // This will only allocate memory and initialize descriptor sets in the resource cache // Resources will be initialized by InitializeResourceMemoryInCache() pPSO->GetPipelineLayout().InitResourceCache(pRenderDeviceVkImpl, m_ShaderResourceCache, pPSO->GetResourceCacheDataAllocator()); diff --git a/Graphics/GraphicsEngineVulkan/src/TextureViewVkImpl.cpp b/Graphics/GraphicsEngineVulkan/src/TextureViewVkImpl.cpp index 432ead97..4b2b8db1 100644 --- a/Graphics/GraphicsEngineVulkan/src/TextureViewVkImpl.cpp +++ b/Graphics/GraphicsEngineVulkan/src/TextureViewVkImpl.cpp @@ -42,7 +42,7 @@ TextureViewVkImpl::TextureViewVkImpl( IReferenceCounters *pRefCounters, TextureViewVkImpl::~TextureViewVkImpl() { - auto *pDeviceVkImpl = ValidatedCast<RenderDeviceVkImpl>(GetDevice()); + auto *pDeviceVkImpl = GetDevice<RenderDeviceVkImpl>(); if(m_Desc.ViewType == TEXTURE_VIEW_DEPTH_STENCIL || m_Desc.ViewType == TEXTURE_VIEW_RENDER_TARGET) pDeviceVkImpl->GetFramebufferCache().OnDestroyImageView(m_ImageView); pDeviceVkImpl->SafeReleaseVkObject(std::move(m_ImageView)); diff --git a/Graphics/GraphicsEngineVulkan/src/TextureVkImpl.cpp b/Graphics/GraphicsEngineVulkan/src/TextureVkImpl.cpp index 9272fb0c..2df6b8ff 100644 --- a/Graphics/GraphicsEngineVulkan/src/TextureVkImpl.cpp +++ b/Graphics/GraphicsEngineVulkan/src/TextureVkImpl.cpp @@ -443,7 +443,7 @@ void TextureVkImpl::CreateViewInternal( const struct TextureViewDesc &ViewDesc, try { - auto *pDeviceVkImpl = ValidatedCast<RenderDeviceVkImpl>(GetDevice()); + auto *pDeviceVkImpl = GetDevice<RenderDeviceVkImpl>(); auto &TexViewAllocator = pDeviceVkImpl->GetTexViewObjAllocator(); VERIFY( &TexViewAllocator == &m_dbgTexViewObjAllocator, "Texture view allocator does not match allocator provided during texture initialization" ); @@ -470,7 +470,7 @@ void TextureVkImpl::CreateViewInternal( const struct TextureViewDesc &ViewDesc, TextureVkImpl :: ~TextureVkImpl() { - auto *pDeviceVkImpl = ValidatedCast<RenderDeviceVkImpl>(GetDevice()); + auto *pDeviceVkImpl = GetDevice<RenderDeviceVkImpl>(); // Vk object can only be destroyed when it is no longer used by the GPU // Wrappers for external texture will not be destroyed as they are created with null device pointer pDeviceVkImpl->SafeReleaseVkObject(std::move(m_VulkanImage)); @@ -688,7 +688,7 @@ VulkanUtilities::ImageViewWrapper TextureVkImpl::CreateImageView(TextureViewDesc ImageViewCI.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; } - auto *pRenderDeviceVk = static_cast<RenderDeviceVkImpl*>(GetDevice()); + auto *pRenderDeviceVk = GetDevice<RenderDeviceVkImpl>(); const auto& LogicalDevice = pRenderDeviceVk->GetLogicalDevice(); std::string ViewName = "Image view for \'"; diff --git a/Graphics/GraphicsEngineVulkan/src/VulkanDynamicHeap.cpp b/Graphics/GraphicsEngineVulkan/src/VulkanDynamicHeap.cpp index ae219873..51c71604 100644 --- a/Graphics/GraphicsEngineVulkan/src/VulkanDynamicHeap.cpp +++ b/Graphics/GraphicsEngineVulkan/src/VulkanDynamicHeap.cpp @@ -27,46 +27,59 @@ namespace Diligent { -VulkanRingBuffer::VulkanRingBuffer(size_t MaxSize, IMemoryAllocator &Allocator, RenderDeviceVkImpl* pDeviceVk) : - RingBuffer(MaxSize, Allocator), - m_pDeviceVk(pDeviceVk) + +static uint32_t GetDefaultAlignment(const VulkanUtilities::VulkanPhysicalDevice& PhysicalDevice) { + const auto& Props = PhysicalDevice.GetProperties(); + const auto& Limits = Props.limits; + return std::max(std::max(Limits.minUniformBufferOffsetAlignment, Limits.minTexelBufferOffsetAlignment), Limits.minStorageBufferOffsetAlignment); +} + +VulkanDynamicHeap::VulkanDynamicHeap(IMemoryAllocator& Allocator, + RenderDeviceVkImpl* pDeviceVk, + Uint32 ImmediateCtxHeapSize, + Uint32 DeferredCtxHeapSize, + Uint32 DeferredCtxCount) : + m_pDeviceVk(pDeviceVk), + m_DefaultAlignment(GetDefaultAlignment(pDeviceVk->GetPhysicalDevice())) +{ + Uint32 BufferSize = ImmediateCtxHeapSize + DeferredCtxHeapSize * DeferredCtxCount; VkBufferCreateInfo VkBuffCI = {}; VkBuffCI.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; VkBuffCI.pNext = nullptr; VkBuffCI.flags = 0; // VK_BUFFER_CREATE_SPARSE_BINDING_BIT, VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT, VK_BUFFER_CREATE_SPARSE_ALIASED_BIT - VkBuffCI.size = MaxSize; - VkBuffCI.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + VkBuffCI.size = BufferSize; + VkBuffCI.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT; VkBuffCI.sharingMode = VK_SHARING_MODE_EXCLUSIVE; VkBuffCI.queueFamilyIndexCount = 0; VkBuffCI.pQueueFamilyIndices = nullptr; const auto& LogicalDevice = pDeviceVk->GetLogicalDevice(); - m_VkBuffer = LogicalDevice.CreateBuffer(VkBuffCI, "Upload buffer"); + m_VkBuffer = LogicalDevice.CreateBuffer(VkBuffCI, "Dynamic heap buffer"); VkMemoryRequirements MemReqs = LogicalDevice.GetBufferMemoryRequirements(m_VkBuffer); const auto& PhysicalDevice = pDeviceVk->GetPhysicalDevice(); - + VkMemoryAllocateInfo MemAlloc = {}; MemAlloc.pNext = nullptr; MemAlloc.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; MemAlloc.allocationSize = MemReqs.size; - + // VK_MEMORY_PROPERTY_HOST_COHERENT_BIT bit specifies that the host cache management commands vkFlushMappedMemoryRanges // and vkInvalidateMappedMemoryRanges are NOT needed to flush host writes to the device or make device writes visible // to the host (10.2) MemAlloc.memoryTypeIndex = PhysicalDevice.GetMemoryTypeIndex(MemReqs.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); VERIFY(MemAlloc.memoryTypeIndex != VulkanUtilities::VulkanPhysicalDevice::InvalidMemoryTypeIndex, - "Vulkan spec requires that for a VkBuffer not created with the " - "VK_BUFFER_CREATE_SPARSE_BINDING_BIT bit set, the memoryTypeBits member always contains at least one bit set " - "corresponding to a VkMemoryType with a propertyFlags that has both the VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT bit " - "and the VK_MEMORY_PROPERTY_HOST_COHERENT_BIT bit set(11.6)"); + "Vulkan spec requires that for a VkBuffer not created with the " + "VK_BUFFER_CREATE_SPARSE_BINDING_BIT bit set, the memoryTypeBits member always contains at least one bit set " + "corresponding to a VkMemoryType with a propertyFlags that has both the VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT bit " + "and the VK_MEMORY_PROPERTY_HOST_COHERENT_BIT bit set(11.6)"); m_BufferMemory = LogicalDevice.AllocateDeviceMemory(MemAlloc, "Host-visible memory for upload buffer"); - + void *Data = nullptr; - auto err = LogicalDevice.MapMemory(m_BufferMemory, + auto err = LogicalDevice.MapMemory(m_BufferMemory, 0, // offset MemAlloc.allocationSize, 0, // flags, reserved for future use @@ -77,14 +90,21 @@ VulkanRingBuffer::VulkanRingBuffer(size_t MaxSize, IMemoryAllocator &Allocator, err = LogicalDevice.BindBufferMemory(m_VkBuffer, m_BufferMemory, 0 /*offset*/); CHECK_VK_ERROR_AND_THROW(err, "Failed to bind bufer memory"); - LOG_INFO_MESSAGE("GPU ring buffer created. Size: ", MaxSize); + LOG_INFO_MESSAGE("GPU dynamic heap created. Total buffer size: ", BufferSize); + + m_RingBuffers.reserve(1 + DeferredCtxCount); + Uint32 BaseOffset = 0; + for(Uint32 ctx = 0; ctx < 1 + DeferredCtxCount; ++ctx) + { + Uint32 HeapSize = ctx == 0 ? ImmediateCtxHeapSize : DeferredCtxHeapSize; + m_RingBuffers.emplace_back( HeapSize, Allocator, BaseOffset ); + } } -void VulkanRingBuffer::Destroy() +void VulkanDynamicHeap::Destroy() { - if(m_VkBuffer) + if (m_VkBuffer) { - LOG_INFO_MESSAGE("Destroying GPU ring buffer. Size: ", GetMaxSize()); m_pDeviceVk->GetLogicalDevice().UnmapMemory(m_BufferMemory); m_pDeviceVk->SafeReleaseVkObject(std::move(m_VkBuffer)); m_pDeviceVk->SafeReleaseVkObject(std::move(m_BufferMemory)); @@ -92,27 +112,27 @@ void VulkanRingBuffer::Destroy() m_CPUAddress = nullptr; } -VulkanRingBuffer::~VulkanRingBuffer() +VulkanDynamicHeap::~VulkanDynamicHeap() { - Destroy(); + VERIFY(m_BufferMemory == VK_NULL_HANDLE && m_VkBuffer == VK_NULL_HANDLE, "Vulkan resources must be explcitly released with Destroy()"); } -VulkanDynamicHeap::VulkanDynamicHeap(IMemoryAllocator &Allocator, RenderDeviceVkImpl* pDevice, size_t InitialSize) : - m_Allocator(Allocator), - m_pDeviceVk(pDevice), - m_RingBuffers(STD_ALLOCATOR_RAW_MEM(VulkanRingBuffer, GetRawAllocator(), "Allocator for vector<VulkanRingBuffer>")) +VulkanDynamicAllocation VulkanDynamicHeap::Allocate(Uint32 CtxId, size_t SizeInBytes, size_t Alignment /*= 0*/) { - m_RingBuffers.emplace_back(InitialSize, Allocator, pDevice); -} + VERIFY_EXPR(CtxId < m_RingBuffers.size()); - -VulkanDynamicAllocation VulkanDynamicHeap::Allocate(size_t SizeInBytes, size_t Alignment /*= DEFAULT_ALIGN*/) -{ if(Alignment == 0) - Alignment = DEFAULT_ALIGN; - // Every device context has its own upload heap, so there is no need to lock + Alignment = m_DefaultAlignment; + + auto& RingBuff = m_RingBuffers[CtxId].RingBuff; + if (SizeInBytes > RingBuff.GetMaxSize()) + { + LOG_ERROR("Requested dynamic allocation size ", SizeInBytes, " exceeds maximum ring buffer size ", RingBuff.GetMaxSize(), ". The app should increase dynamic heap size."); + return VulkanDynamicAllocation{}; + } + // Every device context uses its own upload heap, so there is no need to lock //std::lock_guard<std::mutex> Lock(m_Mutex); // @@ -125,18 +145,13 @@ VulkanDynamicAllocation VulkanDynamicHeap::Allocate(size_t SizeInBytes, size_t A VERIFY_EXPR((AlignmentMask & Alignment) == 0); // Align the allocation const size_t AlignedSize = (SizeInBytes + AlignmentMask) & ~AlignmentMask; - auto DynAlloc = m_RingBuffers.back().Allocate(AlignedSize); - if (DynAlloc.vkBuffer == VK_NULL_HANDLE) + auto Offset = RingBuff.Allocate(AlignedSize); + while(Offset == RingBuffer::InvalidOffset) { - auto NewMaxSize = m_RingBuffers.back().GetMaxSize() * 2; - while(NewMaxSize < AlignedSize)NewMaxSize*=2; - m_RingBuffers.emplace_back(NewMaxSize, m_Allocator, m_pDeviceVk); - DynAlloc = m_RingBuffers.back().Allocate(AlignedSize); + VulkanDynamicAllocation{}; } -#ifdef _DEBUG - DynAlloc.FrameNum = m_pDeviceVk->GetCurrentFrameNumber(); -#endif - return DynAlloc; + + return VulkanDynamicAllocation{ *this, m_RingBuffers[CtxId].BaseOffset + Offset, SizeInBytes }; } void VulkanDynamicHeap::FinishFrame(Uint64 FenceValue, Uint64 LastCompletedFenceValue) @@ -149,20 +164,11 @@ void VulkanDynamicHeap::FinishFrame(Uint64 FenceValue, Uint64 LastCompletedFence // across several frames! // - size_t NumBuffsToDelete = 0; - for(size_t Ind = 0; Ind < m_RingBuffers.size(); ++Ind) + for (auto& RingBuff : m_RingBuffers) { - auto &RingBuff = m_RingBuffers[Ind]; - RingBuff.FinishCurrentFrame(FenceValue); - RingBuff.ReleaseCompletedFrames(LastCompletedFenceValue); - if ( NumBuffsToDelete == Ind && Ind < m_RingBuffers.size()-1 && RingBuff.IsEmpty()) - { - ++NumBuffsToDelete; - } + RingBuff.RingBuff.FinishCurrentFrame(FenceValue); + RingBuff.RingBuff.ReleaseCompletedFrames(LastCompletedFenceValue); } - - if(NumBuffsToDelete) - m_RingBuffers.erase(m_RingBuffers.begin(), m_RingBuffers.begin()+NumBuffsToDelete); } } |
