From 775b164f929aa06ee4723f72e51eaee9c8a16c09 Mon Sep 17 00:00:00 2001 From: Egor Yusov Date: Wed, 6 Jun 2018 22:22:10 -0700 Subject: Reworked dynamic upload heap to be managed by every context and to release resources safely --- .../include/CommandListVkImpl.h | 20 +++- .../include/DeviceContextVkImpl.h | 18 +++- .../include/RenderDeviceVkImpl.h | 10 +- .../src/DeviceContextVkImpl.cpp | 119 +++++++++++++++------ .../src/RenderDeviceVkImpl.cpp | 91 ++++++---------- 5 files changed, 149 insertions(+), 109 deletions(-) (limited to 'Graphics/GraphicsEngineVulkan') diff --git a/Graphics/GraphicsEngineVulkan/include/CommandListVkImpl.h b/Graphics/GraphicsEngineVulkan/include/CommandListVkImpl.h index 0ed4b97e..d309b49e 100644 --- a/Graphics/GraphicsEngineVulkan/include/CommandListVkImpl.h +++ b/Graphics/GraphicsEngineVulkan/include/CommandListVkImpl.h @@ -37,10 +37,15 @@ class CommandListVkImpl : public CommandListBase { public: typedef CommandListBase TCommandListBase; - CommandListVkImpl(IReferenceCounters *pRefCounters, IRenderDevice *pDevice, IDeviceContext *pDeferredCtx, VkCommandBuffer vkCmdBuff) : - TCommandListBase(pRefCounters, pDevice), - m_pDeferredCtx(pDeferredCtx), - m_vkCmdBuff(vkCmdBuff) + CommandListVkImpl(IReferenceCounters* pRefCounters, + IRenderDevice* pDevice, + IDeviceContext* pDeferredCtx, + VkCommandBuffer vkCmdBuff, + Uint64 CommandListNumber) : + TCommandListBase (pRefCounters, pDevice), + m_pDeferredCtx (pDeferredCtx), + m_vkCmdBuff (vkCmdBuff), + m_CommandBufferNumber(CommandListNumber) { } @@ -49,16 +54,21 @@ public: VERIFY(m_vkCmdBuff == VK_NULL_HANDLE && !m_pDeferredCtx, "Destroying command list that was never executed"); } - void Close(VkCommandBuffer& CmdBuff, RefCntAutoPtr& pDeferredCtx) + void Close(VkCommandBuffer& CmdBuff, + RefCntAutoPtr& pDeferredCtx, + Uint64& CommandBufferNumber) { CmdBuff = m_vkCmdBuff; m_vkCmdBuff = VK_NULL_HANDLE; pDeferredCtx = std::move(m_pDeferredCtx); + CommandBufferNumber = m_CommandBufferNumber; + m_CommandBufferNumber = 0; } private: RefCntAutoPtr m_pDeferredCtx; VkCommandBuffer m_vkCmdBuff; + Uint64 m_CommandBufferNumber; // Command buffer number in the deferred context that recorded this command list }; } diff --git a/Graphics/GraphicsEngineVulkan/include/DeviceContextVkImpl.h b/Graphics/GraphicsEngineVulkan/include/DeviceContextVkImpl.h index 0cc0ebb7..ccdc62ee 100644 --- a/Graphics/GraphicsEngineVulkan/include/DeviceContextVkImpl.h +++ b/Graphics/GraphicsEngineVulkan/include/DeviceContextVkImpl.h @@ -34,6 +34,7 @@ #include "VulkanUtilities/VulkanCommandBuffer.h" #include "VulkanUtilities/VulkanUploadHeap.h" #include "VulkanDynamicHeap.h" +#include "ResourceReleaseQueue.h" #ifdef _DEBUG # define VERIFY_CONTEXT_BINDINGS @@ -136,6 +137,8 @@ public: return m_CommandBuffer; } + void FinishFrame(Uint64 CompletedFenceValue); + private: void CommitRenderPassAndFramebuffer(class PipelineStateVkImpl *pPipelineStateVk); void CommitVkVertexBuffers(); @@ -145,9 +148,10 @@ private: void CommitScissorRects(); inline void EnsureVkCmdBuffer(); - inline void DisposeVkCmdBuffer(VkCommandBuffer vkCmdBuff); - inline void DisposeCurrentCmdBuffer(); - + inline void DisposeVkCmdBuffer(VkCommandBuffer vkCmdBuff, Uint64 FenceValue); + inline void DisposeCurrentCmdBuffer(Uint64 FenceValue); + void ReleaseStaleContextResources(Uint64 SubmittedCmdBufferNumber, Uint64 SubmittedFenceValue, Uint64 CompletedFenceValue); + VulkanUtilities::VulkanCommandBuffer m_CommandBuffer; const Uint32 m_NumCommandsToFlush = 192; @@ -162,7 +166,6 @@ private: Uint32 NumCommands = 0; }m_State; - #if 0 GenerateMipsHelper m_MipsGenerator; #endif @@ -180,6 +183,13 @@ private: std::vector m_SignalSemaphores; std::unordered_map m_UploadAllocations; + ResourceReleaseQueue m_ReleaseQueue; + + VulkanUtilities::VulkanUploadHeap m_UploadHeap; + + // Number of the command buffer currently being recorded by the context and that will + // be submitted next + Atomics::AtomicInt64 m_NextCmdBuffNumber; }; } diff --git a/Graphics/GraphicsEngineVulkan/include/RenderDeviceVkImpl.h b/Graphics/GraphicsEngineVulkan/include/RenderDeviceVkImpl.h index 6b462a99..331e64bf 100644 --- a/Graphics/GraphicsEngineVulkan/include/RenderDeviceVkImpl.h +++ b/Graphics/GraphicsEngineVulkan/include/RenderDeviceVkImpl.h @@ -98,7 +98,8 @@ public: void IdleGPU(bool ReleaseStaleObjects); // pImmediateCtx parameter is only used to make sure the command buffer is submitted from the immediate context - void ExecuteCommandBuffer(const VkSubmitInfo &SubmitInfo, class DeviceContextVkImpl* pImmediateCtx); + // The method returns fence value associated with the submitted command buffer + Uint64 ExecuteCommandBuffer(const VkSubmitInfo &SubmitInfo, class DeviceContextVkImpl* pImmediateCtx); void AllocateTransientCmdPool(VulkanUtilities::CommandPoolWrapper& CmdPool, VkCommandBuffer& vkCmdBuff, const Char* DebugPoolName = nullptr); void ExecuteAndDisposeTransientCmdBuff(VkCommandBuffer vkCmdBuff, VulkanUtilities::CommandPoolWrapper&& CmdPool); @@ -123,11 +124,6 @@ public: return m_DescriptorPools[1 + CtxId].Allocate(SetLayout); } - VulkanUtilities::VulkanUploadAllocation AllocateUploadSpace(Uint32 CtxId, size_t Size) - { - return m_UploadHeaps[CtxId].Allocate(Size); - } - std::shared_ptr GetVulkanInstance()const{return m_VulkanInstance;} const VulkanUtilities::VulkanPhysicalDevice &GetPhysicalDevice(){return *m_PhysicalDevice;} const auto &GetLogicalDevice(){return *m_LogicalVkDevice;} @@ -195,8 +191,6 @@ private: // [2+] - Deferred context dynamic descriptor pool std::vector > m_DescriptorPools; - std::vector> m_UploadHeaps; - // These one-time command pools are used by buffer and texture constructors to // issue copy commands. Vulkan requires that every command pool is used by one thread // at a time, so every constructor must allocate command buffer from its own pool. diff --git a/Graphics/GraphicsEngineVulkan/src/DeviceContextVkImpl.cpp b/Graphics/GraphicsEngineVulkan/src/DeviceContextVkImpl.cpp index cc41a10c..8e54e049 100644 --- a/Graphics/GraphicsEngineVulkan/src/DeviceContextVkImpl.cpp +++ b/Graphics/GraphicsEngineVulkan/src/DeviceContextVkImpl.cpp @@ -35,14 +35,36 @@ namespace Diligent { - DeviceContextVkImpl::DeviceContextVkImpl( IReferenceCounters *pRefCounters, RenderDeviceVkImpl *pDeviceVkImpl, bool bIsDeferred, const EngineVkAttribs &Attribs, Uint32 ContextId) : - TDeviceContextBase(pRefCounters, pDeviceVkImpl, bIsDeferred), - m_NumCommandsToFlush(bIsDeferred ? std::numeric_limits::max() : Attribs.NumCommandsToFlushCmdBuffer), + DeviceContextVkImpl::DeviceContextVkImpl( IReferenceCounters* pRefCounters, + RenderDeviceVkImpl* pDeviceVkImpl, + bool bIsDeferred, + const EngineVkAttribs& Attribs, + Uint32 ContextId) : + TDeviceContextBase{pRefCounters, pDeviceVkImpl, bIsDeferred}, + m_NumCommandsToFlush{bIsDeferred ? std::numeric_limits::max() : Attribs.NumCommandsToFlushCmdBuffer}, /*m_MipsGenerator(pDeviceVkImpl->GetVkDevice()),*/ - m_CmdListAllocator(GetRawAllocator(), sizeof(CommandListVkImpl), 64 ), - m_ContextId(ContextId), + m_CmdListAllocator{ GetRawAllocator(), sizeof(CommandListVkImpl), 64 }, + m_ContextId{ContextId}, // Command pools for deferred contexts must be thread safe because finished command buffers are executed and released from another thread - m_CmdPool(pDeviceVkImpl->GetLogicalDevice().GetSharedPtr(), pDeviceVkImpl->GetCmdQueue()->GetQueueFamilyIndex(), VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, bIsDeferred) + m_CmdPool + { + pDeviceVkImpl->GetLogicalDevice().GetSharedPtr(), + pDeviceVkImpl->GetCmdQueue()->GetQueueFamilyIndex(), + VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, + bIsDeferred + }, + m_ReleaseQueue{GetRawAllocator()}, + // Upload heap must always be thread-safe as Finish() may be called from another thread + m_UploadHeap + { + bIsDeferred ? "Upload heap for deferred context" : "Upload heap for immediate context", + pDeviceVkImpl->GetLogicalDevice(), + pDeviceVkImpl->GetPhysicalDevice(), + GetRawAllocator(), + bIsDeferred ? Attribs.DeferredCtxUploadHeapPageSize : Attribs.ImmediateCtxUploadHeapPageSize, + bIsDeferred ? Attribs.DeferredCtxUploadHeapReserveSize : Attribs.ImmediateCtxUploadHeapReserveSize + }, + m_NextCmdBuffNumber(0) { #if 0 auto *pVkDevice = pDeviceVkImpl->GetVkDevice(); @@ -72,9 +94,13 @@ namespace Diligent DeviceContextVkImpl::~DeviceContextVkImpl() { + auto *pDeviceVkImpl = m_pDevice.RawPtr(); if(m_bIsDeferred) { - DisposeCurrentCmdBuffer(); + DisposeCurrentCmdBuffer(pDeviceVkImpl->GetNextFenceValue()); + // There must be no resources in the stale resource list. All outstanding resources (if any) must be in the + // release queue and must be now released + ReleaseStaleContextResources(m_NextCmdBuffNumber, pDeviceVkImpl->GetNextFenceValue(), pDeviceVkImpl->GetCompletedFenceValue()); } else { @@ -85,7 +111,7 @@ namespace Diligent } auto VkCmdPool = m_CmdPool.Release(); - m_pDevice.RawPtr()->SafeReleaseVkObject(std::move(VkCmdPool)); + pDeviceVkImpl->SafeReleaseVkObject(std::move(VkCmdPool)); } IMPLEMENT_QUERY_INTERFACE( DeviceContextVkImpl, IID_DeviceContextVk, TDeviceContextBase ) @@ -103,20 +129,19 @@ namespace Diligent } } - void DeviceContextVkImpl::DisposeVkCmdBuffer(VkCommandBuffer vkCmdBuff) + void DeviceContextVkImpl::DisposeVkCmdBuffer(VkCommandBuffer vkCmdBuff, Uint64 FenceValue) { VERIFY_EXPR(vkCmdBuff != VK_NULL_HANDLE); - auto pDeviceVkImpl = m_pDevice.RawPtr(); - m_CmdPool.DisposeCommandBuffer(vkCmdBuff, pDeviceVkImpl->GetNextFenceValue()); + m_CmdPool.DisposeCommandBuffer(vkCmdBuff, FenceValue); } - inline void DeviceContextVkImpl::DisposeCurrentCmdBuffer() + inline void DeviceContextVkImpl::DisposeCurrentCmdBuffer(Uint64 FenceValue) { VERIFY(m_CommandBuffer.GetState().RenderPass == VK_NULL_HANDLE, "Disposing command buffer with unifinished render pass"); auto vkCmdBuff = m_CommandBuffer.GetVkCmdBuffer(); if(vkCmdBuff != VK_NULL_HANDLE) { - DisposeVkCmdBuffer(vkCmdBuff); + DisposeVkCmdBuffer(vkCmdBuff, FenceValue); m_CommandBuffer.Reset(); } } @@ -723,6 +748,19 @@ namespace Diligent ++m_State.NumCommands; } + void DeviceContextVkImpl::FinishFrame(Uint64 CompletedFenceValue) + { + m_ReleaseQueue.Purge(CompletedFenceValue); + m_UploadHeap.ShrinkMemory(); + } + + void DeviceContextVkImpl::ReleaseStaleContextResources(Uint64 SubmittedCmdBufferNumber, Uint64 SubmittedFenceValue, Uint64 CompletedFenceValue) + { + m_ReleaseQueue.DiscardStaleResources(SubmittedCmdBufferNumber, SubmittedFenceValue); + m_ReleaseQueue.Purge(CompletedFenceValue); + m_UploadHeap.ShrinkMemory(); + } + void DeviceContextVkImpl::Flush() { VERIFY(!m_bIsDeferred, "Flush() should only be called for immediate contexts"); @@ -760,19 +798,23 @@ namespace Diligent // Submit command buffer even if there are no commands to release stale resources. //if(SubmitInfo.commandBufferCount != 0 || SubmitInfo.waitSemaphoreCount !=0 || SubmitInfo.signalSemaphoreCount != 0) - { - pDeviceVkImpl->ExecuteCommandBuffer(SubmitInfo, this); - } - + auto SubmittedFenceValue = pDeviceVkImpl->ExecuteCommandBuffer(SubmitInfo, this); + m_WaitSemaphores.clear(); m_WaitDstStageMasks.clear(); m_SignalSemaphores.clear(); if (vkCmdBuff != VK_NULL_HANDLE) { - DisposeCurrentCmdBuffer(); + DisposeCurrentCmdBuffer(SubmittedFenceValue); } + // Release temporary resources that were used by this context while recording the last command buffer + auto SubmittedCmdBuffNumber = m_NextCmdBuffNumber; + Atomics::AtomicIncrement(m_NextCmdBuffNumber); + auto CompletedFenceValue = pDeviceVkImpl->GetCompletedFenceValue(); + ReleaseStaleContextResources(SubmittedCmdBuffNumber, SubmittedFenceValue, CompletedFenceValue); + m_State = ContextState{}; m_CommandBuffer.Reset(); m_pPipelineState.Release(); @@ -933,12 +975,14 @@ namespace Diligent { VERIFY(pBuffVk->GetDesc().Usage != USAGE_DYNAMIC, "Dynamic buffers must be updated via Map()"); VERIFY_EXPR( static_cast(NumBytes) == NumBytes ); - auto *pDeviceVkImpl = m_pDevice.RawPtr(); - auto TmpSpace = pDeviceVkImpl->AllocateUploadSpace(m_ContextId, NumBytes); + auto TmpSpace = m_UploadHeap.Allocate(NumBytes); auto CPUAddress = TmpSpace.MemAllocation.Page->GetCPUMemory(); memcpy(reinterpret_cast(CPUAddress) + TmpSpace.MemAllocation.UnalignedOffset, pData, static_cast(NumBytes)); UpdateBufferRegion(pBuffVk, TmpSpace, DstOffset, NumBytes); - pDeviceVkImpl->SafeReleaseVkObject(std::move(TmpSpace)); + // The allocation will stay in the queue until the command buffer from this context is submitted + // to the queue. We cannot use the device's release queue as other contexts may interfer with + // the release order + m_ReleaseQueue.SafeReleaseResource(std::move(TmpSpace), m_NextCmdBuffNumber); } void DeviceContextVkImpl::CopyBufferRegion(BufferVkImpl *pSrcBuffVk, BufferVkImpl *pDstBuffVk, Uint64 SrcOffset, Uint64 DstOffset, Uint64 NumBytes) @@ -1060,11 +1104,13 @@ namespace Diligent VERIFY(err == VK_SUCCESS, "Failed to end command buffer"); CommandListVkImpl *pCmdListVk( NEW_RC_OBJ(m_CmdListAllocator, "CommandListVkImpl instance", CommandListVkImpl) - (m_pDevice, this, vkCmdBuff) ); + (m_pDevice, this, vkCmdBuff, m_NextCmdBuffNumber) ); pCmdListVk->QueryInterface( IID_CommandList, reinterpret_cast(ppCommandList) ); m_CommandBuffer.SetVkCmdBuffer(VK_NULL_HANDLE); - //Flush(); + + // Increment command buffer number, but do not release any resources until the command list is executed + Atomics::AtomicIncrement(m_NextCmdBuffNumber); m_CommandBuffer.Reset(); m_State = ContextState{}; @@ -1108,17 +1154,26 @@ namespace Diligent CommandListVkImpl* pCmdListVk = ValidatedCast(pCommandList); VkCommandBuffer vkCmdBuff = VK_NULL_HANDLE; RefCntAutoPtr pDeferredCtx; - pCmdListVk->Close(vkCmdBuff, pDeferredCtx); + Uint64 DeferredCtxCmdBuffNumber = 0; + pCmdListVk->Close(vkCmdBuff, pDeferredCtx, DeferredCtxCmdBuffNumber); VERIFY(vkCmdBuff != VK_NULL_HANDLE, "Trying to execute empty command buffer"); VERIFY_EXPR(pDeferredCtx); VkSubmitInfo SubmitInfo = {}; SubmitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; SubmitInfo.commandBufferCount = 1; SubmitInfo.pCommandBuffers = &vkCmdBuff; - m_pDevice.RawPtr()->ExecuteCommandBuffer(SubmitInfo, this); + auto pDeviceVkImpl = m_pDevice.RawPtr(); + auto SubmittedFenceValue = pDeviceVkImpl->ExecuteCommandBuffer(SubmitInfo, this); + + auto pDeferredCtxVkImpl = pDeferredCtx.RawPtr(); // It is OK to dispose command buffer from another thread. We are not going to // record any commands and only need to add the buffer to the queue - pDeferredCtx.RawPtr()->DisposeVkCmdBuffer(vkCmdBuff); + pDeferredCtxVkImpl->DisposeVkCmdBuffer(vkCmdBuff, SubmittedFenceValue); + // We can now release all temporary resources in the deferred context associated with the submitted command list + auto CompletedFenceValue = pDeviceVkImpl->GetCompletedFenceValue(); + pDeferredCtxVkImpl->ReleaseStaleContextResources(DeferredCtxCmdBuffNumber, SubmittedFenceValue, CompletedFenceValue); + + m_ReleaseQueue.Purge(CompletedFenceValue); } void DeviceContextVkImpl::TransitionImageLayout(ITexture *pTexture, VkImageLayout NewLayout) @@ -1183,7 +1238,7 @@ namespace Diligent void* DeviceContextVkImpl::AllocateUploadSpace(BufferVkImpl* pBuffer, size_t NumBytes) { VERIFY(m_UploadAllocations.find(pBuffer) == m_UploadAllocations.end(), "Upload space has already been allocated for this buffer"); - auto UploadAllocation = m_pDevice.RawPtr()->AllocateUploadSpace(m_ContextId, NumBytes); + auto UploadAllocation = m_UploadHeap.Allocate(NumBytes); auto *CPUAddress = reinterpret_cast(UploadAllocation.MemAllocation.Page->GetCPUMemory()) + UploadAllocation.MemAllocation.UnalignedOffset; m_UploadAllocations.emplace(pBuffer, std::move(UploadAllocation)); return CPUAddress; @@ -1194,13 +1249,11 @@ namespace Diligent auto it = m_UploadAllocations.find(pBuffer); if(it != m_UploadAllocations.end()) { - -#ifdef _DEBUG - //auto CurrentFrame = m_pDevice.RawPtr()->GetCurrentFrameNumber(); - //VERIFY(it->second.FrameNum == CurrentFrame, "Dynamic allocation is out-of-date. Dynamic buffer \"", pBuffer->GetDesc().Name, "\" must be unmapped in the same frame it is used."); -#endif UpdateBufferRegion(pBuffer, it->second, 0, pBuffer->GetDesc().uiSizeInBytes); - m_pDevice.RawPtr()->SafeReleaseVkObject(std::move(it->second)); + // The allocation will stay in the queue until the command buffer from this context is submitted + // to the queue. We cannot use the device's release queue as other contexts may interfer with + // the release order + m_ReleaseQueue.SafeReleaseResource(std::move(it->second), m_NextCmdBuffNumber); m_UploadAllocations.erase(it); } else diff --git a/Graphics/GraphicsEngineVulkan/src/RenderDeviceVkImpl.cpp b/Graphics/GraphicsEngineVulkan/src/RenderDeviceVkImpl.cpp index bde9dc2e..104089a7 100644 --- a/Graphics/GraphicsEngineVulkan/src/RenderDeviceVkImpl.cpp +++ b/Graphics/GraphicsEngineVulkan/src/RenderDeviceVkImpl.cpp @@ -37,15 +37,28 @@ namespace Diligent { -RenderDeviceVkImpl :: RenderDeviceVkImpl(IReferenceCounters *pRefCounters, - IMemoryAllocator &RawMemAllocator, - const EngineVkAttribs &CreationAttribs, - ICommandQueueVk *pCmdQueue, - std::shared_ptr Instance, - std::unique_ptr PhysicalDevice, - std::shared_ptr LogicalDevice, - Uint32 NumDeferredContexts) : - TRenderDeviceBase(pRefCounters, RawMemAllocator, NumDeferredContexts, sizeof(TextureVkImpl), sizeof(TextureViewVkImpl), sizeof(BufferVkImpl), sizeof(BufferViewVkImpl), sizeof(ShaderVkImpl), sizeof(SamplerVkImpl), sizeof(PipelineStateVkImpl), sizeof(ShaderResourceBindingVkImpl)), +RenderDeviceVkImpl :: RenderDeviceVkImpl(IReferenceCounters* pRefCounters, + IMemoryAllocator& RawMemAllocator, + const EngineVkAttribs& CreationAttribs, + ICommandQueueVk* pCmdQueue, + std::shared_ptr Instance, + std::unique_ptr PhysicalDevice, + std::shared_ptr LogicalDevice, + Uint32 NumDeferredContexts) : + TRenderDeviceBase + { + pRefCounters, + RawMemAllocator, + NumDeferredContexts, + sizeof(TextureVkImpl), + sizeof(TextureViewVkImpl), + sizeof(BufferVkImpl), + sizeof(BufferViewVkImpl), + sizeof(ShaderVkImpl), + sizeof(SamplerVkImpl), + sizeof(PipelineStateVkImpl), + sizeof(ShaderResourceBindingVkImpl) + }, m_VulkanInstance(Instance), m_PhysicalDevice(std::move(PhysicalDevice)), m_LogicalVkDevice(std::move(LogicalDevice)), @@ -57,7 +70,6 @@ RenderDeviceVkImpl :: RenderDeviceVkImpl(IReferenceCounters *pRefCounters, m_ContextPool(STD_ALLOCATOR_RAW_MEM(ContextPoolElemType, GetRawAllocator(), "Allocator for vector>")), m_AvailableContexts(STD_ALLOCATOR_RAW_MEM(CommandContext*, GetRawAllocator(), "Allocator for vector")),*/ m_DescriptorPools(STD_ALLOCATOR_RAW_MEM(DescriptorPoolManager, GetRawAllocator(), "Allocator for vector")), - m_UploadHeaps(STD_ALLOCATOR_RAW_MEM(VulkanUtilities::VulkanUploadHeap, GetRawAllocator(), "Allocator for vector")), m_FramebufferCache(*this), m_TransientCmdPoolMgr(*m_LogicalVkDevice, pCmdQueue->GetQueueFamilyIndex(), VK_COMMAND_POOL_CREATE_TRANSIENT_BIT), m_MemoryMgr("Global resource memory manager", *m_LogicalVkDevice, *m_PhysicalDevice, GetRawAllocator(), CreationAttribs.DeviceLocalMemoryPageSize, CreationAttribs.HostVisibleMemoryPageSize, CreationAttribs.DeviceLocalMemoryReserveSize, CreationAttribs.HostVisibleMemoryReserveSize), @@ -90,7 +102,6 @@ RenderDeviceVkImpl :: RenderDeviceVkImpl(IReferenceCounters *pRefCounters, true // Thread-safe ); - m_UploadHeaps.reserve(1 + NumDeferredContexts); for(Uint32 ctx = 0; ctx < 1 + NumDeferredContexts; ++ctx) { m_DescriptorPools.emplace_back( @@ -110,18 +121,6 @@ RenderDeviceVkImpl :: RenderDeviceVkImpl(IReferenceCounters *pRefCounters, CreationAttribs.DynamicDescriptorPoolSize.MaxDescriptorSets, false // Dynamic descriptor pools need not to be thread-safe ); - - { - auto PageSize = ctx == 0 ? CreationAttribs.ImmediateCtxUploadHeapPageSize : CreationAttribs.DeferredCtxUploadHeapPageSize; - auto ReserveSize = ctx == 0 ? CreationAttribs.ImmediateCtxUploadHeapReserveSize : CreationAttribs.DeferredCtxUploadHeapReserveSize; - std::stringstream ss; - if(ctx == 0) - ss << "Immediate context"; - else - ss << "Deferred context " << ctx-1; - ss << " upload heap"; - m_UploadHeaps.emplace_back( ss.str(), *m_LogicalVkDevice, *m_PhysicalDevice, RawMemAllocator, PageSize, ReserveSize ); - } } } @@ -238,7 +237,7 @@ void RenderDeviceVkImpl::SubmitCommandBuffer(const VkSubmitInfo& SubmitInfo, Atomics::AtomicIncrement(m_NextCmdBuffNumber); } -void RenderDeviceVkImpl::ExecuteCommandBuffer(const VkSubmitInfo &SubmitInfo, DeviceContextVkImpl* pImmediateCtx) +Uint64 RenderDeviceVkImpl::ExecuteCommandBuffer(const VkSubmitInfo &SubmitInfo, DeviceContextVkImpl* pImmediateCtx) { // pImmediateCtx parameter is only used to make sure the command buffer is submitted from the immediate context // Stale objects MUST only be discarded when submitting cmd list from the immediate context @@ -277,6 +276,7 @@ void RenderDeviceVkImpl::ExecuteCommandBuffer(const VkSubmitInfo &SubmitInfo, De m_AvailableContexts.push_back(pCtx); } #endif + return SubmittedFenceValue; } @@ -328,12 +328,15 @@ Uint64 RenderDeviceVkImpl::GetCompletedFenceValue() void RenderDeviceVkImpl::FinishFrame(bool ReleaseAllResources) { + auto CompletedFenceValue = ReleaseAllResources ? std::numeric_limits::max() : GetCompletedFenceValue(); + { if (auto pImmediateCtx = m_wpImmediateContext.Lock()) { auto pImmediateCtxVk = pImmediateCtx.RawPtr(); if(pImmediateCtxVk->GetNumCommandsInCtx() != 0) LOG_ERROR_MESSAGE("There are outstanding commands in the immediate device context when finishing the frame. This is an error and may cause unpredicted behaviour. Call Flush() to submit all commands for execution before finishing the frame"); + pImmediateCtxVk->FinishFrame(ReleaseAllResources); } for (auto wpDeferredCtx : m_wpDeferredContexts) @@ -343,18 +346,17 @@ void RenderDeviceVkImpl::FinishFrame(bool ReleaseAllResources) auto pDeferredCtxVk = pDeferredCtx.RawPtr(); if(pDeferredCtxVk->GetNumCommandsInCtx() != 0) LOG_ERROR_MESSAGE("There are outstanding commands in the deferred device context when finishing the frame. This is an error and may cause unpredicted behaviour. Close all deferred contexts and execute them before finishing the frame"); + pDeferredCtxVk->FinishFrame(CompletedFenceValue); } } } - auto CompletedFenceValue = ReleaseAllResources ? std::numeric_limits::max() : GetCompletedFenceValue(); - // We must use NextFenceValue here, NOT current value, because the // fence value may or may not have been incremented when the last // command list was submitted for execution (Unity only // increments fence value once per frame) Uint64 NextFenceValue = 0; - Uint64 CmdBuffNumber = 0; + Uint64 SubmittedCmdBuffNumber = 0; { // Lock the command queue to avoid other threads interfering with the GPU std::lock_guard LockGuard(m_CmdQueueMutex); @@ -362,39 +364,10 @@ void RenderDeviceVkImpl::FinishFrame(bool ReleaseAllResources) // Increment cmd list number while keeping queue locked. // This guarantees that any Vk object released after the lock // is released, will be associated with the incremented cmd list number - CmdBuffNumber = m_NextCmdBuffNumber; + SubmittedCmdBuffNumber = m_NextCmdBuffNumber; Atomics::AtomicIncrement(m_NextCmdBuffNumber); } - - { - // There is no need to lock as new heaps are only created during initialization - // time for every context - //std::lock_guard LockGuard(m_UploadHeapMutex); - - // Upload heaps are used to update resource contents - // Initial resource data is uploaded using temporary one-time upload buffers, - // so can be performed in parallel across frame boundaries - for (auto &UploadHeap : m_UploadHeaps) - { - // Currently upload heaps are free-threaded, so other threads must not allocate - // resources at the same time. This means that all dynamic buffers must be unmaped - // in the same frame and all resources must be updated within boundaries of a single frame. - // - // worker thread 3 | pDevice->CrateTexture(InitData) | | pDevice->CrateBuffer(InitData) | | pDevice->CrateTexture(InitData) | - // - // worker thread 2 | pDfrdCtx2->UpdateResource() | || - // || - // worker thread 1 | pDfrdCtx1->Map(WRITE_DISCARD) | | pDfrdCtx1->UpdateResource() | || - // || - // main thread | pCtx->Map(WRITE_DISCARD )| | pCtx->UpdateResource() | || | Present() | - // - // - - UploadHeap.ShrinkMemory(); - } - } - - + { // This is OK if other thread disposes descriptor heap allocation at this time // The allocation will be registered as part of the current frame @@ -404,7 +377,7 @@ void RenderDeviceVkImpl::FinishFrame(bool ReleaseAllResources) // Discard all remaining objects. This is important to do if there were // no command lists submitted during the frame - m_ReleaseQueue.DiscardStaleResources(CmdBuffNumber, NextFenceValue); + m_ReleaseQueue.DiscardStaleResources(SubmittedCmdBuffNumber, NextFenceValue); ProcessReleaseQueue(CompletedFenceValue); m_MemoryMgr.ShrinkMemory(); -- cgit v1.2.3