DirectX11:Position Based Fluid

前言

这是我本科毕业设计项目，使用DirectX11实现一个基于PBD的流体模拟仿真，同时也算是补了Games101的大作业了。

阅读本文假设你对以下内容比较熟悉：

流体模拟：Smoothed Particle Hydrodynamics

流体模拟：NeighborHood Search

DirectX11：GPU基数排序

流体模拟：Position Based Fluid

算法过程

具体过程

领域搜索（Neighbor Search）

我们采用空间哈希的方法对粒子所处的空间网格进行划分，通过计算其空间哈希值并将其进行排序，得到当前网格的起始与结束地址。（具体实现可参考：流体模拟：NeighborHood Search）

我们可以通过遍历当前粒子附近的27个网格得到出其的邻居粒子（即粒子之间距离少于一定距离，本项目设为粒子半径），最大邻居粒子数量设为96。

HLSL核心代码

#include "PBFSolverCommon.hlsli"
//Find Neighbor Paticle
[numthreads(THREAD_NUM_X, 1, 1)]
void CS( uint3 DTid : SV_DispatchThreadID )
{
    
    if (DTid.x >= g_ParticleNums)
    {
        return;
    }

    //curr particle pos
    float3 currPos = g_sortedNewPosition[DTid.x];
    float3 sceneMin = g_Bounds[0];
    int3 currCellPos = floor((currPos - sceneMin) / g_CellSize);
    
    //curr particle index
    //uint currParitcleIndex = g_ParticleIndex[DTid.x];
    
    int neighborCount = 0;
    int x = 0, y = 0, z = 0;
    [unroll(3)]
    for (z = -1; z <= 1; ++z)
    {
        [unroll(3)]
        for (y = -1; y <= 1; ++y)
        {
            [unroll(3)]
            for (x = -1; x <= 1; ++x)
            {
                //find 27 cell neighbor particle
                int3 neighCellPos = currCellPos + int3(x, y, z);
                if (neighCellPos.x < 0.0f || neighCellPos.y < 0.0f || neighCellPos.z < 0.0f)
                {
                    continue;
                }
                uint cellHash = GetCellHash(neighCellPos);
                uint neighborCellStart = g_CellStart[cellHash];
                uint neighborCellEnd = g_CellEnd[cellHash];
                if (neighborCellStart >= neighborCellEnd)
                {
                    continue;
                }
                for (uint index = neighborCellStart; index < neighborCellEnd; ++index)
                {
                    //get the cell particle pos
                    float3 neighborPartclePos = g_sortedNewPosition[index];
                    float3 distance = currPos - neighborPartclePos;
                    float distancesq = dot(distance, distance);
                    if (distancesq < g_ParticleRadiusSq)
                    {
                       //contact
                        int contactsIndex = DTid.x * g_MaxNeighborPerParticle + neighborCount;
                        g_Contacts[contactsIndex] = index;
                        neighborCount++;
                    }
                    if (neighborCount == g_MaxNeighborPerParticle)
                    {
                        g_ContactCounts[DTid.x] = neighborCount;
                        return;
                    }
                }

            }
        }
    }
    
    g_ContactCounts[DTid.x] = neighborCount;
}

不可压缩约束和拉格朗日乘子

HLSL核心代码：

#include "PBFSolverCommon.hlsli"
[numthreads(THREAD_NUM_X, 1, 1)]
void CS( uint3 DTid : SV_DispatchThreadID )
{
    if (DTid.x >= g_ParticleNums)
    {
        return;
    }

    //curr particle pos
    float3 currPos = g_sortedNewPosition[DTid.x];
    //curr neighbor count
    uint neightborCount = g_ContactCounts[DTid.x];
    
    //clac density
    float density = 0;
    //clac Lagrange multiplier
    float3 gradSum_i = float3(0.0f, 0.0f, 0.0f);
    float gradSum_j = 0;
    
    uint i = 0;
    for (i = 0; i < neightborCount; ++i)
    {
         //get the cell particle pos
        uint neightborParticleIndex = g_Contacts[DTid.x * g_MaxNeighborPerParticle + i];
        float3 neighborPartclePos = g_sortedNewPosition[neightborParticleIndex];
        //r=p_i-p_j
        float3 r = currPos - neighborPartclePos;
        density += WPoly6(r, g_sphSmoothLength);

        float3 currGrad = WSpikyGrad(r, g_sphSmoothLength);
        currGrad *= g_InverseDensity_0;
        gradSum_i += currGrad;

        if (neightborParticleIndex != DTid.x)
        {
            gradSum_j += dot(currGrad, currGrad);
        }
        
    }
    
    //debug show 
    g_Density[DTid.x] = density;
    float gradSumTotal = gradSum_j + dot(gradSum_i, gradSum_i);
    // evaluate density constraint
    float constraint = max(density * g_InverseDensity_0 - 1.0f, 0.0f);
    float lambda = -constraint / (gradSumTotal + g_LambdaEps);
    
    g_LambdaMultiplier[DTid.x] = lambda;
}

约束投影与拉伸不稳定性

HLSL核心代码：

#include "PBFSolverCommon.hlsli"
[numthreads(THREAD_NUM_X, 1, 1)]
void CS( uint3 DTid : SV_DispatchThreadID )
{
    if (DTid.x >= g_ParticleNums)
    {
        return;
    }

    //curr particle pos
    float3 currPos = g_sortedNewPosition[DTid.x];
    //curr neighbor count
    uint neightborCount = g_ContactCounts[DTid.x];

    float currLambda = g_LambdaMultiplier[DTid.x];
    float poly6Q = WSpiky(g_DeltaQ, g_sphSmoothLength);

    float3 deltaPos = float3(0.0f, 0.0f, 0.0f);
    uint i = 0;
    for (i = 0; i < neightborCount; ++i)
    {
        //get the cell particle pos
        uint neightborParticleIndex = g_Contacts[DTid.x * g_MaxNeighborPerParticle + i];
        float neighborLambda = g_LambdaMultiplier[neightborParticleIndex];
        //get the cell particle pos
        float3 neighborParticlePos = g_sortedNewPosition[neightborParticleIndex];
        //r=p_i-p_j
        float3 r = currPos - neighborParticlePos;
        float poly6 = WSpiky(r, g_sphSmoothLength);
        float diffPoly = poly6 / poly6Q;
        float scorr = -g_ScorrK * pow(abs(diffPoly), g_ScorrN);
        float coff_j = currLambda + neighborLambda + scorr;

        float3 currGrad = WSpikyGrad(r, g_sphSmoothLength);
        deltaPos += coff_j * currGrad;
    }
    
    deltaPos = deltaPos * g_InverseDensity_0;
    g_DeltaPosition[DTid.x] = deltaPos;
}

上述描述的平均约束力确保了收敛性，但是在某些情况下，这种平均会过于激进，并且达到解所需的迭代次数增加。因此，我们需要一个全局用户参数ω来控制逐次超松驰法(SOR)的速度。

HLSL核心代码：

#include "PBFSolverCommon.hlsli"
[numthreads(THREAD_NUM_X, 1, 1)]
void CS( uint3 DTid : SV_DispatchThreadID)
{
    if (DTid.x >= g_ParticleNums)
    {
        return;
    }
    
    uint totalNeighborNum = g_ContactCounts[DTid.x];
    float factor = max(g_SOR * totalNeighborNum, 1.0f);
    float3 resPos = g_sortedNewPosition[DTid.x] + g_DeltaPosition[DTid.x] * (1 / factor);
    
    g_sortedNewPosition[DTid.x] = resPos;
}

处理碰撞

粒子与边界碰撞的处理也一直是一个非常关键的问题，一般来说，在SPH方法中采用将边界粒子也视为粒子（即边界粒子）进行表达，我不想采用这种方法。

另一种处理方法是采用SDF(Signed Distance Field)的方式变大空间中容器的位置，然后判断粒子是否出去了这个空间并将其推回去，从而保证粒子在容器之中。因为时间有限以及场景都是简单的几何体，所以本项目直接使用了解析SDF函数来解决。若场景复杂，存在多面体模型的话，可以考虑先bake出当前场景的中3D SDF Texture，在Shader中进行采样进行计算。

因为本场景采用平面对粒子的容器范围进行限制，所以采用了平面的SDF距离场公式：

//sdf plane function
float sdfPlane(float3 p, float3 n, float h)
{
    // n must be normalized
    return dot(p, n) + h;
}

所以我们处理粒子的碰撞时，首先在约束求解前得到每个粒子接触的平面（最大的接触平面数为6）。在每次迭代求解后，对其进行判断是否出去了这个空间并将其推回去。

处理碰撞过程中，我还增加了一个friction model，具体公式如下：

HLSL核心代码：

//CollisionPlane_CS.hlsl
#include "PBFSolverCommon.hlsli"
[numthreads(THREAD_NUM_X, 1, 1)]
void CS( uint3 DTid : SV_DispatchThreadID )
{
    if (DTid.x >= g_ParticleNums)
    {
        return;
    }
    
    float3 currPos = g_sortedNewPosition[DTid.x];
    
    int count = 0;
    int i = 0;
    [unroll]
    for (i = 0; i < g_PlaneNums; ++i)
    {
        float distance = sdfPlane(currPos, g_Plane[i].xyz, g_Plane[i].w) - g_CollisionDistance;
        if (distance < g_CollisionThreshold && count < g_MaxCollisionPlanes)
        {
            int index = DTid.x * g_MaxCollisionPlanes + count;
            g_CollisionPlanes[index] = g_Plane[i];
            count++;
        }
    }
    
    g_CollisionCounts[DTid.x] = count;
    
}

//SolveContact_CS.hlsl
#include "PBFSolverCommon.hlsli"
[numthreads(THREAD_NUM_X, 1, 1)]
void CS( uint3 DTid : SV_DispatchThreadID )
{
    
    if (DTid.x >= g_ParticleNums)
    {
        return;
    }
    
    float3 currPos = g_sortedNewPosition[DTid.x];                  
    float3 oldPos = g_sortedOldPosition[DTid.x];
    
    int collisionCount = g_CollisionCounts[DTid.x];
    int i = 0;
    for (i = 0; i < collisionCount; ++i)
    {
        int index = DTid.x * g_MaxCollisionPlanes + i;
        float4 currPlane = g_CollisionPlanes[index];
        float distance = sdfPlane(currPos, currPlane.xyz, currPlane.w) - g_CollisionDistance; //d
        if (distance < 0.0f)
        {
            float3 sdfPos = (-distance) * currPlane.xyz; 
            
            //friction model
            float3 deltaPos = currPos - oldPos;
            float deltaX = dot(deltaPos, currPlane.xyz);
            float3 deltaDistane = (-deltaX) * currPlane.xyz + deltaPos; //DeltaX 
            float deltaLength = dot(deltaDistane, deltaDistane);
            [flatten]
            if (deltaLength <= (g_StaticFriction * distance))        //|deltaX|< u_s*disctance
            {
                sdfPos -= deltaDistane;
            }
            else
            {
                float dynamicFriction = min((-distance) * 0.01f * rsqrt(deltaLength), 1.0f); //
                sdfPos -= dynamicFriction * (deltaDistane);
            }
            currPos += sdfPos;
        }
    }
    
    g_UpdatedPosition[DTid.x] = currPos;
}

更新速度

这里的公式非常简单：

HLSL核心代码：

#include "PBFSolverCommon.hlsli"
[numthreads(THREAD_NUM_X, 1, 1)]
void CS( uint3 DTid : SV_DispatchThreadID )
{
    if (DTid.x >= g_ParticleNums)
    {
        return;
    }

    float3 oldPos = g_sortedOldPosition[DTid.x];
    float3 updatePos = g_sortedNewPosition[DTid.x];

    float3 newVec = g_InverseDeltaTime * (updatePos - oldPos);
    g_UpdatedVelocity[DTid.x] = newVec;
}

涡轮控制和人工粘性

由于数值耗散，PBD方法通常会引入额外的阻尼，导致整个系统的能来损耗，由此会导致本来该有的一些涡流快速消失。PBF通过vorticity confinement由系统重新注入能量：

HLSL核心代码：

#include "PBFSolverCommon.hlsli"
//Clac curl
[numthreads(THREAD_NUM_X, 1, 1)]
void CS( uint3 DTid : SV_DispatchThreadID )
{
    if (DTid.x >= g_ParticleNums)
    {
        return;
    }

   //curr neighbor count
    uint neightborCount = g_ContactCounts[DTid.x];
    
    //curr particle pos
    float3 currPos = g_sortedNewPosition[DTid.x];
    float3 currVec = g_UpdatedVelocity[DTid.x];
    float3 currOmega = float3(0.0f, 0.0f, 0.0f);
    
    uint i = 0;
    for (i = 0; i < neightborCount; ++i)
    {
        //get the cell particle pos
        uint neightborParticleIndex = g_Contacts[DTid.x * g_MaxNeighborPerParticle + i];
        //get the cell particle pos
        float3 neighborParticlePos = g_sortedNewPosition[neightborParticleIndex];
        //r=p_i-p_j
        float3 r = currPos - neighborParticlePos;
        //v_j-v_i
        float3 deltaVelocity = g_UpdatedVelocity[neightborParticleIndex] - currVec;
        float3 currGrad = WSpikyGrad(r, g_sphSmoothLength);
         //calc omega
        float3 omega_j = cross(deltaVelocity, currGrad);
        currOmega += omega_j;
    }
    
  
    float curlLength = length(currOmega);
    g_Curl[DTid.x] = float4(currOmega.xyz, curlLength);   
}

#include "PBFSolverCommon.hlsli"
[numthreads(THREAD_NUM_X, 1, 1)]
void CS( uint3 DTid : SV_DispatchThreadID )
{
    if (DTid.x >= g_ParticleNums)
    {
        return;
    }
    
    float3 currPos = g_sortedNewPosition[DTid.x];
    float3 currVec = g_UpdatedVelocity[DTid.x];
    float3 oldVec = g_sortedVelocity[DTid.x];
    
    
    uint counter = g_ContactCounts[DTid.x];
    
    float3 deltaTotalVec = float3(0.0f, 0.0f, 0.0f);
    float3 etaTotal = float3(0.0f, 0.0f, 0.0f);
    float density;
    for (uint i = 0; i < counter; ++i)
    {
        //get the cell particle pos
        uint neightborParticleIndex = g_Contacts[DTid.x * g_MaxNeighborPerParticle + i];
        //get the cell particle pos
        float3 neighborParticlePos = g_sortedNewPosition[neightborParticleIndex];
        //r=p_i-p_j
        float3 r = currPos - neighborParticlePos;
            //v_j-v_i
        float3 deltaVelocity = g_UpdatedVelocity[neightborParticleIndex] - currVec;
        
        float3 currGrad = WSpikyGrad(r, g_sphSmoothLength);
        
        
        //vorsitory confinement
        float neighCurlLength = g_Curl[neightborParticleIndex].w;
        etaTotal += currGrad * neighCurlLength;                            
                
           //XSPH
        float3 deltaVec_j = deltaVelocity * WSpiky(r, g_sphSmoothLength);
        deltaTotalVec += deltaVec_j;
        
         //density debug 
        density += WPoly6(r, g_sphSmoothLength);

    }
    
    float3 impulse = float3(0.0f, 0.0f, 0.0f);
    //vorticity Confinement
    if (length(etaTotal) > 0.0f && g_VorticityConfinement > 0.0f && density>0.0f)
    {
        float epsilon = g_DeltaTime * g_DeltaTime * g_InverseDensity_0 * g_VorticityConfinement;
        
        float3 currCurl = g_Curl[DTid.x].xyz;       //r2
        
        float3 N = normalize(etaTotal);
        float3 force = cross(N, currCurl);
        
        impulse += epsilon * force;

    }
    
    //XSPH
    impulse += g_VorticityC * deltaTotalVec;
    
    // solve plane 
    uint planeCounts = g_CollisionCounts[DTid.x];
    uint resCounts = 0;
    float3 restitutionVec = float3(0.0f, 0.0f, 0.0f);
    for (uint j = 0; j < planeCounts; ++j)
    {
        float4 plane = g_CollisionPlanes[DTid.x * g_MaxCollisionPlanes + j];
        float distance = sdfPlane(currPos, plane.xyz, plane.w) - 1.001f * g_CollisionDistance;
        
        float oldVecD = dot(oldVec, plane.xyz);
        if (distance < 0.0f && oldVecD < 0.0f)
        {
            float currVecD = dot(currVec, plane.xyz);
            float restitutionD = oldVecD * g_Restituion + currVecD;
            restitutionVec += plane.xyz * (-restitutionD);
            resCounts++;
        }
    }
    resCounts = max(resCounts, 1);
    restitutionVec /= resCounts;
    
    
    impulse += restitutionVec;
    g_Impulses[DTid.x] = impulse;
}

最终处理

最终我们只需将求解后的最终结果(位置与速度信息)输出即可，本项目还对粒子的最大速度进行一定的限制。

HLSL核心代码：

#include "PBFSolverCommon.hlsli"
[numthreads(THREAD_NUM_X, 1, 1)]
void CS( uint3 DTid : SV_DispatchThreadID )
{
    if (DTid.x >= g_ParticleNums)
    {
        return;
    }

    uint prevIndex = g_Particleindex[DTid.x];
    
    g_SolveredPosition[prevIndex] = g_sortedNewPosition[DTid.x];
    
    float3 currVec = g_UpdatedVelocity[DTid.x];
    float3 impulse = g_Impulses[DTid.x];
    float3 oldVec = g_oldVelocity[prevIndex];
    float3 deltaVec = currVec + impulse - oldVec;
    float deltaVecLengthsq = dot(deltaVec, deltaVec);
    if (deltaVecLengthsq > (g_MaxVeclocityDelta * g_MaxVeclocityDelta))
    {
        deltaVec = deltaVec * rsqrt(deltaVecLengthsq) * g_MaxVeclocityDelta;
    }
    float3 finVec = oldVec + deltaVec;
    g_SolveredVelocity[prevIndex] = finVec;
}

C++核心部分

因为代码太多，这里这粗略展示算法核心过程代码，具体代码可去下载地址查看：

void FluidSystem::TickLogic(ID3D11DeviceContext* deviceContext, PBFSolver::PBFParams params)
{
	m_pPBFSolver->SetPBFParams(params);

	for (int i = 0; i < params.subStep; ++i)
	{
		m_pPBFSolver->PredParticlePosition(deviceContext, *m_pPBFSolverEffect,
			m_pParticlePosBuffer->GetBuffer(), m_pParticleVecBuffer->GetBuffer());

		//NeighborSearch 
		m_GpuTimer_NeighBorSearch.Start();
		m_pNeighborSearch->BeginNeighborSearch(deviceContext, m_pPBFSolver->GetPredPosition(), m_pParticleIndexBuffer->GetBuffer(), params.cellSize);
		m_pNeighborSearch->CalcBounds(deviceContext, *m_pNeighborSearchEffect, m_pPBFSolver->GetPredPosition(), m_pParticleIndexBuffer->GetBuffer(), params.cellSize);
		m_pNeighborSearch->RadixSort(deviceContext, *m_pNeighborSearchEffect);
		m_pNeighborSearch->FindCellStartAndEnd(deviceContext, *m_pNeighborSearchEffect);
		m_pNeighborSearch->EndNeighborSearch();

		// Constraint iter solver
		m_pPBFSolver->BeginConstraint(deviceContext, *m_pPBFSolverEffect, m_pNeighborSearch->GetSortedParticleIndex(),
			m_pNeighborSearch->GetSortedCellStart(), m_pNeighborSearch->GetSortedCellEnd(), m_pNeighborSearch->GetBounds());
		m_pPBFSolver->SolverConstraint(deviceContext, *m_pPBFSolverEffect);
		m_pPBFSolver->EndConstraint(deviceContext, *m_pPBFSolverEffect);


		//update data
		m_pParticlePosBuffer->UpdataBufferGPU(deviceContext, m_pPBFSolver->GetSolveredPosition());
		m_pParticleVecBuffer->UpdataBufferGPU(deviceContext, m_pPBFSolver->GetSolveredVelocity());
	}
	m_pPBFSolver->CalcAnisotropy(deviceContext, *m_pPBFSolverEffect);
}

void PBFSolver::SolverConstraint(ID3D11DeviceContext* deviceContext, PBFSolverEffect& effect)
{
    effect.SetOutPutUAVByName("g_LambdaMultiplier", m_pLambdaMultiplierBuffer->GetUnorderedAccess());
    effect.SetOutPutUAVByName("g_Density", m_pDensityBuffer->GetUnorderedAccess());
    effect.SetOutPutUAVByName("g_DeltaPosition", m_pDeltaPositionBuffer->GetUnorderedAccess());
    effect.SetOutPutUAVByName("g_UpdatedPosition", m_pUpdatedPositionBuffer->GetUnorderedAccess());
    for (int i = 0; i < m_PBFParams.maxSolverIterations; ++i)
    {
        //calc lagrange multiplier
        effect.SetCalcLagrangeMultiplierState();
        effect.Apply(deviceContext);
        deviceContext->Dispatch(m_BlockNums, 1, 1);

        //calc Displacement
        effect.SetCalcDisplacementState();
        effect.Apply(deviceContext);
        deviceContext->Dispatch(m_BlockNums, 1, 1);

        //add deltapos
        effect.SetADDDeltaPositionState();
        effect.Apply(deviceContext);
        deviceContext->Dispatch(m_BlockNums, 1, 1);

        //solver contacts
        effect.SetSolverContactState();
        effect.Apply(deviceContext);
        deviceContext->Dispatch(m_BlockNums, 1, 1);


        m_pSortedNewPostionBuffer->UpdataBufferGPU(deviceContext, m_pUpdatedPositionBuffer->GetBuffer(),m_ParticleNums);
    }
}