对比总览

真到写标题时才觉得自己干的事好杂……借着学校座谈会需要选取论文演讲开了PBD和XPBD,顺手写了半隐式和隐式进行对比。又想起之前实习时一直想但是没做的对比C#和C++性能,就还把半隐式的方法在Unity和C++中各写了两份对比(怎么能少了并行化)。先上个总的对比表格吧,具体数值没有多大意义,但看个趋势还是可以的。(35x35布料网格,2000劲度系数,32迭代,64线程,GTX1650笔记本。并行均为无锁多线程)

Unity Unity Job + Burst C++ C++ Thread
Time (ms) 36.9 1.6 12.2 38.8

C++ Thread的表现有些出乎意料,可能是Thread过于耗了?不过C++比Unity的性能倒好了不少,感觉之后遇到想优化但又不适合Job + Burst并行化的代码都可以试试C++

Semi-Implicit Euler Implicit Euler PBD XPBD
Main Thread (ms) 36.9 N/A 70.9 52.2
Job + Burst (ms) 1.6 N/A 2.1 2.2

隐式的炸了不计入考量,主线程下XPBD优于PBD应该是我在写PBD的时候用的雅可比而非高斯塞德尔,但看并行化的就行了。比较不甘的是Unity自己的Cloth组件模拟这些只需要0.2ms,而我的XPBD在保持稳定的情况下只能到0.4ms,小作坊下猛料也没下过官方,之后有空再研究下如何优化

半隐式欧拉

本来是想写显式的,太久没写直接写了个半隐。P为网格格点,读入时为本地空间。为了能在Unity中拖动观察效果,故转化到世界空间计算完后再转回去

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
void Semi_Implict()
{
for (int i = 0; i < P.Length; i++)
{
P2[i] = transform.TransformPoint(P[i]);
V[i] += (P2[i] - P1[i]) / dt;
}

float idt = dt / iter, d = Mathf.Pow(damping, 1f / iter);
for (int i = 0; i < iter; i++)
{
// Constrant
for (int j = 0; j < C.Length; j++)
{
int x = C[j].x, y = C[j].y; float z = C[j].z;
Vector3 f = k * (1 - z / (P2[x] - P2[y]).magnitude) * (P2[x] - P2[y]);
F[x] -= f; F[y] += f;
}

// Gravity & Update & Calculate Local Position
for (int j = 0; j < P.Length; j++)
{
V[j] = d * V[j] + idt * F[j];
if (W[j] != 0) P2[j] += idt * V[j];
P1[j] = P2[j]; F[j] = g;
}
}

for (int i = 0; i < P.Length; i++) P[i] = transform.InverseTransformPoint(P2[i]);
}

隐式欧拉

没什么好说的,算梯度迭代就是,用了手Chebyshev加速但感觉差距不大。比较值得一说的是之前自己写的一版中求梯度用了一个normalize和一个magnitute,改成这版后性能提升不少。开始日常怀疑Unity函数实现

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
void Implict()
{
for (int i = 0; i < P.Length; i++)
{
V[i] += ((P2[i] = transform.TransformPoint(P[i])) - P1[i]) / dt;
if (W[i] != 0) P[i] = P2[i] += (dt * (V[i] *= damping));
}

float invdt2 = 1 / dt / dt, factor = 1 / (invdt2 + 4 * k), rho2 = rho * rho, w = 1;
for (int i = 0; i < iter; i++)
{
//Momentum and Gravity.
for (int j = 0; j < P.Length; j++) G[j] = invdt2 * (P2[j] - P[j]) - g;

//Spring Force.
for (int j = 0; j < C.Length; j++)
{
int x = C[j].x, y = C[j].y; float z = C[j].z;
Vector3 grad = k * (1 - z / (P2[x] - P2[y]).magnitude) * (P2[x] - P2[y]);
G[x] += grad; G[y] -= grad;
}

if (i == 0) w = 1;
else if (i == 1) w = 2 / (2 - rho2);
else w = 4 / (4 - w * rho2);

//Update P2 by gradient.
for (int j = 0; j < P.Length; j++)
{
if (W[j] == 0) continue;
Vector3 old = P2[j];
P2[j] = w * (P2[j] - factor * G[j]) + (1 - w) * P1[j];
P1[j] = old;
}
}

for (int i = 0; i < P.Length; i++)
{
V[i] += (P2[i] - P[i]) / dt;
P[i] = transform.InverseTransformPoint(P2[i]);
}
}

PBD

公式算法流程什么就不摆了,大致思路就是先自由运动,再上约束限制,详细论文解读可以看看这篇jeffzzz:position based dynamics笔记。但PBD其实也存在一些问题,最大的问题就是不那么物理,其展示出来的物理特性只与时间步长和迭代次数挂钩。但时步与表现相关,迭代次数与性能相关,为了改变物理特性而给不同物体赋予不同时步和迭代次数想想就很埋雷……

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
void PBD()
{
for (int i = 0; i < P.Length; i++) P2[i] = transform.TransformPoint(P[i]);

// Free Update
for (int i = 0; i < P.Length; i++)
{
if (W[i] == 0) continue;
V[i] = damping * V[i] + dt * g;
P2[i] += dt * V[i] + damping * (P2[i] - P1[i]);
}

for (int i = 0; i < iter; i++)
{
// Strain Limit
for (int j = 0; j < C.Length; j++)
{
int x = C[j].x, y = C[j].y; float z = C[j].z;
Vector3 p = z * (P2[x] - P2[y]).normalized;
SX[x] += 0.5f * (P2[x] + P2[y] + p); SN[x]++; SX[y] += 0.5f * (P2[x] + P2[y] - p); SN[y]++;
}
for (int j = 0; j < P.Length; j++)
{
if (W[j] == 0) continue;
P1[j] = P2[j];
P2[j] = (0.2f * P2[j] + SX[j]) / (0.2f + SN[j]);
V[j] += (P2[j] - P1[j]) / dt;
SX[j] = Vector3.zero; SN[j] = 0;
}
}

for (int i = 0; i < P.Length; i++) P[i] = transform.InverseTransformPoint(P1[i] = P2[i]);
}

XPBD

XPBD通过引入弹性势能重新定义PBD约束,在耗时仅增加不到5%(论文中好像写的2-3%,但大差不差)的情况下解决了上述PBD的问题。这里推荐的论文解读是zilch:XPBD论文解读,或者也可以看看祖师爷Müller写的WebXPBDCloth,结合实际工程理解会更快些

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
void XPBD()
{
float idt = dt / iter, a = 1f / k / idt / idt, d = Mathf.Pow(damping, 1f / iter);
for (int i = 0; i < P.Length; i++) { V[i] += ((P2[i] = transform.TransformPoint(P[i])) - P1[i]) / idt; }

for (int i = 0; i < iter; i++)
{
// Free Update
for (int j = 0; j < P.Length; j++)
{
if (W[j] == 0) continue;
V[j] = d * V[j] + idt * g;
P2[j] += idt * V[j];
}

// Solve Constraint (Gauss Seidel)
for (int j = 0; j < C.Length; j++)
{
int x = C[j].x, y = C[j].y; float z = C[j].z, w = W[x] + W[y];
Vector3 grad = -(1 - z / (P2[x] - P2[y]).magnitude) * (P2[x] - P2[y]) / (a + w);
P2[x] += grad * W[x]; P2[y] -= grad * W[y];
}

// Record Velocity
for (int j = 0; j < P.Length; j++) { V[j] = (P2[j] - P1[j]) / idt; P1[j] = P2[j]; }
}

for (int i = 0; i < P.Length; i++) P[i] = transform.InverseTransformPoint(P2[i]);
}