在现实世界中,仅有环境光(Ambient Light)的区域也不是所有像素的照明度都相同。
对于Self-Ambient Occlusion 自身的环境光遮蔽,通常可以通过离线烘焙一张AO贴图,它不会考虑场景其他物体的遮挡。
环境光遮蔽(Ambient Occlusion)是计算机图形学中的一种着色和渲染技术,用来计算场景中每一点是如何接受环境光的。
Wiki-环境光遮蔽 给出了实时环境光遮蔽算法分类:
SSAO(屏幕空间环境光,Screen space ambient occlusion)
SSDO(屏幕空间定向遮蔽,Screen space directional occlusion)
RTAO(光线追踪环境光遮蔽,Ray Traced Ambient Occlusion)
HDAO(高分辨率环境光遮蔽,High Definition Ambient Occlusion)
HBAO+(水平基准环境光遮蔽,Horizon Based Ambient Occlusion+)
AAO(Alchemy Ambient Occlusion)
ABAO(角度基准环境光遮蔽,Angle Based Ambient Occlusion)
PBAO(预烘焙环境光遮蔽,Pre Baked Ambient Occlusion)
VXAO(体素基准环境光遮蔽,Voxel Accelerated Ambient Occlusion)
GTAO(Ground Truth based Ambient Occlusion)
基于顶点的AO技术在模型表面顶点足够密的情况下,能够得到很好的效果。但是基于物理精确的AO计算需要进行光线与场景的求交运算 , 十分耗时 ,所以这种方法只能用于离线渲染。
屏幕空间环境光遮蔽(SSAO,Screen space ambient occlusion)是实际应用较多的一种AO算法。
A p ( n → ) = 1 − 1 π ∫ Ω V P ( w → ) ( n → ⋅ w → ) d w (1) A_p(\overrightarrow{n} ) = 1 - \frac{1}{\pi} \int\limits_{\Omega} V_P(\overrightarrow{w})(\overrightarrow{n}\cdot \overrightarrow{w} )dw\tag{1} Ap(n )=1−π1Ω∫VP(w )(n ⋅w )dw(1)
函数采样点更加接近原点。// 默认产生的[0,1]之间的浮点数 float RandomFloat(float LO = 0.0f, float HI = 1.0f) { float random = LO + static_cast <float> (rand()) / (static_cast <float> (RAND_MAX / (HI - LO))); return random; } // make it closer to the origin point float Lerp(float a, float b, float f) { return a + f * (b - a); } // SSAO Samples std::vector<Vector4f> Samples; Samples.resize(64); for (int i = 0; i < 64; ++i) { Vector3f Sample = Vector3f( RandomFloat() * 2 - 1.0f, RandomFloat() * 2 - 1.0f, RandomFloat() ); Sample = Sample.Normalize(); // Scale float Scale = RandomFloat(); Scale *= i * 1.0f / 64.0f; Scale = Lerp(0.1f, 1.0f, Scale * Scale); Sample = Sample * Scale; Samples[i] = Vector4f(Sample, 0); }
使用上面的采样点,需构造 T B N TBN TBN矩阵,则需要一个世界空间的随机的方向。
// SSAO Random rotation vector
std::vector<Vector4f> SSAONoises;
for (int i = 0; i < 16; ++i)
// z is 0
Vector3f Noise = Vector3f(
RandomFloat() * 2 - 1.0f,
RandomFloat() * 2 - 1.0f,
Noise = Noise.Normalize();
SSAONoises.push_back(Vector4f(Noise, 0));
g_SSAONoise.Create(4, 4, DXGI_FORMAT_R32G32B32A32_FLOAT, &SSAONoises[0]);
Occlusion += (SampleDepth < Offset.z ? 1.0 : 0.0);
Occlusion /= kernelSize;
static const int kernelSize = 64; static const float2 NoiseScale = float2(1024.0f / 4.0f, 768.0f / 4.0f); struct PixelOutput { float4 Target0 : SV_Target0; }; PixelOutput PS_SSAO(float2 Tex : TEXCOORD, float4 ScreenPos : SV_Position) { PixelOutput Out; Out.Target0 = float4(0, 0, 0, 0); // World Pos float Depth = SceneDepthZ.SampleLevel(LinearSampler, Tex, 0).r; float2 ScreenCoord = ViewportUVToScreenPos(Tex); float4 NDCPos = float4(ScreenCoord, Depth, 1.0f); float4 WorldPos = mul(NDCPos, InvViewProjMatrix); WorldPos /= WorldPos.w; // World Normal float3 N = GBufferA.SampleLevel(LinearSampler, Tex, 0).rgb; N = N * 2 - 1.0; // Sample Random Vector Need Scale float3 RandomVec = NoiseMap.SampleLevel(WrapLinearSampler, Tex * NoiseScale, 0).rgb; // TBN float3 T = normalize(RandomVec - N * dot(RandomVec, N)); float3 B = cross(N, T); float3x3 TBN = float3x3(T, B, N); float Occlusion = 0.0f; for (int i = 0; i < kernelSize; ++i) { // Tangent Space to World Space float3 Sample = mul(Samples[i].xyz, (float3x3)TBN); Sample = WorldPos + Sample * Radius; float4 Offset = float4(Sample, 1.0); // View and Projection Offset = mul(Offset, ViewProjMatrix); Offset.xyz /= Offset.w; float2 SampleUV = ScreenPosToViewportUV(Offset.xy); float SampleDepth = SceneDepthZ.SampleLevel(LinearSampler, SampleUV, 0).r; Occlusion += (SampleDepth < Offset.z ? 1.0 : 0.0); } Occlusion /= kernelSize; // Unocclusion Occlusion = 1 - Occlusion; Out.Target0 = float4(Occlusion, Occlusion, Occlusion, 1); return Out; }
cbuffer PSContant : register(b0) { float2 texelSize; int BlurRadius; }; struct PixelOutput { float4 Target0 : SV_Target0; }; PixelOutput PS_SSAO_Blur(float2 Tex : TEXCOORD, float4 ScreenPos : SV_Position) { PixelOutput Out; Out.Target0 = float4(0, 0, 0, 0); float result = 0; for (int x = -BlurRadius; x < BlurRadius; ++x) { for (int y = -BlurRadius; y < BlurRadius; ++y) { float2 offset = Tex + float2(x, y) * texelSize; result += AOBuffer.SampleLevel(LinearSampler, offset, 0).r; } } result /= (float)(BlurRadius * BlurRadius * 4); Out.Target0 = float4(result, result, result, 1); return Out; }
HBAO,全称为:Image-space horizon-based ambient occlusion,屏幕空间水平基准环境光遮蔽。
A = 1 − 1 2 π ∫ θ = − π π ∫ α = t ( θ ) h ( θ ) W ( w → ) c o s ( α ) d α d θ (2) A = 1 - \frac{1}{2\pi} \int_{\theta = -\pi }^{\pi} \int_{\alpha=t(\theta )}^{h(\theta )} W(\overrightarrow{w})cos(\alpha)d\alpha d\theta\tag{2} A=1−2π1∫θ=−ππ∫α=t(θ)h(θ)W(w )cos(α)dαdθ(2)
其中, W ( w ) W(w) W(w)的定义如下:
W ( θ ) = m a x ( 0 , 1 − r ( θ ) R ) W(\theta) = max(0,1-\frac{r(\theta)}{R}) W(θ)=max(0,1−Rr(θ))
- 公式2为什么是cosα,并不能找到解释。论文的作者就是这么规定的。
A = 1 − 1 2 π ∫ θ = − π π [ s i n ( h ( θ ) ) − s i n ( t ( θ ) ) ] W ( θ ) d θ (3) A = 1 - \frac{1}{2\pi} \int_{\theta = -\pi }^{\pi} [sin(h(\theta)) - sin(t(\theta))] W(\theta )d\theta\tag{3} A=1−2π1∫θ=−ππ[sin(h(θ))−sin(t(θ))]W(θ)dθ(3)
接下来,对方位角(Horizon Angle)进行一个解释,即公式(3)中的 h ( θ ) h(\theta) h(θ)。
而公式(3)中的 t ( θ ) t(\theta) t(θ)表示的是切面角(Tangent Angle),如下图所示。
通过这两个角度**(Horizon Angle、Tangent Angle)**,就可以求解出当前方向的环境光遮蔽值。
s i n ( h ( θ ) ) = t a n ( h ( θ ) ) 1 + t a n 2 ( h ( θ ) ) (4) sin(h(\theta)) = \frac{tan(h(\theta))}{\sqrt{ 1+ tan^2(h(\theta ))}}\tag{4} sin(h(θ))=1+tan2(h(θ)) tan(h(θ))(4)
- 上图的Z轴正方向指向相机,这是OpenGL坐标系下的定义。
- 对于D3D而言,需要对Z进行取反。
t a n ( h ( θ ) ) = S . z ( S . x ) 2 + ( S . y ) 2 (5) tan(h(\theta )) = \frac{S.z}{\sqrt{(S.x)^2+(S.y)^2} }\tag{5} tan(h(θ))=(S.x)2+(S.y)2 S.z(5)
- 只采样4个方向,每个方向采样5次,一共的采样数量为20次。远小于SSAO的64次。
HBAO希望这部分不要加入遮挡中,因此使用了一个对切线可以使用一个偏差值Angle Bias,从而将错误的遮挡给去除掉。
对于相邻的两个像素P0和P1,他们的AO值相差为: 0.7 − 0 = 0.7 0.7-0 = 0.7 0.7−0=0.7,幅值差距过大,会带来不连续的问题。
#pragma pack_matrix(row_major) Texture2D GBufferA : register(t0); // Normal Texture2D GBufferC : register(t1); // DiffuseAO Texture2D<float> SceneDepthZ : register(t2); // Depth Texture2D NoiseMap : register(t3); // Noise SamplerState LinearSampler : register(s0); SamplerState WrapLinearSampler : register(s1); cbuffer PSContant : register(b0) { float4x4 InvProjMatrix; float4 Resolution; // width height 1/width 1/height float NumDirections; float NumSamples; float TraceRadius; float MaxRadiusPixels; float2 ClipInfo; }; struct PixelOutput { float4 Target0 : SV_Target0; }; float3 GetViewPos(float2 Tex) { float Depth = SceneDepthZ.SampleLevel(LinearSampler, Tex, 0).r; float2 ScreenCoord = ViewportUVToScreenPos(Tex); float4 NDCPos = float4(ScreenCoord, Depth, 1.0f); float4 ViewdPos = mul(NDCPos, InvProjMatrix); ViewdPos /= ViewdPos.w; return ViewdPos.xyz; } float3 MinDiff(float3 P, float3 Pr, float3 Pl) { float3 V1 = Pr - P; float3 V2 = P - Pl; return (length(V1) < length(V2)) ? V1 : V2; } void ComputeSteps(out float2 stepSizeUv, out float numSteps, float rayRadiusPix, float rand) { numSteps = min(NumSamples, rayRadiusPix); float stepSizePix = rayRadiusPix / (numSteps + 1); float maxNumSteps = MaxRadiusPixels / stepSizePix; if (maxNumSteps < numSteps) { numSteps = floor(maxNumSteps + rand); numSteps = max(numSteps, 1); } stepSizeUv = stepSizePix * Resolution.zw; } float2 RotateDirections(float2 Dir, float2 CosSin) { // https://zhuanlan.zhihu.com/p/58517426 return float2( Dir.x * CosSin.x - Dir.y * CosSin.y, Dir.x * CosSin.y + Dir.y * CosSin.x); } float Length2(float3 V) { return dot(V, V); } float InvLength(float2 V) { return 1.0f / sqrt(dot(V, V)); } float Tangent(float3 V) { // in D3D, z is negative towards eye, so inverse it return -V.z * InvLength(V.xy); } float Tangent(float3 P, float3 S) { return Tangent(S - P); } float BiasedTangent(float3 V) { // Low-Tessellation问题 const float TanBias = tan(30.0 * PI / 180.0); return Tangent(V) + TanBias; } float TanToSin(float x) { return x / sqrt(x * x + 1.0); } float Falloff(float d2) { // The farther the distance, the smaller the contribution return saturate(1.0f - d2 * 1.0 / (TraceRadius * TraceRadius)); } float2 SnapUVOffset(float2 uv) { // Rounds the specified value to the nearest integer. return round(uv * Resolution.xy) * Resolution.zw; } float HorizonOcclusion( float2 Tex, float2 deltaUV, float3 P, float3 dPdu, float3 dPdv, float randstep, float numSamples) { float ao = 0; float2 uv = Tex + SnapUVOffset(randstep * deltaUV); deltaUV = SnapUVOffset(deltaUV); float3 T = deltaUV.x * dPdu + deltaUV.y * dPdv; float tanH = BiasedTangent(T); float sinH = TanToSin(tanH); float tanS; float d2; float3 S; for (float s = 1; s <= numSamples; ++s) { uv += deltaUV; S = GetViewPos(uv); // p as the origin of the space tanS = Tangent(P, S); d2 = Length2(S - P); // only above Tangent can make contribution if (d2 < TraceRadius * TraceRadius && tanS > tanH) { float sinS = TanToSin(tanS); // 不连续问题 ao += Falloff(d2) * (sinS - sinH); tanH = tanS; sinH = sinS; } } return ao; } PixelOutput PS_HBAO(float2 Tex : TEXCOORD, float4 ScreenPos : SV_Position) { PixelOutput Out; Out.Target0 = float4(0, 0, 0, 0); const float2 NoiseScale = float2(Resolution.x / 4.0f, Resolution.y / 4.0f); // view position float3 P = GetViewPos(Tex); float3 Pl = GetViewPos(Tex + float2(-Resolution.z, 0)); float3 Pr = GetViewPos(Tex + float2(Resolution.z, 0)); float3 Pt = GetViewPos(Tex + float2(0, Resolution.w)); float3 Pb = GetViewPos(Tex + float2(0, -Resolution.w)); // used to calculate tangent float3 dPdu = MinDiff(P, Pr, Pl); float3 dPdv = MinDiff(P, Pt, Pb) * (Resolution.y * 1.0 / Resolution.x); // sample random vector need scale float3 RandomVec = NoiseMap.SampleLevel(WrapLinearSampler, Tex * NoiseScale, 0).rgb; // The 0.5 uv range corresponds to the camera fov angle // ClipInfo = 1 / tan(theta/2) float2 rayRadiusUV = 0.5 * ClipInfo * TraceRadius / P.z; // radius in pixels float rayRadiusPix = rayRadiusUV.x * Resolution.x; float ao = 1.0; if (rayRadiusPix > 1.0) { ao = 0.0; float numSteps; float2 stepSizeUV; ComputeSteps(stepSizeUV, numSteps, rayRadiusPix, RandomVec.z); float alpha = 2.0 * PI / NumDirections; for (float i = 0; i < NumDirections; ++i) { float theta = alpha * i; float2 dir = RotateDirections(float2(cos(theta), sin(theta)), RandomVec.xy); float2 deltaUV = dir * stepSizeUV; // accumulate occlusion ao += HorizonOcclusion( Tex, deltaUV, P, dPdu, dPdv, RandomVec.z, numSteps); } ao = 1.0f - ao / NumDirections * 1.9f; } Out.Target0 = ao; return Out; }
GTAO(Ground Truth-based Ambient Occlusion)是Jorge在2016年提出,其原理上借鉴了Nvidia的HBAO。
A ( x ) ≈ A ^ ( x ) = 1 π ∫ 0 π ∫ − π / 2 π / 2 V ( ϕ , θ ) ∣ s i n ( θ ) ∣ d θ d ϕ (6) A(x) \approx \hat{A} (x) = \frac{1}{\pi} \int_{0}^{\pi} \int_{-\pi/2}^{\pi/2} V(\phi,\theta )|sin(\theta )|d\theta d\phi\tag{6} A(x)≈A^(x)=π1∫0π∫−π/2π/2V(ϕ,θ)∣sin(θ)∣dθdϕ(6)
那么,我们所要求解的是就是分别找到 θ \theta θ和 θ \theta θ方向上的最大水平角h1和h2。
Multi Bounce(多次弹射)
上述值考虑了Single bounce of light,作者还给出了一个近似拟合的Multi Bounce的公式:
float3 GTAOMultiBounce(float visibility,float3 albedo)
float3 a = 2.0404 * albedo - 0.3324;
float3 b = -4.7951 * albedo + 0.6417;
float3 c = 2.7552 * albedo + 0.6903;
float x = visibility;
return max(x, ( (x * a + b) * x + c) * x);
cbuffer PSContant : register(b0) { float4x4 InvProjMatrix; float4x4 ViewMatrix; float4 Resolution; // width height 1/width 1/height float4 ClipInfo; float4 GTAOParams; // cos sin angle }; struct PixelOutput { float4 Target0 : SV_Target0; }; static const float c_NumAngles = 8.0f; static const uint GTAO_NUMTAPS = 10; float3 ReconstructViewPos(float2 Tex) { float Depth = SceneDepthZ.SampleLevel(LinearSampler, Tex, 0).r; float2 ScreenCoord = ViewportUVToScreenPos(Tex); float Z = LinearEyeDepth(Depth, ClipInfo.x, ClipInfo.y); float2 XY = ScreenCoord * ClipInfo.zw * Z; return float3(XY, Z); } float InterleavedGradientNoise(float2 iPos) { return frac(52.9829189f * frac((iPos.x * 0.06711056) + (iPos.y * 0.00583715))); } float3 GetRandomVector(uint2 iPos) { iPos.y = 16384 - iPos.y; float3 RandomVec = float3(0, 0, 0); float3 RandomTexVec = float3(0, 0, 0); float ScaleOffset; float TemporalCos = GTAOParams.x; float TemporalSin = GTAOParams.y; float GradientNoise = InterleavedGradientNoise(float2(iPos)); RandomTexVec.x = cos((GradientNoise * PI)); RandomTexVec.y = sin((GradientNoise * PI)); ScaleOffset = (1.0 / 4.0) * ((iPos.y - iPos.x) & 3); RandomVec.x = dot(RandomTexVec.xy, float2(TemporalCos, -TemporalSin)); RandomVec.y = dot(RandomTexVec.xy, float2(TemporalSin, TemporalCos)); RandomVec.z = frac(ScaleOffset + GTAOParams.z); return RandomVec; } float2 SearchForLargestAngleDual(uint NumSteps, float2 BaseUV, float2 ScreenDir, float3 ViewPos, float3 ViewDir) { float LenDsSquare, LenDsInv, Ang, FallOff; float3 Ds; float2 BestAng = float2(-1, -1); const float WorldRadius = 30.0f; float AttenFactor = 2.0 / (WorldRadius * WorldRadius); float Thickness = 0.9f; for (uint i = 0; i < NumSteps; i++) { float fi = (float)i; float2 UVOffset = ScreenDir * (fi + 1.0) * Resolution.zw; // why? UVOffset.y *= -1; float4 UV2 = BaseUV.xyxy + float4(UVOffset.xy, -UVOffset.xy); // h1 Ds = ReconstructViewPos(UV2.xy) - ViewPos; LenDsSquare = dot(Ds, Ds); LenDsInv = rsqrt(LenDsSquare + 0.0001); Ang = dot(Ds, ViewDir) * LenDsInv; FallOff = saturate(LenDsSquare * AttenFactor); Ang = lerp(Ang, BestAng.x, FallOff); BestAng.x = (Ang > BestAng.x) ? Ang : lerp(Ang, BestAng.x, Thickness); // h2 Ds = ReconstructViewPos(UV2.zw) - ViewPos; LenDsSquare = dot(Ds, Ds); LenDsInv = rsqrt(LenDsSquare + 0.0001); Ang = dot(Ds, ViewDir) * LenDsInv; FallOff = saturate(LenDsSquare * AttenFactor); Ang = lerp(Ang, BestAng.x, FallOff); BestAng.y = (Ang > BestAng.y) ? Ang : lerp(Ang, BestAng.y, Thickness); } BestAng.x = acos(clamp(BestAng.x, -1.0, 1.0)); BestAng.y = acos(clamp(BestAng.y, -1.0, 1.0)); return BestAng; } float ComputeInnerIntegral(float2 Angles, float2 ScreenDir, float3 ViewDir, float3 ViewSpaceNormal) { // Given the angles found in the search plane // we need to project the View Space Normal onto the plane // defined by the search axis and the View Direction and perform the inner integrate float3 PlaneNormal = normalize(cross(float3(ScreenDir, 0), ViewDir)); float3 Perp = cross(ViewDir, PlaneNormal); float3 ProjNormal = ViewSpaceNormal - PlaneNormal * dot(ViewSpaceNormal, PlaneNormal); float LenProjNormal = length(ProjNormal) + 0.000001f; float RecipMag = 1.0f / (LenProjNormal); float CosAng = dot(ProjNormal, Perp) * RecipMag; float Gamma = acos(CosAng) - HALF_PI; float CosGamma = dot(ProjNormal, ViewDir) * RecipMag; float SinGamma = CosAng * -2.0f; // clamp to normal hemisphere Angles.x = Gamma + max(-Angles.x - Gamma, -(HALF_PI)); Angles.y = Gamma + min(Angles.y - Gamma, (HALF_PI)); float AO = ((LenProjNormal) * 0.25 * ((Angles.x * SinGamma + CosGamma - cos((2.0 * Angles.x) - Gamma)) + (Angles.y * SinGamma + CosGamma - cos((2.0 * Angles.y) - Gamma)))); return AO; } PixelOutput PS_GTAO(float2 Tex : TEXCOORD, float4 ScreenPos : SV_Position) { PixelOutput Out; Out.Target0 = float4(0, 0, 0, 0); // view normal float3 WorldNormal = GBufferA.SampleLevel(LinearSampler, Tex, 0).rgb; WorldNormal = WorldNormal * 2 - 1; float3 ViewSpaceNormal = normalize(mul(WorldNormal, (float3x3)ViewMatrix)); // view position float3 ViewSpacePos = ReconstructViewPos(Tex); float3 ViewDir = normalize(-ViewSpacePos); int2 iPos = int2(ScreenPos.xy); float3 RandomAndOffset = GetRandomVector(iPos); float2 RandomVec = RandomAndOffset.xy; float2 ScreenDir = float2(RandomVec.x, RandomVec.y); uint NumAngles = (uint)c_NumAngles; float SinDeltaAngle = sin(PI / c_NumAngles); float CosDeltaAngle = cos(PI / c_NumAngles); float Sum = 0.0; for (uint Angle = 0; Angle < NumAngles; Angle++) { float2 horizons = SearchForLargestAngleDual(GTAO_NUMTAPS, Tex, ScreenDir, ViewSpacePos, ViewDir); Sum += ComputeInnerIntegral(horizons, ScreenDir, ViewDir, ViewSpaceNormal); // Rotate for the next angle float2 TempScreenDir = ScreenDir.xy; ScreenDir.x = (TempScreenDir.x * CosDeltaAngle) + (TempScreenDir.y * -SinDeltaAngle); ScreenDir.y = (TempScreenDir.x * SinDeltaAngle) + (TempScreenDir.y * CosDeltaAngle); } float AO = Sum; AO = AO / ((float)NumAngles); Out.Target0 = float4(AO, AO, AO, 1); return Out; }
