#include "VolcanoSim.h" #include "Terrain.h" #include #include #include #include #include #include #include #include namespace { struct VolcanoParticleSIMDParams { float dt; float gravity; float plumeBuoyancy; float ambientTemperature; float damping; float windX; float windZ; float particleCooling; }; extern "C" float volcanoFlowFromDropAsm(float drop, float k); extern "C" void volcanoParticleStepAsm( float* posX, float* posY, float* posZ, float* velX, float* velY, float* velZ, float dt, float gravity, float plumeBuoyancy, float tempNorm, float damping, float windX, float windZ, float invR, float ventPush); extern "C" void volcanoParticleStepSIMDAsm( float* posX, float* posY, float* posZ, float* velX, float* velY, float* velZ, float* life, float* temp, std::size_t count, const VolcanoParticleSIMDParams* params); extern "C" void volcanoParticleStepBatchAsm( float* posX, float* posY, float* posZ, float* velX, float* velY, float* velZ, float* life, float* temp, std::size_t count, const VolcanoParticleSIMDParams* params); extern "C" void volcanoUpdateLavaFluxSIMDAsm( float* lavaHeightNext, const float* lavaHeight, const float* terrainHeight, const float* temperature, int width, int height, float viscDt); extern "C" void volcanoDiffuseHeatSIMDAsm( float* temperatureNext, const float* temperature, const float* lavaHeight, int width, int height, float alphaDt, float dt, float lavaCooling, float ambientTemperature); static inline float max0(float v) { return (v > 0.0f) ? v : 0.0f; } static inline float flowFromDropScalar(float drop, float k) { const float pos = max0(drop); const float shape = 0.35f + 0.65f * std::sqrt(pos + 1.0e-4f); return pos * k * shape; } static inline float ventPulseAtTime(float t) { const float base = 0.76f + 0.19f * std::sin(t * 0.37f + 0.6f * std::sin(t * 0.11f)); const float carrier = max0(std::sin(t * 1.45f + 0.85f * std::sin(t * 0.22f))); const float burst = carrier * carrier * (0.65f + 0.35f * carrier); return std::clamp(base + 0.52f * burst, 0.45f, 1.85f); } static inline float gaussian2D(float x, float y, float sigma) { const float inv = 1.0f / std::max(sigma, 1.0e-4f); const float nx = x * inv; const float ny = y * inv; return std::exp(-(nx * nx + ny * ny)); } static float maxAbsDiff(const float* a, const float* b, std::size_t count) { float maxDiff = 0.0f; for (std::size_t i = 0; i < count; ++i) { maxDiff = std::max(maxDiff, std::fabs(a[i] - b[i])); } return maxDiff; } static std::uint64_t hashFloatArray(const float* data, std::size_t count) { const std::uint64_t fnvOffset = 1469598103934665603ull; const std::uint64_t fnvPrime = 1099511628211ull; std::uint64_t hash = fnvOffset; for (std::size_t i = 0; i < count; ++i) { std::uint32_t bits = 0u; std::memcpy(&bits, data + i, sizeof(float)); hash ^= static_cast(bits); hash *= fnvPrime; } return hash; } static void printReferenceIteration(const VolcanoSimulation& sim, int frame) { const std::size_t particleCount = sim.getParticleCount(); const std::size_t gridCount = static_cast(sim.getGridWidth()) * static_cast(sim.getGridHeight()); const float* posX = sim.getParticlePosX(); const float* posY = sim.getParticlePosY(); const float* posZ = sim.getParticlePosZ(); const float* pTemp = sim.getParticleTemperature(); const float* lava = sim.getLavaHeightGrid(); const float* gTemp = sim.getTemperatureGrid(); const float p0x = (particleCount > 0) ? posX[0] : 0.0f; const float p0y = (particleCount > 0) ? posY[0] : 0.0f; const float p0z = (particleCount > 0) ? posZ[0] : 0.0f; const float p0t = (particleCount > 0) ? pTemp[0] : 0.0f; std::cout << "REF frame=" << frame << " avgLava=" << sim.getAverageLavaHeight() << " avgTemp=" << sim.getAverageTemperature() << " p0=(" << p0x << "," << p0y << "," << p0z << ")" << " p0Temp=" << p0t << " lavaHash=" << hashFloatArray(lava, gridCount) << " tempHash=" << hashFloatArray(gTemp, gridCount) << '\n'; } } // namespace VolcanoSimulation::AlignedFloatArray::AlignedFloatArray() : ptr(nullptr), count(0) {} VolcanoSimulation::AlignedFloatArray::~AlignedFloatArray() { if (ptr != nullptr) { #ifdef _WIN32 _aligned_free(ptr); #else std::free(ptr); #endif } } VolcanoSimulation::AlignedFloatArray::AlignedFloatArray(const AlignedFloatArray& other) : ptr(nullptr), count(0) { if (other.count > 0 && resize(other.count)) { std::memcpy(ptr, other.ptr, other.count * sizeof(float)); } } VolcanoSimulation::AlignedFloatArray& VolcanoSimulation::AlignedFloatArray::operator=(const AlignedFloatArray& other) { if (this == &other) { return *this; } if (!resize(other.count)) { return *this; } if (count > 0) { std::memcpy(ptr, other.ptr, count * sizeof(float)); } return *this; } VolcanoSimulation::AlignedFloatArray::AlignedFloatArray(AlignedFloatArray&& other) noexcept : ptr(other.ptr), count(other.count) { other.ptr = nullptr; other.count = 0; } VolcanoSimulation::AlignedFloatArray& VolcanoSimulation::AlignedFloatArray::operator=(AlignedFloatArray&& other) noexcept { if (this == &other) { return *this; } if (ptr != nullptr) { std::free(ptr); } ptr = other.ptr; count = other.count; other.ptr = nullptr; other.count = 0; return *this; } bool VolcanoSimulation::AlignedFloatArray::resize(std::size_t newCount) { if (ptr != nullptr) { std::free(ptr); ptr = nullptr; } count = 0; if (newCount == 0) { return true; } void* raw = nullptr; #ifdef _WIN32 raw = _aligned_malloc(newCount * sizeof(float), 32); if (!raw) { return false; } #else if (posix_memalign(&raw, 32, newCount * sizeof(float)) != 0) { return false; } #endif ptr = static_cast(raw); count = newCount; std::memset(ptr, 0, count * sizeof(float)); return true; } float* VolcanoSimulation::AlignedFloatArray::data() { return ptr; } const float* VolcanoSimulation::AlignedFloatArray::data() const { return ptr; } std::size_t VolcanoSimulation::AlignedFloatArray::size() const { return count; } bool VolcanoSimulation::ParticleSoA::resize(std::size_t count) { return posX.resize(count) && posY.resize(count) && posZ.resize(count) && velX.resize(count) && velY.resize(count) && velZ.resize(count) && life.resize(count) && temperature.resize(count); } std::size_t VolcanoSimulation::ParticleSoA::size() const { return posX.size(); } VolcanoSimulation::VolcanoSimulation() : width(0), height(0), gravity(-9.81f), damping(0.992f), particleCooling(1.2f), lavaViscosity(1.18f), lavaCooling(0.06f), diffusionAlpha(0.17f), ambientTemperature(295.0f), ventTemperature(1360.0f), lavaSourceRate(3.4f), windX(0.08f), windZ(-0.03f), plumeBuoyancy(7.5f), // (plumeBuoyancy 16.0 -> 7.5) eruptionClock(0.0f), ventPulseLevel(2.0f), renderConfig({28.0f, 0.4f, 24.0f, 13.0f, 12.0f, 22.0f, 1.7f}), averageLavaHeight(0.0f), averageTemperature(295.0f), collisionTerrainSize(0), collisionTerrainOriginX(0.0f), collisionTerrainOriginZ(0.0f), collisionTerrainSpacing(1.0f), collisionWaterLevelWorld(0.0f), rngState(1337u) {} VolcanoSimulation::~VolcanoSimulation() = default; bool VolcanoSimulation::init(std::size_t particleCount, int gridWidth, int gridHeight, unsigned int seed) { if (gridWidth < 3 || gridHeight < 3) { return false; } width = gridWidth; height = gridHeight; rngState = seed; if (!particles.resize(particleCount)) { return false; } const std::size_t gridSize = static_cast(width) * static_cast(height); if (!terrainHeight.resize(gridSize) || !lavaHeight.resize(gridSize) || !lavaHeightNext.resize(gridSize) || !temperature.resize(gridSize) || !temperatureNext.resize(gridSize)) { return false; } for (std::size_t i = 0; i < particles.size(); ++i) { respawnParticle(i); } initTerrainAndLava(); updateStats(); return true; } std::size_t VolcanoSimulation::idx(int x, int y) const { return static_cast(y) * static_cast(width) + static_cast(x); } float VolcanoSimulation::random01() { rngState = 1664525u * rngState + 1013904223u; return static_cast((rngState >> 8) & 0x00FFFFFFu) / 16777215.0f; } void VolcanoSimulation::setCollisionTerrain(const Terrain& terrain, float waterLevelWorld) { collisionTerrainSize = terrain.getGridSize(); collisionTerrainSpacing = terrain.getGridSpacing(); collisionWaterLevelWorld = waterLevelWorld; collisionTerrainHeights.clear(); if (collisionTerrainSize <= 1 || collisionTerrainSpacing <= 0.0f) { collisionTerrainSize = 0; return; } const glm::vec3 origin = terrain.getVertex(0, 0); collisionTerrainOriginX = origin.x; collisionTerrainOriginZ = origin.z; const std::size_t count = static_cast(collisionTerrainSize) * static_cast(collisionTerrainSize); collisionTerrainHeights.resize(count, collisionWaterLevelWorld); for (int x = 0; x < collisionTerrainSize; ++x) { for (int z = 0; z < collisionTerrainSize; ++z) { const std::size_t id = static_cast(x) * static_cast(collisionTerrainSize) + static_cast(z); collisionTerrainHeights[id] = terrain.getVertex(x, z).y; } } } float VolcanoSimulation::sampleCollisionTerrainHeightWorld(float worldX, float worldZ) const { if (collisionTerrainSize <= 1 || collisionTerrainHeights.empty() || collisionTerrainSpacing <= 0.0f) { return collisionWaterLevelWorld; } const float fx = (worldX - collisionTerrainOriginX) / collisionTerrainSpacing; const float fz = (worldZ - collisionTerrainOriginZ) / collisionTerrainSpacing; if (!std::isfinite(fx) || !std::isfinite(fz)) { return collisionWaterLevelWorld; } const float maxIdx = static_cast(collisionTerrainSize - 1); const float clampedX = std::clamp(fx, 0.0f, maxIdx); const float clampedZ = std::clamp(fz, 0.0f, maxIdx); const int x0 = static_cast(std::floor(clampedX)); const int z0 = static_cast(std::floor(clampedZ)); const int x1 = std::min(x0 + 1, collisionTerrainSize - 1); const int z1 = std::min(z0 + 1, collisionTerrainSize - 1); const float tx = clampedX - static_cast(x0); const float tz = clampedZ - static_cast(z0); const auto at = [&](int x, int z) { return collisionTerrainHeights[static_cast(x) * static_cast(collisionTerrainSize) + static_cast(z)]; }; const float h00 = at(x0, z0); const float h10 = at(x1, z0); const float h01 = at(x0, z1); const float h11 = at(x1, z1); const float hx0 = h00 + tx * (h10 - h00); const float hx1 = h01 + tx * (h11 - h01); return hx0 + tz * (hx1 - hx0); } void VolcanoSimulation::sampleCollisionTerrainGradientWorld(float worldX, float worldZ, float& dHdX, float& dHdZ) const { const float eps = std::max(0.25f, collisionTerrainSpacing); const float hx0 = sampleCollisionTerrainHeightWorld(worldX - eps, worldZ); const float hx1 = sampleCollisionTerrainHeightWorld(worldX + eps, worldZ); const float hz0 = sampleCollisionTerrainHeightWorld(worldX, worldZ - eps); const float hz1 = sampleCollisionTerrainHeightWorld(worldX, worldZ + eps); dHdX = (hx1 - hx0) / (2.0f * eps); dHdZ = (hz1 - hz0) / (2.0f * eps); } void VolcanoSimulation::respawnParticle(std::size_t i) { const float pulse = std::clamp(ventPulseLevel, 0.45f, 2.0f); const float swirl = eruptionClock * (0.42f + 0.28f * pulse); const int jetCount = 3; const int jetId = std::min(jetCount - 1, static_cast(random01() * static_cast(jetCount))); const float jetBaseAngle = swirl + (6.28318530718f * static_cast(jetId) / static_cast(jetCount)); const float angle = jetBaseAngle + (random01() - 0.5f) * (0.62f - 0.16f * std::min(1.0f, pulse)); const float radius = (0.09f + 0.22f * std::sqrt(random01())) * (0.80f + 0.26f * pulse); const float jitterMag = 0.06f + 0.08f * random01(); const float jitterAngle = 6.28318530718f * random01(); const float dx = std::cos(angle) * radius + std::cos(jitterAngle) * jitterMag; const float dz = std::sin(angle) * radius + std::sin(jitterAngle) * jitterMag; particles.posX.data()[i] = dx; particles.posY.data()[i] = 0.08f + random01() * (0.22f + 0.18f * pulse); particles.posZ.data()[i] = dz; const float radialSpeed = (2.2f + random01() * 3.8f) * (0.85f + 0.55f * pulse); const float crossBias = (random01() - 0.5f) * (1.10f + 0.65f * pulse); particles.velX.data()[i] = std::cos(angle) * radialSpeed + std::cos(angle + 1.57079632679f) * crossBias + windX * 0.9f; particles.velY.data()[i] = (8.0f + random01() * 5.5f) * (0.88f + 0.52f * pulse); particles.velZ.data()[i] = std::sin(angle) * radialSpeed + std::sin(angle + 1.57079632679f) * crossBias + windZ * 0.9f; particles.life.data()[i] = (3.2f + random01() * 2.8f) * (0.92f + 0.16f * pulse); particles.temperature.data()[i] = 900.0f + random01() * (340.0f + 170.0f * std::min(1.0f, pulse)); } void VolcanoSimulation::initTerrainAndLava() { const int cx = width / 2; const int cy = height / 2; for (int y = 0; y < height; ++y) { for (int x = 0; x < width; ++x) { const float nx = (2.0f * static_cast(x) / static_cast(width - 1)) - 1.0f; const float ny = (2.0f * static_cast(y) / static_cast(height - 1)) - 1.0f; const float r = std::sqrt(nx * nx + ny * ny); const float rClamped = std::min(1.0f, r); const float cone = std::exp(-2.7f * r * r); const float crater = 0.30f * std::exp(-220.0f * r * r); const float ridge = 0.03f * std::sin(11.0f * nx) * std::cos(9.0f * ny) * (1.0f - rClamped); const std::size_t id = idx(x, y); terrainHeight.data()[id] = std::max(0.0f, 0.12f + 1.1f * cone - crater + ridge); const float ventMask = std::exp(-190.0f * r * r); lavaHeight.data()[id] = 0.52f * ventMask; temperature.data()[id] = ambientTemperature + (ventTemperature - ambientTemperature) * ventMask; } } std::memcpy(lavaHeightNext.data(), lavaHeight.data(), lavaHeight.size() * sizeof(float)); std::memcpy(temperatureNext.data(), temperature.data(), temperature.size() * sizeof(float)); // Keep cell exactly at crater hot to avoid delayed startup. const std::size_t c = idx(cx, cy); temperature.data()[c] = ventTemperature; lavaHeight.data()[c] = std::max(lavaHeight.data()[c], 0.70f); } void VolcanoSimulation::injectVentSources(float dt) { eruptionClock += dt; const int cx = width / 2; const int cy = height / 2; const float targetPulse = ventPulseAtTime(eruptionClock); const float stochasticPulse = 1.0f + 0.16f * (random01() - 0.5f); const float desiredPulse = targetPulse * stochasticPulse; const float relax = std::clamp(dt * 2.2f, 0.0f, 1.0f); ventPulseLevel += (desiredPulse - ventPulseLevel) * relax; const float pulse = std::clamp(ventPulseLevel, 0.45f, 1.95f); const float rot = eruptionClock * 0.34f; const float c = std::cos(rot); const float s = std::sin(rot); const float nozzleR = 1.28f; const float j0x = c * nozzleR; const float j0y = s * nozzleR; const float j1x = std::cos(rot + 2.09439510239f) * nozzleR; const float j1y = std::sin(rot + 2.09439510239f) * nozzleR; const float j2x = std::cos(rot + 4.18879020479f) * nozzleR; const float j2y = std::sin(rot + 4.18879020479f) * nozzleR; const float ventTempPeak = std::min(1540.0f, ventTemperature + 80.0f * (pulse - 1.0f)); for (int dy = -3; dy <= 3; ++dy) { for (int dx = -3; dx <= 3; ++dx) { const int gx = cx + dx; const int gy = cy + dy; if (gx < 1 || gy < 1 || gx >= width - 1 || gy >= height - 1) { continue; } const float fx = static_cast(dx); const float fy = static_cast(dy); const float core = gaussian2D(fx, fy, 1.25f); const float jet0 = gaussian2D(fx - j0x, fy - j0y, 0.95f); const float jet1 = gaussian2D(fx - j1x, fy - j1y, 0.95f); const float jet2 = gaussian2D(fx - j2x, fy - j2y, 0.95f); const float jetLobes = std::max(jet0, std::max(jet1, jet2)); const float skirt = gaussian2D(fx, fy, 2.35f) * 0.28f; const float w = std::clamp(0.50f * core + 0.68f * jetLobes + skirt, 0.0f, 1.18f); const std::size_t id = idx(gx, gy); lavaHeight.data()[id] += lavaSourceRate * dt * w * (0.74f + 0.92f * pulse); temperature.data()[id] = std::min(1540.0f, temperature.data()[id] + (ventTempPeak - temperature.data()[id]) * w * dt * (1.1f + 0.9f * pulse)); } } // Low frequency directional drift + turbulent wobble keeps vent ejection believable. const float windTargetX = 0.16f * std::sin(eruptionClock * 0.19f + 0.7f * std::sin(eruptionClock * 0.05f)); const float windTargetZ = 0.14f * std::cos(eruptionClock * 0.17f + 0.6f * std::sin(eruptionClock * 0.07f)); windX += (windTargetX - windX) * std::clamp(dt * 0.8f, 0.0f, 1.0f); windZ += (windTargetZ - windZ) * std::clamp(dt * 0.8f, 0.0f, 1.0f); windX = std::clamp(windX + (random01() - 0.5f) * dt * 0.14f, -0.30f, 0.30f); windZ = std::clamp(windZ + (random01() - 0.5f) * dt * 0.14f, -0.30f, 0.30f); } void VolcanoSimulation::updateStats() { const std::size_t n = lavaHeight.size(); if (n == 0) { averageLavaHeight = 0.0f; averageTemperature = ambientTemperature; return; } float lavaSum = 0.0f; float tempSum = 0.0f; const float* lava = lavaHeight.data(); const float* temp = temperature.data(); for (std::size_t i = 0; i < n; ++i) { const float l = std::isfinite(lava[i]) ? lava[i] : 0.0f; const float t = std::isfinite(temp[i]) ? temp[i] : ambientTemperature; lavaSum += l; tempSum += t; } const float invN = 1.0f / static_cast(n); averageLavaHeight = lavaSum * invN; averageTemperature = tempSum * invN; } void VolcanoSimulation::updateParticlesScalar(float dt) { const std::size_t n = particles.size(); float* posX = particles.posX.data(); float* posY = particles.posY.data(); float* posZ = particles.posZ.data(); float* velX = particles.velX.data(); float* velY = particles.velY.data(); float* velZ = particles.velZ.data(); float* life = particles.life.data(); float* temp = particles.temperature.data(); const float invTempRange = 1.0f / 900.0f; const float emitterBaseWorldY = renderConfig.worldY + renderConfig.coneHeight + 0.25f; const float minLocalY = (collisionWaterLevelWorld - emitterBaseWorldY) - 2.0f; for (std::size_t i = 0; i < n; ++i) { if (!std::isfinite(posX[i]) || !std::isfinite(posY[i]) || !std::isfinite(posZ[i]) || !std::isfinite(velX[i]) || !std::isfinite(velY[i]) || !std::isfinite(velZ[i]) || !std::isfinite(life[i]) || !std::isfinite(temp[i])) { respawnParticle(i); continue; } const float tempNorm = std::clamp((temp[i] - ambientTemperature) * invTempRange, 0.0f, 1.6f); const float radius2 = posX[i] * posX[i] + posZ[i] * posZ[i]; const float invR = 1.0f / std::sqrt(std::max(radius2, 0.04f)); const float ventPush = max0(1.2f - radius2) * tempNorm * 5.2f; volcanoParticleStepAsm( posX + i, posY + i, posZ + i, velX + i, velY + i, velZ + i, dt, gravity, plumeBuoyancy, tempNorm, damping, windX, windZ, invR, ventPush); life[i] -= dt * (0.14f + 0.10f * tempNorm); temp[i] -= particleCooling * dt * (0.55f + 0.6f * std::sqrt(radius2 + 0.1f)); const float worldX = renderConfig.worldX + posX[i] * renderConfig.particleSpreadScale; const float worldZ = renderConfig.worldZ + posZ[i] * renderConfig.particleSpreadScale; const float terrainWorldY = sampleCollisionTerrainHeightWorld(worldX, worldZ); const float hitWorldY = std::max(terrainWorldY, collisionWaterLevelWorld); const float hitLocalY = hitWorldY - emitterBaseWorldY + 0.08f; if (velY[i] < 0.0f && posY[i] <= hitLocalY) { if (terrainWorldY > collisionWaterLevelWorld + 0.03f) { float dHdX = 0.0f; float dHdZ = 0.0f; sampleCollisionTerrainGradientWorld(worldX, worldZ, dHdX, dHdZ); float downX = -dHdX; float downZ = -dHdZ; float downLen = std::sqrt(downX * downX + downZ * downZ); if (downLen < 1.0e-5f) { downX = posX[i]; downZ = posZ[i]; downLen = std::sqrt(downX * downX + downZ * downZ); } if (downLen < 1.0e-5f) { downX = 1.0f; downZ = 0.0f; downLen = 1.0f; } downX /= downLen; downZ /= downLen; const float rollSpeedWorld = 1.4f + 2.6f * std::clamp((temp[i] - ambientTemperature) / 1100.0f, 0.0f, 1.0f); const float invSpread = 1.0f / std::max(renderConfig.particleSpreadScale, 1.0e-4f); velX[i] = downX * rollSpeedWorld * invSpread; velZ[i] = downZ * rollSpeedWorld * invSpread; velY[i] = 0.0f; posY[i] = hitLocalY + 0.03f; } else { respawnParticle(i); } continue; } if (!std::isfinite(posX[i]) || !std::isfinite(posY[i]) || !std::isfinite(posZ[i]) || !std::isfinite(velX[i]) || !std::isfinite(velY[i]) || !std::isfinite(velZ[i]) || !std::isfinite(life[i]) || !std::isfinite(temp[i]) || life[i] <= 0.0f || posY[i] < minLocalY || posY[i] > 64.0f) { respawnParticle(i); } } } void VolcanoSimulation::updateParticlesSIMD(float dt) { const std::size_t n = particles.size(); float* posX = particles.posX.data(); float* posY = particles.posY.data(); float* posZ = particles.posZ.data(); float* velX = particles.velX.data(); float* velY = particles.velY.data(); float* velZ = particles.velZ.data(); float* life = particles.life.data(); float* temp = particles.temperature.data(); const float emitterBaseWorldY = renderConfig.worldY + renderConfig.coneHeight + 0.25f; const float minLocalY = (collisionWaterLevelWorld - emitterBaseWorldY) - 2.0f; const VolcanoParticleSIMDParams params = { dt, gravity, plumeBuoyancy, ambientTemperature, damping, windX, windZ, particleCooling }; volcanoParticleStepBatchAsm(posX, posY, posZ, velX, velY, velZ, life, temp, n, ¶ms); for (std::size_t i = 0; i < n; ++i) { const float worldX = renderConfig.worldX + posX[i] * renderConfig.particleSpreadScale; const float worldZ = renderConfig.worldZ + posZ[i] * renderConfig.particleSpreadScale; const float terrainWorldY = sampleCollisionTerrainHeightWorld(worldX, worldZ); const float hitWorldY = std::max(terrainWorldY, collisionWaterLevelWorld); const float hitLocalY = hitWorldY - emitterBaseWorldY + 0.08f; if (velY[i] < 0.0f && posY[i] <= hitLocalY) { if (terrainWorldY > collisionWaterLevelWorld + 0.03f) { float dHdX = 0.0f; float dHdZ = 0.0f; sampleCollisionTerrainGradientWorld(worldX, worldZ, dHdX, dHdZ); float downX = -dHdX; float downZ = -dHdZ; float downLen = std::sqrt(downX * downX + downZ * downZ); if (downLen < 1.0e-5f) { downX = posX[i]; downZ = posZ[i]; downLen = std::sqrt(downX * downX + downZ * downZ); } if (downLen < 1.0e-5f) { downX = 1.0f; downZ = 0.0f; downLen = 1.0f; } downX /= downLen; downZ /= downLen; const float rollSpeedWorld = 1.4f + 2.6f * std::clamp((temp[i] - ambientTemperature) / 1100.0f, 0.0f, 1.0f); const float invSpread = 1.0f / std::max(renderConfig.particleSpreadScale, 1.0e-4f); velX[i] = downX * rollSpeedWorld * invSpread; velZ[i] = downZ * rollSpeedWorld * invSpread; velY[i] = 0.0f; posY[i] = hitLocalY + 0.03f; } else { respawnParticle(i); } continue; } if (life[i] <= 0.0f || posY[i] < minLocalY || posY[i] > 64.0f) { respawnParticle(i); } } } void VolcanoSimulation::updateLavaFluxScalar(float dt) { std::memcpy(lavaHeightNext.data(), lavaHeight.data(), lavaHeight.size() * sizeof(float)); for (int y = 1; y < height - 1; ++y) { for (int x = 1; x < width - 1; ++x) { const std::size_t id = idx(x, y); const std::size_t l = id - 1; const std::size_t r = id + 1; const std::size_t u = id - width; const std::size_t d = id + width; const float hC = terrainHeight.data()[id] + lavaHeight.data()[id]; const float hL = terrainHeight.data()[l] + lavaHeight.data()[l]; const float hR = terrainHeight.data()[r] + lavaHeight.data()[r]; const float hU = terrainHeight.data()[u] + lavaHeight.data()[u]; const float hD = terrainHeight.data()[d] + lavaHeight.data()[d]; const float fluidC = std::clamp((temperature.data()[id] - 650.0f) / 650.0f, 0.0f, 1.0f); const float fluidL = std::clamp((temperature.data()[l] - 650.0f) / 650.0f, 0.0f, 1.0f); const float fluidR = std::clamp((temperature.data()[r] - 650.0f) / 650.0f, 0.0f, 1.0f); const float fluidU = std::clamp((temperature.data()[u] - 650.0f) / 650.0f, 0.0f, 1.0f); const float fluidD = std::clamp((temperature.data()[d] - 650.0f) / 650.0f, 0.0f, 1.0f); const float kC = lavaViscosity * dt * (0.18f + 1.22f * fluidC); const float kL = lavaViscosity * dt * (0.18f + 1.22f * fluidL); const float kR = lavaViscosity * dt * (0.18f + 1.22f * fluidR); const float kU = lavaViscosity * dt * (0.18f + 1.22f * fluidU); const float kD = lavaViscosity * dt * (0.18f + 1.22f * fluidD); float outL = volcanoFlowFromDropAsm(hC - hL, kC); float outR = volcanoFlowFromDropAsm(hC - hR, kC); float outU = volcanoFlowFromDropAsm(hC - hU, kC); float outD = volcanoFlowFromDropAsm(hC - hD, kC); const float inL = volcanoFlowFromDropAsm(hL - hC, kL); const float inR = volcanoFlowFromDropAsm(hR - hC, kR); const float inU = volcanoFlowFromDropAsm(hU - hC, kU); const float inD = volcanoFlowFromDropAsm(hD - hC, kD); float outSum = outL + outR + outU + outD; const float inSum = inL + inR + inU + inD; const float maxOut = lavaHeight.data()[id] * 0.94f; if (outSum > maxOut && outSum > 1.0e-6f) { const float scale = maxOut / outSum; outL *= scale; outR *= scale; outU *= scale; outD *= scale; outSum = outL + outR + outU + outD; } const float next = std::max(0.0f, lavaHeight.data()[id] + inSum - outSum); lavaHeightNext.data()[id] = std::isfinite(next) ? std::min(6.0f, next) : 0.0f; } } std::memcpy(lavaHeight.data(), lavaHeightNext.data(), lavaHeight.size() * sizeof(float)); } void VolcanoSimulation::updateLavaFluxSIMD(float dt) { std::memcpy(lavaHeightNext.data(), lavaHeight.data(), lavaHeight.size() * sizeof(float)); const float viscDt = lavaViscosity * dt; volcanoUpdateLavaFluxSIMDAsm( lavaHeightNext.data(), lavaHeight.data(), terrainHeight.data(), temperature.data(), width, height, viscDt); const int scalarStartX = 1 + ((width - 2) / 8) * 8; for (int y = 1; y < height - 1; ++y) { const std::size_t row = static_cast(y) * static_cast(width); for (int x = scalarStartX; x < width - 1; ++x) { const std::size_t id = row + static_cast(x); const std::size_t l = id - 1; const std::size_t r = id + 1; const std::size_t u = id - width; const std::size_t d = id + width; const float hC = terrainHeight.data()[id] + lavaHeight.data()[id]; const float hL = terrainHeight.data()[l] + lavaHeight.data()[l]; const float hR = terrainHeight.data()[r] + lavaHeight.data()[r]; const float hU = terrainHeight.data()[u] + lavaHeight.data()[u]; const float hD = terrainHeight.data()[d] + lavaHeight.data()[d]; const float fluidC = std::clamp((temperature.data()[id] - 650.0f) / 650.0f, 0.0f, 1.0f); const float fluidL = std::clamp((temperature.data()[l] - 650.0f) / 650.0f, 0.0f, 1.0f); const float fluidR = std::clamp((temperature.data()[r] - 650.0f) / 650.0f, 0.0f, 1.0f); const float fluidU = std::clamp((temperature.data()[u] - 650.0f) / 650.0f, 0.0f, 1.0f); const float fluidD = std::clamp((temperature.data()[d] - 650.0f) / 650.0f, 0.0f, 1.0f); const float kC = viscDt * (0.18f + 1.22f * fluidC); const float kL = viscDt * (0.18f + 1.22f * fluidL); const float kR = viscDt * (0.18f + 1.22f * fluidR); const float kU = viscDt * (0.18f + 1.22f * fluidU); const float kD = viscDt * (0.18f + 1.22f * fluidD); float outL = volcanoFlowFromDropAsm(hC - hL, kC); float outR = volcanoFlowFromDropAsm(hC - hR, kC); float outU = volcanoFlowFromDropAsm(hC - hU, kC); float outD = volcanoFlowFromDropAsm(hC - hD, kC); const float inL = volcanoFlowFromDropAsm(hL - hC, kL); const float inR = volcanoFlowFromDropAsm(hR - hC, kR); const float inU = volcanoFlowFromDropAsm(hU - hC, kU); const float inD = volcanoFlowFromDropAsm(hD - hC, kD); float outSum = outL + outR + outU + outD; const float inSum = inL + inR + inU + inD; const float maxOut = lavaHeight.data()[id] * 0.94f; if (outSum > maxOut && outSum > 1.0e-6f) { const float scale = maxOut / outSum; outL *= scale; outR *= scale; outU *= scale; outD *= scale; outSum = outL + outR + outU + outD; } float next = lavaHeight.data()[id] + inSum - outSum; if (!std::isfinite(next)) { next = 0.0f; } lavaHeightNext.data()[id] = std::max(0.0f, std::min(6.0f, next)); } } std::memcpy(lavaHeight.data(), lavaHeightNext.data(), lavaHeight.size() * sizeof(float)); } void VolcanoSimulation::diffuseHeatScalar(float dt) { std::memcpy(temperatureNext.data(), temperature.data(), temperature.size() * sizeof(float)); for (int y = 1; y < height - 1; ++y) { for (int x = 1; x < width - 1; ++x) { const std::size_t id = idx(x, y); const float c = temperature.data()[id]; const float l = temperature.data()[id - 1]; const float r = temperature.data()[id + 1]; const float u = temperature.data()[id - width]; const float d = temperature.data()[id + width]; const float lap = l + r + u + d - 4.0f * c; const float fluid = std::clamp((c - 650.0f) / 650.0f, 0.0f, 1.0f); const float lavaHeat = lavaHeight.data()[id] * (420.0f + 100.0f * fluid); const float cooling = lavaCooling * (c - ambientTemperature); float next = c + dt * (diffusionAlpha * lap + lavaHeat - cooling); next = std::clamp(next, ambientTemperature, 1520.0f); temperatureNext.data()[id] = std::isfinite(next) ? next : ambientTemperature; } } const int cx = width / 2; const int cy = height / 2; for (int dy = -2; dy <= 2; ++dy) { for (int dx = -2; dx <= 2; ++dx) { const int gx = cx + dx; const int gy = cy + dy; if (gx < 1 || gy < 1 || gx >= width - 1 || gy >= height - 1) { continue; } const float r2 = static_cast(dx * dx + dy * dy); const float w = std::exp(-0.75f * r2); const std::size_t id = idx(gx, gy); const float target = ambientTemperature + (ventTemperature - ambientTemperature) * w * 0.95f; temperatureNext.data()[id] = std::max(temperatureNext.data()[id], target); } } std::memcpy(temperature.data(), temperatureNext.data(), temperature.size() * sizeof(float)); } void VolcanoSimulation::diffuseHeatSIMD(float dt) { std::memcpy(temperatureNext.data(), temperature.data(), temperature.size() * sizeof(float)); const float alphaDt = diffusionAlpha * dt; volcanoDiffuseHeatSIMDAsm( temperatureNext.data(), temperature.data(), lavaHeight.data(), width, height, alphaDt, dt, lavaCooling, ambientTemperature); const int scalarStartX = 1 + ((width - 2) / 8) * 8; for (int y = 1; y < height - 1; ++y) { const std::size_t row = static_cast(y) * static_cast(width); for (int x = scalarStartX; x < width - 1; ++x) { const std::size_t id = row + static_cast(x); const float c = temperature.data()[id]; const float l = temperature.data()[id - 1]; const float r = temperature.data()[id + 1]; const float u = temperature.data()[id - width]; const float d = temperature.data()[id + width]; const float lap = l + r + u + d - 4.0f * c; const float fluid = std::clamp((c - 650.0f) / 650.0f, 0.0f, 1.0f); const float lavaHeat = lavaHeight.data()[id] * (420.0f + 100.0f * fluid); const float cooling = lavaCooling * (c - ambientTemperature); float next = c + dt * (diffusionAlpha * lap + lavaHeat - cooling); temperatureNext.data()[id] = std::isfinite(next) ? std::clamp(next, ambientTemperature, 1520.0f) : ambientTemperature; } } const int cx = width / 2; const int cy = height / 2; for (int dy = -2; dy <= 2; ++dy) { for (int dx = -2; dx <= 2; ++dx) { const int gx = cx + dx; const int gy = cy + dy; if (gx < 1 || gy < 1 || gx >= width - 1 || gy >= height - 1) { continue; } const float r2 = static_cast(dx * dx + dy * dy); const float w = std::exp(-0.75f * r2); const std::size_t id = idx(gx, gy); const float target = ambientTemperature + (ventTemperature - ambientTemperature) * w * 0.95f; temperatureNext.data()[id] = std::max(temperatureNext.data()[id], target); } } std::memcpy(temperature.data(), temperatureNext.data(), temperature.size() * sizeof(float)); } void VolcanoSimulation::stepScalar(float dt) { dt = std::clamp(dt, 1.0f / 300.0f, 0.05f); updateParticlesScalar(dt); injectVentSources(dt); updateLavaFluxScalar(dt); diffuseHeatScalar(dt); updateStats(); } void VolcanoSimulation::stepSIMD(float dt) { dt = std::clamp(dt, 1.0f / 300.0f, 0.05f); updateParticlesSIMD(dt); injectVentSources(dt); updateLavaFluxSIMD(dt); diffuseHeatSIMD(dt); updateStats(); } void runVolcanoReference(int frames) { const std::size_t particleCount = 65536; const int gridWidth = 192; const int gridHeight = 192; const float dt = 1.0f / 60.0f; VolcanoSimulation scalarSim; if (!scalarSim.init(particleCount, gridWidth, gridHeight, 1337u)) { std::cerr << "Volcano reference initialization failed." << std::endl; return; } std::cout << std::fixed << std::setprecision(6); std::cout << "Volcano reference sequence (" << frames << " frames)\n"; for (int i = 0; i < frames; ++i) { scalarSim.stepScalar(dt); printReferenceIteration(scalarSim, i + 1); } } void runVolcanoBenchmark() { const std::size_t particleCount = 65536; const int gridWidth = 192; const int gridHeight = 192; const int frames = 600; const int verifyFrames = 120; const int referenceFrames = 8; const float dt = 1.0f / 60.0f; VolcanoSimulation scalarSim; VolcanoSimulation simdSim; if (!scalarSim.init(particleCount, gridWidth, gridHeight, 1337u) || !simdSim.init(particleCount, gridWidth, gridHeight, 1337u)) { std::cerr << "Volcano benchmark initialization failed." << std::endl; return; } float maxPosXDiff = 0.0f; float maxPosYDiff = 0.0f; float maxPosZDiff = 0.0f; float maxParticleTempDiff = 0.0f; float maxLavaDiff = 0.0f; float maxGridTempDiff = 0.0f; float maxAvgLavaDiff = 0.0f; float maxAvgTempDiff = 0.0f; std::cout << std::fixed << std::setprecision(6); std::cout << "Volcano correctness check (" << verifyFrames << " frames)\n"; std::cout << "Reference scalar iterations for golden checks:\n"; for (int i = 0; i < verifyFrames; ++i) { scalarSim.stepScalar(dt); simdSim.stepSIMD(dt); if (i < referenceFrames) { printReferenceIteration(scalarSim, i + 1); } const std::size_t particleN = scalarSim.getParticleCount(); const std::size_t gridN = static_cast(scalarSim.getGridWidth()) * static_cast(scalarSim.getGridHeight()); maxPosXDiff = std::max(maxPosXDiff, maxAbsDiff(scalarSim.getParticlePosX(), simdSim.getParticlePosX(), particleN)); maxPosYDiff = std::max(maxPosYDiff, maxAbsDiff(scalarSim.getParticlePosY(), simdSim.getParticlePosY(), particleN)); maxPosZDiff = std::max(maxPosZDiff, maxAbsDiff(scalarSim.getParticlePosZ(), simdSim.getParticlePosZ(), particleN)); maxParticleTempDiff = std::max( maxParticleTempDiff, maxAbsDiff(scalarSim.getParticleTemperature(), simdSim.getParticleTemperature(), particleN)); maxLavaDiff = std::max(maxLavaDiff, maxAbsDiff(scalarSim.getLavaHeightGrid(), simdSim.getLavaHeightGrid(), gridN)); maxGridTempDiff = std::max(maxGridTempDiff, maxAbsDiff(scalarSim.getTemperatureGrid(), simdSim.getTemperatureGrid(), gridN)); maxAvgLavaDiff = std::max(maxAvgLavaDiff, std::fabs(scalarSim.getAverageLavaHeight() - simdSim.getAverageLavaHeight())); maxAvgTempDiff = std::max(maxAvgTempDiff, std::fabs(scalarSim.getAverageTemperature() - simdSim.getAverageTemperature())); } std::cout << "Max abs diff over verification window:\n"; std::cout << " posX=" << maxPosXDiff << " posY=" << maxPosYDiff << " posZ=" << maxPosZDiff << '\n'; std::cout << " particleTemp=" << maxParticleTempDiff << " lavaGrid=" << maxLavaDiff << " tempGrid=" << maxGridTempDiff << '\n'; std::cout << " avgLava=" << maxAvgLavaDiff << " avgTemp=" << maxAvgTempDiff << '\n'; auto startScalar = std::chrono::high_resolution_clock::now(); for (int i = 0; i < frames; ++i) { scalarSim.stepScalar(dt); } auto endScalar = std::chrono::high_resolution_clock::now(); auto startSimd = std::chrono::high_resolution_clock::now(); for (int i = 0; i < frames; ++i) { simdSim.stepSIMD(dt); } auto endSimd = std::chrono::high_resolution_clock::now(); const double scalarMs = std::chrono::duration(endScalar - startScalar).count(); const double simdMs = std::chrono::duration(endSimd - startSimd).count(); const double speedup = (simdMs > 0.0) ? (scalarMs / simdMs) : 0.0; std::cout << "Volcano simulation benchmark (" << frames << " frames)\n"; std::cout << "Scalar: " << scalarMs << " ms\n"; std::cout << "SIMD (AVX2): " << simdMs << " ms\n"; std::cout << "Speedup: " << speedup << "x\n"; }