Eccentric Developments


Bringing Back The Triangles

As mentioned in the previous article, I want to bring back the triangle support into the renderer, but I'll do it with a twist: removing support for spheres. Meaning that we will have to say goodbye to the smooth spheres, but hopefully not forever.

The reason behind this, is that I want to streamline and optimize the code as much as possible, similar with the spheres in the previous entry. This way there is no need for branching or type checks.

For this first entry I made the following changes:

  • Re-added triangles support
  • Removed spheres support
  • Removed the isSphere and isTriangle flags
  • Took out the branches that used the previous flags
  • Extended the createScene with createTorus and createPlane functions, among a couple of other support methods.

I added the torus creating function to be able to generate aun object with a high amount of triangles, which could help characterize the perfromance of the path tracer.

function wasmWorker() {
  function createScene(args) {
    const {
      vector: { sub, unit, cross },
      memory: { allocStaticFloat32Array, set },
    } = args;

    function rotatePoint([x, y, z], [ax, ay, az]) {
      let sinax = Math.sin(ax);
      let cosax = Math.cos(ax);

      [y, z] = [y * cosax - z * sinax, y * sinax + z * cosax];

      let sinay = Math.sin(ay);
      let cosay = Math.cos(ay);

      [x, z] = [x * cosay + z * sinay, -x * sinay + z * cosay];

      let sinaz = Math.sin(az);
      let cosaz = Math.cos(az);

      [x, y] = [x * cosaz - y * sinaz, x * sinaz + y * cosaz];

      return [x, y, z];
    }

    function translatePoint(point, [x, y, z]) {
      return [point[0] + x, point[1] + y, point[2] + z];
    }

    function createTorus(radius = 1.0, tubeRadius = 0.3, center, rotation, props) {
      const triangles = [];
      const rings = [];
      const ringSegments = 12;
      const tubeSegments = 24;
      const sourceRing = [];

      const ringSegmentRad = (2 * Math.PI) / ringSegments;
      for (let i = 0; i < ringSegments; i++) {
        sourceRing.push(rotatePoint([tubeRadius, 0, 0], [0, 0, ringSegmentRad * i]));
      }

      const tubeSegmentRad = (2 * Math.PI) / tubeSegments;
      for (let i = 0; i < tubeSegments; i++) {
        const ring = structuredClone(sourceRing)
          .map((pt) => translatePoint(pt, [radius, 0, 0]))
          .map((pt) => rotatePoint(pt, [0, tubeSegmentRad * i, 0]))
          .map((pt) => translatePoint(pt, center))
          .map((pt) => rotatePoint(pt, rotation));
        rings.push(ring);
      }

      for (let i = 0; i < tubeSegments; i++) {
        const ni = (i + 1) % tubeSegments;
        for (let j = 0; j < ringSegments; j++) {
          let pt0 = rings[i][j];
          let pt1 = rings[ni][j];
          let pt2 = rings[i][(j + 1) % ringSegments];
          triangles.push({
            pt0,
            pt1,
            pt2,
            ...props,
          });
          pt0 = rings[i][(j + 1) % ringSegments];
          pt1 = rings[ni][j];
          pt2 = rings[ni][(j + 1) % ringSegments];
          triangles.push({
            pt0,
            pt1,
            pt2,
            ...props,
          });
        }
      }

      return triangles;
    }

    function createPlane(width, height, center, rotation, props) {
      const triangles = [];
      let pt0 = [-width / 2, height / 2, 0];
      let pt1 = [width / 2, height / 2, 0];
      let pt2 = [-width / 2, -height / 2, 0];

      pt0 = translatePoint(rotatePoint(pt0, rotation), center);
      pt1 = translatePoint(rotatePoint(pt1, rotation), center);
      pt2 = translatePoint(rotatePoint(pt2, rotation), center);
      triangles.push({
        pt0,
        pt1,
        pt2,
        ...props,
      });

      pt0 = [width / 2, height / 2, 0];
      pt1 = [width / 2, -height / 2, 0];
      pt2 = [-width / 2, -height / 2, 0];
      pt0 = translatePoint(rotatePoint(pt0, rotation), center);
      pt1 = translatePoint(rotatePoint(pt1, rotation), center);
      pt2 = translatePoint(rotatePoint(pt2, rotation), center);
      triangles.push({
        pt0,
        pt1,
        pt2,
        ...props,
      });

      return triangles;
    }

    const scene1 = [
      ...createTorus(4, 2, [0, 0, 0], [Math.PI / 2, 0, 0], { color: [0, 1, 0], isLight: false }),
      ...createPlane(20, 20, [0, 10, 0], [Math.PI / 2, 0, 0], { color: [3, 3, 3], isLight: true }),
      ...createPlane(1000, 1000, [0, -5, 0], [Math.PI / 2, 0, 0], { color: [1, 1, 1], isLight: false }),
    ];

    const scene = scene1.map((obj, id) => {
      const edge0 = sub(obj.pt1, obj.pt0);
      const edge1 = sub(obj.pt2, obj.pt0);
      obj.normal = unit(cross(edge0, edge1));

      obj.id = id;
      return obj;
    });

    return {
      scene,
    };
  }

  function createMemoryFunctions(args) {
    const { wasm } = args;
    const totalAvailable = 1024 * 4;
    const memPtr = wasm.alloc(totalAvailable);
    const memView = new DataView(wasm.memory.buffer, memPtr, totalAvailable);
    let staOffset = 0;
    let dynOffset = 2048; // half of totalAvailable

    function get(A, idx) {
      return memView.getFloat32(A + idx * 4 - memPtr, true);
    }
    function set(A, idx, v) {
      memView.setFloat32(A + idx * 4 - memPtr, v, true);
    }
    const allocFloat32Array = (size) => {
      const byteOffset = memPtr + dynOffset;
      dynOffset += size * 4;
      return byteOffset;
    };

    const allocStaticFloat32Array = (size) => {
      const byteOffset = memPtr + staOffset;
      staOffset += size * 4;
      return byteOffset;
    };
    const free = () => (dynOffset = 2048);
    return {
      memory: {
        allocFloat32Array,
        allocStaticFloat32Array,
        free,
        get,
        set,
      },
    };
  }

  function createAspectRatioFunction() {
    return {
      aspectRatio: (width, height) => {
        let gcd = width;
        let reminder = height;
        while (reminder != 0) {
          const temp = reminder;
          reminder = gcd % reminder;
          gcd = temp;
        }
        return [width / gcd, height / gcd];
      },
    };
  }

  function createCamera(args) {
    const { width, height, aspectRatio } = args;
    const [w, h] = aspectRatio(width, height);
    return {
      camera: {
        leftTop: [-w, h + 1, -50.0],
        rightTop: [w, h + 1, -50.0],
        leftBottom: [-w, -h + 1, -50.0],
        eye: [0.0, 0.0, -65.0],
      },
    };
  }

  function createImageGeometry({ width, height }) {
    return {
      imageGeometry: {
        width,
        height,
      },
    };
  }

  function createVectorFunctions() {
    const sub = (A, B) => [A[0] - B[0], A[1] - B[1], A[2] - B[2]];
    const add = (A, B) => [A[0] + B[0], A[1] + B[1], A[2] + B[2]];
    const mul = (A, B) => [A[0] * B[0], A[1] * B[1], A[2] * B[2]];
    const dot = (A, B) => A[0] * B[0] + A[1] * B[1] + A[2] * B[2];
    // 135ms
    const scale = (A, s) => [A[0] * s, A[1] * s, A[2] * s];
    const norm = (A) => Math.sqrt(dot(A, A));
    const unit = (A) => scale(A, 1.0 / norm(A));
    const abs = (A) => [Math.abs(A[0]), Math.abs(A[1]), Math.abs(A[2])];
    const maxDimension = (A) => {
      if (A[0] > A[1] && A[0] > A[2]) return 0;
      if (A[1] > A[0] && A[1] > A[3]) return 1;
      return 2;
    };
    const permute = (A, i, j, k) => [A[i], A[j], A[k]];
    const cross = (A, B) => {
      const j = A[1] * B[2] - B[1] * A[2];
      const k = A[2] * B[0] - A[0] * B[2];
      const l = A[0] * B[1] - A[1] * B[0];
      return [j, k, l];
    };
    const vector = {
      sub,
      add,
      mul,
      dot,
      scale,
      norm,
      unit,
      abs,
      maxDimension,
      permute,
      cross,
    };

    return {
      vector,
    };
  }

  function calculatePrimaryRays(args) {
    const {
      camera: { rightTop, leftTop, leftBottom, eye },
      imageGeometry: { width, height },
      vector: { scale, add, sub, unit },
    } = args;
    const vdu = scale(sub(rightTop, leftTop), 1.0 / width);
    const vdv = scale(sub(leftBottom, leftTop), 1.0 / height);
    const primaryRays = [];
    for (let y = 0; y < height; y++) {
      for (let x = 0; x < width; x++) {
        const pixel = y * width + x;
        const origin = eye;
        const direction = unit(sub(add(add(scale(vdu, x), scale(vdv, y)), leftTop), origin));
        primaryRays[pixel] = {
          pixel,
          origin,
          direction,
        };
      }
    }

    return {
      primaryRays,
    };
  }

  function createRandomDirectionFunction(args) {
    const {
      vector: { dot, norm },
    } = args;

    const randomDirection = (normal) => {
      const p = [0, 0, 0];
      while (true) {
        p[0] = Math.random() - 0.5;
        p[1] = Math.random() - 0.5;
        p[2] = Math.random() - 0.5;
        const n = 1.0 / norm(p);
        p[0] *= n;
        p[1] *= n;
        p[2] *= n;
        if (dot(p, normal) >= 0) {
          return p;
        }
      }
    };
    return {
      randomDirection,
    };
  }

  function createTriangleIntersectFunction(args) {
    const { sub, abs, maxDimension, permute, scale, add } = args.vector;
    const triangleIntersect = (ray, triangle) => {
      const { pt0, pt1, pt2, normal } = triangle;
      let pt0_t = sub(pt0, ray.origin);
      let pt1_t = sub(pt1, ray.origin);
      let pt2_t = sub(pt2, ray.origin);
      const k = maxDimension(abs(ray.origin));
      const i = (k + 1) % 3;
      const j = (i + 1) % 3;

      const [pdx, pdy, pdz] = permute(ray.direction, i, j, k);
      const sz = 1.0 / pdz;
      const sx = -pdx * sz;
      const sy = -pdy * sz;

      pt0_t = permute(pt0_t, i, j, k);
      pt1_t = permute(pt1_t, i, j, k);
      pt2_t = permute(pt2_t, i, j, k);

      const pt0_t_0 = pt0_t[0] + sx * pt0_t[2];
      const pt0_t_1 = pt0_t[1] + sy * pt0_t[2];
      const pt0_t_2 = pt0_t[2] * sz;
      const pt1_t_0 = pt1_t[0] + sx * pt1_t[2];
      const pt1_t_1 = pt1_t[1] + sy * pt1_t[2];
      const pt1_t_2 = pt1_t[2] * sz;
      const pt2_t_0 = pt2_t[0] + sx * pt2_t[2];
      const pt2_t_1 = pt2_t[1] + sy * pt2_t[2];
      const pt2_t_2 = pt2_t[2] * sz;

      const e0 = pt1_t_0 * pt2_t_1 - pt1_t_1 * pt2_t_0;
      const e1 = pt2_t_0 * pt0_t_1 - pt2_t_1 * pt0_t_0;
      const e2 = pt0_t_0 * pt1_t_1 - pt0_t_1 * pt1_t_0;

      if ((e0 < 0.0 || e1 < 0.0 || e2 < 0.0) && (e0 > 0.0 || e1 > 0.0 || e2 > 0.0)) {
        return { hit: false };
      }

      const det = e0 + e1 + e2;
      if (det == 0.0) {
        return { hit: false };
      }

      const t_scaled = e0 * pt0_t_2 + e1 * pt1_t_2 + e2 * pt2_t_2;

      if (det < 0.0 && t_scaled >= 0.0) {
        return { hit: false };
      }

      if (det > 0.0 && t_scaled <= 0.0) {
        return { hit: false };
      }

      const t = t_scaled / det;

      if (t > 0.007) {
        const point = add(scale(ray.direction, t), ray.origin);
        return {
          hit: true,
          distance: t,
          point,
          normal,
        };
      }

      return {
        hit: false,
      };
    };

    return {
      triangleIntersect,
    };
  }

  function createTraceFunction(args) {
    const { scene, triangleIntersect } = args;
    const trace = (ray) =>
      scene
        .map((obj) => ({ ...triangleIntersect(ray, obj), obj }))
        .filter(({ hit }) => hit)
        .reduce((acc, intersection) => (intersection.distance < acc.distance ? intersection : acc), {
          hit: false,
          distance: Number.MAX_VALUE,
        });
    return {
      trace,
    };
  }

  function createTracePrimaryRaysFunction(args) {
    const { trace, primaryRays, width } = args;
    const traceResults = [];
    const tracePrimaryRays = (section, tileSize) => {
      let idx = 0;
      const startPixel = section;
      const endPixel = section + width * tileSize;
      for (let i = startPixel; i < endPixel; i++) {
        const ray = primaryRays[i];
        traceResults[idx++] = trace(ray);
      }
      return traceResults;
    };
    return {
      tracePrimaryRays,
    };
  }

  function createGenerateBitmapFunction(args) {
    const {
      shading,
      vector: { mul },
    } = args;

    const generateBitmap = (traceResults, section, bbp) => {
      let idx = section * 4 * 3;
      for (const it of traceResults) {
        let pixel = [0, 0, 0];
        if (it.hit) {
          pixel = it.obj.color;
          if (!it.obj.isLight) {
            const intensity = shading(it.point, it.normal, 0);
            pixel = mul(pixel, intensity);
          }
        }
        bbp.setFloat32(idx, pixel[0], true);
        bbp.setFloat32(idx + 4, pixel[1], true);
        bbp.setFloat32(idx + 8, pixel[2], true);
        idx += 12;
      }
    };
    return { generateBitmap };
  }

  function createRenderFunction(args) {
    const { tracePrimaryRays, generateBitmap, width, height } = args;
    const totalPixels = width * height;
    const render = (tileSize, bitmap, syncBuffer) => {
      const sync = new Uint32Array(syncBuffer);
      const sectionSize = tileSize * width;
      let section = Atomics.add(sync, 0, sectionSize);
      const bpp = new DataView(bitmap, 0, bitmap.length);
      while (section < totalPixels) {
        const traceResults = tracePrimaryRays(section, tileSize);
        generateBitmap(traceResults, section, bpp);
        section = Atomics.add(sync, 0, sectionSize);
      }
    };
    return { render };
  }

  function pipeline(fns) {
    return (args) => {
      let acc = { ...args };
      for (const fn of fns) {
        const result = fn(acc);
        acc = { ...acc, ...result };
      }
      return acc;
    };
  }

  function createShadingFunction(args) {
    const {
      vector: { dot },
      trace,
      randomDirection,
    } = args;
    const shading = (shadingPoint, pointNormal, depth) => {
      const color = [0, 0, 0];
      if (depth === 5) {
        return color;
      }
      const origin = [];
      origin[0] = pointNormal[0] * 0.1 + shadingPoint[0];
      origin[1] = pointNormal[1] * 0.1 + shadingPoint[1];
      origin[2] = pointNormal[2] * 0.1 + shadingPoint[2];

      const direction = randomDirection(pointNormal);
      const d = dot(pointNormal, direction);
      const ray = { origin, direction };
      const tr = trace(ray);
      if (!tr.hit) {
        return color; //black up to this point;
      }
      color[0] = tr.obj.color[0] * d;
      color[1] = tr.obj.color[0] * d;
      color[2] = tr.obj.color[0] * d;
      if (!tr.obj.isLight) {
        const ncolor = shading(tr.point, tr.normal, depth + 1);
        color[0] *= ncolor[0];
        color[1] *= ncolor[1];
        color[2] *= ncolor[2];
      }
      return color;
    };

    return {
      shading,
    };
  }

  var renderingPipeline = pipeline([
    createMemoryFunctions,
    createVectorFunctions,
    createAspectRatioFunction,
    createScene,
    createCamera,
    createImageGeometry,
    createRandomDirectionFunction,
    calculatePrimaryRays,
    createTriangleIntersectFunction,
    createTraceFunction,
    createShadingFunction,
    createTracePrimaryRaysFunction,
    createGenerateBitmapFunction,
    createRenderFunction,
  ]);

  async function loadWasm(wasmFile) {
    const {
      instance: { exports: wasm },
    } = await WebAssembly.instantiate(wasmFile, {});
    return wasm;
  }

  let render = null;

  async function init(wasmFile, width, height) {
    const wasm = await loadWasm(wasmFile);
    render = renderingPipeline({
      wasm,
      width,
      height,
      sceneSelector: 1,
    }).render;
  }

  this.onmessage = async (msg) => {
    const { data } = msg;
    if (data.operation === 'init') {
      const { wasmFile, width, height } = data;
      await init(wasmFile, width, height);
      this.postMessage(0);
    } else {
      render(data.tileSize, data.bitmap, data.syncBuffer);
      this.postMessage(1);
    }
  };
}

class WorkersPool {
  #blobUrl;
  poolSize;
  maxPoolSize;
  #pool = [];
  results = [];
  constructor(poolSize, maxPoolSize, workerFunction) {
    const wasmWorkerString = `(${workerFunction.toString()})()`;
    const blob = new Blob([wasmWorkerString], {
      type: 'application/javascript',
    });
    this.#blobUrl = URL.createObjectURL(blob);
    this.poolSize = poolSize;
    this.maxPoolSize = maxPoolSize;
  }

  init(initPayload) {
    this.#pool = [];
    this.poolSize = Math.max(1, this.poolSize);
    return new Promise((resolve) => {
      let workersDone = 0;
      for (let i = 0; i < this.maxPoolSize; i++) {
        const worker = new Worker(this.#blobUrl);
        this.#pool.push(worker);
        worker.onmessage = () => {
          if (++workersDone === this.maxPoolSize) {
            resolve();
          }
        };
        worker.postMessage(initPayload);
      }
    });
  }

  resize(newSize) {
    this.poolSize = Math.max(1, newSize);
  }

  async process(payload) {
    return new Promise((resolve) => {
      let currentJob = 0;
      for (let i = 0; i < this.poolSize; i++) {
        let wrk = this.#pool[i];
        wrk.onmessage = async (msg) => {
          if (++currentJob === this.poolSize) {
            resolve();
          }
        };
        wrk.postMessage(payload);
      }
    });
  }
}

const workersPool = new WorkersPool(1, 1 /* navigator.hardwareConcurrency */, wasmWorker);

async function renderAndPresent(canvas, frameCount, framesAcc, syncBuffer, sharedBuffer, finalBitmap) {
  const ctx = canvas.getContext('2d');
  const width = canvas.width;
  const renderStart = performance.now();
  const bitmap = new Float32Array(sharedBuffer);
  const sync = new Uint32Array(syncBuffer);
  const tileSize = 8;

  sync[0] = 0;
  await workersPool.process({ tileSize, bitmap: sharedBuffer, syncBuffer });

  for (let i = 0; i < bitmap.length; i += 3) {
    const r = bitmap[i] + (framesAcc[i] ?? 0);
    const g = bitmap[i + 1] + (framesAcc[i + 1] ?? 0);
    const b = bitmap[i + 2] + (framesAcc[i + 2] ?? 0);
    framesAcc[i] = r;
    framesAcc[i + 1] = g;
    framesAcc[i + 2] = b;
    finalBitmap[i / 3] =
      (255 << 24) | (((b / frameCount) * 255) << 16) | (((g / frameCount) * 255) << 8) | ((r / frameCount) * 255);
  }
  const imageData = new ImageData(new Uint8ClampedArray(finalBitmap.buffer), width);
  ctx.putImageData(imageData, 0, 0);
  const elapsed = Math.floor(performance.now() - renderStart);
  const elapsedMs = `${elapsed}ms|${(Math.round(10000 / elapsed) / 10).toFixed(1)}fps|${workersPool.poolSize}/${workersPool.maxPoolSize}threads(${window.adjustPool ? '?' : '!'})`;
  ctx.font = '20px monospace';
  ctx.textBaseline = 'top';
  const measure = ctx.measureText(elapsedMs);
  ctx.fillStyle = '#000000';
  ctx.fillRect(0, 0, measure.width, measure.fontBoundingBoxDescent);
  ctx.fillStyle = '#999999';
  ctx.fillText(elapsedMs, 0, 0);
}

window.running = false;
(async () => {
  const canvas = document.getElementById('canvas-1');
  const width = canvas.width;
  const height = canvas.height;
  const wasmFile = await (await fetch('wasm/vector_simd.wasm')).arrayBuffer();
  await workersPool.init({ operation: 'init', wasmFile, width, height });
  let frameCount = 0;
  const framesAcc = new Array(width * height * 3);
  const sharedBuffer = new SharedArrayBuffer(width * height * 3 * 4);
  const syncBuffer = new SharedArrayBuffer(4);
  const finalBitmap = new Uint32Array(width * height);
  window.running = true;

  /* START: Auto adjust poolSize */
  const renderTimes = {};
  let renderStart = performance.now();
  window.adjustPool = true;
  /* END: Auto adjust poolSize */

  const animation = async () => {
    frameCount++;
    await renderAndPresent(canvas, frameCount, framesAcc, syncBuffer, sharedBuffer, finalBitmap);
    /* START: Auto adjust poolSize */
    renderTimes[workersPool.poolSize] += performance.now();
    if (window.adjustPool && frameCount % 10 === 0) {
      renderTimes[workersPool.poolSize] = performance.now() - renderStart;
      if (workersPool.poolSize < workersPool.maxPoolSize) {
        await workersPool.resize(workersPool.poolSize + 1);
        renderStart = performance.now();
      } else {
        window.adjustPool = false;
        let fastest = Number.MAX_VALUE;
        let poolSize = 0;
        for (const [size, time] of Object.entries(renderTimes)) {
          if (time < fastest) {
            fastest = time;
            poolSize = size;
          }
        }
        await workersPool.resize(poolSize);
      }
    }
    /* END: Auto adjust poolSize */
    window.running && window.requestAnimationFrame(animation);
  };
  window.requestAnimationFrame(animation);
})();

Now lets talk about the big performance issue, this thing is SLOOOOWWWWWwww spending over one minute per frame. Such bad performance is mainly a consequence of the change in the numbers of objects rendered, which increased from 8 to 580.

In this case SIMD nor multithreads would make much of a difference, that is why rendereres of this type use acceleration structures, and I'll leave it at this for now.