/***************************************************************************
 *   Copyright (C) 1998-2009 by David Bucciarelli (davibu@interfree.it)    *
 *                                                                         *
 *   This file is part of SmallLuxGPU.                                     *
 *                                                                         *
 *   SmallLuxGPU is free software; you can redistribute it and/or modify   *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 3 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 *  SmallLuxGPU is distributed in the hope that it will be useful,         *
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
 *   GNU General Public License for more details.                          *
 *                                                                         *
 *   You should have received a copy of the GNU General Public License     *
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>. *
 *                                                                         *
 *   This project is based on PBRT ; see http://www.pbrt.org               *
 *   and Lux Renderer website : http://www.luxrender.net                   *
 ***************************************************************************/

typedef struct {
	float x, y, z;
} Point;

typedef struct {
	float x, y, z;
} Vector;

typedef struct {
	Point o;
	Vector d;
	float mint, maxt;
} Ray;

typedef struct {
	unsigned int index;
	float t;
} RayHit;

typedef struct {
	unsigned int v[3];
} Triangle;

typedef struct {
	Point pMin, pMax;
} BBox;

typedef struct {
	BBox bbox;
	unsigned int primitive;
	unsigned int skipIndex;
} BVHAccelArrayNode;

static void TriangleIntersect(
		const float4 rayOrig,
		const float4 rayDir,
		const float minT,
		float *maxT,
		unsigned int *hitIndex,
		const unsigned int currentIndex,
		__constant Point *verts,
		__constant Triangle *tris) {

	// Load triangle vertices
	__constant Point *p0 = &verts[tris[currentIndex].v[0]];
	__constant Point *p1 = &verts[tris[currentIndex].v[1]];
	__constant Point *p2 = &verts[tris[currentIndex].v[2]];

	float4 v0 = (float4) (p0->x, p0->y, p0->z, 0.f);
	float4 v1 = (float4) (p1->x, p1->y, p1->z, 0.f);
	float4 v2 = (float4) (p2->x, p2->y, p2->z, 0.f);

	// Calculate intersection
	float4 e1 = v1 - v0;
	float4 e2 = v2 - v0;
	float4 s1 = cross(rayDir, e2);

	const float divisor = dot(s1, e1);
	if (divisor == 0.f)
		return;

	const float invDivisor = 1.f / divisor;

	// Compute first barycentric coordinate
	const float4 d = rayOrig - v0;
	const float b1 = dot(d, s1) * invDivisor;
	if (b1 < 0.f)
		return;

	// Compute second barycentric coordinate
	const float4 s2 = cross(d, e1);
	const float b2 = dot(rayDir, s2) * invDivisor;
	if (b2 < 0.f)
		return;

	const float b0 = 1.f - b1 - b2;
	if (b0 < 0.f)
		return;

	// Compute _t_ to intersection point
	const float t = dot(e2, s2) * invDivisor;
	if (t < minT || t > *maxT)
		return;

	*maxT = t;
	*hitIndex = currentIndex;
}

// Some interesting about ray/AABB intersection:
//  http://www.uni-koblenz.de/~cg/publikationen/cp_raytrace.pdf
//  http://graphics.tu-bs.de/publications/Eisemann07RS.pdf
//  http://www.flipcode.com/archives/SSE_RayBox_Intersection_Test.shtml
// This code is a lot slower than usual one
/*static int BBoxIntersectP(
	const float4 rayOrig, const float4 invRayDir,
	const float mint, const float maxt,
	const float4 pMin, const float4 pMax) {
	const float4 plusInf = (float4)INFINITY;
	const float4 minusInf = (float4)-INFINITY;

	const float4 l1 = (pMin - rayOrig) * invRayDir;
	const float4 l2 = (pMax - rayOrig) * invRayDir;

	const float4 filtered_l1a = fmin(l1, plusInf);
	const float4 filtered_l2a = fmin(l2, plusInf);
	const float4 filtered_l1b = fmax(l1, minusInf);
	const float4 filtered_l2b = fmax(l2, minusInf);

	float4 lmax = fmax(filtered_l1a, filtered_l2a);
	float4 lmin = fmin(filtered_l1b, filtered_l2b);

	const float4 lmax0 = lmax.yzwx;
	const float4 lmin0 = lmin.yzwx;
	lmax = fmin(lmax, lmax0);
	lmin = fmax(lmin, lmin0);

	const float4 lmax1 = lmax.zwzw;
	const float4 lmin1 = lmin.zwzw;
	lmax = fmin(lmax, lmax1);
	lmin = fmax(lmin, lmin1);

	return all((lmin <= lmax) && (lmax >= (float4)0.f));
}*/

static int BBoxIntersectP(
		const float4 rayOrig, const float4 invRayDir,
		const float mint, const float maxt,
		const float4 pMin, const float4 pMax) {
	const float4 l1 = (pMin - rayOrig) * invRayDir;
	const float4 l2 = (pMax - rayOrig) * invRayDir;
	const float4 tNear = fmin(l1, l2);
	const float4 tFar = fmax(l1, l2);

	float t0 = mint;
	float t1 = maxt;

	t0 = tNear.x > t0 ? tNear.x : t0;
	t1 = tFar.x < t1 ? tFar.x : t1;
	if (t0 > t1) return 0;

	t0 = tNear.y > t0 ? tNear.y : t0;
	t1 = tFar.y < t1 ? tFar.y : t1;
	if (t0 > t1) return 0;

	t0 = tNear.z > t0 ? tNear.z : t0;
	t1 = tFar.z < t1 ? tFar.z : t1;
	if (t0 > t1) return 0;

	return 1;
}

__kernel void Intersect(
		__global Ray *rays,
		__global RayHit *rayHits,
		__constant Point *verts,
		__constant Triangle *tris,
		const unsigned int triangleCount,
		const unsigned int nodeCount,
		__constant BVHAccelArrayNode *bvhTree) {
	// Select the ray to check
	const int gid = get_global_id(0);

	float4 rayOrig = (float4) (rays[gid].o.x, rays[gid].o.y, rays[gid].o.z, 0.f);
	float4 rayDir = (float4) (rays[gid].d.x, rays[gid].d.y, rays[gid].d.z, 0.f);
	float4 invRayDir = (float4) 1.f / rayDir;

	float minT = rays[gid].mint;
	float maxT = rays[gid].maxt;
	unsigned int hitIndex = 0xffffffffu;
	unsigned int currentNode = 0; // Root Node
	unsigned int stopNode = bvhTree[0].skipIndex; // Non-existent

	while (currentNode < stopNode) {
		float4 pMin = (float4) (bvhTree[currentNode].bbox.pMin.x,
				bvhTree[currentNode].bbox.pMin.y,
				bvhTree[currentNode].bbox.pMin.z,
				0.f);
		float4 pMax = (float4) (bvhTree[currentNode].bbox.pMax.x,
				bvhTree[currentNode].bbox.pMax.y,
				bvhTree[currentNode].bbox.pMax.z,
				0.f);

		if (BBoxIntersectP(rayOrig, invRayDir, minT, maxT, pMin, pMax)) {
			const unsigned int triIndex = bvhTree[currentNode].primitive;

			if (triIndex != 0xffffffffu)
				TriangleIntersect(rayOrig, rayDir, minT, &maxT, &hitIndex, triIndex, verts, tris);

			currentNode++;
		} else
			currentNode = bvhTree[currentNode].skipIndex;
	}

	// Write result
	rayHits[gid].index = hitIndex;
	rayHits[gid].t = maxT;
}
