/***************************************************************************
 *   Copyright (C) 1998-2009 by David Bucciarelli (davibu@interfree.it)    *
 *                                                                         *
 *   This file is part of SmallLuxGPU.                                     *
 *                                                                         *
 *   SmallLuxGPU is free software; you can redistribute it and/or modify   *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 3 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 *  SmallLuxGPU is distributed in the hope that it will be useful,         *
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
 *   GNU General Public License for more details.                          *
 *                                                                         *
 *   You should have received a copy of the GNU General Public License     *
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>. *
 *                                                                         *
 *   This project is based on PBRT ; see http://www.pbrt.org               *
 *   and Lux Renderer website : http://www.luxrender.net                   *
 ***************************************************************************/

#include <cstdio>
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <string>
#include <sstream>
#include <stdexcept>

#include "smalllux.h"
#include "path.h"
#include "renderconfig.h"

PathIntegrator::PathIntegrator(Scene *s, RayBuffer *rb, const bool useGPU) :
	statsTotalRayTime(0.0), statsTotalRayCount(0.0), maxPathDepth(6),
	scene(s), rayBuffer(rb), currentPath(0) {
	SetUpOpenCL(useGPU);
}

PathIntegrator::~PathIntegrator() {
	delete bvhKernel;
	delete bruteforceKernel;

	delete raysBuff;
	delete hitsBuff;
	delete vertsBuff;
	delete trisBuff;
	delete bvhBuff;

	delete queue;
	delete context;
}

string PathIntegrator::ReadSources(const string &fileName) {
	fstream file;
	file.exceptions(ifstream::eofbit | ifstream::failbit | ifstream::badbit);
	file.open(fileName.c_str(), fstream::in | fstream::binary);

	string prog(istreambuf_iterator<char>(file), (istreambuf_iterator<char>()));
	cerr << "Kernel file size " << prog.length() << "bytes" << endl;

	return prog;
}

cl::Kernel *PathIntegrator::SetUpKernel(const string &kernelFileName) {
	string src = ReadSources(kernelFileName);

	// Compile sources
	cl::Program::Sources source(1, make_pair(src.c_str(), src.length()));
	cl::Program program = cl::Program(*context, source);
	try {
		program.build(buildDevice, "-I.");
	} catch (cl::Error err) {
		cl::string strError = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(buildDevice[0]);
		cerr << "Compilation error:" << endl << strError.c_str() << endl;

		throw err;
	}

	return new cl::Kernel(program, "Intersect");

}

void PathIntegrator::SetUpOpenCL(const bool useGPU) {
	// Platform info
	cl::vector<cl::Platform> platforms;
	cl::Platform::get(&platforms);
	for (size_t i = 0; i < platforms.size(); ++i)
		cerr << "OpenCL Platform " << i << ": " <<
				platforms[i].getInfo<CL_PLATFORM_VENDOR>().c_str() << endl;

	cl_context_properties cps[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platforms[0](), 0};
	cl_int err;
	context = new cl::Context(CL_DEVICE_TYPE_ALL, cps, NULL, NULL, &err);

	// Device info
	cl::vector<cl::Device> devices = context->getInfo<CL_CONTEXT_DEVICES>();
	for (size_t i = 0; i < devices.size(); ++i) {
		cl_int type = devices[i].getInfo<CL_DEVICE_TYPE>();
		string stype;
		switch (type) {
			case CL_DEVICE_TYPE_ALL:
				stype = "TYPE_ALL";
				break;
			case CL_DEVICE_TYPE_DEFAULT:
				stype = "TYPE_DEFAULT";
				break;
			case CL_DEVICE_TYPE_CPU:
				stype = "TYPE_CPU";
				if (!useGPU && buildDevice.size() == 0)
					buildDevice.push_back(devices[i]);
				break;
			case CL_DEVICE_TYPE_GPU:
				stype = "TYPE_GPU";
				if (useGPU && buildDevice.size() == 0)
					buildDevice.push_back(devices[i]);
				break;
			default:
				stype = "TYPE_UNKNOWN";
				break;
		}

		cerr << "OpenCL Device type " << i << ": " << stype << endl;
		cerr << "OpenCL Device name " << i << ": " <<
				devices[i].getInfo<CL_DEVICE_NAME>().c_str() << endl;
		cerr << "OpenCL Device units " << i << ": " <<
				devices[i].getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>() << endl;
	}

	if (buildDevice.size() == 0)
		throw  runtime_error("Unable to find an appriopiate OpenCL device");
	else
		cerr << "OpenCL Device used: " << buildDevice[0].getInfo<CL_DEVICE_NAME>().c_str() << endl;

	cl_command_queue_properties prop = CL_QUEUE_PROFILING_ENABLE;
	queue = new cl::CommandQueue(*context, buildDevice[0], prop, &err);

	// Buffers

	raysBuff = new cl::Buffer(*context,
			CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
			sizeof(Ray) * RAY_BUFFER_SIZE,
			rayBuffer->rays);
	cerr << "OpenCL rays buffer size: " << (sizeof(Ray) * RAY_BUFFER_SIZE / 1024) << "Kb" <<endl;
	hitsBuff = new cl::Buffer(*context,
			CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
			sizeof(RayHit) * RAY_BUFFER_SIZE,
			rayBuffer->rayHits);
	cerr << "OpenCL ray hits buffer size: " << (sizeof(RayHit) * RAY_BUFFER_SIZE / 1024) << "Kb" <<endl;
	vertsBuff = new cl::Buffer(*context,
			CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
			sizeof(Point) * scene->mesh->vertexCount,
			scene->mesh->vertices);
	cerr << "OpenCL vertices buffer size: " << (sizeof(Point) * scene->mesh->vertexCount / 1024) << "Kb" <<endl;
	trisBuff = new cl::Buffer(*context,
			CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
			sizeof(Triangle) * scene->mesh->triangleCount,
			scene->mesh->triangles);
	cerr << "OpenCL triangle indices buffer size: " << (sizeof(Triangle) * scene->mesh->triangleCount / 1024) << "Kb" <<endl;
	bvhBuff = new cl::Buffer(*context,
			CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
			sizeof(BVHAccelArrayNode) * scene->bvh->nNodes,
			scene->bvh->bvhTree);
	cerr << "OpenCL BVH buffer size: " << (sizeof(BVHAccelArrayNode) * scene->bvh->nNodes / 1024) << "Kb" <<endl;

	// Kernels

	// Bruteforce kernel
	bruteforceKernel = SetUpKernel("bruteforce_kernel.cl");
	bruteforceKernel->getWorkGroupInfo<size_t>(buildDevice[0], CL_KERNEL_WORK_GROUP_SIZE, &bruteforceWorkGroupSize);
	cerr << "OpenCL BruteForce kernel work group size: " << bruteforceWorkGroupSize << endl;
	cl_ulong memSize;
	bruteforceKernel->getWorkGroupInfo<cl_ulong>(buildDevice[0], CL_KERNEL_LOCAL_MEM_SIZE, &memSize);
	cerr << "OpenCL BruteForce kernel memory footprint: " << memSize << endl;
	// Set Arguments
	bruteforceKernel->setArg(0, *raysBuff);
	bruteforceKernel->setArg(1, *hitsBuff);
	bruteforceKernel->setArg(2, *vertsBuff);
	bruteforceKernel->setArg(3, *trisBuff);
	bruteforceKernel->setArg(4, scene->mesh->triangleCount);

	// BVH kernel
	bvhKernel = SetUpKernel("bvh_kernel.cl");
	bvhKernel->getWorkGroupInfo<size_t>(buildDevice[0], CL_KERNEL_WORK_GROUP_SIZE, &bvhWorkGroupSize);
	cerr << "OpenCL BVH kernel work group size: " << bvhWorkGroupSize << endl;
	bvhKernel->getWorkGroupInfo<cl_ulong>(buildDevice[0], CL_KERNEL_LOCAL_MEM_SIZE, &memSize);
	cerr << "OpenCL BVH kernel memory footprint: " << memSize << endl;
	// Set Arguments
	bvhKernel->setArg(0, *raysBuff);
	bvhKernel->setArg(1, *hitsBuff);
	bvhKernel->setArg(2, *vertsBuff);
	bvhKernel->setArg(3, *trisBuff);
	bvhKernel->setArg(4, scene->mesh->triangleCount);
	bvhKernel->setArg(5, scene->bvh->nNodes);
	bvhKernel->setArg(6, *bvhBuff);
}

void PathIntegrator::FillRayBuffer() {
	while (rayBuffer->currentFreeRay < RAY_BUFFER_SIZE) {
		Path *path;
		if (currentPath == paths.size()) {
			// Add a new path
			paths.push_back(Path());
			path = &paths[currentPath++];
			path->Init(scene, maxPathDepth);
		} else
			path = &paths[currentPath++];

		path->FillRayBuffer(rayBuffer);
	}
}

void PathIntegrator::TraceRayBuffer(AccelleratorType type) {
	double t1 = WallClockTime();

	if ((type == OPENCL_BVH) || (type == OPENCL_BRUTEFORCE)) {
		cl::Event beginWorkEvent;
		queue->enqueueWriteBuffer(
				*raysBuff,
				CL_FALSE,
				0,
				sizeof(Ray) * RAY_BUFFER_SIZE,
				rayBuffer->rays,
				NULL, &beginWorkEvent);

		VECTOR_CLASS<cl::Event> ve0;
		ve0.push_back(beginWorkEvent);
		cl::Event traceWorkEvent;
		queue->enqueueNDRangeKernel(*bvhKernel, cl::NullRange,
				cl::NDRange(RAY_BUFFER_SIZE), cl::NDRange(bvhWorkGroupSize), &ve0, &traceWorkEvent);

		VECTOR_CLASS<cl::Event> ve1;
		ve1.push_back(traceWorkEvent);
		cl::Event endWorkEvent;
		queue->enqueueReadBuffer(
				*hitsBuff,
				CL_FALSE,
				0,
				sizeof(RayHit) * RAY_BUFFER_SIZE,
				rayBuffer->rayHits,
				&ve1, &endWorkEvent);
		endWorkEvent.wait();
	} else if (type == NO_OPENCL_BVH) {
		for (unsigned int i = 0; i < rayBuffer->currentFreeRay; ++i) {
			RayHit &rh = rayBuffer->rayHits[i];
			if (!scene->Intersect(rayBuffer->rays[i], &rh.t, &rh.index))
				rh.index = 0xffffffffu;
		}
	}

	double t2 = WallClockTime();
	statsTotalRayTime += t2 - t1;
	statsTotalRayCount += rayBuffer->currentFreeRay;
	rayBuffer->currentFreeRay = 0;
}

void PathIntegrator::AdvancePaths() {
	for (vector<Path>::iterator p = paths.begin(); p < paths.end(); ++p)
		p->AdvancePath(scene, rayBuffer, maxPathDepth);
	currentPath = 0;
}
