#include "CalculateCanvas.h"

__global__
void calculate(unsigned char* canvas, int width, Point* points, int nPoints)
{
    int canvasX = blockIdx.x * blockDim.x + threadIdx.x;
    int canvasY = blockIdx.y * blockDim.y + threadIdx.y;

    int pathX, pathY;
    int length_squared;
    double f = 0;

    for (int i = 0; i < nPoints; i++)
    {
        pathX = points[i].x - canvasX;
        pathY = points[i].y - canvasY;

        length_squared = pathX * pathX + pathY * pathY;
        f += (1e5 * 20) / (float) length_squared;
    }

    int l = (int) f;
    if (l > 255) l = 255;

    canvas[canvasY * width * 3 + canvasX * 3 + 0] = l;
    canvas[canvasY * width * 3 + canvasX * 3 + 1] = 0;
    canvas[canvasY * width * 3 + canvasX * 3 + 2] = 0;
}

void generate_canvas(int width, int height, unsigned char* canvas, Point* points, int nPoints)
{
    const int canvasSize = width * height * 3 * sizeof(unsigned char);
    const int pointsSize = nPoints * sizeof(Point);

    unsigned char* gpuCanvas;
    Point* gpuPoints;

    cudaMalloc((void**) &gpuCanvas, canvasSize);
    cudaMalloc((void**) &gpuPoints, pointsSize);

    cudaMemcpy(gpuCanvas, canvas, canvasSize, cudaMemcpyHostToDevice);
    cudaMemcpy(gpuPoints, points, pointsSize, cudaMemcpyHostToDevice);

    dim3 threadsPerBlock(16, 16);
    dim3 numBlocks(width / threadsPerBlock.x, height / threadsPerBlock.y);
    calculate<<<numBlocks, threadsPerBlock>>>(gpuCanvas, width, gpuPoints, nPoints);

    cudaMemcpy(canvas, gpuCanvas, canvasSize, cudaMemcpyDeviceToHost);

    cudaFree(gpuCanvas);
    cudaFree(gpuPoints);
}