1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
|
#include "CalculateCanvas.h"
__global__
void calculate(unsigned char* canvas, int width, Point* points, int nPoints)
{
int canvasX = blockIdx.x * blockDim.x + threadIdx.x;
int canvasY = blockIdx.y * blockDim.y + threadIdx.y;
int pathX, pathY;
int length_squared;
double f = 0;
for (int i = 0; i < nPoints; i++)
{
pathX = points[i].x - canvasX;
pathY = points[i].y - canvasY;
length_squared = pathX * pathX + pathY * pathY;
f += (1e5 * 20) / (float) length_squared;
}
int l = (int) f;
if (l > 255) l = 255;
canvas[canvasY * width * 3 + canvasX * 3 + 0] = l;
canvas[canvasY * width * 3 + canvasX * 3 + 1] = 0;
canvas[canvasY * width * 3 + canvasX * 3 + 2] = 0;
}
void generate_canvas(int width, int height, unsigned char* canvas, Point* points, int nPoints)
{
const int canvasSize = width * height * 3 * sizeof(unsigned char);
const int pointsSize = nPoints * sizeof(Point);
unsigned char* gpuCanvas;
Point* gpuPoints;
cudaMalloc((void**) &gpuCanvas, canvasSize);
cudaMalloc((void**) &gpuPoints, pointsSize);
cudaMemcpy(gpuCanvas, canvas, canvasSize, cudaMemcpyHostToDevice);
cudaMemcpy(gpuPoints, points, pointsSize, cudaMemcpyHostToDevice);
dim3 threadsPerBlock(16, 16);
dim3 numBlocks(width / threadsPerBlock.x, height / threadsPerBlock.y);
calculate<<<numBlocks, threadsPerBlock>>>(gpuCanvas, width, gpuPoints, nPoints);
cudaMemcpy(canvas, gpuCanvas, canvasSize, cudaMemcpyDeviceToHost);
cudaFree(gpuCanvas);
cudaFree(gpuPoints);
}
|