vibespatial.spatial.distance_metrics_kernels¶
NVRTC kernel sources for Hausdorff and discrete Frechet distance.
Attributes¶
Module Contents¶
- vibespatial.spatial.distance_metrics_kernels.MAX_FRECHET_B = 2048¶
- vibespatial.spatial.distance_metrics_kernels.HAUSDORFF_FP64 = Multiline-String¶
Show Value
"""#define VS_SPATIAL_EPSILON 9.9999999999999998e-13 typedef double compute_t; /* Centered coordinate read: subtract center in fp64, then cast to compute_t. When compute_t is double, this is a no-op identity. When compute_t is float, centering reduces absolute magnitude before the lossy cast. */ #define CX(val) ((compute_t)((val) - center_x)) #define CY(val) ((compute_t)((val) - center_y)) /* Kahan summation helper -- add `val` to `sum` with compensation `c`. */ #define KAHAN_ADD(sum, val, c) do { \ const compute_t _y = (val) - (c); \ const compute_t _t = (sum) + _y; \ (c) = (_t - (sum)) - _y; \ (sum) = _t; \ } while(0) /* Warp-level Kahan reduction for a (sum, compensation) pair. */ #define VS_WARP_FULL_MASK 0xFFFFFFFFu #define WARP_KAHAN_REDUCE(sum, c) do { \ for (int _vs_offset = 16; _vs_offset > 0; _vs_offset >>= 1) { \ const compute_t _vs_shfl_sum = __shfl_down_sync(VS_WARP_FULL_MASK, (sum), _vs_offset); \ const compute_t _vs_shfl_c = __shfl_down_sync(VS_WARP_FULL_MASK, (c), _vs_offset); \ KAHAN_ADD((sum), _vs_shfl_sum - _vs_shfl_c, (c)); \ } \ } while(0) extern "C" __global__ __launch_bounds__(256, 4) void hausdorff_distance( const double* __restrict__ x_a, const double* __restrict__ y_a, const int* __restrict__ offsets_a, const double* __restrict__ x_b, const double* __restrict__ y_b, const int* __restrict__ offsets_b, double* __restrict__ result, double center_x, double center_y, int pair_count ) { const int pair = blockIdx.x; if (pair >= pair_count) return; const int a_start = offsets_a[pair]; const int a_end = offsets_a[pair + 1]; const int b_start = offsets_b[pair]; const int b_end = offsets_b[pair + 1]; const int na = a_end - a_start; const int nb = b_end - b_start; if (na == 0 || nb == 0) { if (threadIdx.x == 0) result[pair] = 0.0 / 0.0; /* NaN */ return; } const int tid = threadIdx.x; const int stride = blockDim.x; /* Forward direction: max over A of { min over B of dist(a,b) } */ compute_t local_max_forward = (compute_t)0.0; for (int i = tid; i < na; i += stride) { const compute_t ax = CX(x_a[a_start + i]); const compute_t ay = CY(y_a[a_start + i]); compute_t min_dist_sq = (compute_t)1e38; for (int j = 0; j < nb; j++) { const compute_t dx = ax - CX(x_b[b_start + j]); const compute_t dy = ay - CY(y_b[b_start + j]); const compute_t d_sq = dx * dx + dy * dy; if (d_sq < min_dist_sq) min_dist_sq = d_sq; } const compute_t min_dist = sqrt((double)min_dist_sq); if (min_dist > local_max_forward) local_max_forward = min_dist; } /* Backward direction: max over B of { min over A of dist(b,a) } */ compute_t local_max_backward = (compute_t)0.0; for (int j = tid; j < nb; j += stride) { const compute_t bx = CX(x_b[b_start + j]); const compute_t by = CY(y_b[b_start + j]); compute_t min_dist_sq = (compute_t)1e38; for (int i = 0; i < na; i++) { const compute_t dx = bx - CX(x_a[a_start + i]); const compute_t dy = by - CY(y_a[a_start + i]); const compute_t d_sq = dx * dx + dy * dy; if (d_sq < min_dist_sq) min_dist_sq = d_sq; } const compute_t min_dist = sqrt((double)min_dist_sq); if (min_dist > local_max_backward) local_max_backward = min_dist; } double local_max = (double)((local_max_forward > local_max_backward) ? local_max_forward : local_max_backward); /* Block-wide max reduction via shared memory */ __shared__ double sdata[256]; sdata[tid] = local_max; __syncthreads(); for (int s = blockDim.x >> 1; s > 0; s >>= 1) { if (tid < s && sdata[tid + s] > sdata[tid]) { sdata[tid] = sdata[tid + s]; } __syncthreads(); } if (tid == 0) result[pair] = sdata[0]; } """
- vibespatial.spatial.distance_metrics_kernels.HAUSDORFF_FP32 = Multiline-String¶
Show Value
"""#define VS_SPATIAL_EPSILON 9.9999999999999998e-13 typedef float compute_t; /* Centered coordinate read: subtract center in fp64, then cast to compute_t. When compute_t is double, this is a no-op identity. When compute_t is float, centering reduces absolute magnitude before the lossy cast. */ #define CX(val) ((compute_t)((val) - center_x)) #define CY(val) ((compute_t)((val) - center_y)) /* Kahan summation helper -- add `val` to `sum` with compensation `c`. */ #define KAHAN_ADD(sum, val, c) do { \ const compute_t _y = (val) - (c); \ const compute_t _t = (sum) + _y; \ (c) = (_t - (sum)) - _y; \ (sum) = _t; \ } while(0) /* Warp-level Kahan reduction for a (sum, compensation) pair. */ #define VS_WARP_FULL_MASK 0xFFFFFFFFu #define WARP_KAHAN_REDUCE(sum, c) do { \ for (int _vs_offset = 16; _vs_offset > 0; _vs_offset >>= 1) { \ const compute_t _vs_shfl_sum = __shfl_down_sync(VS_WARP_FULL_MASK, (sum), _vs_offset); \ const compute_t _vs_shfl_c = __shfl_down_sync(VS_WARP_FULL_MASK, (c), _vs_offset); \ KAHAN_ADD((sum), _vs_shfl_sum - _vs_shfl_c, (c)); \ } \ } while(0) extern "C" __global__ __launch_bounds__(256, 4) void hausdorff_distance( const double* __restrict__ x_a, const double* __restrict__ y_a, const int* __restrict__ offsets_a, const double* __restrict__ x_b, const double* __restrict__ y_b, const int* __restrict__ offsets_b, double* __restrict__ result, double center_x, double center_y, int pair_count ) { const int pair = blockIdx.x; if (pair >= pair_count) return; const int a_start = offsets_a[pair]; const int a_end = offsets_a[pair + 1]; const int b_start = offsets_b[pair]; const int b_end = offsets_b[pair + 1]; const int na = a_end - a_start; const int nb = b_end - b_start; if (na == 0 || nb == 0) { if (threadIdx.x == 0) result[pair] = 0.0 / 0.0; /* NaN */ return; } const int tid = threadIdx.x; const int stride = blockDim.x; /* Forward direction: max over A of { min over B of dist(a,b) } */ compute_t local_max_forward = (compute_t)0.0; for (int i = tid; i < na; i += stride) { const compute_t ax = CX(x_a[a_start + i]); const compute_t ay = CY(y_a[a_start + i]); compute_t min_dist_sq = (compute_t)1e38; for (int j = 0; j < nb; j++) { const compute_t dx = ax - CX(x_b[b_start + j]); const compute_t dy = ay - CY(y_b[b_start + j]); const compute_t d_sq = dx * dx + dy * dy; if (d_sq < min_dist_sq) min_dist_sq = d_sq; } const compute_t min_dist = sqrt((double)min_dist_sq); if (min_dist > local_max_forward) local_max_forward = min_dist; } /* Backward direction: max over B of { min over A of dist(b,a) } */ compute_t local_max_backward = (compute_t)0.0; for (int j = tid; j < nb; j += stride) { const compute_t bx = CX(x_b[b_start + j]); const compute_t by = CY(y_b[b_start + j]); compute_t min_dist_sq = (compute_t)1e38; for (int i = 0; i < na; i++) { const compute_t dx = bx - CX(x_a[a_start + i]); const compute_t dy = by - CY(y_a[a_start + i]); const compute_t d_sq = dx * dx + dy * dy; if (d_sq < min_dist_sq) min_dist_sq = d_sq; } const compute_t min_dist = sqrt((double)min_dist_sq); if (min_dist > local_max_backward) local_max_backward = min_dist; } double local_max = (double)((local_max_forward > local_max_backward) ? local_max_forward : local_max_backward); /* Block-wide max reduction via shared memory */ __shared__ double sdata[256]; sdata[tid] = local_max; __syncthreads(); for (int s = blockDim.x >> 1; s > 0; s >>= 1) { if (tid < s && sdata[tid + s] > sdata[tid]) { sdata[tid] = sdata[tid + s]; } __syncthreads(); } if (tid == 0) result[pair] = sdata[0]; } """
- vibespatial.spatial.distance_metrics_kernels.FRECHET_FP64 = Multiline-String¶
Show Value
"""#define VS_SPATIAL_EPSILON 9.9999999999999998e-13 typedef double compute_t; /* Centered coordinate read: subtract center in fp64, then cast to compute_t. When compute_t is double, this is a no-op identity. When compute_t is float, centering reduces absolute magnitude before the lossy cast. */ #define CX(val) ((compute_t)((val) - center_x)) #define CY(val) ((compute_t)((val) - center_y)) /* Kahan summation helper -- add `val` to `sum` with compensation `c`. */ #define KAHAN_ADD(sum, val, c) do { \ const compute_t _y = (val) - (c); \ const compute_t _t = (sum) + _y; \ (c) = (_t - (sum)) - _y; \ (sum) = _t; \ } while(0) /* Warp-level Kahan reduction for a (sum, compensation) pair. */ #define VS_WARP_FULL_MASK 0xFFFFFFFFu #define WARP_KAHAN_REDUCE(sum, c) do { \ for (int _vs_offset = 16; _vs_offset > 0; _vs_offset >>= 1) { \ const compute_t _vs_shfl_sum = __shfl_down_sync(VS_WARP_FULL_MASK, (sum), _vs_offset); \ const compute_t _vs_shfl_c = __shfl_down_sync(VS_WARP_FULL_MASK, (c), _vs_offset); \ KAHAN_ADD((sum), _vs_shfl_sum - _vs_shfl_c, (c)); \ } \ } while(0) #define MAX_FRECHET_B 2048 extern "C" __global__ void frechet_distance( const double* __restrict__ x_a, const double* __restrict__ y_a, const int* __restrict__ offsets_a, const double* __restrict__ x_b, const double* __restrict__ y_b, const int* __restrict__ offsets_b, double* __restrict__ result, double center_x, double center_y, int pair_count, int max_b_len ) { const int pair = blockIdx.x * blockDim.x + threadIdx.x; if (pair >= pair_count) return; const int a_start = offsets_a[pair]; const int a_end = offsets_a[pair + 1]; const int b_start = offsets_b[pair]; const int b_end = offsets_b[pair + 1]; const int na = a_end - a_start; const int nb = b_end - b_start; if (na == 0 || nb == 0) { result[pair] = 0.0 / 0.0; /* NaN */ return; } if (nb > MAX_FRECHET_B) { /* Too large for local arrays -- return NaN as safe fallback */ result[pair] = 0.0 / 0.0; return; } /* Rolling DP: previous and current rows, each of length nb. For large nb these spill to local memory (L1/L2 cached). */ double prev_row[MAX_FRECHET_B]; double curr_row[MAX_FRECHET_B]; /* Initialize row 0: ca[0][j] = max(ca[0][j-1], d(a_0, b_j)) */ { const compute_t ax = CX(x_a[a_start]); const compute_t ay = CY(y_a[a_start]); { const compute_t dx = ax - CX(x_b[b_start]); const compute_t dy = ay - CY(y_b[b_start]); prev_row[0] = sqrt((double)(dx * dx + dy * dy)); } for (int j = 1; j < nb; j++) { const compute_t dx = ax - CX(x_b[b_start + j]); const compute_t dy = ay - CY(y_b[b_start + j]); const double d = sqrt((double)(dx * dx + dy * dy)); prev_row[j] = (d > prev_row[j - 1]) ? d : prev_row[j - 1]; } } /* Fill rows 1..na-1 */ for (int i = 1; i < na; i++) { const compute_t ax = CX(x_a[a_start + i]); const compute_t ay = CY(y_a[a_start + i]); /* curr_row[0] = max(prev_row[0], d(a_i, b_0)) */ { const compute_t dx = ax - CX(x_b[b_start]); const compute_t dy = ay - CY(y_b[b_start]); const double d = sqrt((double)(dx * dx + dy * dy)); curr_row[0] = (d > prev_row[0]) ? d : prev_row[0]; } for (int j = 1; j < nb; j++) { const compute_t dx = ax - CX(x_b[b_start + j]); const compute_t dy = ay - CY(y_b[b_start + j]); const double d = sqrt((double)(dx * dx + dy * dy)); double min_prev = prev_row[j]; /* ca[i-1][j] */ if (curr_row[j - 1] < min_prev) min_prev = curr_row[j - 1]; /* ca[i][j-1] */ if (prev_row[j - 1] < min_prev) min_prev = prev_row[j - 1]; /* ca[i-1][j-1] */ curr_row[j] = (d > min_prev) ? d : min_prev; } /* Swap: prev_row = curr_row */ for (int j = 0; j < nb; j++) prev_row[j] = curr_row[j]; } result[pair] = prev_row[nb - 1]; } """
- vibespatial.spatial.distance_metrics_kernels.FRECHET_FP32 = Multiline-String¶
Show Value
"""#define VS_SPATIAL_EPSILON 9.9999999999999998e-13 typedef float compute_t; /* Centered coordinate read: subtract center in fp64, then cast to compute_t. When compute_t is double, this is a no-op identity. When compute_t is float, centering reduces absolute magnitude before the lossy cast. */ #define CX(val) ((compute_t)((val) - center_x)) #define CY(val) ((compute_t)((val) - center_y)) /* Kahan summation helper -- add `val` to `sum` with compensation `c`. */ #define KAHAN_ADD(sum, val, c) do { \ const compute_t _y = (val) - (c); \ const compute_t _t = (sum) + _y; \ (c) = (_t - (sum)) - _y; \ (sum) = _t; \ } while(0) /* Warp-level Kahan reduction for a (sum, compensation) pair. */ #define VS_WARP_FULL_MASK 0xFFFFFFFFu #define WARP_KAHAN_REDUCE(sum, c) do { \ for (int _vs_offset = 16; _vs_offset > 0; _vs_offset >>= 1) { \ const compute_t _vs_shfl_sum = __shfl_down_sync(VS_WARP_FULL_MASK, (sum), _vs_offset); \ const compute_t _vs_shfl_c = __shfl_down_sync(VS_WARP_FULL_MASK, (c), _vs_offset); \ KAHAN_ADD((sum), _vs_shfl_sum - _vs_shfl_c, (c)); \ } \ } while(0) #define MAX_FRECHET_B 2048 extern "C" __global__ void frechet_distance( const double* __restrict__ x_a, const double* __restrict__ y_a, const int* __restrict__ offsets_a, const double* __restrict__ x_b, const double* __restrict__ y_b, const int* __restrict__ offsets_b, double* __restrict__ result, double center_x, double center_y, int pair_count, int max_b_len ) { const int pair = blockIdx.x * blockDim.x + threadIdx.x; if (pair >= pair_count) return; const int a_start = offsets_a[pair]; const int a_end = offsets_a[pair + 1]; const int b_start = offsets_b[pair]; const int b_end = offsets_b[pair + 1]; const int na = a_end - a_start; const int nb = b_end - b_start; if (na == 0 || nb == 0) { result[pair] = 0.0 / 0.0; /* NaN */ return; } if (nb > MAX_FRECHET_B) { /* Too large for local arrays -- return NaN as safe fallback */ result[pair] = 0.0 / 0.0; return; } /* Rolling DP: previous and current rows, each of length nb. For large nb these spill to local memory (L1/L2 cached). */ double prev_row[MAX_FRECHET_B]; double curr_row[MAX_FRECHET_B]; /* Initialize row 0: ca[0][j] = max(ca[0][j-1], d(a_0, b_j)) */ { const compute_t ax = CX(x_a[a_start]); const compute_t ay = CY(y_a[a_start]); { const compute_t dx = ax - CX(x_b[b_start]); const compute_t dy = ay - CY(y_b[b_start]); prev_row[0] = sqrt((double)(dx * dx + dy * dy)); } for (int j = 1; j < nb; j++) { const compute_t dx = ax - CX(x_b[b_start + j]); const compute_t dy = ay - CY(y_b[b_start + j]); const double d = sqrt((double)(dx * dx + dy * dy)); prev_row[j] = (d > prev_row[j - 1]) ? d : prev_row[j - 1]; } } /* Fill rows 1..na-1 */ for (int i = 1; i < na; i++) { const compute_t ax = CX(x_a[a_start + i]); const compute_t ay = CY(y_a[a_start + i]); /* curr_row[0] = max(prev_row[0], d(a_i, b_0)) */ { const compute_t dx = ax - CX(x_b[b_start]); const compute_t dy = ay - CY(y_b[b_start]); const double d = sqrt((double)(dx * dx + dy * dy)); curr_row[0] = (d > prev_row[0]) ? d : prev_row[0]; } for (int j = 1; j < nb; j++) { const compute_t dx = ax - CX(x_b[b_start + j]); const compute_t dy = ay - CY(y_b[b_start + j]); const double d = sqrt((double)(dx * dx + dy * dy)); double min_prev = prev_row[j]; /* ca[i-1][j] */ if (curr_row[j - 1] < min_prev) min_prev = curr_row[j - 1]; /* ca[i][j-1] */ if (prev_row[j - 1] < min_prev) min_prev = prev_row[j - 1]; /* ca[i-1][j-1] */ curr_row[j] = (d > min_prev) ? d : min_prev; } /* Swap: prev_row = curr_row */ for (int j = 0; j < nb; j++) prev_row[j] = curr_row[j]; } result[pair] = prev_row[nb - 1]; } """