vibespatial.spatial.distance_metrics_kernels

NVRTC kernel sources for Hausdorff and discrete Frechet distance.

Attributes

Module Contents

vibespatial.spatial.distance_metrics_kernels.MAX_FRECHET_B = 2048
vibespatial.spatial.distance_metrics_kernels.HAUSDORFF_FP64 = Multiline-String
Show Value
"""#define VS_SPATIAL_EPSILON 9.9999999999999998e-13

typedef double compute_t;

/* Centered coordinate read: subtract center in fp64, then cast to compute_t.
   When compute_t is double, this is a no-op identity.  When compute_t is float,
   centering reduces absolute magnitude before the lossy cast. */
#define CX(val) ((compute_t)((val) - center_x))
#define CY(val) ((compute_t)((val) - center_y))

/* Kahan summation helper -- add `val` to `sum` with compensation `c`. */
#define KAHAN_ADD(sum, val, c) do { \
    const compute_t _y = (val) - (c); \
    const compute_t _t = (sum) + _y; \
    (c) = (_t - (sum)) - _y; \
    (sum) = _t; \
} while(0)

/* Warp-level Kahan reduction for a (sum, compensation) pair. */
#define VS_WARP_FULL_MASK 0xFFFFFFFFu
#define WARP_KAHAN_REDUCE(sum, c) do { \
    for (int _vs_offset = 16; _vs_offset > 0; _vs_offset >>= 1) { \
        const compute_t _vs_shfl_sum = __shfl_down_sync(VS_WARP_FULL_MASK, (sum), _vs_offset); \
        const compute_t _vs_shfl_c = __shfl_down_sync(VS_WARP_FULL_MASK, (c), _vs_offset); \
        KAHAN_ADD((sum), _vs_shfl_sum - _vs_shfl_c, (c)); \
    } \
} while(0)

extern "C" __global__ __launch_bounds__(256, 4)
void hausdorff_distance(
    const double* __restrict__ x_a,
    const double* __restrict__ y_a,
    const int* __restrict__ offsets_a,
    const double* __restrict__ x_b,
    const double* __restrict__ y_b,
    const int* __restrict__ offsets_b,
    double* __restrict__ result,
    double center_x,
    double center_y,
    int pair_count
) {
    const int pair = blockIdx.x;
    if (pair >= pair_count) return;

    const int a_start = offsets_a[pair];
    const int a_end   = offsets_a[pair + 1];
    const int b_start = offsets_b[pair];
    const int b_end   = offsets_b[pair + 1];
    const int na = a_end - a_start;
    const int nb = b_end - b_start;

    if (na == 0 || nb == 0) {
        if (threadIdx.x == 0) result[pair] = 0.0 / 0.0;  /* NaN */
        return;
    }

    const int tid = threadIdx.x;
    const int stride = blockDim.x;

    /* Forward direction: max over A of { min over B of dist(a,b) } */
    compute_t local_max_forward = (compute_t)0.0;
    for (int i = tid; i < na; i += stride) {
        const compute_t ax = CX(x_a[a_start + i]);
        const compute_t ay = CY(y_a[a_start + i]);
        compute_t min_dist_sq = (compute_t)1e38;
        for (int j = 0; j < nb; j++) {
            const compute_t dx = ax - CX(x_b[b_start + j]);
            const compute_t dy = ay - CY(y_b[b_start + j]);
            const compute_t d_sq = dx * dx + dy * dy;
            if (d_sq < min_dist_sq) min_dist_sq = d_sq;
        }
        const compute_t min_dist = sqrt((double)min_dist_sq);
        if (min_dist > local_max_forward) local_max_forward = min_dist;
    }

    /* Backward direction: max over B of { min over A of dist(b,a) } */
    compute_t local_max_backward = (compute_t)0.0;
    for (int j = tid; j < nb; j += stride) {
        const compute_t bx = CX(x_b[b_start + j]);
        const compute_t by = CY(y_b[b_start + j]);
        compute_t min_dist_sq = (compute_t)1e38;
        for (int i = 0; i < na; i++) {
            const compute_t dx = bx - CX(x_a[a_start + i]);
            const compute_t dy = by - CY(y_a[a_start + i]);
            const compute_t d_sq = dx * dx + dy * dy;
            if (d_sq < min_dist_sq) min_dist_sq = d_sq;
        }
        const compute_t min_dist = sqrt((double)min_dist_sq);
        if (min_dist > local_max_backward) local_max_backward = min_dist;
    }

    double local_max = (double)((local_max_forward > local_max_backward)
                                ? local_max_forward : local_max_backward);

    /* Block-wide max reduction via shared memory */
    __shared__ double sdata[256];
    sdata[tid] = local_max;
    __syncthreads();

    for (int s = blockDim.x >> 1; s > 0; s >>= 1) {
        if (tid < s && sdata[tid + s] > sdata[tid]) {
            sdata[tid] = sdata[tid + s];
        }
        __syncthreads();
    }

    if (tid == 0) result[pair] = sdata[0];
}
"""
vibespatial.spatial.distance_metrics_kernels.HAUSDORFF_FP32 = Multiline-String
Show Value
"""#define VS_SPATIAL_EPSILON 9.9999999999999998e-13

typedef float compute_t;

/* Centered coordinate read: subtract center in fp64, then cast to compute_t.
   When compute_t is double, this is a no-op identity.  When compute_t is float,
   centering reduces absolute magnitude before the lossy cast. */
#define CX(val) ((compute_t)((val) - center_x))
#define CY(val) ((compute_t)((val) - center_y))

/* Kahan summation helper -- add `val` to `sum` with compensation `c`. */
#define KAHAN_ADD(sum, val, c) do { \
    const compute_t _y = (val) - (c); \
    const compute_t _t = (sum) + _y; \
    (c) = (_t - (sum)) - _y; \
    (sum) = _t; \
} while(0)

/* Warp-level Kahan reduction for a (sum, compensation) pair. */
#define VS_WARP_FULL_MASK 0xFFFFFFFFu
#define WARP_KAHAN_REDUCE(sum, c) do { \
    for (int _vs_offset = 16; _vs_offset > 0; _vs_offset >>= 1) { \
        const compute_t _vs_shfl_sum = __shfl_down_sync(VS_WARP_FULL_MASK, (sum), _vs_offset); \
        const compute_t _vs_shfl_c = __shfl_down_sync(VS_WARP_FULL_MASK, (c), _vs_offset); \
        KAHAN_ADD((sum), _vs_shfl_sum - _vs_shfl_c, (c)); \
    } \
} while(0)

extern "C" __global__ __launch_bounds__(256, 4)
void hausdorff_distance(
    const double* __restrict__ x_a,
    const double* __restrict__ y_a,
    const int* __restrict__ offsets_a,
    const double* __restrict__ x_b,
    const double* __restrict__ y_b,
    const int* __restrict__ offsets_b,
    double* __restrict__ result,
    double center_x,
    double center_y,
    int pair_count
) {
    const int pair = blockIdx.x;
    if (pair >= pair_count) return;

    const int a_start = offsets_a[pair];
    const int a_end   = offsets_a[pair + 1];
    const int b_start = offsets_b[pair];
    const int b_end   = offsets_b[pair + 1];
    const int na = a_end - a_start;
    const int nb = b_end - b_start;

    if (na == 0 || nb == 0) {
        if (threadIdx.x == 0) result[pair] = 0.0 / 0.0;  /* NaN */
        return;
    }

    const int tid = threadIdx.x;
    const int stride = blockDim.x;

    /* Forward direction: max over A of { min over B of dist(a,b) } */
    compute_t local_max_forward = (compute_t)0.0;
    for (int i = tid; i < na; i += stride) {
        const compute_t ax = CX(x_a[a_start + i]);
        const compute_t ay = CY(y_a[a_start + i]);
        compute_t min_dist_sq = (compute_t)1e38;
        for (int j = 0; j < nb; j++) {
            const compute_t dx = ax - CX(x_b[b_start + j]);
            const compute_t dy = ay - CY(y_b[b_start + j]);
            const compute_t d_sq = dx * dx + dy * dy;
            if (d_sq < min_dist_sq) min_dist_sq = d_sq;
        }
        const compute_t min_dist = sqrt((double)min_dist_sq);
        if (min_dist > local_max_forward) local_max_forward = min_dist;
    }

    /* Backward direction: max over B of { min over A of dist(b,a) } */
    compute_t local_max_backward = (compute_t)0.0;
    for (int j = tid; j < nb; j += stride) {
        const compute_t bx = CX(x_b[b_start + j]);
        const compute_t by = CY(y_b[b_start + j]);
        compute_t min_dist_sq = (compute_t)1e38;
        for (int i = 0; i < na; i++) {
            const compute_t dx = bx - CX(x_a[a_start + i]);
            const compute_t dy = by - CY(y_a[a_start + i]);
            const compute_t d_sq = dx * dx + dy * dy;
            if (d_sq < min_dist_sq) min_dist_sq = d_sq;
        }
        const compute_t min_dist = sqrt((double)min_dist_sq);
        if (min_dist > local_max_backward) local_max_backward = min_dist;
    }

    double local_max = (double)((local_max_forward > local_max_backward)
                                ? local_max_forward : local_max_backward);

    /* Block-wide max reduction via shared memory */
    __shared__ double sdata[256];
    sdata[tid] = local_max;
    __syncthreads();

    for (int s = blockDim.x >> 1; s > 0; s >>= 1) {
        if (tid < s && sdata[tid + s] > sdata[tid]) {
            sdata[tid] = sdata[tid + s];
        }
        __syncthreads();
    }

    if (tid == 0) result[pair] = sdata[0];
}
"""
vibespatial.spatial.distance_metrics_kernels.FRECHET_FP64 = Multiline-String
Show Value
"""#define VS_SPATIAL_EPSILON 9.9999999999999998e-13

typedef double compute_t;

/* Centered coordinate read: subtract center in fp64, then cast to compute_t.
   When compute_t is double, this is a no-op identity.  When compute_t is float,
   centering reduces absolute magnitude before the lossy cast. */
#define CX(val) ((compute_t)((val) - center_x))
#define CY(val) ((compute_t)((val) - center_y))

/* Kahan summation helper -- add `val` to `sum` with compensation `c`. */
#define KAHAN_ADD(sum, val, c) do { \
    const compute_t _y = (val) - (c); \
    const compute_t _t = (sum) + _y; \
    (c) = (_t - (sum)) - _y; \
    (sum) = _t; \
} while(0)

/* Warp-level Kahan reduction for a (sum, compensation) pair. */
#define VS_WARP_FULL_MASK 0xFFFFFFFFu
#define WARP_KAHAN_REDUCE(sum, c) do { \
    for (int _vs_offset = 16; _vs_offset > 0; _vs_offset >>= 1) { \
        const compute_t _vs_shfl_sum = __shfl_down_sync(VS_WARP_FULL_MASK, (sum), _vs_offset); \
        const compute_t _vs_shfl_c = __shfl_down_sync(VS_WARP_FULL_MASK, (c), _vs_offset); \
        KAHAN_ADD((sum), _vs_shfl_sum - _vs_shfl_c, (c)); \
    } \
} while(0)

#define MAX_FRECHET_B 2048

extern "C" __global__
void frechet_distance(
    const double* __restrict__ x_a,
    const double* __restrict__ y_a,
    const int* __restrict__ offsets_a,
    const double* __restrict__ x_b,
    const double* __restrict__ y_b,
    const int* __restrict__ offsets_b,
    double* __restrict__ result,
    double center_x,
    double center_y,
    int pair_count,
    int max_b_len
) {
    const int pair = blockIdx.x * blockDim.x + threadIdx.x;
    if (pair >= pair_count) return;

    const int a_start = offsets_a[pair];
    const int a_end   = offsets_a[pair + 1];
    const int b_start = offsets_b[pair];
    const int b_end   = offsets_b[pair + 1];
    const int na = a_end - a_start;
    const int nb = b_end - b_start;

    if (na == 0 || nb == 0) {
        result[pair] = 0.0 / 0.0;  /* NaN */
        return;
    }

    if (nb > MAX_FRECHET_B) {
        /* Too large for local arrays -- return NaN as safe fallback */
        result[pair] = 0.0 / 0.0;
        return;
    }

    /* Rolling DP: previous and current rows, each of length nb.
       For large nb these spill to local memory (L1/L2 cached). */
    double prev_row[MAX_FRECHET_B];
    double curr_row[MAX_FRECHET_B];

    /* Initialize row 0: ca[0][j] = max(ca[0][j-1], d(a_0, b_j)) */
    {
        const compute_t ax = CX(x_a[a_start]);
        const compute_t ay = CY(y_a[a_start]);
        {
            const compute_t dx = ax - CX(x_b[b_start]);
            const compute_t dy = ay - CY(y_b[b_start]);
            prev_row[0] = sqrt((double)(dx * dx + dy * dy));
        }
        for (int j = 1; j < nb; j++) {
            const compute_t dx = ax - CX(x_b[b_start + j]);
            const compute_t dy = ay - CY(y_b[b_start + j]);
            const double d = sqrt((double)(dx * dx + dy * dy));
            prev_row[j] = (d > prev_row[j - 1]) ? d : prev_row[j - 1];
        }
    }

    /* Fill rows 1..na-1 */
    for (int i = 1; i < na; i++) {
        const compute_t ax = CX(x_a[a_start + i]);
        const compute_t ay = CY(y_a[a_start + i]);

        /* curr_row[0] = max(prev_row[0], d(a_i, b_0)) */
        {
            const compute_t dx = ax - CX(x_b[b_start]);
            const compute_t dy = ay - CY(y_b[b_start]);
            const double d = sqrt((double)(dx * dx + dy * dy));
            curr_row[0] = (d > prev_row[0]) ? d : prev_row[0];
        }

        for (int j = 1; j < nb; j++) {
            const compute_t dx = ax - CX(x_b[b_start + j]);
            const compute_t dy = ay - CY(y_b[b_start + j]);
            const double d = sqrt((double)(dx * dx + dy * dy));

            double min_prev = prev_row[j];          /* ca[i-1][j]   */
            if (curr_row[j - 1] < min_prev) min_prev = curr_row[j - 1]; /* ca[i][j-1]   */
            if (prev_row[j - 1] < min_prev) min_prev = prev_row[j - 1]; /* ca[i-1][j-1] */

            curr_row[j] = (d > min_prev) ? d : min_prev;
        }

        /* Swap: prev_row = curr_row */
        for (int j = 0; j < nb; j++) prev_row[j] = curr_row[j];
    }

    result[pair] = prev_row[nb - 1];
}
"""
vibespatial.spatial.distance_metrics_kernels.FRECHET_FP32 = Multiline-String
Show Value
"""#define VS_SPATIAL_EPSILON 9.9999999999999998e-13

typedef float compute_t;

/* Centered coordinate read: subtract center in fp64, then cast to compute_t.
   When compute_t is double, this is a no-op identity.  When compute_t is float,
   centering reduces absolute magnitude before the lossy cast. */
#define CX(val) ((compute_t)((val) - center_x))
#define CY(val) ((compute_t)((val) - center_y))

/* Kahan summation helper -- add `val` to `sum` with compensation `c`. */
#define KAHAN_ADD(sum, val, c) do { \
    const compute_t _y = (val) - (c); \
    const compute_t _t = (sum) + _y; \
    (c) = (_t - (sum)) - _y; \
    (sum) = _t; \
} while(0)

/* Warp-level Kahan reduction for a (sum, compensation) pair. */
#define VS_WARP_FULL_MASK 0xFFFFFFFFu
#define WARP_KAHAN_REDUCE(sum, c) do { \
    for (int _vs_offset = 16; _vs_offset > 0; _vs_offset >>= 1) { \
        const compute_t _vs_shfl_sum = __shfl_down_sync(VS_WARP_FULL_MASK, (sum), _vs_offset); \
        const compute_t _vs_shfl_c = __shfl_down_sync(VS_WARP_FULL_MASK, (c), _vs_offset); \
        KAHAN_ADD((sum), _vs_shfl_sum - _vs_shfl_c, (c)); \
    } \
} while(0)

#define MAX_FRECHET_B 2048

extern "C" __global__
void frechet_distance(
    const double* __restrict__ x_a,
    const double* __restrict__ y_a,
    const int* __restrict__ offsets_a,
    const double* __restrict__ x_b,
    const double* __restrict__ y_b,
    const int* __restrict__ offsets_b,
    double* __restrict__ result,
    double center_x,
    double center_y,
    int pair_count,
    int max_b_len
) {
    const int pair = blockIdx.x * blockDim.x + threadIdx.x;
    if (pair >= pair_count) return;

    const int a_start = offsets_a[pair];
    const int a_end   = offsets_a[pair + 1];
    const int b_start = offsets_b[pair];
    const int b_end   = offsets_b[pair + 1];
    const int na = a_end - a_start;
    const int nb = b_end - b_start;

    if (na == 0 || nb == 0) {
        result[pair] = 0.0 / 0.0;  /* NaN */
        return;
    }

    if (nb > MAX_FRECHET_B) {
        /* Too large for local arrays -- return NaN as safe fallback */
        result[pair] = 0.0 / 0.0;
        return;
    }

    /* Rolling DP: previous and current rows, each of length nb.
       For large nb these spill to local memory (L1/L2 cached). */
    double prev_row[MAX_FRECHET_B];
    double curr_row[MAX_FRECHET_B];

    /* Initialize row 0: ca[0][j] = max(ca[0][j-1], d(a_0, b_j)) */
    {
        const compute_t ax = CX(x_a[a_start]);
        const compute_t ay = CY(y_a[a_start]);
        {
            const compute_t dx = ax - CX(x_b[b_start]);
            const compute_t dy = ay - CY(y_b[b_start]);
            prev_row[0] = sqrt((double)(dx * dx + dy * dy));
        }
        for (int j = 1; j < nb; j++) {
            const compute_t dx = ax - CX(x_b[b_start + j]);
            const compute_t dy = ay - CY(y_b[b_start + j]);
            const double d = sqrt((double)(dx * dx + dy * dy));
            prev_row[j] = (d > prev_row[j - 1]) ? d : prev_row[j - 1];
        }
    }

    /* Fill rows 1..na-1 */
    for (int i = 1; i < na; i++) {
        const compute_t ax = CX(x_a[a_start + i]);
        const compute_t ay = CY(y_a[a_start + i]);

        /* curr_row[0] = max(prev_row[0], d(a_i, b_0)) */
        {
            const compute_t dx = ax - CX(x_b[b_start]);
            const compute_t dy = ay - CY(y_b[b_start]);
            const double d = sqrt((double)(dx * dx + dy * dy));
            curr_row[0] = (d > prev_row[0]) ? d : prev_row[0];
        }

        for (int j = 1; j < nb; j++) {
            const compute_t dx = ax - CX(x_b[b_start + j]);
            const compute_t dy = ay - CY(y_b[b_start + j]);
            const double d = sqrt((double)(dx * dx + dy * dy));

            double min_prev = prev_row[j];          /* ca[i-1][j]   */
            if (curr_row[j - 1] < min_prev) min_prev = curr_row[j - 1]; /* ca[i][j-1]   */
            if (prev_row[j - 1] < min_prev) min_prev = prev_row[j - 1]; /* ca[i-1][j-1] */

            curr_row[j] = (d > min_prev) ? d : min_prev;
        }

        /* Swap: prev_row = curr_row */
        for (int j = 0; j < nb; j++) prev_row[j] = curr_row[j];
    }

    result[pair] = prev_row[nb - 1];
}
"""