ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
                                          Intersection *isect,
                                          float3 P,
                                          float3 dir,
                                          uint visibility,
                                          int object,
                                          int prim_addr)
{
  const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr);
#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
  const ssef *ssef_verts = (ssef *)&kg->__prim_tri_verts.data[tri_vindex];
#else
  const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0),
               tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1),
               tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2);
#endif
  float t, u, v;
  if (ray_triangle_intersect(P,
                             dir,
                             isect->t,
#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
                             ssef_verts,
#else
                             float4_to_float3(tri_a),
                             float4_to_float3(tri_b),
                             float4_to_float3(tri_c),
#endif
                             &u,
                             &v,
                             &t)) {
#ifdef __VISIBILITY_FLAG__
    /* Visibility flag test. we do it here under the assumption
     * that most triangles are culled by node flags.
     */
    if (kernel_tex_fetch(__prim_visibility, prim_addr) & visibility)
#endif
    {
      isect->prim = prim_addr;
      isect->object = object;
      isect->type = PRIMITIVE_TRIANGLE;
      isect->u = u;
      isect->v = v;
      isect->t = t;
      return true;
    }
  }
  return false;
}