|
| class | AlignedArray |
| | Aligned array type. More...
|
| |
| struct | AlignedBuffer |
| | Modifies semantics of cutlass::Array<> to provide guaranteed alignment. More...
|
| |
| class | Array< T, N, false > |
| | Statically sized array for any data type. More...
|
| |
| class | Array< T, N, true > |
| | Statically sized array for any data type. More...
|
| |
| struct | CommandLine |
| |
| class | complex |
| |
| class | ConstSubbyteReference |
| |
| struct | Coord |
| | Statically-sized array specifying Coords within a tensor. More...
|
| |
| class | cuda_exception |
| | C++ exception wrapper for CUDA cudaError_t. More...
|
| |
| struct | Distribution |
| | Distribution type. More...
|
| |
| struct | divide_assert |
| |
| struct | divides |
| |
| struct | divides< Array< half_t, N > > |
| |
| struct | divides< Array< T, N > > |
| |
| struct | FloatType |
| | Defines a floating-point type based on the number of exponent and mantissa bits. More...
|
| |
| struct | FloatType< 11, 52 > |
| |
| struct | FloatType< 5, 10 > |
| |
| struct | FloatType< 8, 23 > |
| |
| struct | half_t |
| | IEEE half-precision floating-point type. More...
|
| |
| class | HostTensor |
| | Host tensor. More...
|
| |
| class | IdentityTensorLayout |
| |
| struct | integer_subbyte |
| | 4-bit signed integer type More...
|
| |
| struct | IntegerType |
| | Defines integers based on size and whether they are signed. More...
|
| |
| struct | IntegerType< 1, false > |
| |
| struct | IntegerType< 1, true > |
| |
| struct | IntegerType< 16, false > |
| |
| struct | IntegerType< 16, true > |
| |
| struct | IntegerType< 32, false > |
| |
| struct | IntegerType< 32, true > |
| |
| struct | IntegerType< 4, false > |
| |
| struct | IntegerType< 4, true > |
| |
| struct | IntegerType< 64, false > |
| |
| struct | IntegerType< 64, true > |
| |
| struct | IntegerType< 8, false > |
| |
| struct | IntegerType< 8, true > |
| |
| struct | is_pow2 |
| |
| struct | KernelLaunchConfiguration |
| | Structure containing the basic launch configuration of a CUDA kernel. More...
|
| |
| struct | log2_down |
| |
| struct | log2_down< N, 1, Count > |
| |
| struct | log2_up |
| |
| struct | log2_up< N, 1, Count > |
| |
| struct | MatrixCoord |
| |
| struct | MatrixShape |
| | Describes the size of a matrix tile. More...
|
| |
| struct | Max |
| |
| struct | maximum |
| |
| struct | maximum< Array< T, N > > |
| |
| struct | maximum< float > |
| |
| struct | Min |
| |
| struct | minimum |
| |
| struct | minimum< Array< T, N > > |
| |
| struct | minimum< float > |
| |
| struct | minus |
| |
| struct | minus< Array< half_t, N > > |
| |
| struct | minus< Array< T, N > > |
| |
| struct | multiplies |
| |
| struct | multiplies< Array< half_t, N > > |
| |
| struct | multiplies< Array< T, N > > |
| |
| struct | multiply_add |
| | Fused multiply-add. More...
|
| |
| struct | multiply_add< Array< half_t, N >, Array< half_t, N >, Array< half_t, N > > |
| | Fused multiply-add. More...
|
| |
| struct | multiply_add< Array< T, N >, Array< T, N >, Array< T, N > > |
| | Fused multiply-add. More...
|
| |
| struct | multiply_add< complex< T >, complex< T >, complex< T > > |
| | Fused multiply-add. More...
|
| |
| struct | multiply_add< complex< T >, T, complex< T > > |
| | Fused multiply-add. More...
|
| |
| struct | multiply_add< T, complex< T >, complex< T > > |
| | Fused multiply-add. More...
|
| |
| struct | negate |
| |
| struct | negate< Array< half_t, N > > |
| |
| struct | negate< Array< T, N > > |
| |
| struct | NumericArrayConverter |
| | Conversion operator for Array. More...
|
| |
| struct | NumericArrayConverter< float, half_t, 2, Round > |
| | Partial specialization for Array<float, 2> <= Array<half_t, 2>, round to nearest. More...
|
| |
| struct | NumericArrayConverter< float, half_t, N, Round > |
| | Partial specialization for Array<half> <= Array<float> More...
|
| |
| struct | NumericArrayConverter< half_t, float, 2, FloatRoundStyle::round_to_nearest > |
| | Partial specialization for Array<half, 2> <= Array<float, 2>, round to nearest. More...
|
| |
| struct | NumericArrayConverter< half_t, float, N, Round > |
| | Partial specialization for Array<half> <= Array<float> More...
|
| |
| struct | NumericConverter |
| |
| struct | NumericConverter< float, half_t, Round > |
| | Partial specialization for float <= half_t. More...
|
| |
| struct | NumericConverter< half_t, float, FloatRoundStyle::round_to_nearest > |
| | Specialization for round-to-nearest. More...
|
| |
| struct | NumericConverter< half_t, float, FloatRoundStyle::round_toward_zero > |
| | Specialization for round-toward-zero. More...
|
| |
| struct | NumericConverter< int8_t, float, Round > |
| |
| struct | NumericConverter< T, T, Round > |
| | Partial specialization for float <= half_t. More...
|
| |
| struct | NumericConverterClamp |
| |
| struct | plus |
| |
| struct | plus< Array< half_t, N > > |
| |
| struct | plus< Array< T, N > > |
| |
| struct | PredicateVector |
| | Statically sized array of bits implementing. More...
|
| |
| struct | RealType |
| | Used to determine the real-valued underlying type of a numeric type T. More...
|
| |
| struct | RealType< complex< T > > |
| | Partial specialization for complex-valued type. More...
|
| |
| struct | ReferenceFactory |
| |
| struct | ReferenceFactory< Element, false > |
| |
| struct | ReferenceFactory< Element, true > |
| |
| struct | ScalarIO |
| | Helper to enable formatted printing of CUTLASS scalar types to an ostream. More...
|
| |
| class | Semaphore |
| | CTA-wide semaphore for inter-CTA synchronization. More...
|
| |
| struct | sizeof_bits |
| | Defines the size of an element in bits. More...
|
| |
| struct | sizeof_bits< Array< T, N, RegisterSized > > |
| | Statically sized array for any data type. More...
|
| |
| struct | sizeof_bits< bin1_t > |
| | Defines the size of an element in bits - specialized for bin1_t. More...
|
| |
| struct | sizeof_bits< int4b_t > |
| | Defines the size of an element in bits - specialized for int4b_t. More...
|
| |
| struct | sizeof_bits< uint1b_t > |
| | Defines the size of an element in bits - specialized for uint1b_t. More...
|
| |
| struct | sizeof_bits< uint4b_t > |
| | Defines the size of an element in bits - specialized for uint4b_t. More...
|
| |
| struct | sqrt_est |
| |
| class | SubbyteReference |
| |
| struct | Tensor4DCoord |
| | Defines a canonical 4D coordinate used by tensor operations. More...
|
| |
| class | TensorRef |
| |
| class | TensorView |
| |
| struct | TypeTraits |
| |
| struct | TypeTraits< complex< double > > |
| |
| struct | TypeTraits< complex< float > > |
| |
| struct | TypeTraits< complex< half > > |
| |
| struct | TypeTraits< complex< half_t > > |
| |
| struct | TypeTraits< double > |
| |
| struct | TypeTraits< float > |
| |
| struct | TypeTraits< half_t > |
| |
| struct | TypeTraits< int > |
| |
| struct | TypeTraits< int64_t > |
| |
| struct | TypeTraits< int8_t > |
| |
| struct | TypeTraits< uint64_t > |
| |
| struct | TypeTraits< uint8_t > |
| |
| struct | TypeTraits< unsigned > |
| |
| struct | xor_add |
| | Fused multiply-add. More...
|
| |
|
| CUTLASS_HOST_DEVICE constexpr bool | ispow2 (unsigned x) |
| | Returns true if the argument is a power of 2. More...
|
| |
| CUTLASS_HOST_DEVICE constexpr unsigned | floor_pow_2 (unsigned x) |
| | Returns the largest power of two not greater than the argument. More...
|
| |
| CUTLASS_HOST_DEVICE float const & | real (cuFloatComplex const &z) |
| | Returns the real part of the complex number. More...
|
| |
| CUTLASS_HOST_DEVICE float & | real (cuFloatComplex &z) |
| | Returns the real part of the complex number. More...
|
| |
| CUTLASS_HOST_DEVICE double const & | real (cuDoubleComplex const &z) |
| | Returns the real part of the complex number. More...
|
| |
| CUTLASS_HOST_DEVICE double & | real (cuDoubleComplex &z) |
| | Returns the real part of the complex number. More...
|
| |
| CUTLASS_HOST_DEVICE float const & | imag (cuFloatComplex const &z) |
| | Returns the imaginary part of the complex number. More...
|
| |
| CUTLASS_HOST_DEVICE float & | imag (cuFloatComplex &z) |
| | Returns the imaginary part of the complex number. More...
|
| |
| CUTLASS_HOST_DEVICE double const & | imag (cuDoubleComplex const &z) |
| | Returns the imaginary part of the complex number. More...
|
| |
| CUTLASS_HOST_DEVICE double & | imag (cuDoubleComplex &z) |
| | Returns the imaginary part of the complex number. More...
|
| |
| template<typename T > |
| CUTLASS_HOST_DEVICE T const & | real (complex< T > const &z) |
| | Returns the real part of the complex number. More...
|
| |
| template<typename T > |
| CUTLASS_HOST_DEVICE T & | real (complex< T > &z) |
| | Returns the real part of the complex number. More...
|
| |
| template<typename T > |
| CUTLASS_HOST_DEVICE T const & | imag (complex< T > const &z) |
| | Returns the imaginary part of the complex number. More...
|
| |
| template<typename T > |
| CUTLASS_HOST_DEVICE T & | imag (complex< T > &z) |
| | Returns the imaginary part of the complex number. More...
|
| |
| template<typename T > |
| std::ostream & | operator<< (std::ostream &out, complex< T > const &z) |
| |
| template<typename T > |
| CUTLASS_HOST_DEVICE T | abs (complex< T > const &z) |
| | Returns the magnitude of the complex number. More...
|
| |
| template<typename T > |
| CUTLASS_HOST_DEVICE T | arg (complex< T > const &z) |
| | Returns the magnitude of the complex number. More...
|
| |
| template<typename T > |
| CUTLASS_HOST_DEVICE T | norm (T const &z) |
| | Returns the squared magnitude of a real number. More...
|
| |
| template<> |
| CUTLASS_HOST_DEVICE int8_t | norm (int8_t const &z) |
| | Returns the squared magnitude of a real number. More...
|
| |
| template<typename T > |
| CUTLASS_HOST_DEVICE double | norm (complex< T > const &z) |
| | Returns the squared magnitude of a complex number. More...
|
| |
| template<typename T , typename R > |
| CUTLASS_HOST_DEVICE R | norm_accumulate (T const &x, R const &accumulator) |
| | Norm-accumulate calculation. More...
|
| |
| template<typename T , typename R > |
| CUTLASS_HOST_DEVICE R | norm_accumulate (complex< T > const &z, R const &accumulator) |
| | Norm accumulate specialized for complex types. More...
|
| |
| template<typename T > |
| CUTLASS_HOST_DEVICE complex< T > | conj (complex< T > const &z) |
| | Returns the complex conjugate. More...
|
| |
| template<typename T > |
| CUTLASS_HOST_DEVICE complex< T > | proj (complex< T > const &z) |
| | Projects the complex number z onto the Riemann sphere. More...
|
| |
| template<typename T > |
| CUTLASS_HOST_DEVICE complex< T > | polar (T const &r, T const &theta=T()) |
| | Returns a complex number with magnitude r and phase theta. More...
|
| |
| template<typename T > |
| CUTLASS_HOST_DEVICE complex< T > | exp (complex< T > const &z) |
| | Computes the complex exponential of z. More...
|
| |
| template<typename T > |
| CUTLASS_HOST_DEVICE complex< T > | log (complex< T > const &z) |
| | Computes the complex exponential of z. More...
|
| |
| template<typename T > |
| CUTLASS_HOST_DEVICE complex< T > | log10 (complex< T > const &z) |
| | Computes the complex exponential of z. More...
|
| |
| template<typename T > |
| CUTLASS_HOST_DEVICE complex< T > | sqrt (complex< T > const &z) |
| | Computes the square root of complex number z. More...
|
| |
| template<typename T > |
| CUTLASS_HOST_DEVICE complex< T > | cos (complex< T > const &z) |
| | Computes the cosine of complex z. More...
|
| |
| template<typename T > |
| CUTLASS_HOST_DEVICE complex< T > | sin (complex< T > const &z) |
| | Computes the sin of complex z. More...
|
| |
| template<> |
| CUTLASS_HOST_DEVICE cutlass::complex< half_t > | from_real< cutlass::complex< half_t > > (double r) |
| |
| template<> |
| CUTLASS_HOST_DEVICE cutlass::complex< float > | from_real< cutlass::complex< float > > (double r) |
| |
| template<> |
| CUTLASS_HOST_DEVICE cutlass::complex< double > | from_real< cutlass::complex< double > > (double r) |
| |
| template<int Rank, typename Index > |
| CUTLASS_HOST_DEVICE Coord< Rank, Index > | operator/ (Index s, Coord< Rank, Index > coord) |
| | Scalar division. More...
|
| |
| template<int Rank, typename Index > |
| CUTLASS_HOST_DEVICE Coord< Rank, Index > | operator/ (Coord< Rank, Index > coord, Index s) |
| | Scalar division. More...
|
| |
| CUTLASS_HOST_DEVICE Coord< 1 > | make_Coord (int _0) |
| | Helper to make a 2-element coordinate. More...
|
| |
| CUTLASS_HOST_DEVICE Coord< 2 > | make_Coord (int _0, int _1) |
| | Helper to make a 2-element coordinate. More...
|
| |
| CUTLASS_HOST_DEVICE Coord< 3 > | make_Coord (int _0, int _1, int _2) |
| | Helper to make a 3-element coordinate. More...
|
| |
| CUTLASS_HOST_DEVICE Coord< 4 > | make_Coord (int _0, int _1, int _2, int _3) |
| | Helper to make a 4-element coordinate. More...
|
| |
| template<int Rank> |
| std::ostream & | operator<< (std::ostream &out, Coord< Rank > const &coord) |
| |
| std::istream & | operator>> (std::istream &stream, half_t &x) |
| |
| std::ostream & | operator<< (std::ostream &out, half_t const &x) |
| |
| template<typename T > |
| std::ostream & | operator<< (std::ostream &out, ScalarIO< T > const &scalar) |
| | Default printing to ostream. More...
|
| |
| template<> |
| std::ostream & | operator<< (std::ostream &out, ScalarIO< int8_t > const &scalar) |
| | Printing to ostream of int8_t as integer rather than character. More...
|
| |
| template<> |
| std::ostream & | operator<< (std::ostream &out, ScalarIO< uint8_t > const &scalar) |
| | Printing to ostream of uint8_t as integer rather than character. More...
|
| |
| template<typename Operator > |
| __global__ void | Kernel (typename Operator::Params params) |
| | Generic CUTLASS kernel template. More...
|
| |
| template<typename dividend_t , typename divisor_t > |
| CUTLASS_HOST_DEVICE dividend_t | round_nearest (dividend_t dividend, divisor_t divisor) |
| |
| template<typename value_t > |
| CUTLASS_HOST_DEVICE value_t | gcd (value_t a, value_t b) |
| |
| template<typename value_t > |
| CUTLASS_HOST_DEVICE value_t | lcm (value_t a, value_t b) |
| |
| template<typename value_t > |
| CUTLASS_HOST_DEVICE value_t | clz (value_t x) |
| |
| template<typename value_t > |
| CUTLASS_HOST_DEVICE value_t | find_log2 (value_t x) |
| |
| CUTLASS_HOST_DEVICE void | find_divisor (unsigned int &mul, unsigned int &shr, unsigned int denom) |
| |
| CUTLASS_HOST_DEVICE void | fast_divmod (int &quo, int &rem, int src, int div, unsigned int mul, unsigned int shr) |
| |
| CUTLASS_HOST_DEVICE void | fast_divmod (int &quo, int64_t &rem, int64_t src, int div, unsigned int mul, unsigned int shr) |
| |
| CUTLASS_HOST_DEVICE constexpr int | const_min (int a, int b) |
| |
| CUTLASS_HOST_DEVICE constexpr int | const_max (int a, int b) |
| |
| CUTLASS_HOST_DEVICE bool | signbit (cutlass::half_t const &h) |
| |
| CUTLASS_HOST_DEVICE cutlass::half_t | abs (cutlass::half_t const &h) |
| |
| CUTLASS_HOST_DEVICE bool | isnan (cutlass::half_t const &h) |
| |
| CUTLASS_HOST_DEVICE bool | isfinite (cutlass::half_t const &h) |
| |
| CUTLASS_HOST_DEVICE cutlass::half_t | nanh (const char *) |
| |
| CUTLASS_HOST_DEVICE bool | isinf (cutlass::half_t const &h) |
| |
| CUTLASS_HOST_DEVICE bool | isnormal (cutlass::half_t const &h) |
| |
| CUTLASS_HOST_DEVICE int | fpclassify (cutlass::half_t const &h) |
| |
| CUTLASS_HOST_DEVICE cutlass::half_t | sqrt (cutlass::half_t const &h) |
| |
| CUTLASS_HOST_DEVICE half_t | copysign (half_t const &a, half_t const &b) |
| |
| CUTLASS_HOST_DEVICE bool | operator== (half_t const &lhs, half_t const &rhs) |
| |
| CUTLASS_HOST_DEVICE bool | operator!= (half_t const &lhs, half_t const &rhs) |
| |
| CUTLASS_HOST_DEVICE bool | operator< (half_t const &lhs, half_t const &rhs) |
| |
| CUTLASS_HOST_DEVICE bool | operator<= (half_t const &lhs, half_t const &rhs) |
| |
| CUTLASS_HOST_DEVICE bool | operator> (half_t const &lhs, half_t const &rhs) |
| |
| CUTLASS_HOST_DEVICE bool | operator>= (half_t const &lhs, half_t const &rhs) |
| |
| CUTLASS_HOST_DEVICE half_t | operator+ (half_t const &lhs, half_t const &rhs) |
| |
| CUTLASS_HOST_DEVICE half_t | operator- (half_t const &lhs) |
| |
| CUTLASS_HOST_DEVICE half_t | operator- (half_t const &lhs, half_t const &rhs) |
| |
| CUTLASS_HOST_DEVICE half_t | operator* (half_t const &lhs, half_t const &rhs) |
| |
| CUTLASS_HOST_DEVICE half_t | operator/ (half_t const &lhs, half_t const &rhs) |
| |
| CUTLASS_HOST_DEVICE half_t & | operator+= (half_t &lhs, half_t const &rhs) |
| |
| CUTLASS_HOST_DEVICE half_t & | operator-= (half_t &lhs, half_t const &rhs) |
| |
| CUTLASS_HOST_DEVICE half_t & | operator*= (half_t &lhs, half_t const &rhs) |
| |
| CUTLASS_HOST_DEVICE half_t & | operator/= (half_t &lhs, half_t const &rhs) |
| |
| CUTLASS_HOST_DEVICE half_t & | operator++ (half_t &lhs) |
| |
| CUTLASS_HOST_DEVICE half_t & | operator-- (half_t &lhs) |
| |
| CUTLASS_HOST_DEVICE half_t | operator++ (half_t &lhs, int) |
| |
| CUTLASS_HOST_DEVICE half_t | operator-- (half_t &lhs, int) |
| |
| template<typename T > |
| CUTLASS_HOST_DEVICE bool | relatively_equal (T a, T b, T epsilon, T nonzero_floor) |
| |
| template<> |
| CUTLASS_HOST_DEVICE bool | relatively_equal< uint1b_t > (uint1b_t a, uint1b_t b, uint1b_t, uint1b_t) |
| |
| template<> |
| CUTLASS_HOST_DEVICE bool | relatively_equal< int4b_t > (int4b_t a, int4b_t b, int4b_t, int4b_t) |
| |
| template<> |
| CUTLASS_HOST_DEVICE bool | relatively_equal< uint4b_t > (uint4b_t a, uint4b_t b, uint4b_t, uint4b_t) |
| |
| template<> |
| CUTLASS_HOST_DEVICE bool | relatively_equal< int8_t > (int8_t a, int8_t b, int8_t, int8_t) |
| |
| template<> |
| CUTLASS_HOST_DEVICE bool | relatively_equal< uint8_t > (uint8_t a, uint8_t b, uint8_t, uint8_t) |
| |
| template<> |
| CUTLASS_HOST_DEVICE bool | relatively_equal< int16_t > (int16_t a, int16_t b, int16_t, int16_t) |
| |
| template<> |
| CUTLASS_HOST_DEVICE bool | relatively_equal< uint16_t > (uint16_t a, uint16_t b, uint16_t, uint16_t) |
| |
| template<> |
| CUTLASS_HOST_DEVICE bool | relatively_equal< int32_t > (int32_t a, int32_t b, int32_t, int32_t) |
| |
| template<> |
| CUTLASS_HOST_DEVICE bool | relatively_equal< uint32_t > (uint32_t a, uint32_t b, uint32_t, uint32_t) |
| |
| template<> |
| CUTLASS_HOST_DEVICE bool | relatively_equal< int64_t > (int64_t a, int64_t b, int64_t, int64_t) |
| |
| template<> |
| CUTLASS_HOST_DEVICE bool | relatively_equal< uint64_t > (uint64_t a, uint64_t b, uint64_t, uint64_t) |
| |
| template<> |
| CUTLASS_HOST_DEVICE bool | relatively_equal< half_t > (half_t a, half_t b, half_t epsilon, half_t nonzero_floor) |
| |
| template<> |
| CUTLASS_HOST_DEVICE bool | relatively_equal< float > (float a, float b, float epsilon, float nonzero_floor) |
| |
| template<> |
| CUTLASS_HOST_DEVICE bool | relatively_equal< double > (double a, double b, double epsilon, double nonzero_floor) |
| |
| template<typename Element , typename Layout > |
| CUTLASS_HOST_DEVICE TensorRef< Element, Layout > | make_TensorRef (Element *ptr, Layout const &layout) |
| | Constructs a TensorRef, deducing types from arguments. More...
|
| |
| template<typename Element , typename Layout > |
| bool | TensorRef_aligned (TensorRef< Element, Layout > const &ref, int alignment) |
| |
| template<typename Element , typename Layout > |
| CUTLASS_HOST_DEVICE TensorView< Element, Layout > | make_TensorView (Element *ptr, Layout const &layout, typename Layout::TensorCoord const &extent) |
| | Constructs a TensorRef, deducing types from arguments. More...
|
| |
| __host__ CUTLASS_DEVICE cudaError_t | cuda_perror_impl (cudaError_t error, const char *filename, int line) |
| | The corresponding error message is printed to stderr (or stdout in device code) along with the supplied source context. More...
|
| |
| std::ostream & | operator<< (std::ostream &out, cudaError_t result) |
| | Writes a cudaError_t to an output stream. More...
|
| |
| std::ostream & | operator<< (std::ostream &out, cuda_exception const &e) |
| | Writes a cuda_exception instance to an output stream. More...
|
| |
| template<int Interleaved, typename Element , typename Layout > |
| void | reorder_column (TensorRef< Element, Layout > dest, TensorRef< Element, Layout > src, cutlass::gemm::GemmCoord problem_size) |
| |
| template<typename Element , typename Layout > |
| std::ostream & | TensorViewWrite (std::ostream &out, TensorView< Element, Layout > const &view) |
| | Prints human-readable representation of a TensorView to an ostream. More...
|
| |
| template<typename Element , typename Layout > |
| std::ostream & | operator<< (std::ostream &out, TensorView< Element, Layout > const &view) |
| | Prints human-readable representation of a TensorView to an ostream. More...
|
| |