#include <grouped_gemm_quant_kernel.hpp>
|
| static CK_TILE_HOST const std::string | GetName () |
| static CK_TILE_HOST auto | GetWorkSpaceSize (const std::vector< QuantGroupedGemmHostArgs > &gemm_descs) -> std::size_t |
| static CK_TILE_HOST auto | GetWorkSpaceSize (index_t group_count) -> std::size_t |
| static CK_TILE_HOST auto | BlockSize () -> dim3 |
| static CK_TILE_HOST auto | MaxOccupancyGridSize (const stream_config &s) -> dim3 |
| | Get the maximum occupancy grid size for the persistent kernel on the current device.
|
| static CK_TILE_HOST auto | GridSize (const std::vector< QuantGroupedGemmHostArgs > &gemm_descs) |
| static CK_TILE_HOST auto | MakeKargs (const std::vector< QuantGroupedGemmHostArgs > &gemm_descs) -> std::vector< QuantGemmTransKernelArg > |
| static CK_TILE_HOST bool | IsSupportedArgument (const std::vector< QuantGemmTransKernelArg > &kargs) |
| static CK_TILE_HOST_DEVICE constexpr auto | GetSmemSize () -> index_t |
| template<memory_operation_enum DstInMemOp = memory_operation_enum::set> |
| static CK_TILE_DEVICE void | RunGemmWithPipelineSelection2LDS (const ADataType *a_ptr, const BDataType *b_ptr, const AQDataType *aq_ptr, const BQDataType *bq_ptr, CDataType *c_ptr, void *smem_ptr_0, void *smem_ptr_1, const QuantGroupedGemmKernelArgs &kargs, const typename Base::SplitKBatchOffset &splitk_batch_offset, const index_t block_idx_m, const index_t block_idx_n) |
| static CK_TILE_DEVICE void | RunGemmWithPipelineSelection (const ADataType *a_ptr, const BDataType *b_ptr, const AQDataType *aq_ptr, const BQDataType *bq_ptr, CDataType *c_ptr, void *smem_ptr_0, const QuantGroupedGemmKernelArgs &kargs, const typename Base::SplitKBatchOffset &splitk_batch_offset, const index_t block_idx_m, const index_t block_idx_n) |
| | Runs single GEMM problem cooperatively by whole workgroup.
|
◆ AccDataType
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_,
QuantType QuantType_>
◆ ADataType
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_,
QuantType QuantType_>
Specify the data type configurations for A, B, C/E.
◆ ALayout
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_,
QuantType QuantType_>
◆ AQDataType
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_,
QuantType QuantType_>
Initial value:
remove_cv_t< std::remove_reference_t< T > > remove_cvref_t
Definition type_traits.hpp:21
◆ Base
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_,
QuantType QuantType_>
◆ BDataType
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_,
QuantType QuantType_>
◆ BLayout
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_,
QuantType QuantType_>
◆ BQDataType
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_,
QuantType QuantType_>
◆ CDataType
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_,
QuantType QuantType_>
◆ CLayout
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_,
QuantType QuantType_>
◆ EpiloguePipeline
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_,
QuantType QuantType_>
◆ GemmPipeline
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_,
QuantType QuantType_>
◆ Kernel
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_,
QuantType QuantType_>
Initial value:
Definition grouped_gemm_quant_kernel.hpp:117
◆ OffsetTile1DPartitioner
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_,
QuantType QuantType_>
ALayout and ADataType are expected to be scalars, not a tuple.
BLayout and BDataType are expected to be scalars, not a tuple.
C/ELayout and C/EDataType are expected to be scalars, not a tuple.
◆ TilePartitioner
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_,
QuantType QuantType_>
◆ BlockSize()
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_,
QuantType QuantType_>
◆ GetName()
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_,
QuantType QuantType_>
◆ GetSmemSize()
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_,
QuantType QuantType_>
◆ GetWorkSpaceSize() [1/2]
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_,
QuantType QuantType_>
◆ GetWorkSpaceSize() [2/2]
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_,
QuantType QuantType_>
◆ GridSize()
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_,
QuantType QuantType_>
◆ IsSupportedArgument()
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_,
QuantType QuantType_>
◆ MakeKargs()
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_,
QuantType QuantType_>
◆ MaxOccupancyGridSize()
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_,
QuantType QuantType_>
Get the maximum occupancy grid size for the persistent kernel on the current device.
- Returns
- The maximum occupancy grid size.
- Note
- This function queries the maximum occupancy of the kernel using hipOccupancyMaxActiveBlocksPerMultiprocessor.
◆ operator()()
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_,
QuantType QuantType_>
template<bool U = UsePersistentKernel, typename = std::enable_if_t<U>, typename = void>
◆ Run()
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_,
QuantType QuantType_>
◆ RunGemmWithPipelineSelection()
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_,
QuantType QuantType_>
| CK_TILE_DEVICE void ck_tile::QuantGroupedGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_, QuantType_ >::RunGemmWithPipelineSelection |
( |
const ADataType * | a_ptr, |
|
|
const BDataType * | b_ptr, |
|
|
const AQDataType * | aq_ptr, |
|
|
const BQDataType * | bq_ptr, |
|
|
CDataType * | c_ptr, |
|
|
void * | smem_ptr_0, |
|
|
const QuantGroupedGemmKernelArgs & | kargs, |
|
|
const typename Base::SplitKBatchOffset & | splitk_batch_offset, |
|
|
const index_t | block_idx_m, |
|
|
const index_t | block_idx_n ) |
|
inlinestatic |
Runs single GEMM problem cooperatively by whole workgroup.
- Note
- The GEMM pipeline is selected in-kernel based on the number of K-loops and the tail-number. This is needed for the persistent tile-loop when we didn't have access to the K dimension on the host.
- Parameters
-
| a_ptr | input A pointer |
| b_ptr | input B pointer |
| aq_ptr | input AQ pointer |
| bq_ptr | input BQ pointer |
| c_ptr | output C pointer |
| smem_ptr_0 | The start memory pointer of the shared memory block. |
| kargs | GEMM kernel arguments |
| splitk_batch_offset | splitk_batch_offset Utility structure used to calculate k batch. |
| block_idx_m | The GEMM's output M dimension tile index processed by this workgroup. |
| block_idx_n | The GEMM's output N dimension tile index processed by this workgroup. |
◆ RunGemmWithPipelineSelection2LDS()
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_,
QuantType QuantType_>
| CK_TILE_DEVICE void ck_tile::QuantGroupedGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_, QuantType_ >::RunGemmWithPipelineSelection2LDS |
( |
const ADataType * | a_ptr, |
|
|
const BDataType * | b_ptr, |
|
|
const AQDataType * | aq_ptr, |
|
|
const BQDataType * | bq_ptr, |
|
|
CDataType * | c_ptr, |
|
|
void * | smem_ptr_0, |
|
|
void * | smem_ptr_1, |
|
|
const QuantGroupedGemmKernelArgs & | kargs, |
|
|
const typename Base::SplitKBatchOffset & | splitk_batch_offset, |
|
|
const index_t | block_idx_m, |
|
|
const index_t | block_idx_n ) |
|
inlinestatic |
◆ kBlockSize
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_,
QuantType QuantType_>
◆ kQuantType
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_,
QuantType QuantType_>
◆ UsePersistentKernel
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_,
QuantType QuantType_>
| bool ck_tile::QuantGroupedGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_, QuantType_ >::UsePersistentKernel = GemmPipeline::UsePersistentKernel |
|
staticconstexpr |
The documentation for this struct was generated from the following file: