| // The MIT License (MIT) |
| // |
| // Copyright (c) 2016 Northeastern University |
| // |
| // Permission is hereby granted, free of charge, to any person obtaining a copy |
| // of this software and associated documentation files (the "Software"), to deal |
| // in the Software without restriction, including without limitation the rights |
| // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| // copies of the Software, and to permit persons to whom the Software is |
| // furnished to do so, subject to the following conditions: |
| // |
| // The above copyright notice and this permission notice shall be included in |
| // all copies or substantial portions of the Software. |
| // |
| // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| // SOFTWARE. |
| |
| #ifndef CORE_INCLUDE_DNN_UTILITY_H_ |
| #define CORE_INCLUDE_DNN_UTILITY_H_ |
| |
| #include <iostream> |
| |
| #ifdef NVIDIA_CUDNN |
| #include "cudnn.h" |
| #endif |
| |
| #ifdef AMD_MIOPEN |
| #include <miopen/miopen.h> |
| #include <rocblas.h> |
| #endif |
| |
| #include "common.h" |
| #include "dnn_param.h" |
| #include "timer.h" |
| |
| namespace dnnmark { |
| |
| class Handle { |
| #ifdef NVIDIA_CUDNN |
| private: |
| cudnnHandle_t *cudnn_handles_; |
| cublasHandle_t *blas_handles_; |
| int num_cudnn_handles_; |
| int num_blas_handles_; |
| public: |
| Handle(); |
| Handle(int num); |
| ~Handle(); |
| cudnnHandle_t GetCudnn() const; |
| cudnnHandle_t GetCudnn(int index) const; |
| cublasHandle_t GetBlas() const; |
| cublasHandle_t GetBlas(int index) const; |
| int num_cudnn() const { return num_cudnn_handles_; } |
| int num_blas() const { return num_blas_handles_; } |
| #endif |
| #ifdef AMD_MIOPEN |
| private: |
| miopenHandle_t *miopen_handles_; |
| rocblas_handle *rocblas_handles_; |
| int num_miopen_handles_; |
| int num_rocblas_handles_; |
| public: |
| Handle(); |
| Handle(int num); |
| ~Handle(); |
| miopenHandle_t GetMIOpen() const; |
| miopenHandle_t GetMIOpen(int index) const; |
| rocblas_handle GetBlas() const; |
| rocblas_handle GetBlas(int index) const; |
| int num_miopen() const { return num_miopen_handles_; } |
| int num_blas() const { return num_rocblas_handles_; } |
| #endif |
| }; |
| |
| class Descriptor { |
| protected: |
| bool set_; |
| public: |
| Descriptor(); |
| ~Descriptor(); |
| bool isSet(); |
| }; |
| |
| template <typename T> |
| class DataTensor : public Descriptor { |
| private: |
| #ifdef NVIDIA_CUDNN |
| cudnnTensorDescriptor_t desc_; |
| #endif |
| #ifdef AMD_MIOPEN |
| miopenTensorDescriptor_t desc_; |
| #endif |
| |
| public: |
| DataTensor() |
| : Descriptor() { |
| #ifdef NVIDIA_CUDNN |
| CUDNN_CALL(cudnnCreateTensorDescriptor(&desc_)); |
| #endif |
| #ifdef AMD_MIOPEN |
| MIOPEN_CALL(miopenCreateTensorDescriptor(&desc_)); |
| #endif |
| } |
| |
| ~DataTensor() { |
| #ifdef NVIDIA_CUDNN |
| CUDNN_CALL(cudnnDestroyTensorDescriptor(desc_)); |
| #endif |
| #ifdef AMD_MIOPEN |
| MIOPEN_CALL(miopenDestroyTensorDescriptor(desc_)); |
| #endif |
| } |
| |
| void Set(int n, int c, int h, int w) { |
| if (!set_) { |
| #ifdef NVIDIA_CUDNN |
| CUDNN_CALL(cudnnSetTensor4dDescriptor(desc_, |
| CUDNN_TENSOR_NCHW, |
| DataType<T>::type, |
| n, c, h, w)); |
| #endif |
| #ifdef AMD_MIOPEN |
| MIOPEN_CALL(miopenSet4dTensorDescriptor(desc_, |
| DataType<T>::type, |
| n, c, h, w)); |
| #endif |
| } |
| set_ = true; |
| } |
| |
| #ifdef NVIDIA_CUDNN |
| cudnnTensorDescriptor_t Get() const { |
| #endif |
| #ifdef AMD_MIOPEN |
| miopenTensorDescriptor_t Get() const { |
| #endif |
| if (set_) |
| return desc_; |
| return nullptr; |
| } |
| |
| }; |
| |
| template <typename T> |
| class ConvolutionDesc : public Descriptor { |
| private: |
| #ifdef NVIDIA_CUDNN |
| cudnnFilterDescriptor_t filter_desc_; |
| cudnnConvolutionDescriptor_t conv_desc_; |
| #endif |
| #ifdef AMD_MIOPEN |
| miopenTensorDescriptor_t filter_desc_; |
| miopenConvolutionDescriptor_t conv_desc_; |
| #endif |
| |
| public: |
| ConvolutionDesc() |
| : Descriptor() { |
| #ifdef NVIDIA_CUDNN |
| CUDNN_CALL(cudnnCreateConvolutionDescriptor(&conv_desc_)); |
| CUDNN_CALL(cudnnCreateFilterDescriptor(&filter_desc_)); |
| #endif |
| #ifdef AMD_MIOPEN |
| MIOPEN_CALL(miopenCreateConvolutionDescriptor(&conv_desc_)); |
| MIOPEN_CALL(miopenCreateTensorDescriptor(&filter_desc_)); |
| #endif |
| } |
| |
| ~ConvolutionDesc() { |
| #ifdef NVIDIA_CUDNN |
| CUDNN_CALL(cudnnDestroyConvolutionDescriptor(conv_desc_)); |
| CUDNN_CALL(cudnnDestroyFilterDescriptor(filter_desc_)); |
| #endif |
| #ifdef AMD_MIOPEN |
| MIOPEN_CALL(miopenDestroyConvolutionDescriptor(conv_desc_)); |
| MIOPEN_CALL(miopenDestroyTensorDescriptor(filter_desc_)); |
| #endif |
| } |
| |
| void Set(const ConvolutionParam ¶m, int num_channel) { |
| if (!set_) { |
| #ifdef NVIDIA_CUDNN |
| CUDNN_CALL(cudnnSetConvolution2dDescriptor(conv_desc_, |
| param.pad_h_, param.pad_w_, |
| param.stride_u_, param.stride_v_, |
| param.upscale_x_, param.upscale_y_, |
| param.mode_, DataType<T>::type)); |
| |
| CUDNN_CALL(cudnnSetFilter4dDescriptor(filter_desc_, |
| DataType<T>::type, CUDNN_TENSOR_NCHW, |
| param.output_num_, num_channel, |
| param.kernel_size_h_, param.kernel_size_w_)); |
| #endif |
| #ifdef AMD_MIOPEN |
| MIOPEN_CALL(miopenInitConvolutionDescriptor(conv_desc_, |
| param.mode_, |
| param.pad_h_, param.pad_w_, |
| param.stride_u_, param.stride_v_, |
| param.upscale_x_, param.upscale_y_)); |
| |
| MIOPEN_CALL(miopenSet4dTensorDescriptor(filter_desc_, |
| DataType<T>::type, |
| param.output_num_, num_channel, |
| param.kernel_size_h_, param.kernel_size_w_)); |
| #endif |
| } |
| set_ = true; |
| } |
| |
| #ifdef NVIDIA_CUDNN |
| cudnnFilterDescriptor_t GetFilter() const { |
| #endif |
| #ifdef AMD_MIOPEN |
| miopenTensorDescriptor_t GetFilter() const { |
| #endif |
| if (set_) |
| return filter_desc_; |
| return nullptr; |
| } |
| |
| #ifdef NVIDIA_CUDNN |
| cudnnConvolutionDescriptor_t GetConv() const { |
| #endif |
| #ifdef AMD_MIOPEN |
| miopenConvolutionDescriptor_t GetConv() const { |
| #endif |
| if (set_) |
| return conv_desc_; |
| return nullptr; |
| } |
| |
| |
| }; |
| |
| template <typename T> |
| class PoolingDesc : public Descriptor { |
| private: |
| #ifdef NVIDIA_CUDNN |
| cudnnPoolingDescriptor_t pooling_desc_; |
| #endif |
| #ifdef AMD_MIOPEN |
| miopenPoolingDescriptor_t pooling_desc_; |
| #endif |
| public: |
| PoolingDesc() |
| : Descriptor() { |
| #ifdef NVIDIA_CUDNN |
| CUDNN_CALL(cudnnCreatePoolingDescriptor(&pooling_desc_)); |
| #endif |
| #ifdef AMD_MIOPEN |
| MIOPEN_CALL(miopenCreatePoolingDescriptor(&pooling_desc_)); |
| #endif |
| } |
| |
| ~PoolingDesc() { |
| #ifdef NVIDIA_CUDNN |
| CUDNN_CALL(cudnnDestroyPoolingDescriptor(pooling_desc_)); |
| #endif |
| #ifdef AMD_MIOPEN |
| MIOPEN_CALL(miopenDestroyPoolingDescriptor(pooling_desc_)); |
| #endif |
| } |
| |
| void Set(const PoolingParam ¶m) { |
| if (!set_) { |
| #ifdef NVIDIA_CUDNN |
| CUDNN_CALL(cudnnSetPooling2dDescriptor(pooling_desc_, |
| param.mode_, CUDNN_PROPAGATE_NAN, |
| param.kernel_size_h_, param.kernel_size_w_, |
| param.pad_h_, param.pad_w_, |
| param.stride_h_, param.stride_w_)); |
| #endif |
| #ifdef AMD_MIOPEN |
| MIOPEN_CALL(miopenSet2dPoolingDescriptor(pooling_desc_, |
| param.mode_, |
| param.kernel_size_h_, param.kernel_size_w_, |
| param.pad_h_, param.pad_w_, |
| param.stride_h_, param.stride_w_)); |
| #endif |
| } |
| |
| set_ = true; |
| } |
| |
| #ifdef NVIDIA_CUDNN |
| cudnnPoolingDescriptor_t Get() const { |
| #endif |
| #ifdef AMD_MIOPEN |
| miopenPoolingDescriptor_t Get() const { |
| #endif |
| if (set_) |
| return pooling_desc_; |
| return nullptr; |
| } |
| |
| void GetWorkspaceSize(const DataTensor<T> &y_desc, |
| size_t *workspace_size) { |
| #ifdef AMD_MIOPEN |
| if (set_) |
| MIOPEN_CALL(miopenPoolingGetWorkSpaceSize(y_desc.Get(), workspace_size)); |
| else |
| LOG(FATAL) << "Pooling descriptor NOT set"; |
| #endif |
| } |
| }; |
| |
| template <typename T> |
| class LRNDesc : public Descriptor { |
| private: |
| #ifdef NVIDIA_CUDNN |
| cudnnLRNDescriptor_t lrn_desc_; |
| #endif |
| #ifdef AMD_MIOPEN |
| miopenLRNDescriptor_t lrn_desc_; |
| #endif |
| public: |
| LRNDesc() |
| : Descriptor() { |
| #ifdef NVIDIA_CUDNN |
| CUDNN_CALL(cudnnCreateLRNDescriptor(&lrn_desc_)); |
| #endif |
| #ifdef AMD_MIOPEN |
| MIOPEN_CALL(miopenCreateLRNDescriptor(&lrn_desc_)); |
| #endif |
| } |
| |
| ~LRNDesc() { |
| #ifdef NVIDIA_CUDNN |
| CUDNN_CALL(cudnnDestroyLRNDescriptor(lrn_desc_)); |
| #endif |
| #ifdef AMD_MIOPEN |
| MIOPEN_CALL(miopenDestroyLRNDescriptor(lrn_desc_)); |
| #endif |
| } |
| |
| void Set(const LRNParam ¶m) { |
| if (!set_) { |
| #ifdef NVIDIA_CUDNN |
| CUDNN_CALL(cudnnSetLRNDescriptor(lrn_desc_, |
| param.local_size_, |
| param.alpha_, param.beta_, |
| param.k_)); |
| #endif |
| #ifdef AMD_MIOPEN |
| MIOPEN_CALL(miopenSetLRNDescriptor(lrn_desc_, |
| param.mode_, |
| param.local_size_, |
| param.alpha_, param.beta_, |
| param.k_)); |
| #endif |
| } |
| |
| set_ = true; |
| } |
| |
| #ifdef NVIDIA_CUDNN |
| cudnnLRNDescriptor_t Get() const { |
| #endif |
| #ifdef AMD_MIOPEN |
| miopenLRNDescriptor_t Get() const { |
| #endif |
| if (set_) |
| return lrn_desc_; |
| return nullptr; |
| } |
| |
| void GetWorkspaceSize(const DataTensor<T> &y_desc, |
| size_t *workspace_size) { |
| #ifdef AMD_MIOPEN |
| if (set_) |
| MIOPEN_CALL(miopenLRNGetWorkSpaceSize(y_desc.Get(), workspace_size)); |
| else |
| LOG(FATAL) << "LRN descriptor NOT set"; |
| #endif |
| } |
| |
| }; |
| |
| template <typename T> |
| class ActivationDesc : public Descriptor { |
| private: |
| #ifdef NVIDIA_CUDNN |
| cudnnActivationDescriptor_t activation_desc_; |
| #endif |
| #ifdef AMD_MIOPEN |
| miopenActivationDescriptor_t activation_desc_; |
| #endif |
| public: |
| ActivationDesc() |
| : Descriptor() { |
| #ifdef NVIDIA_CUDNN |
| CUDNN_CALL(cudnnCreateActivationDescriptor(&activation_desc_)); |
| #endif |
| #ifdef AMD_MIOPEN |
| MIOPEN_CALL(miopenCreateActivationDescriptor(&activation_desc_)); |
| #endif |
| } |
| |
| ~ActivationDesc() { |
| #ifdef NVIDIA_CUDNN |
| CUDNN_CALL(cudnnDestroyActivationDescriptor(activation_desc_)); |
| #endif |
| #ifdef AMD_MIOPEN |
| MIOPEN_CALL(miopenDestroyActivationDescriptor(activation_desc_)); |
| #endif |
| } |
| |
| void Set(const ActivationParam ¶m) { |
| if (!set_) { |
| #ifdef NVIDIA_CUDNN |
| CUDNN_CALL(cudnnSetActivationDescriptor(activation_desc_, |
| param.mode_, |
| CUDNN_PROPAGATE_NAN, |
| double(0.0))); |
| #endif |
| #ifdef AMD_MIOPEN |
| MIOPEN_CALL(miopenSetActivationDescriptor(activation_desc_, |
| param.mode_, |
| param.alpha_, |
| param.beta_, |
| param.power_)); |
| #endif |
| } |
| |
| set_ = true; |
| } |
| |
| #ifdef NVIDIA_CUDNN |
| cudnnActivationDescriptor_t Get() const { |
| #endif |
| #ifdef AMD_MIOPEN |
| miopenActivationDescriptor_t Get() const { |
| #endif |
| if (set_) |
| return activation_desc_; |
| return nullptr; |
| } |
| |
| }; |
| |
| template <typename T> |
| class BypassDesc : public Descriptor { |
| private: |
| #ifdef NVIDIA_CUDNN |
| DataDim dim_; |
| #endif |
| #ifdef AMD_MIOPEN |
| miopenActivationDescriptor_t activation_desc_; |
| #endif |
| public: |
| BypassDesc() |
| : Descriptor() { |
| #ifdef AMD_MIOPEN |
| MIOPEN_CALL(miopenCreateActivationDescriptor(&activation_desc_)); |
| #endif |
| } |
| |
| ~BypassDesc() { |
| #ifdef AMD_MIOPEN |
| MIOPEN_CALL(miopenDestroyActivationDescriptor(activation_desc_)); |
| #endif |
| } |
| |
| void Set(DataDim dim) { |
| if (!set_) { |
| #ifdef NVIDIA_CUDNN |
| dim_.n_ = dim.n_; |
| dim_.c_ = dim.c_; |
| dim_.h_ = dim.h_; |
| dim_.w_ = dim.w_; |
| #endif |
| #ifdef AMD_MIOPEN |
| MIOPEN_CALL(miopenSetActivationDescriptor(activation_desc_, |
| miopenActivationPASTHRU, |
| 0, 0, 0)); |
| #endif |
| } |
| |
| set_ = true; |
| } |
| |
| #ifdef NVIDIA_CUDNN |
| DataDim Get() const { |
| return dim_; |
| } |
| #endif |
| #ifdef AMD_MIOPEN |
| miopenActivationDescriptor_t Get() const { |
| if (set_) |
| return activation_desc_; |
| return nullptr; |
| } |
| #endif |
| }; |
| |
| template <typename T> |
| class DropoutDesc : public Descriptor { |
| private: |
| #ifdef NVIDIA_CUDNN |
| cudnnDropoutDescriptor_t dropout_desc_; |
| #endif |
| #ifdef AMD_MIOPEN |
| #endif |
| public: |
| DropoutDesc() |
| : Descriptor() { |
| #ifdef NVIDIA_CUDNN |
| CUDNN_CALL(cudnnCreateDropoutDescriptor(&dropout_desc_)); |
| #endif |
| #ifdef AMD_MIOPEN |
| #endif |
| } |
| |
| ~DropoutDesc() { |
| #ifdef NVIDIA_CUDNN |
| CUDNN_CALL(cudnnDestroyDropoutDescriptor(dropout_desc_)); |
| #endif |
| #ifdef AMD_MIOPEN |
| #endif |
| } |
| |
| void SetStatesSize(const Handle &handle, RunMode mode, int idx, |
| size_t *state_size) { |
| #ifdef NVIDIA_CUDNN |
| CUDNN_CALL(cudnnDropoutGetStatesSize(mode ? |
| handle.GetCudnn(idx): |
| handle.GetCudnn(), |
| state_size)); |
| #endif |
| #ifdef AMD_MIOPEN |
| #endif |
| } |
| |
| void SetReserveSpaceSize(DataTensor<T> &bottom_desc, |
| size_t *reserve_space_size) { |
| #ifdef NVIDIA_CUDNN |
| CUDNN_CALL(cudnnDropoutGetReserveSpaceSize(bottom_desc.Get(), |
| reserve_space_size)); |
| #endif |
| #ifdef AMD_MIOPEN |
| #endif |
| } |
| |
| void Set(const Handle &handle, RunMode mode, int idx, |
| const DropoutParam &dropout_param, |
| void *states, size_t state_size) { |
| if (!set_) { |
| #ifdef NVIDIA_CUDNN |
| if (state_size > 0) |
| CUDNN_CALL(cudnnSetDropoutDescriptor(dropout_desc_, |
| mode == COMPOSED ? |
| handle.GetCudnn(idx): |
| handle.GetCudnn(), |
| dropout_param.dropout_p_, |
| states, |
| state_size, |
| dropout_param.random_seed_)); |
| else |
| LOG(FATAL) << "The size is ZERO"; |
| #endif |
| #ifdef AMD_MIOPEN |
| #endif |
| } |
| |
| set_ = true; |
| } |
| |
| #ifdef NVIDIA_CUDNN |
| cudnnDropoutDescriptor_t Get() const { |
| return dropout_desc_; |
| } |
| #endif |
| #ifdef AMD_MIOPEN |
| #endif |
| }; |
| |
| template <typename T> |
| class ConvAlgo { |
| #ifdef NVIDIA_CUDNN |
| private: |
| cudnnConvolutionFwdAlgo_t fwd_algo_; |
| cudnnConvolutionBwdFilterAlgo_t bwd_filter_algo_; |
| cudnnConvolutionBwdDataAlgo_t bwd_data_algo_; |
| std::string bwd_filter_algo_par; |
| |
| public: |
| ConvAlgo() |
| : fwd_algo_(CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM), |
| bwd_filter_algo_(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0), |
| bwd_data_algo_(CUDNN_CONVOLUTION_BWD_DATA_ALGO_0), |
| bwd_filter_algo_par("") {} |
| |
| |
| cudnnConvolutionBwdDataAlgo_t getDataAlgo(){ |
| return (bwd_data_algo_); |
| } |
| |
| |
| void SetFwdAlgo(std::string algo) { |
| if (!algo.compare("fft")) { |
| fwd_algo_ = CUDNN_CONVOLUTION_FWD_ALGO_FFT; |
| } else if (!algo.compare("winograd")) { |
| fwd_algo_ = CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD; |
| } |
| } |
| void SetFwdAlgo(cudnnConvolutionFwdAlgo_t fwd_algo) { |
| fwd_algo_ = fwd_algo; |
| } |
| void SetFwdAlgo(const Handle &handle, RunMode mode, int idx, |
| const DataTensor<T> &bottom_desc, |
| const ConvolutionDesc<T> &conv_desc, |
| const DataTensor<T> &top_desc, |
| cudnnConvolutionFwdPreference_t pref) { |
| CUDNN_CALL(cudnnGetConvolutionForwardAlgorithm( |
| mode == COMPOSED ? |
| handle.GetCudnn(idx) : handle.GetCudnn(), |
| bottom_desc.Get(), |
| conv_desc.GetFilter(), |
| conv_desc.GetConv(), |
| top_desc.Get(), |
| pref, |
| -1, |
| &fwd_algo_)); |
| } |
| void FindFwdAlgo(const Handle &handle, RunMode mode, int idx, |
| const DataTensor<T> &bottom_desc, |
| const ConvolutionDesc<T> &conv_desc, |
| const DataTensor<T> &top_desc) { |
| cudnnConvolutionFwdAlgoPerf_t *perf_results; |
| int *returned_algo_count; |
| CUDNN_CALL(cudnnFindConvolutionForwardAlgorithm( |
| mode == COMPOSED ? |
| handle.GetCudnn(idx) : handle.GetCudnn(), |
| bottom_desc.Get(), |
| conv_desc.GetFilter(), |
| conv_desc.GetConv(), |
| top_desc.Get(), |
| 1, returned_algo_count, |
| perf_results)); |
| if (*returned_algo_count > 0) { |
| fwd_algo_ = perf_results->algo; |
| } else { |
| fwd_algo_ = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM; |
| } |
| } |
| void SetBwdFilterAlgo(cudnnConvolutionBwdFilterAlgo_t bwd_filter_algo) { |
| bwd_filter_algo_ = bwd_filter_algo; |
| } |
| void SetBwdFilterAlgo(std::string algo) { |
| if (algo.empty()) { |
| return; |
| } |
| if (!algo.compare("fft")) { |
| bwd_filter_algo_ = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT; |
| } else if (!algo.compare("winograd")) { |
| bwd_filter_algo_ = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD; |
| } else if (stoi(algo) == 0) { |
| bwd_filter_algo_ = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0; |
| } else if (stoi(algo) == 1) { |
| bwd_filter_algo_ = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1; |
| } else if (stoi(algo) == 2) { |
| bwd_filter_algo_ = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT; |
| } else if (stoi(algo) == 3) { |
| bwd_filter_algo_ = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3; |
| } else if (stoi(algo) == 4) { |
| bwd_filter_algo_ = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD; |
| } else if (stoi(algo) == 5) { |
| bwd_filter_algo_ = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED; |
| } else if (stoi(algo) == 6) { |
| bwd_filter_algo_ = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING; |
| } else if (algo.compare("")) { |
| std::cout << "Using algo "<< algo << "\n"; |
| bwd_filter_algo_par = algo; |
| } |
| LOG(INFO) << "cuDNN algos: " << CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0 << " " |
| << CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1 << " " |
| << "FFT:" << CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT << " " |
| << CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3 << " " |
| << "WIN:" << CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD << " " |
| << "WIN_NONFUSED:" << CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED << " " |
| << "FFT_TILING:" << CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING << "\n"; |
| LOG(INFO) << "Set Bwd Filter Algo to " << bwd_filter_algo_ << " with " << algo; |
| } |
| void SetBwdFilterAlgo(const Handle &handle, RunMode mode, int idx, |
| const DataTensor<T> &bottom_desc, |
| const DataTensor<T> &top_desc, |
| const ConvolutionDesc<T> &conv_desc, |
| cudnnConvolutionBwdFilterPreference_t pref) { |
| CUDNN_CALL(cudnnGetConvolutionBackwardFilterAlgorithm( |
| mode == COMPOSED ? |
| handle.GetCudnn(idx) : handle.GetCudnn(), |
| bottom_desc.Get(), |
| top_desc.Get(), |
| conv_desc.GetConv(), |
| conv_desc.GetFilter(), |
| pref, |
| -1, |
| &bwd_filter_algo_)); |
| } |
| void FindBwdFilterAlgo(const Handle &handle, RunMode mode, int idx, |
| const DataTensor<T> &bottom_desc, |
| const ConvolutionDesc<T> &conv_desc, |
| const DataTensor<T> &top_desc) { |
| cudnnConvolutionBwdFilterAlgoPerf_t *perf_results; |
| int *returned_algo_count; |
| CUDNN_CALL(cudnnFindConvolutionBackwardFilterAlgorithm( |
| mode == COMPOSED ? |
| handle.GetCudnn(idx) : handle.GetCudnn(), |
| bottom_desc.Get(), |
| top_desc.Get(), |
| conv_desc.GetConv(), |
| conv_desc.GetFilter(), |
| 3, returned_algo_count, |
| perf_results)); |
| std::cout << "cuDNN call returned_algo_count :" << *returned_algo_count <<"\n"; |
| cudnnConvolutionBwdFilterAlgo_t algo = static_cast<cudnnConvolutionBwdFilterAlgo_t>(perf_results->algo); |
| std::cout << "cuDNN call result :" << perf_results->algo <<"\n"; |
| std::cout << "cuDNN casted result :" << algo <<"\n"; |
| if (*returned_algo_count > 0) { |
| bwd_filter_algo_ = perf_results->algo; |
| } else { |
| bwd_filter_algo_ = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0; |
| } |
| } |
| |
| void FindBwdFilterAlgoEx(const Handle &handle, RunMode mode, int idx, |
| const DataTensor<T> &bottom_desc, |
| const ConvolutionDesc<T> &conv_desc, |
| const DataTensor<T> &top_desc, |
| const void *w, |
| const void *dy, |
| void *dx, |
| void *workSpace, |
| size_t workspace_size) |
| { |
| cudnnConvolutionBwdFilterAlgoPerf_t *perf_results; |
| int *returned_algo_count; |
| CUDNN_CALL(cudnnFindConvolutionBackwardFilterAlgorithmEx( |
| mode == COMPOSED ? |
| handle.GetCudnn(idx) : handle.GetCudnn(), |
| bottom_desc.Get(), w, |
| top_desc.Get(), dy, |
| conv_desc.GetConv(), |
| conv_desc.GetFilter(), dx, |
| 1, returned_algo_count, |
| perf_results, |
| workSpace, workspace_size)); |
| if (*returned_algo_count > 0) { |
| bwd_filter_algo_ = perf_results->algo; |
| } else { |
| bwd_filter_algo_ = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0; |
| } |
| } |
| |
| |
| |
| |
| void SetBwdDataAlgo(cudnnConvolutionBwdDataAlgo_t bwd_data_algo) { |
| bwd_data_algo_ = bwd_data_algo; |
| } |
| void SetBwdDataAlgo(std::string algo) { |
| if (algo.empty()) { |
| return; |
| } |
| if (!algo.compare("fft")) { |
| bwd_data_algo_ = CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT; |
| } else if (!algo.compare("winograd")) { |
| bwd_data_algo_ = CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD; |
| } else if (!algo.compare("0")) { |
| bwd_data_algo_ = CUDNN_CONVOLUTION_BWD_DATA_ALGO_0; |
| } else if (!algo.compare("1")) { |
| bwd_data_algo_ = CUDNN_CONVOLUTION_BWD_DATA_ALGO_1; |
| } else if (!algo.compare("winograd_nonfused")) { |
| bwd_data_algo_ = CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED; |
| } else if (!algo.compare("fft_tiling")) { |
| bwd_data_algo_ = CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING; |
| } |
| } |
| void SetBwdDataAlgo(const Handle &handle, RunMode mode, int idx, |
| const DataTensor<T> &bottom_desc, |
| const DataTensor<T> &top_desc, |
| const ConvolutionDesc<T> &conv_desc, |
| cudnnConvolutionBwdDataPreference_t pref) { |
| CUDNN_CALL(cudnnGetConvolutionBackwardDataAlgorithm( |
| mode == COMPOSED ? |
| handle.GetCudnn(idx) : handle.GetCudnn(), |
| conv_desc.GetFilter(), |
| top_desc.Get(), |
| conv_desc.GetConv(), |
| bottom_desc.Get(), |
| pref, |
| -1, |
| &bwd_data_algo_)); |
| } |
| void FindBwdDataAlgo(const Handle &handle, RunMode mode, int idx, |
| const DataTensor<T> &bottom_desc, |
| const ConvolutionDesc<T> &conv_desc, |
| const DataTensor<T> &top_desc) { |
| cudnnConvolutionBwdDataAlgoPerf_t *perf_results; |
| int *returned_algo_count; |
| CUDNN_CALL(cudnnFindConvolutionBackwardDataAlgorithm( |
| mode == COMPOSED ? |
| handle.GetCudnn(idx) : handle.GetCudnn(), |
| conv_desc.GetFilter(), |
| top_desc.Get(), |
| conv_desc.GetConv(), |
| bottom_desc.Get(), |
| 1, returned_algo_count, |
| perf_results)); |
| if (*returned_algo_count > 0) { |
| bwd_data_algo_ = perf_results->algo; |
| } else { |
| bwd_data_algo_ = CUDNN_CONVOLUTION_BWD_DATA_ALGO_0; |
| } |
| } |
| |
| cudnnConvolutionFwdAlgo_t GetFwdAlgo() const { |
| return fwd_algo_; |
| } |
| void GetFwdWorkspaceSize(const Handle &handle, RunMode mode, int idx, |
| const DataTensor<T> &bottom_desc, |
| const DataTensor<T> &top_desc, |
| const ConvolutionDesc<T> &conv_desc, |
| size_t *workspace_size) { |
| CUDNN_CALL(cudnnGetConvolutionForwardWorkspaceSize( |
| mode == COMPOSED ? |
| handle.GetCudnn(idx) : handle.GetCudnn(), |
| bottom_desc.Get(), |
| conv_desc.GetFilter(), |
| conv_desc.GetConv(), |
| top_desc.Get(), |
| fwd_algo_, |
| workspace_size)); |
| } |
| |
| std::string GetBwdFilterAlgoParameter() { |
| return bwd_filter_algo_par; |
| } |
| |
| |
| cudnnConvolutionBwdFilterAlgo_t GetBwdFilterAlgo() const { |
| return bwd_filter_algo_; |
| } |
| void GetBwdFilterWorkspaceSize(const Handle &handle, RunMode mode, int idx, |
| const DataTensor<T> &bottom_desc, |
| const DataTensor<T> &top_desc, |
| const ConvolutionDesc<T> &conv_desc, |
| size_t *workspace_size) { |
| CUDNN_CALL(cudnnGetConvolutionBackwardFilterWorkspaceSize( |
| mode == COMPOSED ? |
| handle.GetCudnn(idx) : handle.GetCudnn(), |
| bottom_desc.Get(), |
| top_desc.Get(), |
| conv_desc.GetConv(), |
| conv_desc.GetFilter(), |
| bwd_filter_algo_, |
| workspace_size)); |
| } |
| cudnnConvolutionBwdDataAlgo_t GetBwdDataAlgo() const { |
| return bwd_data_algo_; |
| } |
| void GetBwdDataWorkspaceSize(const Handle &handle, RunMode mode, int idx, |
| const DataTensor<T> &bottom_desc, |
| const DataTensor<T> &top_desc, |
| const ConvolutionDesc<T> &conv_desc, |
| size_t *workspace_size) { |
| CUDNN_CALL(cudnnGetConvolutionBackwardDataWorkspaceSize( |
| mode == COMPOSED ? |
| handle.GetCudnn(idx) : handle.GetCudnn(), |
| conv_desc.GetFilter(), |
| top_desc.Get(), |
| conv_desc.GetConv(), |
| bottom_desc.Get(), |
| bwd_data_algo_, |
| workspace_size)); |
| } |
| |
| // Dictionary for storing best bwd convolution algorithms |
| std::map<std::string, int> Algo4DataShape; |
| |
| void checkAlgo4DataShape(const cudnnTensorDescriptor_t x, |
| const cudnnTensorDescriptor_t dy, |
| const cudnnFilterDescriptor_t dw) |
| // size_t workspace_in_bytes) |
| { |
| |
| int n,c,h, w, k, nStride, cStride, hStride, wStride; |
| cudnnDataType_t datat; |
| cudnnTensorFormat_t format; |
| std::cout << "Call to checkAlgo4DataShape \n"; |
| CUDNN_CALL(cudnnGetTensor4dDescriptor(x, |
| &datat, |
| &n, &c, &h, &w, |
| &nStride, &cStride, &hStride, &wStride)); |
| std::cout << "x shape: " << n <<" "<< c << " " << h << "x" << w << "\n"; |
| CUDNN_CALL(cudnnGetTensor4dDescriptor(dy, |
| &datat, |
| &n, &c, &h, &w, |
| &nStride, &cStride, &hStride, &wStride)); |
| std::cout << "dy shape: " << n <<" "<< c << " " << h << "x" << w << "\n"; |
| CUDNN_CALL(cudnnGetFilter4dDescriptor(dw, |
| &datat, &format, |
| &k, &c, &h, &w)); |
| std::cout << "dw shape: " << k <<" "<< c << " " << h << "x" << w << "\n"; |
| // std::string hash = std::to_string(x)+"/"+std::to_string(*dy)+"/"+std::to_string(*dw)+"/"+std::to_string(workspace_in_bytes); |
| // std::cout << "datashape hash:" << hash << "x:" << x << "dy:" << y << "w:" << w "\n"; |
| } |
| |
| |
| #endif |
| #ifdef AMD_MIOPEN |
| private: |
| miopenConvFwdAlgorithm_t fwd_algo_; |
| miopenConvBwdWeightsAlgorithm_t bwd_filter_algo_; |
| miopenConvBwdDataAlgorithm_t bwd_data_algo_; |
| |
| public: |
| ConvAlgo() |
| : fwd_algo_(miopenConvolutionFwdAlgoGEMM), |
| bwd_filter_algo_(miopenConvolutionBwdWeightsAlgoGEMM), |
| bwd_data_algo_(miopenConvolutionBwdDataAlgoGEMM) {} |
| |
| void SetFwdAlgo(miopenConvFwdAlgorithm_t fwd_algo) { |
| fwd_algo_ = fwd_algo; |
| } |
| void SetFwdAlgo(std::string algo) { |
| } |
| void FindFwdAlgo(const Handle &handle, RunMode mode, int idx, |
| const DataTensor<T> &bottom_desc, |
| const ConvolutionDesc<T> &conv_desc, |
| const DataTensor<T> &top_desc, |
| const void *x, |
| const void *w, |
| void *y, |
| void *workspace, |
| size_t workspace_size) { |
| fwd_algo_ = miopenConvolutionFwdAlgoGEMM; |
| #ifdef NOSIM |
| miopenConvAlgoPerf_t perf_results; |
| int returned_algo_count; |
| MIOPEN_CALL(miopenFindConvolutionForwardAlgorithm( |
| mode == COMPOSED ? |
| handle.GetMIOpen(idx) : handle.GetMIOpen(), |
| bottom_desc.Get(), x, |
| conv_desc.GetFilter(), w, |
| conv_desc.GetConv(), |
| top_desc.Get(), y, |
| 1, &returned_algo_count, |
| &perf_results, workspace, workspace_size, false)); |
| if (returned_algo_count > 0) { |
| fwd_algo_ = perf_results.fwd_algo; |
| } |
| #endif |
| } |
| void SetBwdFilterAlgo(miopenConvBwdWeightsAlgorithm_t bwd_filter_algo) { |
| bwd_filter_algo_ = bwd_filter_algo; |
| } |
| void SetBwdFilterAlgo(std::string algo) { |
| } |
| void FindBwdFilterAlgo(const Handle &handle, RunMode mode, int idx, |
| const DataTensor<T> &bottom_desc, |
| const ConvolutionDesc<T> &conv_desc, |
| const DataTensor<T> &top_desc, |
| const void *x, |
| const void *dy, |
| void *dw, |
| void *workspace, |
| size_t workspace_size) { |
| bwd_filter_algo_ = miopenConvolutionBwdWeightsAlgoGEMM; |
| #ifdef NOSIM |
| miopenConvAlgoPerf_t perf_results; |
| int returned_algo_count; |
| MIOPEN_CALL(miopenFindConvolutionBackwardWeightsAlgorithm( |
| mode == COMPOSED ? |
| handle.GetMIOpen(idx) : handle.GetMIOpen(), |
| top_desc.Get(), dy, |
| bottom_desc.Get(), x, |
| conv_desc.GetConv(), |
| conv_desc.GetFilter(), dw, |
| 1, &returned_algo_count, |
| &perf_results, workspace, workspace_size, false)); |
| if (returned_algo_count > 0) { |
| bwd_filter_algo_ = perf_results.bwd_weights_algo; |
| } |
| #endif |
| } |
| void SetBwdDataAlgo(miopenConvBwdDataAlgorithm_t bwd_data_algo) { |
| bwd_data_algo_ = bwd_data_algo; |
| } |
| void SetBwdDataAlgo(std::string algo) { |
| } |
| void FindBwdDataAlgo(const Handle &handle, RunMode mode, int idx, |
| const DataTensor<T> &bottom_desc, |
| const ConvolutionDesc<T> &conv_desc, |
| const DataTensor<T> &top_desc, |
| const void *dy, |
| const void *w, |
| void *dx, |
| void *workspace, |
| size_t workspace_size) { |
| bwd_data_algo_ = miopenConvolutionBwdDataAlgoGEMM; |
| #ifdef NOSIM |
| miopenConvAlgoPerf_t perf_results; |
| int returned_algo_count; |
| MIOPEN_CALL(miopenFindConvolutionBackwardDataAlgorithm( |
| mode == COMPOSED ? |
| handle.GetMIOpen(idx) : handle.GetMIOpen(), |
| top_desc.Get(), dy, |
| conv_desc.GetFilter(), w, |
| conv_desc.GetConv(), |
| bottom_desc.Get(), dx, |
| 1, &returned_algo_count, |
| &perf_results, workspace, workspace_size, false)); |
| if (returned_algo_count > 0) { |
| bwd_data_algo_ = perf_results.bwd_data_algo; |
| } |
| #endif |
| } |
| |
| miopenConvFwdAlgorithm_t GetFwdAlgo() const { |
| return fwd_algo_; |
| } |
| void GetFwdWorkspaceSize(const Handle &handle, RunMode mode, int idx, |
| const DataTensor<T> &bottom_desc, |
| const DataTensor<T> &top_desc, |
| const ConvolutionDesc<T> &conv_desc, |
| size_t *workspace_size) { |
| MIOPEN_CALL(miopenConvolutionForwardGetWorkSpaceSize( |
| mode == COMPOSED ? |
| handle.GetMIOpen(idx) : handle.GetMIOpen(), |
| conv_desc.GetFilter(), |
| bottom_desc.Get(), |
| conv_desc.GetConv(), |
| top_desc.Get(), |
| workspace_size)); |
| } |
| miopenConvBwdWeightsAlgorithm_t GetBwdFilterAlgo() const { |
| return bwd_filter_algo_; |
| } |
| void GetBwdFilterWorkspaceSize(const Handle &handle, RunMode mode, int idx, |
| const DataTensor<T> &bottom_desc, |
| const DataTensor<T> &top_desc, |
| const ConvolutionDesc<T> &conv_desc, |
| size_t *workspace_size) { |
| MIOPEN_CALL(miopenConvolutionBackwardWeightsGetWorkSpaceSize( |
| mode == COMPOSED ? |
| handle.GetMIOpen(idx) : handle.GetMIOpen(), |
| top_desc.Get(), |
| bottom_desc.Get(), |
| conv_desc.GetConv(), |
| conv_desc.GetFilter(), |
| workspace_size)); |
| } |
| miopenConvBwdDataAlgorithm_t GetBwdDataAlgo() const { |
| return bwd_data_algo_; |
| } |
| void GetBwdDataWorkspaceSize(const Handle &handle, RunMode mode, int idx, |
| const DataTensor<T> &bottom_desc, |
| const DataTensor<T> &top_desc, |
| const ConvolutionDesc<T> &conv_desc, |
| size_t *workspace_size) { |
| MIOPEN_CALL(miopenConvolutionBackwardDataGetWorkSpaceSize( |
| mode == COMPOSED ? |
| handle.GetMIOpen(idx) : handle.GetMIOpen(), |
| top_desc.Get(), |
| conv_desc.GetFilter(), |
| conv_desc.GetConv(), |
| bottom_desc.Get(), |
| workspace_size)); |
| } |
| #endif |
| }; |
| |
| // Profiling marker |
| inline void ProfilerStart(const Handle &handle, RunMode mode, int idx, |
| Timer *timer, const std::string &layer) { |
| #ifdef NVIDIA_CUDNN |
| cudaProfilerStart(); |
| #endif |
| #ifdef AMD_MIOPEN |
| miopenEnableProfiling(mode == COMPOSED ? |
| handle.GetMIOpen(idx) : handle.GetMIOpen(), true); |
| #endif |
| timer->Start(layer + "_" + std::to_string(idx)); |
| } |
| inline void ProfilerStop(const Handle &handle, RunMode mode, int idx, |
| Timer *timer, const std::string &layer) { |
| #ifdef NVIDIA_CUDNN |
| cudaProfilerStop(); |
| CUDA_CALL(cudaDeviceSynchronize()); |
| #endif |
| #ifdef AMD_MIOPEN |
| miopenEnableProfiling(mode == COMPOSED ? |
| handle.GetMIOpen(idx) : handle.GetMIOpen(), false); |
| HIP_CALL(hipDeviceSynchronize()); |
| #endif |
| timer->Stop(layer + "_" + std::to_string(idx)); |
| } |
| |
| } // namespace dnnmark |
| |
| #endif // CORE_INCLUDE_DNN_UTILITY_H_ |