src/gpu/DNNMark/core/include/dnn_wrapper.h - public/gem5-resources - Git at Google

 // The MIT License (MIT)
 //
 // Copyright (c) 2016 Northeastern University
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy
 // of this software and associated documentation files (the "Software"), to deal
 // in the Software without restriction, including without limitation the rights
 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 // copies of the Software, and to permit persons to whom the Software is
 // furnished to do so, subject to the following conditions:
 //
 // The above copyright notice and this permission notice shall be included in
 // all copies or substantial portions of the Software.
 //
 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 // SOFTWARE.

 #ifndef CORE_INCLUDE_DNN_WRAPPER_H_
 #define CORE_INCLUDE_DNN_WRAPPER_H_

 #include <iostream>
 #include <string>
 #include "common.h"
 #include "dnn_utility.h"
 #include "data_manager.h"
 #include "timer.h"

 namespace dnnmark {

 //
 // Convolution forward/backward functions
 //

 template <typename T>
 inline void dnnmarkConvolutionForward(const Handle &handle,
                                       RunMode mode, int idx, Timer *timer,
                                       const void *alpha,
                                       const DataTensor<T> &bottom_desc,
                                       const void *x,
                                       const ConvolutionDesc<T> &conv_desc,
                                       const void *w,
                                       ConvAlgo<T> *conv_algo,
                                       void *workspace,
                                       size_t workspace_in_bytes,
                                       const void *beta,
                                       const DataTensor<T> &top_desc,
                                       void *y) {
 #ifdef NVIDIA_CUDNN
   ProfilerStart(handle, mode, idx, timer, "ConvFwd");
   CUDNN_CALL(cudnnConvolutionForward(
              mode == COMPOSED ?
              handle.GetCudnn(idx) : handle.GetCudnn(),
              alpha,
              bottom_desc.Get(), x,
              conv_desc.GetFilter(), w,
              conv_desc.GetConv(),
              conv_algo->GetFwdAlgo(), workspace, workspace_in_bytes,
              beta,
              top_desc.Get(), y));
   ProfilerStop(handle, mode, idx, timer, "ConvFwd");
 #endif
 #ifdef AMD_MIOPEN

   conv_algo->FindFwdAlgo(handle, mode, idx,
                         bottom_desc,
                         conv_desc,
                         top_desc,
                         x, w, y,
                         workspace, workspace_in_bytes);
   ProfilerStart(handle, mode, idx, timer, "ConvFwd");
   MIOPEN_CALL(miopenConvolutionForward(
               mode == COMPOSED ?
               handle.GetMIOpen(idx) : handle.GetMIOpen(),
               alpha,
               bottom_desc.Get(), x,
               conv_desc.GetFilter(), w,
               conv_desc.GetConv(),
               conv_algo->GetFwdAlgo(),
               beta,
               top_desc.Get(), y,
               workspace, workspace_in_bytes));
   ProfilerStop(handle, mode, idx, timer, "ConvFwd");
 #endif

 }

 template <typename T>
 inline void dnnmarkConvolutionBackwardData(const Handle &handle,
                                            RunMode mode, int idx, Timer *timer,
                                            const void *alpha,
                                            const DataTensor<T> &top_desc,
                                            const void *dy,
                                            const ConvolutionDesc<T> &conv_desc,
                                            const void *w,
                                            ConvAlgo<T> *conv_algo,
                                            void *workspace,
                                            size_t workspace_in_bytes,
                                            const void *beta,
                                            const DataTensor<T> &bottom_desc,
                                            void *dx) {
 #ifdef NVIDIA_CUDNN
   ProfilerStart(handle, mode, idx, timer, "ConvBwdData");
   CUDNN_CALL(cudnnConvolutionBackwardData(
              mode == COMPOSED ?
              handle.GetCudnn(idx) : handle.GetCudnn(),
              alpha,
              conv_desc.GetFilter(), w,
              top_desc.Get(), dy,
              conv_desc.GetConv(),
              conv_algo->GetBwdDataAlgo(),
              workspace, workspace_in_bytes,
              beta,
              bottom_desc.Get(), dx));
   ProfilerStop(handle, mode, idx, timer, "ConvBwdData");
 #endif
 #ifdef AMD_MIOPEN
   conv_algo->FindBwdDataAlgo(handle, mode, idx,
                             bottom_desc,
                             conv_desc,
                             top_desc,
                             dy, w, dx,
                             workspace, workspace_in_bytes);
   ProfilerStart(handle, mode, idx, timer, "ConvBwdData");
   MIOPEN_CALL(miopenConvolutionBackwardData(
               mode == COMPOSED ?
               handle.GetMIOpen(idx) : handle.GetMIOpen(),
               alpha,
               top_desc.Get(), dy,
               conv_desc.GetFilter(), w,
               conv_desc.GetConv(),
               conv_algo->GetBwdDataAlgo(),
               beta,
               bottom_desc.Get(), dx,
               workspace, workspace_in_bytes));
   ProfilerStop(handle, mode, idx, timer, "ConvBwdData");
 #endif
 }

 template <typename T>
 inline void dnnmarkConvolutionBackwardFilter(const Handle &handle,
                                              RunMode mode, int idx, Timer *timer,
                                              const void *alpha,
                                              const DataTensor<T> &bottom_desc,
                                              const void *x,
                                              const DataTensor<T> &top_desc,
                                              const void *dy,
                                              const ConvolutionDesc<T> &conv_desc,
                                              ConvAlgo<T> *conv_algo,
                                              void *workspace,
                                              size_t workspace_in_bytes,
                                              const void *beta,
                                              void *dw) {
 #ifdef NVIDIA_CUDNN
   // std::string conv_algo_param;
   cudnnFilterDescriptor_t filter_t = conv_desc.GetFilter();
   ProfilerStart(handle, mode, idx, timer, "ConvBwdFilter");
   // conv_algo_param = conv_algo->GetBwdFilterAlgoParameter();
   // // std::cout << "algo_param "<< conv_algo_param <<"\n";
   // if (conv_algo_param == "autoex") {
   //   conv_algo->checkAlgo4DataShape(bottom_desc.Get(),top_desc.Get(), filter_t);
   //   // ,workspace_in_bytes);
   //   conv_algo->FindBwdFilterAlgoEx(handle, mode, idx,
   //                             bottom_desc,
   //                             conv_desc,
   //                             top_desc,
   //                             x, dy, dw,
   //                             workspace, workspace_in_bytes);

   //   LOG(INFO) << "cuDNN AUTO selected conv. bwd filter alg. to " << conv_algo->GetBwdFilterAlgo();
   //   std::cout << "cuDNN AUTO selected bwd convolution filter algorithm:"<<conv_algo->GetBwdFilterAlgo()<<"\n";
   // }
   CUDNN_CALL(cudnnConvolutionBackwardFilter(
              mode == COMPOSED ?
              handle.GetCudnn(idx) : handle.GetCudnn(),
              alpha,
              bottom_desc.Get(), x,
              top_desc.Get(), dy,
              conv_desc.GetConv(),
              conv_algo->GetBwdFilterAlgo(),
              workspace, workspace_in_bytes,
              beta,
              filter_t, dw));
   ProfilerStop(handle, mode, idx, timer, "ConvBwdFilter");
 #endif
 #ifdef AMD_MIOPEN
   conv_algo->FindBwdFilterAlgo(handle, mode, idx,
                               bottom_desc,
                               conv_desc,
                               top_desc,
                               x, dy, dw,
                               workspace, workspace_in_bytes);
   ProfilerStart(handle, mode, idx, timer, "ConvBwdFilter");
   MIOPEN_CALL(miopenConvolutionBackwardWeights(
               mode == COMPOSED ?
               handle.GetMIOpen(idx) : handle.GetMIOpen(),
               alpha,
               top_desc.Get(), dy,
               bottom_desc.Get(), x,
               conv_desc.GetConv(),
               conv_algo->GetBwdFilterAlgo(),
               beta,
               conv_desc.GetFilter(), dw,
               workspace, workspace_in_bytes));
   ProfilerStop(handle, mode, idx, timer, "ConvBwdFilter");
 #endif
 }

 //
 // Pooling forward/backward functions
 //

 template <typename T>
 inline void dnnmarkPoolingForward(const Handle &handle,
 		RunMode mode, int idx,
 		const PoolingDesc<T> &pooling_desc,
 		const void *alpha,
 		const DataTensor<T> &x_desc,
 		const void *x,
 		const void *beta,
 		const DataTensor<T> &y_desc,
 		void * y,
 		Data<T> *workspace,
 		size_t workspace_in_bytes) {
 #ifdef NVIDIA_CUDNN
   CUDNN_CALL(cudnnPoolingForward(
 				mode == COMPOSED ? handle.GetCudnn(idx) : handle.GetCudnn(),
 				pooling_desc.Get(),
 				alpha,
 				x_desc.Get(), x,
 				beta,
 				y_desc.Get(), y));
 #endif
 #ifdef AMD_MIOPEN
     LOG(INFO) << "Before MIOpen call";
 	MIOPEN_CALL(miopenPoolingForward(
 		        mode == COMPOSED ? handle.GetMIOpen(idx):handle.GetMIOpen(),
 		        pooling_desc.Get(),
 		        alpha,
 		        x_desc.Get(), x,
 		        beta,
 		        y_desc.Get(), y,
 		        false,
 		        workspace->Get(), workspace_in_bytes));
     LOG(INFO) << "Done with MIOpen call";
 #endif
 }

 template <typename T>
 inline void dnnmarkPoolingBackward(const Handle &handle,
 		RunMode mode, int idx,
 		const PoolingDesc<T> &pooling_desc,
 		const void *alpha,
 		const DataTensor<T> &y_desc,
 		const void *y,
 		const DataTensor<T> &dy_desc,
 		const void *dy,
 		const DataTensor<T> &x_desc,
 		const void *x,
 		const void *beta,
 		const DataTensor<T> &dx_desc,
 		void *dx,
 		Data<T> *workspace) {
 #ifdef NVIDIA_CUDNN
 	CUDNN_CALL(cudnnPoolingBackward(
 				mode == COMPOSED ? handle.GetCudnn(idx) : handle.GetCudnn(),
 				pooling_desc.Get(),
 				alpha,
 				y_desc.Get(), y,
 				dy_desc.Get(), dy,
 				x_desc.Get(), x,
 				beta,
 				dx_desc.Get(), dx));
 #endif
 #ifdef AMD_MIOPEN
 	MIOPEN_CALL(miopenPoolingBackward(
 				mode == COMPOSED ? handle.GetMIOpen() : handle.GetMIOpen(),
 				pooling_desc.Get(),
 				alpha,
 				y_desc.Get(), y,
 				dy_desc.Get(), dy,
 				x_desc.Get(), x,
 				beta,
 				dx_desc.Get(), dx,
 				workspace->Get()));
 #endif
 }

 //
 // Activation forward/backward functions
 //

 template <typename T>
 inline void dnnmarkActivationForward(const Handle &handle,
                          RunMode mode, int idx,
                          const ActivationDesc<T> &activation_desc,
                          const void *alpha,
                          const DataTensor<T> &bottom_desc,
                          const void *x,
                          const void *beta,
                          const DataTensor<T> &top_desc,
                          void *y) {
 #ifdef NVIDIA_CUDNN
   CUDNN_CALL(cudnnActivationForward(
              mode == COMPOSED ?
              handle.GetCudnn(idx) : handle.GetCudnn(),
              activation_desc.Get(),
              alpha,
              bottom_desc.Get(), x,
              beta,
              top_desc.Get(), y));
 #endif
 #ifdef AMD_MIOPEN
   MIOPEN_CALL(miopenActivationForward(
               mode == COMPOSED ?
               handle.GetMIOpen(idx) : handle.GetMIOpen(),
               activation_desc.Get(),
               alpha,
               bottom_desc.Get(), x,
               beta,
               top_desc.Get(), y));
 #endif
 }

 template <typename T>
 inline void dnnmarkActivationBackward(const Handle &handle,
                          RunMode mode, int idx,
                          const ActivationDesc<T> &activation_desc,
                          const void *alpha,
                          const DataTensor<T> &top_desc,
                          const void *y,
                          const void *dy,
                          const void *beta,
                          const DataTensor<T> &bottom_desc,
                          const void *x,
                          void *dx) {
 #ifdef NVIDIA_CUDNN
   CUDNN_CALL(cudnnActivationBackward(
              mode == COMPOSED ?
              handle.GetCudnn(idx) : handle.GetCudnn(),
              activation_desc.Get(),
              alpha,
              top_desc.Get(), y,
              top_desc.Get(), dy,
              bottom_desc.Get(), x,
              beta,
              bottom_desc.Get(), dx));
 #endif
 #ifdef AMD_MIOPEN
   MIOPEN_CALL(miopenActivationBackward(
               mode == COMPOSED ?
               handle.GetMIOpen(idx) : handle.GetMIOpen(),
               activation_desc.Get(),
               alpha,
               top_desc.Get(), y,
               top_desc.Get(), dy,
               bottom_desc.Get(), x,
               beta,
               bottom_desc.Get(), dx));
 #endif
 }

 //
 // LRN forward/backward functions
 //

 template <typename T>
 inline void dnnmarkLRNForward(const Handle &handle,
                          RunMode mode, int idx,
                          const LRNDesc<T> &lrn_desc,
                          const LRNParam &lrn_param,
                          const void *alpha,
                          const DataTensor<T> &bottom_desc,
                          const void *x,
                          const void *beta,
                          const DataTensor<T> &top_desc,
                          void *y,
                          Data<T> *workspace) {
 #ifdef NVIDIA_CUDNN
   CUDNN_CALL(cudnnLRNCrossChannelForward(
              mode == COMPOSED ?
              handle.GetCudnn(idx) : handle.GetCudnn(),
              lrn_desc.Get(),
              lrn_param.mode_,
              alpha,
              bottom_desc.Get(), x,
              beta,
              top_desc.Get(), y));
 #endif
 #ifdef AMD_MIOPEN
   MIOPEN_CALL(miopenLRNForward(
               mode == COMPOSED ?
               handle.GetMIOpen(idx) : handle.GetMIOpen(),
               lrn_desc.Get(),
               alpha,
               bottom_desc.Get(), x,
               beta,
               top_desc.Get(), y,
               true, workspace->Get()));
 #endif
 }

 template <typename T>
 inline void dnnmarkLRNBackward(const Handle &handle,
                          RunMode mode, int idx,
                          const LRNDesc<T> &lrn_desc,
                          const LRNParam &lrn_param,
                          const void *alpha,
                          const DataTensor<T> &top_desc,
                          const void *y,
                          const void *dy,
                          const void *beta,
                          const DataTensor<T> &bottom_desc,
                          const void *x,
                          void *dx,
                          Data<T> *workspace) {
 #ifdef NVIDIA_CUDNN
   CUDNN_CALL(cudnnLRNCrossChannelBackward(
              mode == COMPOSED ?
              handle.GetCudnn(idx) : handle.GetCudnn(),
              lrn_desc.Get(),
              lrn_param.mode_,
              alpha,
              top_desc.Get(), y,
              top_desc.Get(), dy,
              bottom_desc.Get(), x,
              beta,
              bottom_desc.Get(), dx));
 #endif
 #ifdef AMD_MIOPEN
   MIOPEN_CALL(miopenLRNBackward(
               mode == COMPOSED ?
               handle.GetMIOpen(idx) : handle.GetMIOpen(),
               lrn_desc.Get(),
               alpha,
               top_desc.Get(), y,
               top_desc.Get(), dy,
               bottom_desc.Get(), x,
               beta,
               bottom_desc.Get(), dx,
               workspace->Get()));
 #endif
 }

 //
 // Fully Connected forward/backward functions
 //

 //
 // Softmax forward/backward functions
 //

 template <typename T>
 inline void dnnmarkSoftmaxForward(const Handle &handle,
                          RunMode mode, int idx,
                          const SoftmaxParam &softmax_param,
                          const void *alpha,
                          const DataTensor<T> &bottom_desc,
                          const void *x,
                          const void *beta,
                          const DataTensor<T> &top_desc,
                          void *y) {
 #ifdef NVIDIA_CUDNN
   CUDNN_CALL(cudnnSoftmaxForward(
              mode == COMPOSED ?
              handle.GetCudnn(idx) : handle.GetCudnn(),
              softmax_param.algo_,
              softmax_param.mode_,
              alpha,
              bottom_desc.Get(), x,
              beta,
              top_desc.Get(), y));
 #endif
 #ifdef AMD_MIOPEN
   MIOPEN_CALL(miopenSoftmaxForward(
               mode == COMPOSED ?
               handle.GetMIOpen(idx) : handle.GetMIOpen(),
               alpha,
               bottom_desc.Get(), x,
               beta,
               top_desc.Get(), y));
 #endif
 }

 template <typename T>
 inline void dnnmarkSoftmaxBackward(const Handle &handle,
                          RunMode mode, int idx,
                          const SoftmaxParam &softmax_param,
                          const void *alpha,
                          const DataTensor<T> &top_desc,
                          const void *y,
                          const void *dy,
                          const void *beta,
                          const DataTensor<T> &bottom_desc,
                          void *dx) {
 #ifdef NVIDIA_CUDNN
   CUDNN_CALL(cudnnSoftmaxBackward(
              mode == COMPOSED ?
              handle.GetCudnn(idx) : handle.GetCudnn(),
              softmax_param.algo_,
              softmax_param.mode_,
              alpha,
              top_desc.Get(), y,
              top_desc.Get(), dy,
              beta,
              bottom_desc.Get(), dx));
 #endif
 #ifdef AMD_MIOPEN
   MIOPEN_CALL(miopenSoftmaxBackward(
              mode == COMPOSED ?
              handle.GetMIOpen(idx) : handle.GetMIOpen(),
              alpha,
              top_desc.Get(), y,
              top_desc.Get(), dy,
              beta,
              bottom_desc.Get(), dx));

 #endif
 }

 //
 // Batch Normalization forward/backward functions
 //

 template <typename T>
 inline void dnnmarkBatchNormalizationForwardTraining(
             const Handle &handle,
             RunMode mode, int idx,
             const BatchNormParam &bn_param,
             void *alpha,
             void *beta,
             const DataTensor<T> &bottom_desc,
             const void *x,
             const DataTensor<T> &top_desc,
             void *y,
             const DataTensor<T> &scale_bias_mean_var_desc,
             void *bn_scale,
             void *bn_bias,
             double exp_avg_factor,
             void *result_running_mean,
             void *result_running_var,
             double epsilon,
             void *result_save_mean,
             void *result_save_var) {
 #ifdef NVIDIA_CUDNN
   CUDNN_CALL(cudnnBatchNormalizationForwardTraining(
              mode == COMPOSED ?
              handle.GetCudnn(idx) : handle.GetCudnn(),
              bn_param.mode_,
              alpha,
              beta,
              bottom_desc.Get(), x,
              top_desc.Get(), y,
              scale_bias_mean_var_desc.Get(),
              bn_scale, bn_bias,
              exp_avg_factor,
              result_running_mean, result_running_var,
              epsilon,
              result_save_mean, result_save_var));
 #endif
 #ifdef AMD_MIOPEN
   MIOPEN_CALL(miopenBatchNormalizationForwardTraining(
               mode == COMPOSED ?
               handle.GetMIOpen(idx) : handle.GetMIOpen(),
               bn_param.mode_,
               alpha,
               beta,
               bottom_desc.Get(), x,
               top_desc.Get(), y,
               scale_bias_mean_var_desc.Get(),
               bn_scale, bn_bias,
               exp_avg_factor,
               result_running_mean, result_running_var,
               epsilon,
               result_save_mean, result_save_var));
 #endif
 }

 template <typename T>
 inline void dnnmarkBatchNormalizationBackward(
             const Handle &handle,
             RunMode mode, int idx,
             const BatchNormParam &bn_param,
             const void *alpha_data_diff,
             const void *beta_data_diff,
             const void *alpha_param_diff,
             const void *beta_param_diff,
             const DataTensor<T> &bottom_desc,
             const void *x,
             void *dx,
             const DataTensor<T> &top_desc,
             const void *dy,
             const DataTensor<T> &scale_bias_mean_var_desc,
             const void *bn_scale,
             void *result_bn_scale_diff,
             void *result_bn_bias_diff,
             double epsilon,
             const void *saved_mean,
             const void *saved_var) {
 #ifdef NVIDIA_CUDNN
   CUDNN_CALL(cudnnBatchNormalizationBackward(
              mode == COMPOSED ?
              handle.GetCudnn(idx) : handle.GetCudnn(),
              bn_param.mode_,
              alpha_data_diff,
              beta_data_diff,
              alpha_param_diff,
              beta_param_diff,
              bottom_desc.Get(), x,
              top_desc.Get(), dy,
              bottom_desc.Get(), dx,
              scale_bias_mean_var_desc.Get(),
              bn_scale,
              result_bn_scale_diff, result_bn_bias_diff,
              epsilon,
              saved_mean, saved_var));
 #endif
 #ifdef AMD_MIOPEN
   MIOPEN_CALL(miopenBatchNormalizationBackward(
               mode == COMPOSED ?
               handle.GetMIOpen(idx) : handle.GetMIOpen(),
               bn_param.mode_,
               alpha_data_diff,
               beta_data_diff,
               alpha_param_diff,
               beta_param_diff,
               bottom_desc.Get(), x,
               top_desc.Get(), dy,
               bottom_desc.Get(), dx,
               scale_bias_mean_var_desc.Get(),
               bn_scale,
               result_bn_scale_diff, result_bn_bias_diff,
               epsilon,
               saved_mean, saved_var));
 #endif
 }

 //
 // Bypass layer
 //

 template <typename T>
 inline void dnnmarkBypassForward(const Handle &handle,
                          RunMode mode, int idx,
                          const BypassDesc<T> &bypass_desc,
                          const void *alpha,
                          const DataTensor<T> &bottom_desc,
                          const void *x,
                          const void *beta,
                          const DataTensor<T> &top_desc,
                          void *y) {
 #ifdef NVIDIA_CUDNN
   CUDA_CALL(cudaMemcpy(y,
                        x,
                        sizeof(T) * bypass_desc.Get().n_
                                  * bypass_desc.Get().c_
                                  * bypass_desc.Get().h_
                                  * bypass_desc.Get().w_,
                        cudaMemcpyDeviceToDevice
                        ));
 #endif
 #ifdef AMD_MIOPEN
   MIOPEN_CALL(miopenActivationForward(
               mode == COMPOSED ?
               handle.GetMIOpen(idx) : handle.GetMIOpen(),
               bypass_desc.Get(),
               alpha,
               bottom_desc.Get(), x,
               beta,
               top_desc.Get(), y));
 #endif
 }

 template <typename T>
 inline void dnnmarkBypassBackward(const Handle &handle,
                          RunMode mode, int idx,
                          const BypassDesc<T> &bypass_desc,
                          const void *alpha,
                          const DataTensor<T> &top_desc,
                          const void *y,
                          const void *dy,
                          const void *beta,
                          const DataTensor<T> &bottom_desc,
                          const void *x,
                          void *dx) {
 #ifdef NVIDIA_CUDNN
   CUDA_CALL(cudaMemcpy(dx,
                        dy,
                        sizeof(T) * bypass_desc.Get().n_
                                  * bypass_desc.Get().c_
                                  * bypass_desc.Get().h_
                                  * bypass_desc.Get().w_,
                        cudaMemcpyDeviceToDevice
                        ));
 #endif
 #ifdef AMD_MIOPEN
   MIOPEN_CALL(miopenActivationBackward(
               mode == COMPOSED ?
               handle.GetMIOpen(idx) : handle.GetMIOpen(),
               bypass_desc.Get(),
               alpha,
               top_desc.Get(), y,
               top_desc.Get(), dy,
               bottom_desc.Get(), x,
               beta,
               bottom_desc.Get(), dx));
 #endif
 }

 //
 // Dropout layer
 //

 template <typename T>
 inline void dnnmarkDropoutForward(const Handle &handle,
                          RunMode mode, int idx,
                          const DropoutDesc<T> &dropout_desc,
                          const DataTensor<T> &bottom_desc,
                          const void *x,
                          const DataTensor<T> &top_desc,
                          void *y,
                          void *reserve_space, size_t reserve_space_size) {
 #ifdef NVIDIA_CUDNN
   CUDNN_CALL(cudnnDropoutForward(
           mode == COMPOSED ?
           handle.GetCudnn(idx) : handle.GetCudnn(),
           dropout_desc.Get(),
           bottom_desc.Get(), x,
           top_desc.Get(), y,
           reserve_space,
           reserve_space_size
           ));
 #endif
 #ifdef AMD_MIOPEN
 #endif
 }

 template <typename T>
 inline void dnnmarkDropoutBackward(const Handle &handle,
                          RunMode mode, int idx,
                          const DropoutDesc<T> &dropout_desc,
                          const DataTensor<T> &top_desc,
                          const void *dy,
                          const DataTensor<T> &bottom_desc,
                          void *dx,
                          void *reserve_space, size_t reserve_space_size) {
 #ifdef NVIDIA_CUDNN
   CUDNN_CALL(cudnnDropoutBackward(
           mode == COMPOSED ?
           handle.GetCudnn(idx) : handle.GetCudnn(),
           dropout_desc.Get(),
           top_desc.Get(), dy,
           bottom_desc.Get(), dx,
           reserve_space,
           reserve_space_size
           ));
 #endif
 #ifdef AMD_MIOPEN
 #endif
 }

 } // namespace dnnmark

 #endif // CORE_INCLUDE_DNN_WRAPPER_H_
	// The MIT License (MIT)
	//
	// Copyright (c) 2016 Northeastern University
	//
	// Permission is hereby granted, free of charge, to any person obtaining a copy
	// of this software and associated documentation files (the "Software"), to deal
	// in the Software without restriction, including without limitation the rights
	// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
	// copies of the Software, and to permit persons to whom the Software is
	// furnished to do so, subject to the following conditions:
	//
	// The above copyright notice and this permission notice shall be included in
	// all copies or substantial portions of the Software.
	//
	// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
	// SOFTWARE.

	#ifndef CORE_INCLUDE_DNN_WRAPPER_H_
	#define CORE_INCLUDE_DNN_WRAPPER_H_

	#include <iostream>
	#include <string>
	#include "common.h"
	#include "dnn_utility.h"
	#include "data_manager.h"
	#include "timer.h"

	namespace dnnmark {

	//
	// Convolution forward/backward functions
	//

	template <typename T>
	inline void dnnmarkConvolutionForward(const Handle &handle,
	RunMode mode, int idx, Timer *timer,
	const void *alpha,
	const DataTensor<T> &bottom_desc,
	const void *x,
	const ConvolutionDesc<T> &conv_desc,
	const void *w,
	ConvAlgo<T> *conv_algo,
	void *workspace,
	size_t workspace_in_bytes,
	const void *beta,
	const DataTensor<T> &top_desc,
	void *y) {
	#ifdef NVIDIA_CUDNN
	ProfilerStart(handle, mode, idx, timer, "ConvFwd");
	CUDNN_CALL(cudnnConvolutionForward(
	mode == COMPOSED ?
	handle.GetCudnn(idx) : handle.GetCudnn(),
	alpha,
	bottom_desc.Get(), x,
	conv_desc.GetFilter(), w,
	conv_desc.GetConv(),
	conv_algo->GetFwdAlgo(), workspace, workspace_in_bytes,
	beta,
	top_desc.Get(), y));
	ProfilerStop(handle, mode, idx, timer, "ConvFwd");
	#endif
	#ifdef AMD_MIOPEN

	conv_algo->FindFwdAlgo(handle, mode, idx,
	bottom_desc,
	conv_desc,
	top_desc,
	x, w, y,
	workspace, workspace_in_bytes);
	ProfilerStart(handle, mode, idx, timer, "ConvFwd");
	MIOPEN_CALL(miopenConvolutionForward(
	mode == COMPOSED ?
	handle.GetMIOpen(idx) : handle.GetMIOpen(),
	alpha,
	bottom_desc.Get(), x,
	conv_desc.GetFilter(), w,
	conv_desc.GetConv(),
	conv_algo->GetFwdAlgo(),
	beta,
	top_desc.Get(), y,
	workspace, workspace_in_bytes));
	ProfilerStop(handle, mode, idx, timer, "ConvFwd");
	#endif

	}

	template <typename T>
	inline void dnnmarkConvolutionBackwardData(const Handle &handle,
	RunMode mode, int idx, Timer *timer,
	const void *alpha,
	const DataTensor<T> &top_desc,
	const void *dy,
	const ConvolutionDesc<T> &conv_desc,
	const void *w,
	ConvAlgo<T> *conv_algo,
	void *workspace,
	size_t workspace_in_bytes,
	const void *beta,
	const DataTensor<T> &bottom_desc,
	void *dx) {
	#ifdef NVIDIA_CUDNN
	ProfilerStart(handle, mode, idx, timer, "ConvBwdData");
	CUDNN_CALL(cudnnConvolutionBackwardData(
	mode == COMPOSED ?
	handle.GetCudnn(idx) : handle.GetCudnn(),
	alpha,
	conv_desc.GetFilter(), w,
	top_desc.Get(), dy,
	conv_desc.GetConv(),
	conv_algo->GetBwdDataAlgo(),
	workspace, workspace_in_bytes,
	beta,
	bottom_desc.Get(), dx));
	ProfilerStop(handle, mode, idx, timer, "ConvBwdData");
	#endif
	#ifdef AMD_MIOPEN
	conv_algo->FindBwdDataAlgo(handle, mode, idx,
	bottom_desc,
	conv_desc,
	top_desc,
	dy, w, dx,
	workspace, workspace_in_bytes);
	ProfilerStart(handle, mode, idx, timer, "ConvBwdData");
	MIOPEN_CALL(miopenConvolutionBackwardData(
	mode == COMPOSED ?
	handle.GetMIOpen(idx) : handle.GetMIOpen(),
	alpha,
	top_desc.Get(), dy,
	conv_desc.GetFilter(), w,
	conv_desc.GetConv(),
	conv_algo->GetBwdDataAlgo(),
	beta,
	bottom_desc.Get(), dx,
	workspace, workspace_in_bytes));
	ProfilerStop(handle, mode, idx, timer, "ConvBwdData");
	#endif
	}

	template <typename T>
	inline void dnnmarkConvolutionBackwardFilter(const Handle &handle,
	RunMode mode, int idx, Timer *timer,
	const void *alpha,
	const DataTensor<T> &bottom_desc,
	const void *x,
	const DataTensor<T> &top_desc,
	const void *dy,
	const ConvolutionDesc<T> &conv_desc,
	ConvAlgo<T> *conv_algo,
	void *workspace,
	size_t workspace_in_bytes,
	const void *beta,
	void *dw) {
	#ifdef NVIDIA_CUDNN
	// std::string conv_algo_param;
	cudnnFilterDescriptor_t filter_t = conv_desc.GetFilter();
	ProfilerStart(handle, mode, idx, timer, "ConvBwdFilter");
	// conv_algo_param = conv_algo->GetBwdFilterAlgoParameter();
	// // std::cout << "algo_param "<< conv_algo_param <<"\n";
	// if (conv_algo_param == "autoex") {
	// conv_algo->checkAlgo4DataShape(bottom_desc.Get(),top_desc.Get(), filter_t);
	// // ,workspace_in_bytes);
	// conv_algo->FindBwdFilterAlgoEx(handle, mode, idx,
	// bottom_desc,
	// conv_desc,
	// top_desc,
	// x, dy, dw,
	// workspace, workspace_in_bytes);

	// LOG(INFO) << "cuDNN AUTO selected conv. bwd filter alg. to " << conv_algo->GetBwdFilterAlgo();
	// std::cout << "cuDNN AUTO selected bwd convolution filter algorithm:"<<conv_algo->GetBwdFilterAlgo()<<"\n";
	// }
	CUDNN_CALL(cudnnConvolutionBackwardFilter(
	mode == COMPOSED ?
	handle.GetCudnn(idx) : handle.GetCudnn(),
	alpha,
	bottom_desc.Get(), x,
	top_desc.Get(), dy,
	conv_desc.GetConv(),
	conv_algo->GetBwdFilterAlgo(),
	workspace, workspace_in_bytes,
	beta,
	filter_t, dw));
	ProfilerStop(handle, mode, idx, timer, "ConvBwdFilter");
	#endif
	#ifdef AMD_MIOPEN
	conv_algo->FindBwdFilterAlgo(handle, mode, idx,
	bottom_desc,
	conv_desc,
	top_desc,
	x, dy, dw,
	workspace, workspace_in_bytes);
	ProfilerStart(handle, mode, idx, timer, "ConvBwdFilter");
	MIOPEN_CALL(miopenConvolutionBackwardWeights(
	mode == COMPOSED ?
	handle.GetMIOpen(idx) : handle.GetMIOpen(),
	alpha,
	top_desc.Get(), dy,
	bottom_desc.Get(), x,
	conv_desc.GetConv(),
	conv_algo->GetBwdFilterAlgo(),
	beta,
	conv_desc.GetFilter(), dw,
	workspace, workspace_in_bytes));
	ProfilerStop(handle, mode, idx, timer, "ConvBwdFilter");
	#endif
	}

	//
	// Pooling forward/backward functions
	//

	template <typename T>
	inline void dnnmarkPoolingForward(const Handle &handle,
	RunMode mode, int idx,
	const PoolingDesc<T> &pooling_desc,
	const void *alpha,
	const DataTensor<T> &x_desc,
	const void *x,
	const void *beta,
	const DataTensor<T> &y_desc,
	void * y,
	Data<T> *workspace,
	size_t workspace_in_bytes) {
	#ifdef NVIDIA_CUDNN
	CUDNN_CALL(cudnnPoolingForward(
	mode == COMPOSED ? handle.GetCudnn(idx) : handle.GetCudnn(),
	pooling_desc.Get(),
	alpha,
	x_desc.Get(), x,
	beta,
	y_desc.Get(), y));
	#endif
	#ifdef AMD_MIOPEN
	LOG(INFO) << "Before MIOpen call";
	MIOPEN_CALL(miopenPoolingForward(
	mode == COMPOSED ? handle.GetMIOpen(idx):handle.GetMIOpen(),
	pooling_desc.Get(),
	alpha,
	x_desc.Get(), x,
	beta,
	y_desc.Get(), y,
	false,
	workspace->Get(), workspace_in_bytes));
	LOG(INFO) << "Done with MIOpen call";
	#endif
	}

	template <typename T>
	inline void dnnmarkPoolingBackward(const Handle &handle,
	RunMode mode, int idx,
	const PoolingDesc<T> &pooling_desc,
	const void *alpha,
	const DataTensor<T> &y_desc,
	const void *y,
	const DataTensor<T> &dy_desc,
	const void *dy,
	const DataTensor<T> &x_desc,
	const void *x,
	const void *beta,
	const DataTensor<T> &dx_desc,
	void *dx,
	Data<T> *workspace) {
	#ifdef NVIDIA_CUDNN
	CUDNN_CALL(cudnnPoolingBackward(
	mode == COMPOSED ? handle.GetCudnn(idx) : handle.GetCudnn(),
	pooling_desc.Get(),
	alpha,
	y_desc.Get(), y,
	dy_desc.Get(), dy,
	x_desc.Get(), x,
	beta,
	dx_desc.Get(), dx));
	#endif
	#ifdef AMD_MIOPEN
	MIOPEN_CALL(miopenPoolingBackward(
	mode == COMPOSED ? handle.GetMIOpen() : handle.GetMIOpen(),
	pooling_desc.Get(),
	alpha,
	y_desc.Get(), y,
	dy_desc.Get(), dy,
	x_desc.Get(), x,
	beta,
	dx_desc.Get(), dx,
	workspace->Get()));
	#endif
	}

	//
	// Activation forward/backward functions
	//

	template <typename T>
	inline void dnnmarkActivationForward(const Handle &handle,
	RunMode mode, int idx,
	const ActivationDesc<T> &activation_desc,
	const void *alpha,
	const DataTensor<T> &bottom_desc,
	const void *x,
	const void *beta,
	const DataTensor<T> &top_desc,
	void *y) {
	#ifdef NVIDIA_CUDNN
	CUDNN_CALL(cudnnActivationForward(
	mode == COMPOSED ?
	handle.GetCudnn(idx) : handle.GetCudnn(),
	activation_desc.Get(),
	alpha,
	bottom_desc.Get(), x,
	beta,
	top_desc.Get(), y));
	#endif
	#ifdef AMD_MIOPEN
	MIOPEN_CALL(miopenActivationForward(
	mode == COMPOSED ?
	handle.GetMIOpen(idx) : handle.GetMIOpen(),
	activation_desc.Get(),
	alpha,
	bottom_desc.Get(), x,
	beta,
	top_desc.Get(), y));
	#endif
	}

	template <typename T>
	inline void dnnmarkActivationBackward(const Handle &handle,
	RunMode mode, int idx,
	const ActivationDesc<T> &activation_desc,
	const void *alpha,
	const DataTensor<T> &top_desc,
	const void *y,
	const void *dy,
	const void *beta,
	const DataTensor<T> &bottom_desc,
	const void *x,
	void *dx) {
	#ifdef NVIDIA_CUDNN
	CUDNN_CALL(cudnnActivationBackward(
	mode == COMPOSED ?
	handle.GetCudnn(idx) : handle.GetCudnn(),
	activation_desc.Get(),
	alpha,
	top_desc.Get(), y,
	top_desc.Get(), dy,
	bottom_desc.Get(), x,
	beta,
	bottom_desc.Get(), dx));
	#endif
	#ifdef AMD_MIOPEN
	MIOPEN_CALL(miopenActivationBackward(
	mode == COMPOSED ?
	handle.GetMIOpen(idx) : handle.GetMIOpen(),
	activation_desc.Get(),
	alpha,
	top_desc.Get(), y,
	top_desc.Get(), dy,
	bottom_desc.Get(), x,
	beta,
	bottom_desc.Get(), dx));
	#endif
	}

	//
	// LRN forward/backward functions
	//

	template <typename T>
	inline void dnnmarkLRNForward(const Handle &handle,
	RunMode mode, int idx,
	const LRNDesc<T> &lrn_desc,
	const LRNParam &lrn_param,
	const void *alpha,
	const DataTensor<T> &bottom_desc,
	const void *x,
	const void *beta,
	const DataTensor<T> &top_desc,
	void *y,
	Data<T> *workspace) {
	#ifdef NVIDIA_CUDNN
	CUDNN_CALL(cudnnLRNCrossChannelForward(
	mode == COMPOSED ?
	handle.GetCudnn(idx) : handle.GetCudnn(),
	lrn_desc.Get(),
	lrn_param.mode_,
	alpha,
	bottom_desc.Get(), x,
	beta,
	top_desc.Get(), y));
	#endif
	#ifdef AMD_MIOPEN
	MIOPEN_CALL(miopenLRNForward(
	mode == COMPOSED ?
	handle.GetMIOpen(idx) : handle.GetMIOpen(),
	lrn_desc.Get(),
	alpha,
	bottom_desc.Get(), x,
	beta,
	top_desc.Get(), y,
	true, workspace->Get()));
	#endif
	}

	template <typename T>
	inline void dnnmarkLRNBackward(const Handle &handle,
	RunMode mode, int idx,
	const LRNDesc<T> &lrn_desc,
	const LRNParam &lrn_param,
	const void *alpha,
	const DataTensor<T> &top_desc,
	const void *y,
	const void *dy,
	const void *beta,
	const DataTensor<T> &bottom_desc,
	const void *x,
	void *dx,
	Data<T> *workspace) {
	#ifdef NVIDIA_CUDNN
	CUDNN_CALL(cudnnLRNCrossChannelBackward(
	mode == COMPOSED ?
	handle.GetCudnn(idx) : handle.GetCudnn(),
	lrn_desc.Get(),
	lrn_param.mode_,
	alpha,
	top_desc.Get(), y,
	top_desc.Get(), dy,
	bottom_desc.Get(), x,
	beta,
	bottom_desc.Get(), dx));
	#endif
	#ifdef AMD_MIOPEN
	MIOPEN_CALL(miopenLRNBackward(
	mode == COMPOSED ?
	handle.GetMIOpen(idx) : handle.GetMIOpen(),
	lrn_desc.Get(),
	alpha,
	top_desc.Get(), y,
	top_desc.Get(), dy,
	bottom_desc.Get(), x,
	beta,
	bottom_desc.Get(), dx,
	workspace->Get()));
	#endif
	}

	//
	// Fully Connected forward/backward functions
	//

	//
	// Softmax forward/backward functions
	//

	template <typename T>
	inline void dnnmarkSoftmaxForward(const Handle &handle,
	RunMode mode, int idx,
	const SoftmaxParam &softmax_param,
	const void *alpha,
	const DataTensor<T> &bottom_desc,
	const void *x,
	const void *beta,
	const DataTensor<T> &top_desc,
	void *y) {
	#ifdef NVIDIA_CUDNN
	CUDNN_CALL(cudnnSoftmaxForward(
	mode == COMPOSED ?
	handle.GetCudnn(idx) : handle.GetCudnn(),
	softmax_param.algo_,
	softmax_param.mode_,
	alpha,
	bottom_desc.Get(), x,
	beta,
	top_desc.Get(), y));
	#endif
	#ifdef AMD_MIOPEN
	MIOPEN_CALL(miopenSoftmaxForward(
	mode == COMPOSED ?
	handle.GetMIOpen(idx) : handle.GetMIOpen(),
	alpha,
	bottom_desc.Get(), x,
	beta,
	top_desc.Get(), y));
	#endif
	}

	template <typename T>
	inline void dnnmarkSoftmaxBackward(const Handle &handle,
	RunMode mode, int idx,
	const SoftmaxParam &softmax_param,
	const void *alpha,
	const DataTensor<T> &top_desc,
	const void *y,
	const void *dy,
	const void *beta,
	const DataTensor<T> &bottom_desc,
	void *dx) {
	#ifdef NVIDIA_CUDNN
	CUDNN_CALL(cudnnSoftmaxBackward(
	mode == COMPOSED ?
	handle.GetCudnn(idx) : handle.GetCudnn(),
	softmax_param.algo_,
	softmax_param.mode_,
	alpha,
	top_desc.Get(), y,
	top_desc.Get(), dy,
	beta,
	bottom_desc.Get(), dx));
	#endif
	#ifdef AMD_MIOPEN
	MIOPEN_CALL(miopenSoftmaxBackward(
	mode == COMPOSED ?
	handle.GetMIOpen(idx) : handle.GetMIOpen(),
	alpha,
	top_desc.Get(), y,
	top_desc.Get(), dy,
	beta,
	bottom_desc.Get(), dx));

	#endif
	}

	//
	// Batch Normalization forward/backward functions
	//

	template <typename T>
	inline void dnnmarkBatchNormalizationForwardTraining(
	const Handle &handle,
	RunMode mode, int idx,
	const BatchNormParam &bn_param,
	void *alpha,
	void *beta,
	const DataTensor<T> &bottom_desc,
	const void *x,
	const DataTensor<T> &top_desc,
	void *y,
	const DataTensor<T> &scale_bias_mean_var_desc,
	void *bn_scale,
	void *bn_bias,
	double exp_avg_factor,
	void *result_running_mean,
	void *result_running_var,
	double epsilon,
	void *result_save_mean,
	void *result_save_var) {
	#ifdef NVIDIA_CUDNN
	CUDNN_CALL(cudnnBatchNormalizationForwardTraining(
	mode == COMPOSED ?
	handle.GetCudnn(idx) : handle.GetCudnn(),
	bn_param.mode_,
	alpha,
	beta,
	bottom_desc.Get(), x,
	top_desc.Get(), y,
	scale_bias_mean_var_desc.Get(),
	bn_scale, bn_bias,
	exp_avg_factor,
	result_running_mean, result_running_var,
	epsilon,
	result_save_mean, result_save_var));
	#endif
	#ifdef AMD_MIOPEN
	MIOPEN_CALL(miopenBatchNormalizationForwardTraining(
	mode == COMPOSED ?
	handle.GetMIOpen(idx) : handle.GetMIOpen(),
	bn_param.mode_,
	alpha,
	beta,
	bottom_desc.Get(), x,
	top_desc.Get(), y,
	scale_bias_mean_var_desc.Get(),
	bn_scale, bn_bias,
	exp_avg_factor,
	result_running_mean, result_running_var,
	epsilon,
	result_save_mean, result_save_var));
	#endif
	}

	template <typename T>
	inline void dnnmarkBatchNormalizationBackward(
	const Handle &handle,
	RunMode mode, int idx,
	const BatchNormParam &bn_param,
	const void *alpha_data_diff,
	const void *beta_data_diff,
	const void *alpha_param_diff,
	const void *beta_param_diff,
	const DataTensor<T> &bottom_desc,
	const void *x,
	void *dx,
	const DataTensor<T> &top_desc,
	const void *dy,
	const DataTensor<T> &scale_bias_mean_var_desc,
	const void *bn_scale,
	void *result_bn_scale_diff,
	void *result_bn_bias_diff,
	double epsilon,
	const void *saved_mean,
	const void *saved_var) {
	#ifdef NVIDIA_CUDNN
	CUDNN_CALL(cudnnBatchNormalizationBackward(
	mode == COMPOSED ?
	handle.GetCudnn(idx) : handle.GetCudnn(),
	bn_param.mode_,
	alpha_data_diff,
	beta_data_diff,
	alpha_param_diff,
	beta_param_diff,
	bottom_desc.Get(), x,
	top_desc.Get(), dy,
	bottom_desc.Get(), dx,
	scale_bias_mean_var_desc.Get(),
	bn_scale,
	result_bn_scale_diff, result_bn_bias_diff,
	epsilon,
	saved_mean, saved_var));
	#endif
	#ifdef AMD_MIOPEN
	MIOPEN_CALL(miopenBatchNormalizationBackward(
	mode == COMPOSED ?
	handle.GetMIOpen(idx) : handle.GetMIOpen(),
	bn_param.mode_,
	alpha_data_diff,
	beta_data_diff,
	alpha_param_diff,
	beta_param_diff,
	bottom_desc.Get(), x,
	top_desc.Get(), dy,
	bottom_desc.Get(), dx,
	scale_bias_mean_var_desc.Get(),
	bn_scale,
	result_bn_scale_diff, result_bn_bias_diff,
	epsilon,
	saved_mean, saved_var));
	#endif
	}

	//
	// Bypass layer
	//

	template <typename T>
	inline void dnnmarkBypassForward(const Handle &handle,
	RunMode mode, int idx,
	const BypassDesc<T> &bypass_desc,
	const void *alpha,
	const DataTensor<T> &bottom_desc,
	const void *x,
	const void *beta,
	const DataTensor<T> &top_desc,
	void *y) {
	#ifdef NVIDIA_CUDNN
	CUDA_CALL(cudaMemcpy(y,
	x,
	sizeof(T) * bypass_desc.Get().n_
	* bypass_desc.Get().c_
	* bypass_desc.Get().h_
	* bypass_desc.Get().w_,
	cudaMemcpyDeviceToDevice
	));
	#endif
	#ifdef AMD_MIOPEN
	MIOPEN_CALL(miopenActivationForward(
	mode == COMPOSED ?
	handle.GetMIOpen(idx) : handle.GetMIOpen(),
	bypass_desc.Get(),
	alpha,
	bottom_desc.Get(), x,
	beta,
	top_desc.Get(), y));
	#endif
	}

	template <typename T>
	inline void dnnmarkBypassBackward(const Handle &handle,
	RunMode mode, int idx,
	const BypassDesc<T> &bypass_desc,
	const void *alpha,
	const DataTensor<T> &top_desc,
	const void *y,
	const void *dy,
	const void *beta,
	const DataTensor<T> &bottom_desc,
	const void *x,
	void *dx) {
	#ifdef NVIDIA_CUDNN
	CUDA_CALL(cudaMemcpy(dx,
	dy,
	sizeof(T) * bypass_desc.Get().n_
	* bypass_desc.Get().c_
	* bypass_desc.Get().h_
	* bypass_desc.Get().w_,
	cudaMemcpyDeviceToDevice
	));
	#endif
	#ifdef AMD_MIOPEN
	MIOPEN_CALL(miopenActivationBackward(
	mode == COMPOSED ?
	handle.GetMIOpen(idx) : handle.GetMIOpen(),
	bypass_desc.Get(),
	alpha,
	top_desc.Get(), y,
	top_desc.Get(), dy,
	bottom_desc.Get(), x,
	beta,
	bottom_desc.Get(), dx));
	#endif
	}

	//
	// Dropout layer
	//

	template <typename T>
	inline void dnnmarkDropoutForward(const Handle &handle,
	RunMode mode, int idx,
	const DropoutDesc<T> &dropout_desc,
	const DataTensor<T> &bottom_desc,
	const void *x,
	const DataTensor<T> &top_desc,
	void *y,
	void *reserve_space, size_t reserve_space_size) {
	#ifdef NVIDIA_CUDNN
	CUDNN_CALL(cudnnDropoutForward(
	mode == COMPOSED ?
	handle.GetCudnn(idx) : handle.GetCudnn(),
	dropout_desc.Get(),
	bottom_desc.Get(), x,
	top_desc.Get(), y,
	reserve_space,
	reserve_space_size
	));
	#endif
	#ifdef AMD_MIOPEN
	#endif
	}

	template <typename T>
	inline void dnnmarkDropoutBackward(const Handle &handle,
	RunMode mode, int idx,
	const DropoutDesc<T> &dropout_desc,
	const DataTensor<T> &top_desc,
	const void *dy,
	const DataTensor<T> &bottom_desc,
	void *dx,
	void *reserve_space, size_t reserve_space_size) {
	#ifdef NVIDIA_CUDNN
	CUDNN_CALL(cudnnDropoutBackward(
	mode == COMPOSED ?
	handle.GetCudnn(idx) : handle.GetCudnn(),
	dropout_desc.Get(),
	top_desc.Get(), dy,
	bottom_desc.Get(), dx,
	reserve_space,
	reserve_space_size
	));
	#endif
	#ifdef AMD_MIOPEN
	#endif
	}

	} // namespace dnnmark

	#endif // CORE_INCLUDE_DNN_WRAPPER_H_