!{\src2tex{textfont=tt}}
!!****m* ABINIT/m_dfti
!! NAME
!! m_dfti
!!
!! FUNCTION
!!  This module provides wrappers for the MKL DFTI routines: in-place and out-of-place version.
!!
!! COPYRIGHT
!! Copyright (C) 2009-2012 ABINIT group (MG)
!! This file is distributed under the terms of the
!! GNU General Public License, see ~abinit/COPYING
!! or http://www.gnu.org/copyleft/gpl.txt .
!!
!! NOTES
!!  1) MPI parallelism is not supported 
!!  2) For better performance the FFT divisions should contain small factors  (/2, 3, 5, 7, 11, 13/)
!!     see http://software.intel.com/sites/products/documentation/doclib/mkl_sa/11/mkl_userguide_lnx/index.htm
!!
!! SOURCE

#if defined HAVE_CONFIG_H
#include "config.h"
#endif

#include "abi_common.h"

MODULE m_dfti

 use defs_basis
 use m_profiling
 use m_errors
 use m_timer
 use m_xomp

#ifdef HAVE_FFT_MKL
 use MKL_DFTI

#ifdef HAVE_FC_ISO_C_BINDING
 use iso_c_binding
#else
#error "Compiler does not provide the module iso_c_binding, cannot use m_dfti.F90"
#endif

#endif

 use m_fstrings,  only : basename

 implicit none

 private

! Entry points for client code
 public :: dfti_fourdp         ! Driver for 3D FFT of lengths nx, ny, nz. Mainly used for densities or potentials.

! Low-level routines.
 public :: dfti_r2c_op         ! Real to complex transform (out-of-place version).
 public :: dfti_c2r_op         ! Complex to real transform (out-of-place version).
 public :: dfti_c2c_op         ! complex to complex transform (out-of-place version).
 public :: dfti_c2c_ip         ! complex to complex transform (in-place version).
 public :: dfti_many_dft_op    ! Driver routine for many out-of-place 3D complex-to-complex FFTs.
 public :: dfti_many_dft_ip    ! Driver routine for many in-place 3D complex-to-complex FFTs.
 public :: dfti_fftpad         ! Driver routines for zero-padded FFT of wavefunctions.
 public :: dfti_fftpad_cplx    ! Driver routines for zero-padded FFT of wavefunctions.

 !public :: dfti_fftpad_tr     ! Still under development.
!!***

!----------------------------------------------------------------------

!!****t* m_dfti/my_zpad_t
!! NAME
!! 
!! FUNCTION
!! 
!! SOURCE

 type,public :: my_zpad_t

   integer :: nlinex
   ! Total number of 1D transforms along x

   integer :: n_zplanes
   ! Number of z-planes intersecting the sphere.

   integer,pointer :: zplane(:,:) => null()
   ! zplane(3,n_zplanes)
   ! zplane(1,zpl) : mapping z-plane index -> FFT index_z
   ! zplane(2,zpl) : mapping z-plane index -> igb index in array gboud 

   integer,pointer :: linex2ifft_yz(:,:) => null()
   ! linex2ifft_yz(2,nlinex)
   ! mapping 1D-FFT -> (FFT_index_y, FFT index_z)

 end type my_zpad_t
!!***

!----------------------------------------------------------------------

CONTAINS  !===========================================================

!!****f* m_dfti/dfti_fourdp
!! NAME
!!  dfti_fourdp
!!
!! FUNCTION
!! Driver routine for 3D FFT of lengths nx, ny, nz. Mainly used for densities or potentials.
!! FFT Transform is out-of-place
!!
!! INPUTS
!! cplex=1 if fofr is real, 2 if fofr is complex
!! nx,ny,nz=Number of point along the three directions.
!! ldx,ldy,ldz=Leading dimensions of the array.
!! ndat = Number of FFTS
!! isign= +1 : fofg(G) => fofr(R); 
!!        -1 : fofr(R) => fofg(G)
!! fofg(2,ldx*ldy*ldz*ndat)=The array to be transformed.
!!
!! OUTPUT 
!! fofr(cplex,ldx*ldy*ldz*ndat)=The FFT of fofg
!!
!! PARENTS
!!      fourdp
!!
!! CHILDREN
!!      int2char
!!
!! SOURCE

subroutine dfti_fourdp(cplex,nx,ny,nz,ldx,ldy,ldz,ndat,isign,fofg,fofr)

 use defs_basis
 use m_profiling
 use m_errors

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'dfti_fourdp'
!End of the abilint section

 implicit none 

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: cplex,nx,ny,nz,ldx,ldy,ldz,ndat,isign
!arrays
 real(dp),intent(inout) :: fofg(2*ldx*ldy*ldz*ndat)
 real(dp),intent(inout) :: fofr(cplex*ldx*ldy*ldz*ndat)

!Local variables-------------------------------

! *************************************************************************

 select case (cplex)
 case (2) ! Complex to Complex.

   select case (isign)
   case (+1)
     call dfti_many_dft_op(nx,ny,nz,ldx,ldy,ldz,ndat,isign,fofg,fofr)

   case (-1) ! -1
     call dfti_many_dft_op(nx,ny,nz,ldx,ldy,ldz,ndat,isign,fofr,fofg)

   case default
     MSG_BUG("Wrong isign")
   end select 

 case (1) ! Real case.

   select case (isign)
   case (-1) ! R --> G 
     call dfti_r2c_op(nx,ny,nz,ldx,ldy,ldz,ndat,fofr,fofg)

   case (+1) ! G --> R
     call dfti_c2r_op(nx,ny,nz,ldx,ldy,ldz,ndat,fofg,fofr)

   case default
     MSG_BUG("Wrong isign")
   end select

 case default 
   MSG_BUG(" Wrong value for cplex")
 end select 

end subroutine dfti_fourdp
!!***

!----------------------------------------------------------------------

!!****f* m_dfti/dfti_c2c_ip
!! NAME
!!  dfti_c2c_ip
!!
!! FUNCTION
!! Driver routine for in-place 3D complex-complex FFT.
!!
!! INPUTS
!! nx,ny,nz=Number of points along the three directions.
!! ldx,ldy,ldz=Physical dimensions of the array.
!! ndat=Number of FFTs to be done.
!! isign= +1 : ff(G) => ff(R); -1 : ff(R) => ff(G)
!!
!! SIDE EFFECTS
!!  ff(ldx*ldy*ldz*ndat)=
!!    In input: the complex array to be transformed.
!!    In output: the Fourier transformed in the space specified by isign.
!!
!! PARENTS
!!      fftw3_fourdp,m_dfti
!!
!! CHILDREN
!!      int2char
!!
!! SOURCE

subroutine dfti_c2c_ip(nx,ny,nz,ldx,ldy,ldz,ndat,isign,ff)


!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'dfti_c2c_ip'
!End of the abilint section

 implicit none 

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,ndat,isign
!arrays
 complex(dpc),intent(inout) :: ff(ldx*ldy*ldz*ndat)

#ifdef HAVE_FFT_MKL
!Local variables-------------------------------
!scalars
 integer :: status 
 type(DFTI_DESCRIPTOR),pointer :: Desc1

! *************************************************************************

 status = DftiCreateDescriptor(Desc1, DFTI_DOUBLE, DFTI_COMPLEX, 3, (/nx ,ny ,nz/) )
 DFTI_CHECK(status)

 status = DftiSetValue(Desc1, DFTI_NUMBER_OF_TRANSFORMS, ndat)
 status = DftiSetValue(Desc1, DFTI_INPUT_DISTANCE,  ldx*ldy*ldz)
 status = DftiSetValue(Desc1, DFTI_INPUT_STRIDES,  (/0,1,ldx,ldx*ldy/) )

 if (isign==-1) then
   status = DftiSetValue(Desc1, DFTI_FORWARD_SCALE, one / DBLE(nx*ny*nz) )
 end if

 status = DftiCommitDescriptor(Desc1)
 DFTI_CHECK(status)

 if (isign==-1) then
   status = DftiComputeForward(Desc1, ff)
 else if (isign==+1) then
   status = DftiComputeBackward(Desc1, ff)
 else 
   MSG_ERROR("Wrong isign")
 end if
 DFTI_CHECK(status)

 status = DftiFreeDescriptor(Desc1)
 DFTI_CHECK(status)

#else 
 MSG_ERROR("FFT MKL support not activated")
 ABI_UNUSED((/nx,ny,nz,ldx,ldy,ldz,ndat,isign/))
 ABI_UNUSED(ff)
#endif

end subroutine dfti_c2c_ip
!!***

!----------------------------------------------------------------------

!!****f* m_dfti/dfti_c2c_op
!! NAME
!!  dfti_c2c_op
!!
!! FUNCTION
!! Driver routine for out-of-place 3D complex-complex FFT of lengths nx, ny, nz.
!!
!! INPUTS
!! nx,ny,nz=Number of points along the three directions.
!! ldx,ldy,ldz=Physical dimensions of the array.
!! ndat=Number of FFTs to be done.
!! isign= +1 : ff(G) => gg(R); -1 : ff(R) => gg(G)
!! ff(ldx*ldy*ldz*ndat)=The array to be transformed.
!!
!! OUTPUT 
!!   gg(ldx*ldy*ldz*ndat)=The FFT of ff.
!!
!! PARENTS
!!      fftw3_fourdp,m_dfti
!!
!! CHILDREN
!!      int2char
!!
!! SOURCE

subroutine dfti_c2c_op(nx,ny,nz,ldx,ldy,ldz,ndat,isign,ff,gg)


!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'dfti_c2c_op'
!End of the abilint section

 implicit none 

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,isign,ndat
!arrays
 complex(dpc),intent(in) :: ff(ldx*ldy*ldz*ndat)
 complex(dpc),intent(out) :: gg(ldx*ldy*ldz*ndat)

#ifdef HAVE_FFT_MKL
!Local variables-------------------------------
!scalars
 integer :: status 
 type(DFTI_DESCRIPTOR),pointer :: Desc1

! *************************************************************************

 status = DftiCreateDescriptor(Desc1, DFTI_DOUBLE, DFTI_COMPLEX, 3, (/nx ,ny ,nz/) )
 DFTI_CHECK(status)
                                                                                     
 status = DftiSetValue(Desc1, DFTI_NUMBER_OF_TRANSFORMS, ndat)
 status = DftiSetValue(Desc1, DFTI_INPUT_DISTANCE,  ldx*ldy*ldz)
 status = DftiSetValue(Desc1, DFTI_INPUT_STRIDES,  (/0,1,ldx,ldx*ldy/) )
 status = DftiSetValue(Desc1, DFTI_OUTPUT_DISTANCE,  ldx*ldy*ldz)
 status = DftiSetValue(Desc1, DFTI_OUTPUT_STRIDES,  (/0,1,ldx,ldx*ldy/) )
 status = DftiSetValue(Desc1, DFTI_PLACEMENT, DFTI_NOT_INPLACE)

 if (isign==-1) then
   status = DftiSetValue(Desc1, DFTI_FORWARD_SCALE, one/DBLE(nx*ny*nz) )
 end if
                                                                                     
 status = DftiCommitDescriptor(Desc1)
 DFTI_CHECK(status)

 if (isign==-1) then
   status = DftiComputeForward(Desc1, ff, gg)
 else if (isign==+1) then
   status = DftiComputeBackward(Desc1, ff, gg)
 else 
   MSG_ERROR("Wrong isign")
 end if

 DFTI_CHECK(status)

 status = DftiFreeDescriptor(Desc1)
 DFTI_CHECK(status)

#else 
 MSG_ERROR("FFTW MKL support not activated")
 ABI_UNUSED((/nx,ny,nz,ldx,ldy,ldz,ndat,isign/))
 ABI_UNUSED(ff)
 ABI_UNUSED(gg)
#endif

end subroutine dfti_c2c_op
!!***

!----------------------------------------------------------------------

!!****f* m_dfti/dfti_many_dft_op
!! NAME
!!  dfti_many_dft_op
!!
!! FUNCTION
!! Driver routine for many out-of-place 3D complex-to-complex FFTs of lengths nx, ny, nz. 
!!
!! INPUTS
!! nx,ny,nz=Number of points along the three directions.
!! ldx,ldy,ldz=Physical dimension of the fin and fout arrays (to avoid cache conflicts).
!! ndat=Number of FFTs to be done.
!! fin(2*ldx*ldy*ldz*ndat)=The complex array to be transformed.
!! isign=sign of Fourier transform exponent: current convention uses
!!   +1 for transforming from G to r, 
!!   -1 for transforming from r to G.
!!
!! OUTPUT 
!! fout(2,ldx*ldy*ldz*ndat)=The Fourier transform of fin.
!!
!! PARENTS
!!      m_dfti
!!
!! CHILDREN
!!      int2char
!!
!! SOURCE

subroutine dfti_many_dft_op(nx,ny,nz,ldx,ldy,ldz,ndat,isign,fin,fout)


!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'dfti_many_dft_op'
!End of the abilint section

 implicit none 

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,ndat,isign
!arrays
 real(dp),target,intent(in) :: fin(2*ldx*ldy*ldz*ndat)
 real(dp),target,intent(out) :: fout(2*ldx*ldy*ldz*ndat)

#ifdef HAVE_FFT_MKL
!Local variables-------------------------------
!scalars
 type(C_ptr) :: fin_cptr, fout_cptr
!arrays
 complex(dpc),pointer :: fin_fptr(:),fout_fptr(:)

! *************************************************************************

 fin_cptr = C_loc(fin)
 call C_F_pointer(fin_cptr,fin_fptr, shape=(/ldx*ldy*ldz*ndat/))

 fout_cptr = C_loc(fout)
 call C_F_pointer(fout_cptr,fout_fptr, shape=(/ldx*ldy*ldz*ndat/))

 call dfti_c2c_op(nx,ny,nz,ldx,ldy,ldz,ndat,isign,fin_fptr,fout_fptr)

#else 
 MSG_ERROR("FFTW MKL support not activated")
 ABI_UNUSED((/nx,ny,nz,ldx,ldy,ldz,ndat,isign/))
 ABI_UNUSED(fin(1))
 ABI_UNUSED(fout(1))
#endif

end subroutine dfti_many_dft_op
!!***

!----------------------------------------------------------------------

!!****f* m_dfti/dfti_many_dft_ip
!! NAME
!!  dfti_many_dft_ip
!!
!! FUNCTION
!! Driver routine for many in-place 3D complex-to-complex FFTs of lengths nx, ny, nz. 
!!
!! INPUTS
!! nx,ny,nz=Number of points along the three directions.
!! ldx,ldy,ldz=Physical dimension of the finout array (to avoid cache conflicts).
!! ndat=Number of FFTs to be done.
!! isign=sign of Fourier transform exponent: current convention uses
!!   +1 for transforming from G to r, 
!!   -1 for transforming from r to G.
!!
!! OUTPUT 
!! finout(2,ldx*ldy*ldz*ndat)=
!!   In input: The complex array to be transformed.
!!   In output: The FFT results.
!!
!! PARENTS
!!
!! CHILDREN
!!      int2char
!!
!! SOURCE

subroutine dfti_many_dft_ip(nx,ny,nz,ldx,ldy,ldz,ndat,isign,finout)


!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'dfti_many_dft_ip'
!End of the abilint section

 implicit none 

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,ndat,isign
!arrays
 real(dp),target,intent(inout) :: finout(2*ldx*ldy*ldz*ndat)

#ifdef HAVE_FFT_MKL
!Local variables-------------------------------
!scalars
 type(C_ptr) :: finout_cptr
!arrays
 complex(dpc),pointer :: finout_fptr(:)

! *************************************************************************

 finout_cptr = C_loc(finout)
 call C_F_pointer(finout_cptr,finout_fptr, shape=(/ldx*ldy*ldz*ndat/))

 call dfti_c2c_ip(nx,ny,nz,ldx,ldy,ldz,ndat,isign,finout_fptr)

#else 
 MSG_ERROR("FFTW MKL support not activated")
 ABI_UNUSED((/nx,ny,nz,ldx,ldy,ldz,ndat,isign/))
 ABI_UNUSED(finout(1))
#endif

end subroutine dfti_many_dft_ip
!!***

!----------------------------------------------------------------------

!!****f* m_dfti/dfti_fftpad
!! NAME
!!  dfti_fftpad
!!
!! FUNCTION
!!  This routine transforms wavefunctions using 3D zero-padded FFTs with DFTI. 
!!  The 3D ffts are computed only on lines and planes which have non zero elements (see zpad_init)
!!  FFT transform is in-place.
!!  
!! INPUTS
!!   nx,ny,nz=Logical dimensions of the FFT mesh.
!!   ldx,ldy,ldz=Physical dimension of the f array (to avoid cache conflicts).
!!   ndat=Numer of FFTs
!!   mgfft=MAX(nx,ny,nz), only used to dimension gbound
!!   isign=The sign of the transform.
!!   gbound(2*mgfft+8,2)= The boundaries of the basis sphere of G vectors at a given k-point.
!!     See sphereboundary for more info.
!!
!! SIDE EFFECTS
!!   ff(2*ldx*ldy*ldz*ndat)=
!!     input: The array with the data to be transformed.
!!     output: The results of the FFT.
!!
!! PARENTS
!!      dfti_fourwf
!!
!! CHILDREN
!!      int2char
!!
!! SOURCE

subroutine dfti_fftpad(ff,nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,isign,gbound)


!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'dfti_fftpad'
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,isign
!arrays
 integer,intent(in) :: gbound(2*mgfft+8,2)
 real(dp),target,intent(inout) :: ff(2*ldx*ldy*ldz*ndat)

!Local variables-------------------------------
#ifdef HAVE_FFT_MKL
!scalars
 type(C_ptr) :: cptr
!arrays
 complex(dpc),pointer :: fptr(:)

! *************************************************************************

 cptr = C_loc(ff)
 call C_F_pointer(cptr,fptr, shape=(/ldx*ldy*ldz*ndat/))

 call dfti_fftpad_cplx(fptr,nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,isign,gbound)

#else
 MSG_ERROR("FFT_MKL support not activated")
 ABI_UNUSED((/nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,isign/))
 ABI_UNUSED(gbound(1,1))
 ABI_UNUSED(ff(1))
#endif

end subroutine dfti_fftpad     
!!***

!----------------------------------------------------------------------

!!****f* m_dfti/dfti_fftpad_cplx
!! NAME
!!  dfti_fftpad_cplx
!!
!! FUNCTION
!!  This routine transforms wavefunctions using 3D zero-padded FFTs with DFTI. 
!!  The 3D ffts are computed only on lines and planes which have non zero elements (see zpad_init)
!!  FFT transform is in-place. Target: complex arrays.
!!  
!! INPUTS
!!   nx,ny,nz=Logical dimensions of the FFT mesh.
!!   ldx,ldy,ldz=Physical dimension of the f array (to avoid cache conflicts).
!!   ndat=Number of FFTs.
!!   mgfft=MAX(nx,ny,nz), only used to dimension gbound
!!   isign=The sign of the transform.
!!   gbound(2*mgfft+8,2)= The boundaries of the basis sphere of G vectors at a given k-point.
!!     See sphereboundary for more info.
!!
!! SIDE EFFECTS
!!   ff(ldx*ldy*ldz*ndat)=
!!     input: The array with the data to be transformed.
!!     output: The results of the FFT.
!!
!! PARENTS
!!      fftw3_fourwf,m_dfti
!!
!! CHILDREN
!!      int2char
!!
!! SOURCE

subroutine dfti_fftpad_cplx(ff,nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,isign,gbound)


!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'dfti_fftpad_cplx'
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,isign
!arrays
 integer,intent(in) :: gbound(2*mgfft+8,2)
 complex(dpc),intent(inout) :: ff(ldx*ldy*ldz*ndat)

!Local variables-------------------------------
!scalars
#ifdef HAVE_FFT_MKL
 integer :: dat,nthreads,nt_here,nt_nest,pt
 character(len=500) :: msg

! *************************************************************************

 call dfti_get_nt(ndat,nt_here, nt_nest)
 write(std_out,*)"DFTI: ndat, nt_here, nt_nest",ndat,nt_here,nt_nest

 if (nt_here > 1) then
!$OMP PARALLEL DO PRIVATE(pt) NUM_THREADS(nt_here)
   do dat=1,ndat
     pt = 1 + (dat-1) * ldx*ldy*ldz
     call dfti_fftpad_cplx_nt(ff(pt),nx,ny,nz,ldx,ldy,ldz,1,mgfft,isign,gbound,nt_nest)
   end do
 else
   call dfti_fftpad_cplx_nt(ff,nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,isign,gbound,nt_nest)
 end if

#else
 MSG_ERROR("FFT_MKL support not activated")
 ABI_UNUSED((/nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,isign/))
 ABI_UNUSED(gbound(1,1))
 ABI_UNUSED(ff(1))
#endif

end subroutine dfti_fftpad_cplx     
!!***

!----------------------------------------------------------------------

!!****f* m_dfti/dfti_fftpad_cplx_nt
!! NAME
!!  dfti_fftpad_cplx_nt
!!
!! FUNCTION
!!  This routine transforms wavefunctions using 3D zero-padded FFTs with DFTI. 
!!  The 3D ffts are computed only on lines and planes which have non zero elements (see zpad_init)
!!  FFT transform is in-place. Target: complex arrays.
!!  
!! INPUTS
!!   nx,ny,nz=Logical dimensions of the FFT mesh.
!!   ldx,ldy,ldz=Physical dimension of the f array (to avoid cache conflicts).
!!   ndat=Number of FFTs.
!!   mgfft=MAX(nx,ny,nz), only used to dimension gbound
!!   isign=The sign of the transform.
!!   gbound(2*mgfft+8,2)= The boundaries of the basis sphere of G vectors at a given k-point.
!!     See sphereboundary for more info.
!!
!! SIDE EFFECTS
!!   ff(ldx*ldy*ldz*ndat)=
!!     input: The array with the data to be transformed.
!!     output: The results of the FFT.
!!
!! PARENTS
!!      m_dfti
!!
!! CHILDREN
!!      int2char
!!
!! SOURCE

subroutine dfti_fftpad_cplx_nt(ff,nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,isign,gbound,nthreads)


!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'dfti_fftpad_cplx_nt'
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,isign,nthreads
!arrays
 integer,intent(in) :: gbound(2*mgfft+8,2)
 complex(dpc),intent(inout) :: ff(ldx*ldy*ldz*ndat)

!Local variables-------------------------------
!scalars
#ifdef HAVE_FFT_MKL
 integer :: kk,ii,jj,sidx,cnt,dat,line,zplane,status
 character(len=500) :: msg
 type(my_zpad_t) :: zpad
 type(DFTI_DESCRIPTOR),pointer :: Desc1,Desc2,Desc3
!arrays

! *************************************************************************

 call my_zpad_init(zpad,nx,ny,nz,ldx,ldy,ldz,mgfft,gbound)

 SELECT CASE (isign)

 CASE (+1) ! G --> R
   !
   ! The prototype for dfftw_plan_many_dft is:
   ! dfftw_plan_many_dft(rank, n, howmany, 
   !   fin,  iembed, istride, idist, 
   !   fout, oembed, ostride, odist, isign, my_flags)
   !
   ! 1) Transform along x.
   write(msg,'(a,i3)')"FFT_X: ",nthreads
   DEV_TIMER_START(msg)

!   bw_plan1 = dplan_many_dft(1, (/nx/), 1, &   ! Single 1D transform of f(Gx,Gy,Gz) along Gx.
!&         ff, (/ldx, ldy, ldz/), 1, ldx,   &
!&         ff, (/ldx, ldy, ldz/), 1, ldx, FFTW_BACKWARD, FFTW_ESTIMATE)

   status = DftiCreateDescriptor(Desc1, DFTI_DOUBLE, DFTI_COMPLEX, 1, nx)
   DFTI_CHECK(status)
   !
   ! TODO
   ! this value was added in mkl version 11.0.
   ! It's the only way to avoid side effect in mkl_set_num_threads 
   ! thus making this routine thread safe 
   call mkl_set_num_threads(1)
   !status = DftiSetValue(Desc1, DFTI_THREAD_LIMIT, 1)

   status = DftiSetValue(Desc1, DFTI_NUMBER_OF_USER_THREADS, nthreads)
   DFTI_CHECK(status)

   status = DftiCommitDescriptor(Desc1)
   DFTI_CHECK(status)

!$omp parallel do private(dat,line,jj,kk,sidx,status) NUM_THREADS (nthreads)
   do cnt=0,(zpad%nlinex*ndat)-1 
     dat  = 1 + cnt / zpad%nlinex
     line = 1 + MOD(cnt, zpad%nlinex)
     jj   = zpad%linex2ifft_yz(1,line)
     kk   = zpad%linex2ifft_yz(2,line)
     sidx = 1+ (jj-1)*ldx + (kk-1)*ldx*ldy + (dat-1) * ldx*ldy*ldz
     status = DftiComputeBackward(Desc1, ff(sidx:)) ! Pass the base address.
     DFTI_CHECK(status)
   end do

   status = DftiFreeDescriptor(Desc1)
   DFTI_CHECK(status)

   DEV_TIMER_STOP(msg)
   !
   ! 2) Transform along y: nx 1D transforms of f(x,Gy,Gz) along Gy.

!   bw_plan2 = dplan_many_dft(1, (/ny/), nx,  &   
!&         ff, (/ldx, ldy, ldz/), ldx, 1,     &
!&         ff, (/ldx, ldy, ldz/), ldx, 1, FFTW_BACKWARD, FFTW_ESTIMATE)

   call mkl_set_num_threads(1)
   !status = DftiSetValue(Desc2, DFTI_THREAD_LIMIT, 1)

   status = DftiCreateDescriptor(Desc2, DFTI_DOUBLE, DFTI_COMPLEX, 1, ny)
   DFTI_CHECK(status)

   status = DftiSetValue(Desc2, DFTI_NUMBER_OF_TRANSFORMS, nx)
   status = DftiSetValue(Desc2, DFTI_INPUT_DISTANCE,  1)
   status = DftiSetValue(Desc2, DFTI_INPUT_STRIDES,  (/0,ldx/))
   status = DftiSetValue(Desc2, DFTI_NUMBER_OF_USER_THREADS, nthreads)

   status = DftiCommitDescriptor(Desc2)
   DFTI_CHECK(status)

!$omp parallel do private(dat,zplane,kk,sidx,status) NUM_THREADS(nthreads)
   do cnt=0,(zpad%n_zplanes*ndat)-1 
     dat    = 1 + cnt / zpad%n_zplanes
     zplane = 1 + MOD(cnt, zpad%n_zplanes)
     kk     = zpad%zplane(1,zplane)
     sidx   = 1 + (kk-1)*ldx*ldy + (dat-1) *ldx*ldy*ldz
     status = DftiComputeBackward(Desc2, ff(sidx:))
     DFTI_CHECK(status)
   end do

   status = DftiFreeDescriptor(Desc2)
   DFTI_CHECK(status)
   !
   ! 3) Transform along z.
   ! ldx*ldy 1D transforms of f(x,y,Gz) along Gz.
   ! Note that we have to visit the entire augmented x-y plane!
   write(msg,'(a,i3)')"A_Z: ",nthreads
   DEV_TIMER_START(msg)

!   bw_plan3  = dplan_many_dft(1, (/nz/), ldx*ldy, & 
!&         ff, (/ldx, ldy, ldz/), ldx*ldy, 1,      & 
!&         ff, (/ldx, ldy, ldz/), ldx*ldy, 1, FFTW_BACKWARD, FFTW_ESTIMATE)

   if (ndat==1 .or. nthreads==1 .or. MOD(ndat,nthreads)/=0) then
     !
     ! Use MKL internal threading if single FFT or ndat is not divisible by nthreads
     call mkl_set_num_threads(nthreads)
     status = DftiCreateDescriptor(Desc3, DFTI_DOUBLE, DFTI_COMPLEX, 1, nz)
     DFTI_CHECK(status)
                                                                            
     status = DftiSetValue(Desc3, DFTI_NUMBER_OF_TRANSFORMS, ldx*ldy)
     status = DftiSetValue(Desc3, DFTI_INPUT_DISTANCE,  1)
     status = DftiSetValue(Desc3, DFTI_INPUT_STRIDES,  (/0,ldx*ldy/))

     status = DftiCommitDescriptor(Desc3)
     DFTI_CHECK(status)

     do dat=1,ndat
       sidx = 1 + (dat-1) *ldx*ldy*ldz
       status = DftiComputeBackward(Desc3, ff(sidx:))
       DFTI_CHECK(status)
     end do

     status = DftiFreeDescriptor(Desc3)
     DFTI_CHECK(status)

   else 
     ! Split ndat transforms among the threads.
     call mkl_set_num_threads(1)
     status = DftiCreateDescriptor(Desc3, DFTI_DOUBLE, DFTI_COMPLEX, 1, nz)
     DFTI_CHECK(status)
                                                                            
     status = DftiSetValue(Desc3, DFTI_NUMBER_OF_TRANSFORMS, ldx*ldy)
     status = DftiSetValue(Desc3, DFTI_INPUT_DISTANCE,  1)
     status = DftiSetValue(Desc3, DFTI_INPUT_STRIDES,  (/0,ldx*ldy/))
     status = DftiSetValue(Desc3, DFTI_NUMBER_OF_USER_THREADS, nthreads)
     !status = DftiSetValue(Desc2, DFTI_THREAD_LIMIT, 1)
                                                                             
     status = DftiCommitDescriptor(Desc3)
     DFTI_CHECK(status)

!$omp parallel do private(sidx,status) NUM_THREADS(nthreads)
     do dat=1,ndat
       sidx = 1 + (dat-1) *ldx*ldy*ldz
       status = DftiComputeBackward(Desc3, ff(sidx:))
       DFTI_CHECK(status)
     end do
                                                                             
     status = DftiFreeDescriptor(Desc3)
     DFTI_CHECK(status)
   end if

   DEV_TIMER_STOP(msg)
 
 CASE (-1) ! R --> G
   !
   ! The prototype for dfftw_plan_many_dft is:
   ! dfftw_plan_many_dft(rank, n, howmany, 
   !   fin,  iembed, istride, idist, 
   !   fout, oembed, ostride, odist, isign, my_flags)
   !
   ! 1) Transform along z.
!   fw_plan3 = dplan_many_dft(1, (/nz/), ldx*ldy, & ! We have to visit the entire augmented x-y plane!
!&         ff, (/ldx, ldy, ldz/), ldx*ldy, 1,     &
!&         ff, (/ldx, ldy, ldz/), ldx*ldy, 1, FFTW_FORWARD, FFTW_ESTIMATE)

   if (ndat==1 .or. nthreads==1 .or. MOD(ndat,nthreads)/=0) then
     !
     ! Use MKL internal threading if single FFT or ndat is not divisible by nthreads
     call mkl_set_num_threads(nthreads)
     status = DftiCreateDescriptor(Desc3, DFTI_DOUBLE, DFTI_COMPLEX, 1, nz)
     DFTI_CHECK(status)

     status = DftiSetValue(Desc3, DFTI_NUMBER_OF_TRANSFORMS, ldx*ldy)
     status = DftiSetValue(Desc3, DFTI_INPUT_DISTANCE,  1)
     status = DftiSetValue(Desc3, DFTI_INPUT_STRIDES,  (/0,ldx*ldy/))
     !status = DftiSetValue(Desc3, DFTI_THREAD_LIMIT, nthreads)

     status = DftiCommitDescriptor(Desc3)
     DFTI_CHECK(status)

     do dat=1,ndat
       sidx = 1 + (dat-1) *ldx*ldy*ldz
       status = DftiComputeForward(Desc3, ff(sidx:))
       DFTI_CHECK(status)
     end do

     status = DftiFreeDescriptor(Desc3)
     DFTI_CHECK(status)
   else 
     ! Split ndat transforms among the threads.
     call mkl_set_num_threads(1)
     status = DftiCreateDescriptor(Desc3, DFTI_DOUBLE, DFTI_COMPLEX, 1, nz)
     DFTI_CHECK(status)
                                                                            
     status = DftiSetValue(Desc3, DFTI_NUMBER_OF_TRANSFORMS, ldx*ldy)
     status = DftiSetValue(Desc3, DFTI_INPUT_DISTANCE,  1)
     status = DftiSetValue(Desc3, DFTI_INPUT_STRIDES,  (/0,ldx*ldy/))
     status = DftiSetValue(Desc3, DFTI_NUMBER_OF_USER_THREADS, nthreads)
     !status = DftiSetValue(Desc3, DFTI_THREAD_LIMIT, 1)
                                                                            
     status = DftiCommitDescriptor(Desc3)
     DFTI_CHECK(status)
                                                                            
!$omp parallel do private(sidx,status) NUM_THREADS(nthreads)
     do dat=1,ndat
       sidx = 1 + (dat-1) *ldx*ldy*ldz
       status = DftiComputeForward(Desc3, ff(sidx:))
       DFTI_CHECK(status)
     end do
                                                                            
     status = DftiFreeDescriptor(Desc3)
     DFTI_CHECK(status)
   end if
   !
   ! 2) Transform along y.

!   fw_plan2 = dplan_many_dft(1, (/ny/), nx, &
!&         ff, (/ldx, ldy, ldz/), ldx, 1,    &
!&         ff, (/ldx, ldy, ldz/), ldx, 1, FFTW_FORWARD, FFTW_ESTIMATE)

   call mkl_set_num_threads(1)
   status = DftiCreateDescriptor(Desc2, DFTI_DOUBLE, DFTI_COMPLEX, 1, ny)
   DFTI_CHECK(status)

   !status = DftiSetValue(Desc2, DFTI_THREAD_LIMIT, 1)
   status = DftiSetValue(Desc2, DFTI_NUMBER_OF_TRANSFORMS, nx)
   status = DftiSetValue(Desc2, DFTI_INPUT_DISTANCE,  1)
   status = DftiSetValue(Desc2, DFTI_INPUT_STRIDES,  (/0,ldx/))

   status = DftiSetValue(Desc2, DFTI_NUMBER_OF_USER_THREADS, nthreads)

   status = DftiCommitDescriptor(Desc2)
   DFTI_CHECK(status)

!$omp parallel do private(dat,zplane,kk,sidx,status) NUM_THREADS(nthreads)
   do cnt=0,(zpad%n_zplanes*ndat)-1 
     dat    = 1 + cnt / zpad%n_zplanes
     zplane = 1 + MOD(cnt, zpad%n_zplanes)
     kk     = zpad%zplane(1,zplane)
     sidx   = 1 + ldx*ldy*(kk-1) + (dat-1) *ldx*ldy*ldz
     status = DftiComputeForward(Desc2, ff(sidx:))
     DFTI_CHECK(status)
   end do

   status = DftiFreeDescriptor(Desc2)
   DFTI_CHECK(status)
   !
   ! 3) Transform along x. 

!   fw_plan1 = dplan_many_dft(1, (/nx/), 1,  &
!&         ff, (/ldx, ldy, ldz/), 1, ldx,    &
!&         ff, (/ldx, ldy, ldz/), 1, ldx, FFTW_FORWARD, FFTW_ESTIMATE)

   call mkl_set_num_threads(1)
   status = DftiCreateDescriptor(Desc1, DFTI_DOUBLE, DFTI_COMPLEX, 1, nx)
   DFTI_CHECK(status)

   !status = DftiSetValue(Desc1, DFTI_THREAD_LIMIT, 1)
   status = DftiSetValue(Desc1, DFTI_NUMBER_OF_USER_THREADS, nthreads)
   !status = DftiSetValue(Desc1, DFTI_FORWARD_SCALE, one/(nx*ny*nz))

   status = DftiCommitDescriptor(Desc1)
   DFTI_CHECK(status)

!$omp parallel do private(dat,line,jj,kk,sidx,status) NUM_THREADS(nthreads)
   do cnt=0,(zpad%nlinex*ndat)-1 
     dat  = 1 + cnt / zpad%nlinex
     line = 1 + MOD(cnt, zpad%nlinex)
     jj   = zpad%linex2ifft_yz(1,line)
     kk   = zpad%linex2ifft_yz(2,line)
     sidx = 1+ (jj-1)*ldx + (kk-1)*ldx*ldy + (dat-1) * ldx*ldy*ldz
     status = DftiComputeForward(Desc1, ff(sidx:)) ! Pass the base address.
     DFTI_CHECK(status)
   end do

   status = DftiFreeDescriptor(Desc1)
   DFTI_CHECK(status)

   call mkl_set_num_threads(nthreads)
   !
   ! 4) Normalize the transform.
   call ZDSCAL(ldx*ldy*ldz*ndat, one/(nx*ny*nz), ff, 1)
 
 CASE DEFAULT 
   MSG_BUG("Wrong isign")
 END SELECT

 call my_zpad_free(zpad)

 call mkl_set_num_threads(nthreads)

#else
 MSG_ERROR("FFT_MKL support not activated")
 ABI_UNUSED((/nx,ny,nz,ldx,ldy,ldz,ndat,mgfft,isign,nthreads/))
 ABI_UNUSED(gbound(1,1))
 ABI_UNUSED(ff(1))
#endif

end subroutine dfti_fftpad_cplx_nt     
!!***

!----------------------------------------------------------------------

!!****f* m_dfti/dfti_get_nt
!! NAME
!!  dfti_get_nt
!!
!! FUNCTION
!!  
!! INPUTS
!!
!! CHILDREN
!!
!! PARENTS
!!      m_dfti
!!
!! CHILDREN
!!      int2char
!!
!! SOURCE

subroutine dfti_get_nt(ndat,nt_here,nt_nest)


!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'dfti_get_nt'
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: ndat
 integer,intent(out) :: nt_here,nt_nest

!Local variables-------------------------------
!scalars
 integer :: nthreads
!arrays

! *************************************************************************

 nthreads = xomp_get_max_threads()

 if (ndat==1 .or. MOD(ndat,nthreads)/=0) then
   nt_here = 1
   nt_nest = nthreads
 else
   nt_here = ndat
   nt_nest = MAX(nthreads/ndat,1)
 end if

 !nt_here = 1
 !nt_nest = nthreads

 nt_here = nthreads
 nt_nest = 1

end subroutine dfti_get_nt
!!***

!----------------------------------------------------------------------

!!****f* m_dfti/dfti_r2c_op_cplx
!! NAME
!!  dfti_r2c_op_cplx
!!
!! FUNCTION
!! Driver routine for out-of-place 3D real-to-complex FFT of lengths nx, ny, nz. 
!!
!! INPUTS
!! nx,ny,nz=Number of points along the three directions.
!! ldx,ldy,ldz=Physical dimensions of the f array (to avoid cache conflicts).
!! ff(ldx*ldy*ldz*ndat)=The real array to be transformed.
!! ndat=Number of FFTs to be done.
!!
!! OUTPUT 
!! gg(ldx*ldy*ldz*ndat)=The forward FFT of ff (complex valued)
!!
!! NOTES
!!  FIXME For the time-being. No augmentation of the mesh to reduce memory conflicts, as MKL crashes 
!!  if the advanced interface is used.
!!
!! PARENTS
!!      m_dfti
!!
!! CHILDREN
!!      int2char
!!
!! SOURCE

subroutine dfti_r2c_op_cplx(nx,ny,nz,ldx,ldy,ldz,ndat,ff,gg)


!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'dfti_r2c_op_cplx'
!End of the abilint section

 implicit none 

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,ndat
!arrays
 real(dp),intent(in) :: ff(ldx*ldy*ldz*ndat)
 complex(dpc),intent(out) :: gg(ldx*ldy*ldz*ndat)

#ifdef HAVE_FFT_MKL
!Local variables-------------------------------
!scalars
 integer :: status,nhp,padx,i1,i2,i3,igp,igf,imgf,ii
 integer :: i1inv,i2inv,i3inv,idat,padatf     
 type(DFTI_DESCRIPTOR),pointer :: Desc
!arrays
 integer,allocatable :: i1inver(:),i2inver(:),i3inver(:)
 complex(dpc),allocatable :: gg_hp(:)

! *************************************************************************

 ! TODO
 if (ndat/=1) then
   MSG_ERROR("ndat/=1 + MKL not coded")
 end if

 if (ANY( (/nx,ny,nz/) /= (/ldx,ldy,ldz/) )) then
   MSG_ERROR("Augmentation not supported")
 end if

 padx = (nx/2+1)
 nhp = (nx/2+1)*ny*nz
 ABI_MALLOC(gg_hp,(nhp*ndat))

 status = DftiCreateDescriptor(Desc, DFTI_DOUBLE, DFTI_REAL, 3, (/nx,ny,nz/) )
 DFTI_CHECK(status)

 status = DftiSetValue(Desc, DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX)
 status = DftiSetValue(Desc, DFTI_PLACEMENT, DFTI_NOT_INPLACE )
 status = DftiSetValue(Desc, DFTI_INPUT_STRIDES,  (/0, 1, ldx,  ldx*ldy/))
 status = DftiSetValue(Desc, DFTI_OUTPUT_STRIDES, (/0, 1, padx, padx*ny/))
 status = DftiSetValue(Desc, DFTI_NUMBER_OF_TRANSFORMS, ndat)
 status = DftiSetValue(Desc, DFTI_FORWARD_SCALE, one / DBLE(nx*ny*nz) )
 DFTI_CHECK(status)

 status = DftiCommitDescriptor(Desc)
 DFTI_CHECK(status)

 status = DftiComputeForward(Desc, ff, gg_hp)
 DFTI_CHECK(status)

 status = DftiFreeDescriptor(Desc)
 DFTI_CHECK(status)

 ! Reconstruct full FFT: Hermitian redundancy: out[i] is the conjugate of out[n-i]
 ABI_MALLOC(i1inver,(padx))
 ABI_MALLOC(i2inver,(ny))
 ABI_MALLOC(i3inver,(nz))

 i1inver(1)=1
 do i1=2,padx
   i1inver(i1)=nx+2-i1
 end do

 i2inver(1)=1
 do i2=2,ny
   i2inver(i2)=ny+2-i2
 end do

 i3inver(1)=1
 do i3=2,nz
   i3inver(i3)=nz+2-i3
 end do

 igp=0
 do idat=1,ndat
   padatf=(idat-1)*ldx*ldy*ldz
   do i3=1,nz
     i3inv = i3inver(i3)
     do i2=1,ny
       i2inv = i2inver(i2)
       do i1=1,padx
         igp=igp+1
         igf = i1 + (i3-1)*ldx*ldy + (i2-1)*ldx + padatf
         gg(igf) =  gg_hp(igp)
         i1inv = i1inver(i1)
         if (i1inv/=i1) then
           imgf = i1inv + (i3inv-1)*ldx*ldy + (i2inv-1)*ldx + padatf
           gg(imgf) = DCONJG(gg_hp(igp))
         end if
       end do
     end do
   end do
 end do

 ABI_FREE(i1inver)
 ABI_FREE(i2inver)
 ABI_FREE(i3inver)

 ABI_FREE(gg_hp)

#else
 MSG_ERROR("FFTW MKL support not activated")
 ABI_UNUSED((/nx,ny,nz,ldx,ldy,ldz/))
 ABI_UNUSED(ff)
 ABI_UNUSED(gg(1))
#endif

end subroutine dfti_r2c_op_cplx
!!***

!----------------------------------------------------------------------

!!****f* m_dfti/dfti_r2c_op
!! NAME
!!  dfti_r2c_op
!!
!! FUNCTION
!! Driver routine for out-of-place 3D real-to-complex FFT of lengths nx, ny, nz. 
!!
!! INPUTS
!! nx,ny,nz=Number of points along the three directions.
!! ldx,ldy,ldz=Physical dimensions of the f array (to avoid cache conflicts).
!! ndat=Number of FFTs to be done.
!! ff(ldx*ldy*ldz*ndat)=The real array to be transformed.
!!
!! OUTPUT 
!! gg(2*ldx*ldy*ldz*ndat)=The forward FFT of ff (real valued)
!!
!! NOTES
!!  FIXME For the time-being. No augmentation of the mesh to reduce memory conflicts, as MKL crashes 
!!  if the advanced interface is used.
!!
!! PARENTS
!!      m_dfti
!!
!! CHILDREN
!!      int2char
!!
!! SOURCE

subroutine dfti_r2c_op(nx,ny,nz,ldx,ldy,ldz,ndat,ff,gg)


!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'dfti_r2c_op'
!End of the abilint section

 implicit none 

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,ndat
!arrays
 real(dp),intent(in) :: ff(ldx*ldy*ldz*ndat)
 real(dp),target,intent(out) :: gg(2*ldx*ldy*ldz*ndat)

#ifdef HAVE_FFT_MKL
!Local variables-------------------------------
!scalars
 type(C_ptr) :: gg_cptr
!arrays
 complex(dpc),pointer :: gg_fptr(:)

! *************************************************************************

 gg_cptr = C_loc(gg)
 call C_F_pointer(gg_cptr,gg_fptr, shape=(/ldx*ldy*ldz*ndat/))

 call dfti_r2c_op_cplx(nx,ny,nz,ldx,ldy,ldz,ndat,ff,gg_fptr)

#else
 MSG_ERROR("FFTW MKL support not activated")
 ABI_UNUSED((/nx,ny,nz,ldx,ldy,ldz/))
 ABI_UNUSED(ff)
 ABI_UNUSED(gg(1))
#endif

end subroutine dfti_r2c_op
!!***

!----------------------------------------------------------------------

!!****f* m_dfti/dfti_c2r_op_cplx
!! NAME
!!  dfti_c2r_op_cplx
!!
!! FUNCTION
!! Driver routine for out-of-place 3D complex-to-real FFT of lengths nx, ny, nz. 
!!
!! INPUTS
!! nx,ny,nz=Number of point along the three directions.
!! ldx,ldy,ldz=Physical dimension of the f array (to avoid cache conflicts).
!! ndat=Number of FFTs to be done.
!! ff(2,ldx*ldy*ldz*ndat)=The complex array to be transformed.
!!
!! OUTPUT 
!! gg(2,ldx*ldy*ldz*ndat)=The backwards real FFT of ff.
!!
!! PARENTS
!!      m_dfti
!!
!! CHILDREN
!!      int2char
!!
!! SOURCE

subroutine dfti_c2r_op_cplx(nx,ny,nz,ldx,ldy,ldz,ndat,ff,gg)


!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'dfti_c2r_op_cplx'
!End of the abilint section

 implicit none 

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,ndat
!arrays
 complex(dpc),intent(in) :: ff(ldx*ldy*ldz*ndat)
 real(dp),intent(out) :: gg(ldx*ldy*ldz*ndat)

#ifdef HAVE_FFT_MKL
!Local variables-------------------------------
!scalars
 integer :: status,nhp,padx,i2,i3,igp,igf,idat,padatf,padatp,ii
 type(DFTI_DESCRIPTOR),pointer :: Desc
!arrays
 complex(dpc),allocatable :: ff_hp(:)

! *************************************************************************

 if (ndat/=1) then 
   MSG_ERROR("ndat/=1 + MKL not coded")
 end if

 if (ANY( (/nx,ny,nz/) /= (/ldx,ldy,ldz/) )) then
   MSG_ERROR("Augmentation not supported")
 end if

 !stride  = 1
 !idist   = nhp
 !odist   = nx*ny*nz
 !n       = (/nx,ny,nz/)
 !inembed = (/(nx/2+1),ny,nz/)
 !onembed = (/nx,ny,nz/) ! check this
 !my_plan = retrieve_plan3(n,ndat,inembed,stride,idist,onembed,stride,odist,FFTW_BACKWARD,my_flags,Saved_plans)

 ! Fill the Hermitian part: Hermitian redundancy: out[i] is the conjugate of out[n-i]
 padx    = (nx/2+1)
 nhp     = (nx/2+1)*ny*nz
 ABI_MALLOC(ff_hp,(nhp*ndat))

 do idat=1,ndat
   padatf=(idat-1)*ldx*ldy*ldz
   padatp=(idat-1)*padx*ny*nz
!$omp parallel do private(igf,igp)
   do i3=1,nz
     do i2=1,ny
       igf = (i3-1)*ldx*ldy + (i2-1)*ldx   + padatf
       igp = (i3-1)*padx*ny + (i2-1)*padx  + padatp
       ff_hp(igp+1:igp+padx) = ff(igf+1:igf+padx)
     end do
   end do
 end do

 status = DftiCreateDescriptor(Desc, DFTI_DOUBLE, DFTI_REAL, 3, (/nx,ny,nz/) )
 DFTI_CHECK(status)

 status = DftiSetValue(Desc, DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX)
 status = DftiSetValue(Desc, DFTI_PLACEMENT, DFTI_NOT_INPLACE )
 status = DftiSetValue(Desc, DFTI_INPUT_STRIDES, (/0, 1, padx, padx*ny/))
 status = DftiSetValue(Desc, DFTI_OUTPUT_STRIDES, (/0, 1, ldx, ldx*ldy/))
 status = DftiSetValue(Desc, DFTI_NUMBER_OF_TRANSFORMS, ndat)
 DFTI_CHECK(status)

 status = DftiCommitDescriptor(Desc)
 DFTI_CHECK(status)

 status = DftiComputeBackward(Desc, ff_hp, gg)
 DFTI_CHECK(status)

 status = DftiFreeDescriptor(Desc)
 DFTI_CHECK(status)

 ABI_FREE(ff_hp)

#else 
 MSG_ERROR("FFT MKL support not activated")
 ABI_UNUSED((/nx,ny,nz,ldx,ldy,ldz/))
 ABI_UNUSED(ff(1))
 ABI_UNUSED(gg(1))
#endif

end subroutine dfti_c2r_op_cplx
!!***

!----------------------------------------------------------------------

!!****f* m_dfti/dfti_c2r_op
!! NAME
!!  dfti_c2r_op
!!
!! FUNCTION
!! Driver routine for out-of-place 3D complex-to-real FFT of lengths nx, ny, nz. 
!!
!! INPUTS
!! nx,ny,nz=Number of point along the three directions.
!! ldx,ldy,ldz=Physical dimension of the f array (to avoid cache conflicts).
!! ndat=Number of FFTs to be done.
!! ff(2,ldx*ldy*ldz*ndat)=The complex array to be transformed.
!!
!! OUTPUT 
!! gg(ldx*ldy*ldz*ndat)=The backwards real FFT of ff.
!!
!! PARENTS
!!      m_dfti
!!
!! CHILDREN
!!      int2char
!!
!! SOURCE

subroutine dfti_c2r_op(nx,ny,nz,ldx,ldy,ldz,ndat,ff,gg)


!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'dfti_c2r_op'
!End of the abilint section

 implicit none 

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,ndat
!arrays
 real(dp),target,intent(in) :: ff(2*ldx*ldy*ldz*ndat)
 real(dp),intent(out) :: gg(ldx*ldy*ldz*ndat)

#ifdef HAVE_FFT_MKL
!Local variables-------------------------------
!scalars
 type(C_ptr) :: ff_cptr
!arrays
 complex(dpc),pointer :: ff_fptr(:)

! *************************************************************************

 ff_cptr = C_loc(ff)
 call C_F_pointer(ff_cptr,ff_fptr, shape=(/ldx*ldy*ldz*ndat/))

 call dfti_c2r_op_cplx(nx,ny,nz,ldx,ldy,ldz,ndat,ff_fptr,gg)

#else 
 MSG_ERROR("FFT MKL support not activated")
 ABI_UNUSED((/nx,ny,nz,ldx,ldy,ldz/))
 ABI_UNUSED((/ff(1),gg(1)/))
#endif

end subroutine dfti_c2r_op
!!***

!----------------------------------------------------------------------

!!****f* m_dfti/my_zpad_init
!! NAME
!!  zpad_init
!!
!! FUNCTION
!!  
!! INPUTS
!!   mgfft=MAX(nx,ny,nz), only used to dimension gbound
!!   gbound(2*mgfft+8,2)= The boundaries of the basis sphere of G vectors at a given k-point.
!!     See sphereboundary for more info.
!!
!! OUTPUT
!!  zpad<type(zpad_t)>
!!
!! PARENTS
!!
!! CHILDREN
!!
!! SOURCE

subroutine my_zpad_init(zpad,nx,ny,nz,ldx,ldy,ldz,mgfft,gbound)


!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'my_zpad_init'
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,mgfft
 type(my_zpad_t),intent(out) :: zpad
!arrays
 integer,intent(in) :: gbound(2*mgfft+8,2)

!Local variables-------------------------------
!scalars
 integer :: jj,g3_max,g3_min,gg3,ifft_g3,igb,g2min,g2max,nlinex

! *************************************************************************

 g3_min = gbound(3,2)
 g3_max = gbound(4,2)

 zpad%n_zplanes = g3_max - g3_min + 1

 ABI_MALLOC(zpad%zplane,      (2,nz))
 ABI_MALLOC(zpad%linex2ifft_yz, (2,nx*ny*nz))
 !
 ! Loop over the z-planes intersecting the G-sphere.
 nlinex = 0
 do gg3=1,zpad%n_zplanes 
   !
   if (gg3<=g3_max+1) then
     ifft_g3 = gg3
   else 
     ifft_g3 = gg3 + nz - zpad%n_zplanes ! Wrap around for negative gg3.
   end if
   !
   ! Select the set of y for this z-plane.
   igb=2*gg3+3
   g2min = gbound(igb  ,2) 
   g2max = gbound(igb+1,2)

   zpad%zplane(1,gg3) = ifft_g3
   zpad%zplane(2,gg3) = igb

   !(1:g2max+1,ifft_g3)     ! Positive g_y.
   !(g2min+ny+1:ny,ifft_g3) ! Negative g_y.

   do jj=1,g2max+1
     nlinex = nlinex + 1
     zpad%linex2ifft_yz(1,nlinex) = jj  
     zpad%linex2ifft_yz(2,nlinex) = ifft_g3  
   end do

   do jj=g2min+ny+1,ny
     nlinex = nlinex + 1
     zpad%linex2ifft_yz(1,nlinex) = jj  
     zpad%linex2ifft_yz(2,nlinex) = ifft_g3  
   end do
 end do

 zpad%nlinex = nlinex

 RETURN
 ABI_UNUSED((/ldx,ldy,ldz/))

end subroutine my_zpad_init
!!***

!----------------------------------------------------------------------

!!****f* m_dfti/zpad_free
!! NAME
!!  zpad_free
!!
!! FUNCTION
!!  
!! INPUTS
!!
!! OUTPUT
!!
!! PARENTS
!!
!! CHILDREN
!!
!! SOURCE

subroutine my_zpad_free(zpad)


!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'my_zpad_free'
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 type(my_zpad_t),intent(inout) :: zpad

!Local variables-------------------------------

! *************************************************************************

 if (associated(zpad%zplane)) then
   ABI_FREE(zpad%zplane)
 end if

 if (associated(zpad%linex2ifft_yz)) then
   ABI_FREE(zpad%linex2ifft_yz)
 end if

end subroutine my_zpad_free
!!***

!----------------------------------------------------------------------

!!****f* m_dfti/check_status
!! NAME
!!  check_status
!!
!! FUNCTION
!!  Error handler for DFTI wrappers. Print error message and abort.
!!  
!! INPUTS
!!  status = status error reported by dfti. 
!!  [file] = file name
!!  [line] = line number
!!
!! PARENTS
!!
!! CHILDREN
!!      int2char
!!
!! SOURCE

#ifdef HAVE_FFT_MKL

subroutine check_status(status,file,line)


!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'check_status'
 use interfaces_27_toolbox_oop
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: status
 integer,optional,intent(in) :: line
 character(len=*),optional,intent(in) :: file

!Local variables-------------------------------
 integer :: f90line
 character(len=10) :: lnum
 character(len=500) :: f90name
 character(len=500) :: my_msg
 character(len=DFTI_MAX_MESSAGE_LENGTH+500) :: err_msg

! *************************************************************************

 if (PRESENT(line)) then
   f90line=line
 else 
   f90line=0
 end if
 call int2char(f90line,lnum)

 if (PRESENT(file)) then 
   f90name = basename(file)
 else
   f90name='Subroutine Unknown'
 end if

 my_msg=TRIM(f90name)//":"//TRIM(lnum)//":"

 if (status /= 0) then
   if (.not. DftiErrorClass(status, DFTI_NO_ERROR)) then
     err_msg = TRIM(my_msg)//" Error: "//DftiErrorMessage(status)
     MSG_ERROR(err_msg)
   end if
 end if

end subroutine check_status
!!***

#endif

END MODULE m_dfti
!!***
