!{\src2tex{textfont=tt}}
!!****f* ABINIT/pawgrnl
!!
!! NAME
!! pawgrnl
!!
!! FUNCTION
!! PAW: Add to GRadients of total energy due to non-local term of Hamiltonian
!!      the contribution due to Dij derivatives
!! In particular, compute contribution to forces, stresses, dyn. matrix
!! Remember: Vnl=Sum_ij[|p_i>Dij<p_j|]
!!
!! COPYRIGHT
!! Copyright (C) 1998-2012 ABINIT group (FJ, MT)
!! This file is distributed under the terms of the
!! GNU General Public License, see ~abinit/COPYING
!! or http://www.gnu.org/copyleft/gpl.txt .
!! For the initials of contributors, see ~abinit/doc/developers/contributors.
!!
!! INPUTS
!!  atindx1(natom)=index table for atoms, inverse of atindx
!!  dimnhat=second dimension of array nhat (0 or # of spin components)
!!  dyfr_cplex=1 if dyfrnl is real, 2 if it is complex
!!  gsqcut_eff=Fourier cutoff on G^2 for "large sphere" of radius double that of the basis sphere
!!  mgfft=maximum size of 1D FFTs
!!  me_g0=--optional-- 1 if the current process treat the g=0 plane-wave (only needed when mpi_comm_fft is present)
!!  mpi_atmtab(:)=--optional-- indexes of the atoms treated by current proc
!!  mpi_comm_atom=--optional-- MPI communicator over atoms
!!  mpi_comm_fft=--optional-- MPI communicator over FFT components (=mpi_comm_grid is not present)
!!  mpi_comm_grid=--optional-- MPI communicator over real space grid components (=mpi_comm_fft is not present)
!!  my_natom=number of atoms treated by current processor
!!  natom=total number of atoms in cell
!!  nattyp(ntypat)=array describing how many atoms of each type in cell
!!  nfft=(effective) number of FFT grid points (for this processor)
!!  ngfft(18)=contain all needed information about 3D FFT, see ~abinit/doc/input_variables/vargs.htm#ngfft
!!  nhat(nfft,dimnhat)=compensation charge density on rectangular grid in real space
!!  nspden=number of spin-density components
!!  nsym=number of symmetries in space group
!!  ntypat=number of types of atoms
!!  optgr= 1 if gradients with respect to atomic position(s) have to be computed
!!  optgr2= 1 if 2nd gradients with respect to atomic position(s) have to be computed
!!  optstr= 1 if gradients with respect to strain(s) have to be computed
!!  paral_kgb=--optional-- 1 if "band-FFT" parallelism is activated (only needed when mpi_comm_fft is present)
!!  pawang <type(pawang_type)>=paw angular mesh and related data
!!  pawfgrtab(my_natom) <type(pawfgrtab_type)>=atomic data given on fine rectangular grid
!!  pawrhoij(my_natom) <type(pawrhoij_type)>= paw rhoij occupancies and related data
!!  pawtab(ntypat) <type(pawtab_type)>=paw tabulated starting data
!!  ph1d(2,3*(2*mgfft+1)*natom)=1-dim phase (structure factor) information
!!  psps <type(pseudopotential_type)>=variables related to pseudopotentials
!!  qphon(3)=wavevector of the phonon
!!  rprimd(3,3)=dimensional primitive translations in real space (bohr)
!!  symrec(3,3,nsym)=symmetries in reciprocal space, reduced coordinates
!!  typat(natom)=types of atoms
!!  vtrial(nfft,nspden)= total local potential
!!  vxc(nfft,nspden)=XC potential
!!  xred(3,natom)=reduced dimensionless atomic coordinates
!!
!! SIDE EFFECTS
!!  At input, this terms contain contribution from non-local projectors derivatives
!!  At output, they are updated with the contribution of Dij derivatives
!!  ==== if optgr=1 ====
!!   grnl(3*natom) =gradients of NL energy wrt atomic coordinates
!!  ==== if optstr=1 ====
!!   nlstr(6) =gradients of NL energy wrt strains
!!  ==== if optgr2=1 ====
!!   dyfrnl(dyfr_cplex,3,3,natom,natom) =2nd gradients of NL energy wrt atomic coordinates
!!
!! NOTES
!!   In the case of parallelisation over atoms and calculation of dynamical matrix (optgr2=1)
!!   several data are gathered and no more distributed inside this routine.
!!
!! PARENTS
!!      dyfnl3,etotfor,forstr
!!
!! CHILDREN
!!      atm2fft3,free_my_atmtab,get_my_atmtab,metric,pawexpiqr,pawfgrtab_free
!!      pawfgrtab_gather,pawgylm,rhoij_free,rhoij_gather,rhoij_nullify
!!      stresssym,xsum_mpi
!!
!! SOURCE

#if defined HAVE_CONFIG_H
#include "config.h"
#endif

#include "abi_common.h"

subroutine pawgrnl(atindx1,dimnhat,dyfrnl,dyfr_cplex,grnl,gsqcut,mgfft,my_natom,natom,&
&                  nattyp,nfft,ngfft,nhat,nlstr,nspden,nsym,ntypat,optgr,optgr2,optstr,&
&                  pawang,pawfgrtab,pawrhoij,pawtab,ph1d,psps,qphon,rprimd,symrec,typat,vtrial,vxc,xred, &
&                  mpi_atmtab,mpi_comm_atom,mpi_comm_fft,mpi_comm_grid,me_g0,paral_kgb) ! optional arguments (parallelism)

 use m_profiling

 use defs_basis
 use defs_datatypes
 use defs_abitypes
 use m_xmpi
 use m_errors
 use m_paral_atom
 use m_pawrhoij, only : pawrhoij_type, rhoij_free, rhoij_gather, rhoij_nullify
 use m_paw_toolbox, only : pawfgrtab_free,pawfgrtab_gather

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'pawgrnl'
 use interfaces_41_geometry
 use interfaces_65_psp
 use interfaces_66_paw, except_this_one => pawgrnl
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: dimnhat,dyfr_cplex,mgfft,my_natom,natom,nfft,nspden,nsym,ntypat
 integer,intent(in) :: optgr,optgr2,optstr
 integer,optional,intent(in) :: me_g0,mpi_comm_atom,mpi_comm_fft,mpi_comm_grid,paral_kgb
 real(dp),intent(in) :: gsqcut
 type(pawang_type),intent(in) :: pawang
 type(pseudopotential_type),intent(in) :: psps
!arrays
 integer,intent(in) :: atindx1(natom),nattyp(ntypat),ngfft(18)
 integer,intent(in) :: symrec(3,3,nsym),typat(natom)
 integer,optional,target,intent(in) :: mpi_atmtab(:)
 real(dp),intent(in) :: nhat(nfft,dimnhat),ph1d(2,3*(2*mgfft+1)*natom),qphon(3)
 real(dp),intent(in) :: rprimd(3,3),vxc(nfft,nspden),xred(3,natom)
 real(dp),intent(in),target :: vtrial(nfft,nspden)
 real(dp),intent(inout) :: dyfrnl(dyfr_cplex,3,3,natom,natom*optgr2),grnl(3*natom*optgr)
 real(dp),intent(inout) :: nlstr(6*optstr)
 type(pawfgrtab_type),target,intent(inout) :: pawfgrtab(my_natom)
 type(pawrhoij_type),target,intent(inout) ::  pawrhoij(my_natom)
 type(pawtab_type),intent(in) :: pawtab(ntypat)

!Local variables-------------------------------
!scalars
 integer :: bufind,bufsiz,cplex,dimvtrial,iatm,iatom,iatom_tot,iatshft,ic,idiag,idir,ier,ilm,irhoij,isel
 integer :: ishift_gr,ishift_gr2,ishift_str,ispden,ispvtr,itypat,jatom,jatom_tot,jatm,jc
 integer :: jrhoij,jtypat,klm,klmn,klmn1,ll,lm_size,lm_sizej,lmax,lmin,lmn2_size,mu,mua,mub,mushift
 integer :: my_me_g0,my_mpi_comm_fft,my_mpi_comm_grid,my_paral_kgb,nfftot,nfgd,ngrad,ngrad_nondiag
 integer :: ngradp,ngradp_nondiag,ngrhat,nsploop,opt1,opt2,opt3,optv,optn,optn2,qne0,usexcnhat
 logical :: has_phase,my_atmtab_allocated,paral_atom,paral_grid
 real(dp) :: dlt_tmp,fact_ucvol,grhat_x,hatstr_diag,ro,ro_d,ucvol
 character(len=500) :: msg
!arrays
 integer,parameter :: alpha(9)=(/1,2,3,3,3,2,2,1,1/),beta(9)=(/1,2,3,2,1,1,3,3,2/)
 integer,parameter :: mu9(9)=(/1,2,3,4,5,6,4,5,6/)
 integer,allocatable :: atindx(:),atm_indx(:)
 integer,pointer :: my_atmtab(:)
 real(dp) :: gmet(3,3),gprimd(3,3),hatstr(6),rdum(1),rmet(3,3),tmp(6)
 real(dp) :: work1(dyfr_cplex,3,3),work2(dyfr_cplex,3,3)
 real(dp),allocatable :: buf(:,:),buf1(:),dum_atmrho1(:,:),dum_gauss(:),dyfr(:,:,:,:,:)
 real(dp),allocatable :: grhat_tmp(:,:),hatgr(:),prod(:,:),prodp(:,:),vloc(:),vpsp1(:,:)
 real(dp),pointer :: vtrial_(:,:)
 type(coeff2_type),allocatable :: prod_nondiag(:),prodp_nondiag(:)
 type(pawfgrtab_type),pointer :: pawfgrtab_tot(:)
 type(pawrhoij_type),pointer :: pawrhoij_tot(:)

! *************************************************************************

 DBG_ENTER("COLL")

!Compatibility tests
 qne0=0;if (qphon(1)**2+qphon(2)**2+qphon(3)**2>=1.d-15) qne0=1
 if (my_natom>0) then
   if (optgr2==1.and.pawrhoij(1)%ngrhoij==0) then
     msg='Inconsistency between variables optgr2 and ngrhoij !'
     MSG_BUG(msg)
   end if
   if (optgr2==1.and.pawfgrtab(1)%rfgd_allocated==0.and.qne0==1) then
     MSG_BUG('  pawfgrtab()%rfgd array must be allocated  !')
   end if
 end if

!Set up parallelism over atoms
 paral_atom=(present(mpi_comm_atom).and.(my_natom/=natom))
 nullify(my_atmtab);if (present(mpi_atmtab)) my_atmtab => mpi_atmtab
 call get_my_atmtab(mpi_comm_atom,my_atmtab,my_atmtab_allocated,paral_atom,natom,my_natom_ref=my_natom)
 if (paral_atom) then
   ABI_ALLOCATE(atm_indx,(natom))
   atm_indx=-1
   do iatom=1,my_natom
     atm_indx(my_atmtab(iatom))=iatom
   end do
 end if

!Set up parallelism over real space grid and/or FFT
 my_mpi_comm_grid=xmpi_self;my_mpi_comm_fft=xmpi_self
 my_me_g0=1;my_paral_kgb=0;paral_grid=.false.
 if (present(mpi_comm_grid).or.present(mpi_comm_fft)) then
   if (present(mpi_comm_grid)) my_mpi_comm_grid=mpi_comm_grid
   if (present(mpi_comm_fft)) my_mpi_comm_fft=mpi_comm_fft
   if (.not.present(mpi_comm_grid)) my_mpi_comm_grid=mpi_comm_fft
   if (.not.present(mpi_comm_fft)) my_mpi_comm_fft=mpi_comm_grid
   paral_grid=(xcomm_size(my_mpi_comm_grid)>1)
   if (optgr2==1.and.present(mpi_comm_fft)) then
     if ((.not.present(paral_kgb)).or.(.not.present(me_g0))) then
       MSG_BUG(' Need paral_kgb and me_g0 with mpi_comm_fft !')
     end if
     my_me_g0=me_g0;my_paral_kgb=paral_kgb
   end if
 end if

!Compute different geometric tensor, as well as ucvol, from rprimd
 call metric(gmet,gprimd,-1,rmet,rprimd,ucvol)

!Retrieve local potential according to the use of nhat in XC
 usexcnhat=maxval(pawtab(1:ntypat)%usexcnhat)
 if (usexcnhat==0) then
   ABI_ALLOCATE(vtrial_,(nfft,1))
   dimvtrial=1
!$OMP PARALLEL DO PRIVATE(ic) SHARED(nfft,vtrial,vtrial_,vxc)
   do ic=1,nfft
     vtrial_(ic,1)=vtrial(ic,1)-vxc(ic,1)
   end do
 else
   dimvtrial=nspden
   vtrial_ => vtrial
 end if

!Initializations and allocations
 ngrhat=0;ngrad=0;ngradp=0;ngrad_nondiag=0;ngradp_nondiag=0
 ishift_gr=0;ishift_gr2=0;ishift_str=0
 cplex=1;if (qne0==1) cplex=2
 if (optgr==1) then
   ABI_ALLOCATE(hatgr,(3*natom))
   hatgr=zero
   ngrad=ngrad+3
   ngrhat=ngrhat+3
   ishift_gr2=ishift_gr2+3
 end if
 if (optgr2==1) then
   mu=min(dyfr_cplex,cplex)
   ngrad =ngrad +9
   ngradp=ngradp+3
   ngrad_nondiag =ngrad_nondiag +9*mu
   ngradp_nondiag=ngradp_nondiag+3*mu
   ngrhat=ngrhat+9*mu
 end if
 if (optstr==1) then
   hatstr=zero
   ngrad=ngrad+6
   ngrhat=ngrhat+6
   ishift_gr=ishift_gr+6
   ishift_gr2=ishift_gr2+6
 end if

 nfftot=ngfft(1)*ngfft(2)*ngfft(3)
 fact_ucvol=ucvol/dble(nfftot)
 nsploop=nspden;if (dimvtrial<nspden) nsploop=2
 if (optgr2/=1) then
   ABI_ALLOCATE(grhat_tmp,(ngrhat,1))
 else
   ABI_ALLOCATE(dyfr,(dyfr_cplex,3,3,natom,natom))
   dyfr=zero
   ABI_ALLOCATE(grhat_tmp,(ngrhat,natom))
   ABI_DATATYPE_ALLOCATE(prod_nondiag,(natom))
   ABI_DATATYPE_ALLOCATE(prodp_nondiag,(natom))
   ABI_ALLOCATE(atindx,(natom))
   ABI_ALLOCATE(vpsp1,(cplex*nfft,3))
   atindx(:)=0
   do iatom=1,natom
     iatm=0
     do while (atindx(iatom)==0.and.iatm<natom)
       iatm=iatm+1;if (atindx1(iatm)==iatom) atindx(iatom)=iatm
     end do
   end do
 end if

!The computation of dynamical matrix requires the knowledge of
!g_l(r).Y_lm(r) and derivatives for all atoms
 if (optgr2==1) then
   do jatom=1,my_natom
     jatom_tot=jatom;if (paral_atom) jatom_tot=my_atmtab(jatom)
     opt1=0;opt2=0;opt3=0
     lm_sizej=pawfgrtab(jatom)%l_size**2
     if (pawfgrtab(jatom)%gylm_allocated==0) then
       if (associated(pawfgrtab(jatom)%gylm))  then
         ABI_DEALLOCATE(pawfgrtab(jatom)%gylm)
       end if
       ABI_ALLOCATE(pawfgrtab(jatom)%gylm,(pawfgrtab(jatom)%nfgd,lm_sizej))
       pawfgrtab(jatom)%gylm_allocated=2;opt1=1
     end if
     if (pawfgrtab(jatom)%gylmgr_allocated==0) then
       if (associated(pawfgrtab(jatom)%gylmgr))  then
         ABI_DEALLOCATE(pawfgrtab(jatom)%gylmgr)
       end if
       ABI_ALLOCATE(pawfgrtab(jatom)%gylmgr,(3,pawfgrtab(jatom)%nfgd,lm_sizej))
       pawfgrtab(jatom)%gylmgr_allocated=2;opt2=1
     end if
     call pawgylm(pawfgrtab(jatom)%gylm,pawfgrtab(jatom)%gylmgr,&
&     pawfgrtab(jatom)%gylmgr2,lm_sizej,pawfgrtab(jatom)%nfgd,&
&     opt1,opt2,opt3,pawtab(typat(jatom_tot)),pawfgrtab(jatom)%rfgd,&
&     pawfgrtab(jatom)%rfgd_allocated)
   end do
 end if

!The computation of dynamical matrix requires some communications
 if (optgr2==1.and.paral_atom) then
   ABI_DATATYPE_ALLOCATE(pawfgrtab_tot,(natom))
   call pawfgrtab_gather(pawfgrtab,pawfgrtab_tot,mpi_comm_atom,ier, &
&   mpi_atmtab=my_atmtab)
   ABI_DATATYPE_ALLOCATE(pawrhoij_tot,(natom))
   call rhoij_nullify(pawrhoij_tot)
   call rhoij_gather(-1,mpi_comm_atom,pawrhoij,pawrhoij_tot, &
&   with_rhoijres=.false.,with_rhoij_=.false.,with_lmnmix=.false.)
 else
   pawfgrtab_tot => pawfgrtab
   pawrhoij_tot => pawrhoij
 end if

!Loops over types and atoms
 iatshft=0
 do itypat=1,ntypat

   lmn2_size=pawtab(itypat)%lmn2_size
   do iatm=iatshft+1,iatshft+nattyp(itypat)
     iatom_tot=atindx1(iatm)
     iatom=iatom_tot;if (paral_atom.and.optgr2/=1) iatom=atm_indx(iatom_tot)
     if (iatom==-1) cycle

     idiag=1;if (optgr2==1) idiag=iatm
     lm_size=pawfgrtab_tot(iatom)%l_size**2
     nfgd=pawfgrtab_tot(iatom)%nfgd

     ABI_ALLOCATE(vloc,(nfgd))
     if (ngrad>0)  then
       ABI_ALLOCATE(prod,(ngrad,lm_size))
     end if
     if (ngradp>0)  then
       ABI_ALLOCATE(prodp,(ngradp,lm_size))
     end if
     if (optgr2==1) then
       do jatm=1,natom ! NOTE: Not compatible with parallelization over atoms
         lm_sizej=pawfgrtab_tot(atindx1(jatm))%l_size**2
         ABI_ALLOCATE(prod_nondiag (jatm)%value,(ngrad_nondiag ,lm_sizej))
         ABI_ALLOCATE(prodp_nondiag(jatm)%value,(ngradp_nondiag,lm_sizej))
       end do
     end if

     grhat_tmp=zero

!    Eventually compute g_l(r).Y_lm(r) derivatives for the current atom (if not already done)
     if ((optgr==1.or.optstr==1).and.(optgr2/=1)) then
       if (pawfgrtab_tot(iatom)%gylmgr_allocated==0) then
         if (associated(pawfgrtab_tot(iatom)%gylmgr))  then
           ABI_DEALLOCATE(pawfgrtab_tot(iatom)%gylmgr)
         end if
         ABI_ALLOCATE(pawfgrtab_tot(iatom)%gylmgr,(3,pawfgrtab_tot(iatom)%nfgd,lm_size))
         pawfgrtab_tot(iatom)%gylmgr_allocated=2
         call pawgylm(rdum,pawfgrtab_tot(iatom)%gylmgr,rdum,&
&         lm_size,pawfgrtab_tot(iatom)%nfgd,0,1,0,pawtab(itypat),&
&         pawfgrtab_tot(iatom)%rfgd,pawfgrtab_tot(iatom)%rfgd_allocated)
       end if
     end if
     if (optgr2==1) then
       opt1=0;opt2=0;opt3=0
       if (pawfgrtab_tot(iatom)%gylmgr_allocated==0) then
         if (associated(pawfgrtab_tot(iatom)%gylmgr))  then
           ABI_DEALLOCATE(pawfgrtab_tot(iatom)%gylmgr)
         end if
         ABI_ALLOCATE(pawfgrtab_tot(iatom)%gylmgr,(3,pawfgrtab_tot(iatom)%nfgd,lm_size))
         pawfgrtab_tot(iatom)%gylmgr_allocated=2;opt2=1
       end if
       if (pawfgrtab_tot(iatom)%gylmgr2_allocated==0) then
         if (associated(pawfgrtab_tot(iatom)%gylmgr2))  then
           ABI_DEALLOCATE(pawfgrtab_tot(iatom)%gylmgr2)
         end if
         ABI_ALLOCATE(pawfgrtab_tot(iatom)%gylmgr2,(6,pawfgrtab_tot(iatom)%nfgd,lm_size))
         pawfgrtab_tot(iatom)%gylmgr2_allocated=2;opt3=1
       end if
       call pawgylm(pawfgrtab_tot(iatom)%gylm,pawfgrtab_tot(iatom)%gylmgr,&
&       pawfgrtab_tot(iatom)%gylmgr2,lm_size,pawfgrtab_tot(iatom)%nfgd,&
&       opt1,opt2,opt3,pawtab(itypat),pawfgrtab_tot(iatom)%rfgd,&
&       pawfgrtab_tot(iatom)%rfgd_allocated)
     end if

!    DEBUG XG120523   One more crazy write, needed for have the XLF12 compiler on IBM6 execute some tests correctly. DO NOT REMOVE !
!    jmb 2012     write(std_out,*)' pawgrnl : 12b, iatm= ',iatm
!    ENDDEBUG

!    Eventually compute exp(-i.q.r) factors for the current atom (if not already done)
     if (optgr2==1.and.qne0==1.and.(pawfgrtab_tot(iatom)%expiqr_allocated==0)) then
       if (associated(pawfgrtab_tot(iatom)%expiqr))  then
         ABI_DEALLOCATE(pawfgrtab_tot(iatom)%expiqr)
       end if
       ABI_ALLOCATE(pawfgrtab_tot(iatom)%expiqr,(2,nfgd))
       pawfgrtab_tot(iatom)%expiqr_allocated=2
       call pawexpiqr(gprimd,pawfgrtab_tot(iatom),qphon,xred)
     end if
     has_phase=(optgr2==1.and.pawfgrtab_tot(iatom)%expiqr_allocated/=0)

!    Eventually compute 1st-order potential
     if (optgr2==1) then
       optv=1;optn=0;optn2=1;idir=0
       call atm2fft3(atindx,dum_atmrho1,vpsp1,cplex,dum_gauss,gmet,gsqcut,idir,iatom,&
&       mgfft,psps%mqgrid_vl,natom,3,nfft,ngfft,ntypat,optn,optn2,optv,&
&       pawtab,ph1d,psps%qgrid_vl,qphon,typat,ucvol,psps%usepaw,psps%vlspl,xred,&
       mpi_comm_fft=my_mpi_comm_fft,me_g0=my_me_g0,paral_kgb=my_paral_kgb)
       if (cplex==1) then
         do ic=1,nfft
           tmp(1:3)=vpsp1(ic,1:3)
           do mu=1,3
             vpsp1(ic,mu)=-(gprimd(mu,1)*tmp(1)+gprimd(mu,2)*tmp(2)+gprimd(mu,3)*tmp(3))
           end do
         end do
       else ! cplex=2
         do ic=1,nfft
           jc=2*ic;tmp(1:3)=vpsp1(jc-1,1:3);tmp(4:6)=vpsp1(jc,1:3)
           do mu=1,3
             vpsp1(jc-1,mu)=-(gprimd(mu,1)*tmp(1)+gprimd(mu,2)*tmp(2)+gprimd(mu,3)*tmp(3))
             vpsp1(jc  ,mu)=-(gprimd(mu,1)*tmp(4)+gprimd(mu,2)*tmp(5)+gprimd(mu,3)*tmp(6))
           end do
         end do
       end if

     end if

!    Loop over spin components
     do ispden=1,nsploop

!      ----- Retrieve potential (subtle if nspden=4 ;-)
       if (nspden/=4) then
         ispvtr=min(dimvtrial,ispden)
         do ic=1,nfgd
           vloc(ic)=vtrial_(pawfgrtab_tot(iatom)%ifftsph(ic),ispvtr)
         end do
       else
         if (ispden==1) then
           ispvtr=min(dimvtrial,2)
           do ic=1,nfgd
             jc=pawfgrtab_tot(iatom)%ifftsph(ic)
             vloc(ic)=half*(vtrial_(jc,1)+vtrial_(jc,ispvtr))
           end do
         else if (ispden==4) then
           ispvtr=min(dimvtrial,2)
           do ic=1,nfgd
             jc=pawfgrtab_tot(iatom)%ifftsph(ic)
             vloc(ic)=half*(vtrial_(jc,1)-vtrial_(jc,ispvtr))
           end do
         else if (ispden==2) then
           ispvtr=min(dimvtrial,3)
           do ic=1,nfgd
             jc=pawfgrtab_tot(iatom)%ifftsph(ic)
             vloc(ic)=vtrial_(jc,ispvtr)
           end do
         else ! ispden=3
           ispvtr=min(dimvtrial,4)
           do ic=1,nfgd
             jc=pawfgrtab_tot(iatom)%ifftsph(ic)
             vloc(ic)=-vtrial_(jc,ispvtr)
           end do
         end if
       end if

!      ----- Compute projected scalars (integrals of vloc and Q_ij^hat)
!      ----- and/or their derivatives

       if (ngrad>0) prod=zero
       if (ngradp>0) prodp=zero

!      ==== Contribution to forces ====
       if (optgr==1) then
         do ilm=1,lm_size
           do ic=1,pawfgrtab_tot(iatom)%nfgd
             do mu=1,3
               prod(mu+ishift_gr,ilm)=prod(mu+ishift_gr,ilm)-&
&               vloc(ic)*pawfgrtab_tot(iatom)%gylmgr(mu,ic,ilm)
             end do
           end do
         end do
       end if
!      ==== Contribution to stresses ====
       if (optstr==1) then
         do ilm=1,lm_size
           do ic=1,pawfgrtab_tot(iatom)%nfgd
             do mu=1,6
               mua=alpha(mu);mub=beta(mu)
               prod(mu+ishift_str,ilm)=prod(mu+ishift_str,ilm) &
&               +half*vloc(ic) &
&               *(pawfgrtab_tot(iatom)%gylmgr(mua,ic,ilm)*pawfgrtab_tot(iatom)%rfgd(mub,ic)&
&               +pawfgrtab_tot(iatom)%gylmgr(mub,ic,ilm)*pawfgrtab_tot(iatom)%rfgd(mua,ic))
             end do
           end do
         end do
       end if
!      ==== Contribution to frozen wf part of dyn. matrix ====
       if (optgr2==1) then
!        Diagonal contribution
         do ilm=1,lm_size
           do ic=1,pawfgrtab_tot(iatom)%nfgd
             do mu=1,9
               prod(ishift_gr2+mu,ilm)=prod(ishift_gr2+mu,ilm) &
&               +half*vloc(ic)*pawfgrtab_tot(iatom)%gylmgr2(mu9(mu),ic,ilm)
             end do
             do mu=1,3
               prodp(mu,ilm)=prodp(mu,ilm) &
&               -vloc(ic)*pawfgrtab_tot(iatom)%gylmgr(mu,ic,ilm)
             end do
           end do
         end do
!        Off-diagonal contribution
         do jatm=1,natom ! NOTE: Not compatible with parallelization over atoms
           jatom=atindx1(jatm)
           lm_sizej=pawfgrtab_tot(jatom)%l_size**2
           prod_nondiag (jatm)%value=zero
           prodp_nondiag(jatm)%value=zero
           if (has_phase.or.cplex==2) then
             if (dyfr_cplex==1.or.cplex==1) then
               do ilm=1,lm_sizej
                 do ic=1,pawfgrtab_tot(jatom)%nfgd
                   jc=2*pawfgrtab_tot(jatom)%ifftsph(ic)
                   tmp(1:3)=vpsp1(jc-1,1:3)*pawfgrtab_tot(jatom)%expiqr(1,ic) &
&                   -vpsp1(jc  ,1:3)*pawfgrtab_tot(jatom)%expiqr(2,ic)
                   do mu=1,9
                     mua=alpha(mu);mub=beta(mu)
                     prod_nondiag(jatm)%value(mu,ilm)=prod_nondiag(jatm)%value(mu,ilm) &
&                     +tmp(mua)*pawfgrtab_tot(jatom)%gylmgr(mub,ic,ilm)
                   end do
                   do mu=1,3
                     prodp_nondiag(jatm)%value(mu,ilm)=prodp_nondiag(jatm)%value(mu,ilm) &
&                     -tmp(mu)*pawfgrtab_tot(jatom)%gylm(ic,ilm)
                   end do
                 end do
               end do
             else
               do ilm=1,lm_sizej
                 do ic=1,pawfgrtab_tot(jatom)%nfgd
                   jc=2*pawfgrtab_tot(jatom)%ifftsph(ic)
                   tmp(1:3)=vpsp1(jc-1,1:3)*pawfgrtab_tot(jatom)%expiqr(1,ic) &
&                   -vpsp1(jc  ,1:3)*pawfgrtab_tot(jatom)%expiqr(2,ic)
                   tmp(4:6)=vpsp1(jc-1,1:3)*pawfgrtab_tot(jatom)%expiqr(2,ic) &
&                   +vpsp1(jc  ,1:3)*pawfgrtab_tot(jatom)%expiqr(1,ic)
                   do mu=1,9
                     mua=alpha(mu);mub=beta(mu)
                     prod_nondiag(jatm)%value(mu,ilm)=prod_nondiag(jatm)%value(mu,ilm) &
&                     +tmp(mua)*pawfgrtab_tot(jatom)%gylmgr(mub,ic,ilm)
                     prod_nondiag(jatm)%value(9+mu,ilm)=prod_nondiag(jatm)%value(9+mu,ilm) &
&                     +tmp(3+mua)*pawfgrtab_tot(jatom)%gylmgr(mub,ic,ilm)
                   end do
                   do mu=1,3
                     prodp_nondiag(jatm)%value(mu,ilm)=prodp_nondiag(jatm)%value(mu,ilm) &
&                     -tmp(mu)*pawfgrtab_tot(jatom)%gylm(ic,ilm)
                     prodp_nondiag(jatm)%value(3+mu,ilm)=prodp_nondiag(jatm)%value(3+mu,ilm) &
&                     -tmp(3+mu)*pawfgrtab_tot(jatom)%gylm(ic,ilm)
                   end do
                 end do
               end do
             end if
           else ! no phase
             do ilm=1,lm_sizej
               do ic=1,pawfgrtab_tot(jatom)%nfgd
                 jc=pawfgrtab_tot(jatom)%ifftsph(ic)
                 do mu=1,9
                   mua=alpha(mu);mub=beta(mu)
                   prod_nondiag(jatm)%value(mu,ilm)=prod_nondiag(jatm)%value(mu,ilm) &
&                   +vpsp1(jc,mua)*pawfgrtab_tot(jatom)%gylmgr(mub,ic,ilm)
                 end do
                 do mu=1,3
                   prodp_nondiag(jatm)%value(mu,ilm)=prodp_nondiag(jatm)%value(mu,ilm) &
&                   -vpsp1(jc,mu)*pawfgrtab_tot(jatom)%gylm(ic,ilm)
                 end do
               end do
             end do
           end if
         end do
       end if

!      --- Reduction in case of parallelization ---
       if (paral_grid) then
         if (ngrad>0) then
           call xsum_mpi(prod,my_mpi_comm_grid,ier)
         end if
         if (ngradp>0) then
           call xsum_mpi(prodp,my_mpi_comm_grid,ier)
         end if
         if (optgr2==1) then
           bufsiz=0;bufind=0
           do jatm=1,natom
             bufsiz=bufsiz+pawfgrtab_tot(atindx1(jatm))%l_size**2
           end do
           ABI_ALLOCATE(buf,(ngrad_nondiag+ngradp_nondiag,bufsiz))
           do jatm=1,natom ! NOTE: Not compatible with parallelization over atoms
             lm_sizej=pawfgrtab_tot(atindx1(jatm))%l_size**2
             if (ngrad_nondiag> 0) buf(1:ngrad_nondiag,bufind+1:bufind+lm_sizej)= &
&             prod_nondiag (jatm)%value(:,:)
             if (ngradp_nondiag>0) buf(ngrad_nondiag+1:ngrad_nondiag+ngradp_nondiag, &
&             bufind+1:bufind+lm_sizej)=prodp_nondiag(jatm)%value(:,:)
             bufind=bufind+lm_sizej*(ngrad_nondiag+ngradp_nondiag)
           end do
           call xsum_mpi(buf,my_mpi_comm_grid,ier)
           bufind=0
           do jatm=1,natom ! NOTE: Not compatible with parallelization over atoms
             lm_sizej=pawfgrtab_tot(atindx1(jatm))%l_size**2
             if (ngrad> 0) prod_nondiag (jatm)%value(:,:)= &
&             buf(1:ngrad_nondiag,bufind+1:bufind+lm_sizej)
             if (ngradp>0) prodp_nondiag(jatm)%value(:,:)= &
&             buf(ngrad_nondiag+1:ngrad_nondiag+ngradp_nondiag,bufind+1:bufind+lm_sizej)
             bufind=bufind+lm_sizej*(ngrad_nondiag+ngradp_nondiag)
           end do
           ABI_DEALLOCATE(buf)
         end if
       end if

!      ---- Compute all gradients
       jrhoij=1
       do irhoij=1,pawrhoij_tot(iatom)%nrhoijsel
         klmn=pawrhoij_tot(iatom)%rhoijselect(irhoij)
         klm =pawtab(itypat)%indklmn(1,klmn)
         lmin=pawtab(itypat)%indklmn(3,klmn)
         lmax=pawtab(itypat)%indklmn(4,klmn)
         ro  =pawrhoij_tot(iatom)%rhoijp(jrhoij,ispden)
         ro_d=ro*pawtab(itypat)%dltij(klmn)
         do ll=lmin,lmax,2
           do ilm=ll**2+1,(ll+1)**2
             isel=pawang%gntselect(ilm,klm)
             if (isel>0) then
               grhat_x=ro_d*pawtab(itypat)%qijl(ilm,klmn)
               do mu=1,ngrad
                 grhat_tmp(mu,idiag)=grhat_tmp(mu,idiag)+grhat_x*prod(mu,ilm)
               end do
             end if
           end do
         end do
         jrhoij=jrhoij+pawrhoij_tot(iatom)%cplex
       end do ! irhoij

!      ---- Add additional terms for second gradients
       if (optgr2==1) then
!        Diagonal term including rhoij derivative
         klmn1=1
         do klmn=1,lmn2_size
           klm =pawtab(itypat)%indklmn(1,klmn)
           lmin=pawtab(itypat)%indklmn(3,klmn)
           lmax=pawtab(itypat)%indklmn(4,klmn)
           dlt_tmp=pawtab(itypat)%dltij(klmn)
           do ll=lmin,lmax,2
             do ilm=ll**2+1,(ll+1)**2
               isel=pawang%gntselect(ilm,klm)
               if (isel>0) then
                 ro_d=dlt_tmp*pawtab(itypat)%qijl(ilm,klmn)
                 do mu=1,9
                   mua=alpha(mu);mub=beta(mu)
                   grhat_tmp(ishift_gr2+mu,idiag)=grhat_tmp(ishift_gr2+mu,idiag)&
&                   +ro_d*pawrhoij_tot(iatom)%grhoij(mua,klmn1,ispden)*prodp(mub,ilm)
                 end do
               end if
             end do
           end do
           klmn1=klmn1+pawrhoij_tot(iatom)%cplex
         end do ! klmn
         do jatm=1,natom ! NOTE: Not compatible with parallelization over atoms
           jatom=atindx1(jatm);jtypat=typat(jatom)
!          Off-diagonal term including rhoij
           if (dyfr_cplex==1.or.cplex==1) then
             jrhoij=1
             do irhoij=1,pawrhoij_tot(jatom)%nrhoijsel
               klmn=pawrhoij_tot(jatom)%rhoijselect(irhoij)
               klm =pawtab(jtypat)%indklmn(1,klmn)
               lmin=pawtab(jtypat)%indklmn(3,klmn)
               lmax=pawtab(jtypat)%indklmn(4,klmn)
               ro  =pawrhoij_tot(jatom)%rhoijp(jrhoij,ispden)
               ro_d=ro*pawtab(jtypat)%dltij(klmn)
               do ll=lmin,lmax,2
                 do ilm=ll**2+1,(ll+1)**2
                   isel=pawang%gntselect(ilm,klm)
                   if (isel>0) then
                     grhat_x=ro_d*pawtab(jtypat)%qijl(ilm,klmn)
                     do mu=1,9
                       grhat_tmp(ishift_gr2+mu,jatm)=grhat_tmp(ishift_gr2+mu,jatm) &
&                       +grhat_x*prod_nondiag(jatm)%value(mu,ilm)
                     end do
                   end if
                 end do
               end do
               jrhoij=jrhoij+pawrhoij_tot(jatom)%cplex
             end do ! irhoij
           else
             jrhoij=1;mushift=ishift_gr2+9
             do irhoij=1,pawrhoij_tot(jatom)%nrhoijsel
               klmn=pawrhoij_tot(jatom)%rhoijselect(irhoij)
               klm =pawtab(jtypat)%indklmn(1,klmn)
               lmin=pawtab(jtypat)%indklmn(3,klmn)
               lmax=pawtab(jtypat)%indklmn(4,klmn)
               ro  =pawrhoij_tot(jatom)%rhoijp(jrhoij,ispden)
               ro_d=ro*pawtab(jtypat)%dltij(klmn)
               do ll=lmin,lmax,2
                 do ilm=ll**2+1,(ll+1)**2
                   isel=pawang%gntselect(ilm,klm)
                   if (isel>0) then
                     grhat_x=ro_d*pawtab(jtypat)%qijl(ilm,klmn)
                     do mu=1,9
                       grhat_tmp(ishift_gr2+mu,jatm)=grhat_tmp(ishift_gr2+mu,jatm) &
&                       +grhat_x*prod_nondiag(jatm)%value(mu,ilm)
                       grhat_tmp(mushift+mu,jatm)=grhat_tmp(mushift+mu,jatm) &
&                       +grhat_x*prod_nondiag(jatm)%value(9+mu,ilm)
                     end do
                   end if
                 end do
               end do
               jrhoij=jrhoij+pawrhoij_tot(jatom)%cplex
             end do ! irhoij
           end if
!          Off-diagonal term including rhoij derivative
           if (dyfr_cplex==1.or.cplex==1) then
             klmn1=1
             do klmn=1,pawrhoij_tot(jatom)%lmn2_size
               klm =pawtab(jtypat)%indklmn(1,klmn)
               lmin=pawtab(jtypat)%indklmn(3,klmn)
               lmax=pawtab(jtypat)%indklmn(4,klmn)
               dlt_tmp=pawtab(jtypat)%dltij(klmn)
               do ll=lmin,lmax,2
                 do ilm=ll**2+1,(ll+1)**2
                   isel=pawang%gntselect(ilm,klm)
                   if (isel>0) then
                     ro_d=dlt_tmp*pawtab(jtypat)%qijl(ilm,klmn)
                     do mu=1,9
                       mua=alpha(mu);mub=beta(mu)
                       grhat_tmp(ishift_gr2+mu,jatm)=grhat_tmp(ishift_gr2+mu,jatm)&
&                       +ro_d*pawrhoij_tot(jatom)%grhoij(mub,klmn1,ispden)*prodp_nondiag(jatm)%value(mua,ilm)
                     end do
                   end if
                 end do
               end do
               klmn1=klmn1+pawrhoij_tot(jatom)%cplex
             end do ! klmn
           else ! ngradp_nondiag>=6
             klmn1=1;mushift=ishift_gr2+9
             do klmn=1,pawrhoij_tot(jatom)%lmn2_size
               klm =pawtab(jtypat)%indklmn(1,klmn)
               lmin=pawtab(jtypat)%indklmn(3,klmn)
               lmax=pawtab(jtypat)%indklmn(4,klmn)
               dlt_tmp=pawtab(jtypat)%dltij(klmn)
               do ll=lmin,lmax,2
                 do ilm=ll**2+1,(ll+1)**2
                   isel=pawang%gntselect(ilm,klm)
                   if (isel>0) then
                     ro_d=dlt_tmp*pawtab(jtypat)%qijl(ilm,klmn)
                     do mu=1,9
                       mua=alpha(mu);mub=beta(mu)
                       grhat_tmp(ishift_gr2+mu,jatm)=grhat_tmp(ishift_gr2+mu,jatm)&
&                       +ro_d*pawrhoij_tot(jatom)%grhoij(mub,klmn1,ispden)*prodp_nondiag(jatm)%value(mua,ilm)
                       grhat_tmp(mushift+mu,jatm)=grhat_tmp(mushift+mu,jatm)&
&                       +ro_d*pawrhoij_tot(jatom)%grhoij(mub,klmn1,ispden)*prodp_nondiag(jatm)%value(3+mua,ilm)
                     end do
                   end if
                 end do
               end do
               klmn1=klmn1+pawrhoij_tot(jatom)%cplex
             end do ! klmn
           end if ! prodp_nondiag
         end do ! jatm
       end if ! optgr2==1

     end do ! ispden

!    Eventually free temporary space for g_l(r).Y_lm(r) factors
     if (pawfgrtab_tot(iatom)%gylmgr_allocated==2) then
       ABI_DEALLOCATE(pawfgrtab_tot(iatom)%gylmgr)
       ABI_ALLOCATE(pawfgrtab_tot(iatom)%gylmgr,(0,0,0))
       pawfgrtab_tot(iatom)%gylmgr_allocated=0
     end if
     if (optgr2==1) then
       if (pawfgrtab_tot(iatom)%gylmgr2_allocated==2) then
         ABI_DEALLOCATE(pawfgrtab_tot(iatom)%gylmgr2)
         ABI_ALLOCATE(pawfgrtab_tot(iatom)%gylmgr2,(0,0,0))
         pawfgrtab_tot(iatom)%gylmgr2_allocated=0
       end if
     end if

!    ==== Forces ====
!    Convert from cartesian to reduced coordinates
     if (optgr==1) then
       mushift=3*(iatm-1)
       tmp(1:3)=grhat_tmp(ishift_gr+1:ishift_gr+3,idiag)
       do mu=1,3
         hatgr(mu+mushift)=fact_ucvol*(rprimd(1,mu)*tmp(1)+rprimd(2,mu)*tmp(2)+rprimd(3,mu)*tmp(3))
       end do
     end if
!    ==== Stresses ====
     if (optstr==1) then
       hatstr(1:6)=hatstr(1:6)+ grhat_tmp(ishift_str+1:ishift_str+6,idiag)
     end if
!    ==== Frozen wf part of dyn. matrix ====
     if (optgr2==1) then
       do jatm=1,natom ! NOTE: Not compatible with parallelization over atoms
         do mu=1,9
           mua=alpha(mu);mub=beta(mu)
           dyfr(1,mub,mua,jatm,iatm)=grhat_tmp(ishift_gr2+mu,jatm)
         end do
         if (dyfr_cplex==2.and.cplex==2) then
           mushift=ishift_gr2+9
           do mu=1,9
             mua=alpha(mu);mub=beta(mu)
             dyfr(2,mub,mua,jatm,iatm)=grhat_tmp(mushift+mu,jatm)
           end do
         end if
       end do
     end if

!    End loops on types and atoms
     ABI_DEALLOCATE(vloc)
     if (ngrad>0)  then
       ABI_DEALLOCATE(prod)
     end if
     if (ngradp>0)  then
       ABI_DEALLOCATE(prodp)
     end if
     if (optgr2==1) then
       do jatm=1,natom
         ABI_DEALLOCATE(prod_nondiag(jatm)%value)
         ABI_DEALLOCATE(prodp_nondiag(jatm)%value)
       end do
     end if

   end do
   iatshft=iatshft+nattyp(itypat)
 end do

!Reduction in case of parallelisation over atoms
 if (paral_atom.and.optgr2/=1) then
   bufsiz=3*natom*optgr+6*optstr
   if (bufsiz>0) then
     ABI_ALLOCATE(buf1,(bufsiz))
     if (optgr==1) buf1(1:3*natom)=hatgr(1:3*natom)
     if (optstr==1) buf1(3*natom*optgr+1:3*natom*optgr+6)=hatstr(1:6)
     call xsum_mpi(buf1,mpi_comm_atom,ier)
     if (optgr==1) hatgr(1:3*natom)=buf1(1:3*natom)
     if (optstr==1) hatstr(1:6)=buf1(3*natom*optgr+1:3*natom*optgr+6)
     ABI_DEALLOCATE(buf1)
   end if
 end if

!Deallocate additional memory
 ABI_DEALLOCATE(grhat_tmp)
 if (optgr2==1) then
   ABI_DEALLOCATE(atindx)
   ABI_DEALLOCATE(vpsp1)
   ABI_DATATYPE_DEALLOCATE(prod_nondiag)
   ABI_DATATYPE_DEALLOCATE(prodp_nondiag)

   do jatom=1,natom
     if (pawfgrtab_tot(jatom)%gylm_allocated==2) then
       ABI_DEALLOCATE(pawfgrtab_tot(jatom)%gylm)
       ABI_ALLOCATE(pawfgrtab_tot(jatom)%gylm,(0,0))
       pawfgrtab_tot(jatom)%gylm_allocated=0
     end if
     if (pawfgrtab_tot(jatom)%gylmgr_allocated==2) then
       ABI_DEALLOCATE(pawfgrtab_tot(jatom)%gylmgr)
       ABI_ALLOCATE(pawfgrtab_tot(jatom)%gylmgr,(0,0,0))
       pawfgrtab_tot(jatom)%gylmgr_allocated=0
     end if
     if (pawfgrtab_tot(jatom)%expiqr_allocated==2) then
       ABI_DEALLOCATE(pawfgrtab_tot(jatom)%expiqr)
       ABI_ALLOCATE(pawfgrtab_tot(jatom)%expiqr,(0,0))
       pawfgrtab_tot(jatom)%expiqr_allocated=0
     end if
   end do
 end if

!===== Update forces =====
 if (optgr==1) then
   grnl(1:3*natom)=grnl(1:3*natom)+hatgr(1:3*natom)
   ABI_DEALLOCATE(hatgr)
 end if

!===== Convert stresses (add diag and off-diag contributions) =====
 if (optstr==1) then
!  Has to compute int[nhat*vtrial]
   hatstr_diag=zero
   if (nspden==1.or.dimvtrial==1) then
     do ic=1,nfft
       hatstr_diag=hatstr_diag+vtrial_(ic,1)*nhat(ic,1)
     end do
   else if (nspden==2) then
     do ic=1,nfft
       hatstr_diag=hatstr_diag+vtrial_(ic,1)*nhat(ic,2)+vtrial_(ic,2)*(nhat(ic,1)-nhat(ic,2))
     end do
   else if (nspden==4) then
     do ic=1,nfft
       hatstr_diag=hatstr_diag+half*(vtrial_(ic,1)*(nhat(ic,1)+nhat(ic,4)) &
&       +vtrial_(ic,2)*(nhat(ic,1)-nhat(ic,4))) &
&       +vtrial_(ic,3)*nhat(ic,2)+vtrial_(ic,4)*nhat(ic,3)
     end do
   end if
   if (paral_grid) then
     call xsum_mpi(hatstr_diag,my_mpi_comm_grid,ier)
   end if

!  Convert hat contribution
   hatstr(1:3)=(hatstr(1:3)+hatstr_diag)/dble(nfftot)
   hatstr(4:6)= hatstr(4:6)/dble(nfftot)
!  Add to already computed NL contrib
   nlstr(1:6)=nlstr(1:6)+hatstr(1:6)
!  Apply symmetries
   call stresssym(gprimd,nsym,nlstr,symrec)
 end if

!===== Convert dynamical matrix (from cartesian to reduced coordinates) =====
 if (optgr2==1) then
   do iatm=1,natom
     do jatm=1,natom
       do mua=1,3
         do mub=1,3
           work1(1,mua,mub)=dyfr(1,mub,mua,jatm,iatm)+dyfr(1,mua,mub,iatm,jatm)
         end do
       end do
       if (dyfr_cplex==2) then
         do mua=1,3
           do mub=1,3
             work1(2,mua,mub)=dyfr(2,mub,mua,jatm,iatm)-dyfr(2,mua,mub,iatm,jatm)
           end do
         end do
       end if
       do mu=1,3
         work2(:,:,mu)=rprimd(1,mu)*work1(:,:,1)+rprimd(2,mu)*work1(:,:,2)+rprimd(3,mu)*work1(:,:,3)
       end do
       do mub=1,3
         do mua=1,3
           dyfrnl(:,mua,mub,jatm,iatm)=dyfrnl(:,mua,mub,jatm,iatm) &   ! Already contains NL projectors contribution
&          +fact_ucvol*(rprimd(1,mua)*work2(:,1,mub) &
&           +rprimd(2,mua)*work2(:,2,mub) &
&           +rprimd(3,mua)*work2(:,3,mub))
         end do
       end do
     end do
   end do
   ABI_DEALLOCATE(dyfr)
 end if

!Destroy temporary space
 if (usexcnhat==0)  then
   ABI_DEALLOCATE(vtrial_)
 end if

!Destroy atom tables used for parallelism
 if (paral_atom) then
   ABI_DEALLOCATE(atm_indx)
   if (optgr2==1) then
     call pawfgrtab_free(pawfgrtab_tot)
     call rhoij_free(pawrhoij_tot)
     ABI_DATATYPE_DEALLOCATE(pawfgrtab_tot)
     ABI_DATATYPE_DEALLOCATE(pawrhoij_tot)
   end if
 end if
 call free_my_atmtab(my_atmtab,my_atmtab_allocated)

 DBG_ENTER("COLL")


end subroutine pawgrnl
!!***
