!!{\src2tex{textfont=tt}}
!!****f* ABINIT/pawdij
!! NAME
!! pawdij
!!
!! FUNCTION
!! Compute the pseudopotential strengths Dij of the PAW non local operator.
!! Also compute several contributions to Dij.
!! Can compute first-order strenghts Dij for RF calculations.
!!
!! COPYRIGHT
!! Copyright (C) 1998-2012 ABINIT group (FJ, MT)
!! This file is distributed under the terms of the
!! GNU General Public License, see ~abinit/COPYING
!! or http://www.gnu.org/copyleft/gpl.txt .
!! For the initials of contributors, see ~abinit/doc/developers/contributors.txt.
!!
!! INPUTS
!!  cplex=(RF calculations only) - 1 if RF 1st-order quantities are REAL, 2 if COMPLEX
!!  gprimd(3,3)=dimensional primitive translations for reciprocal space
!!  dtset <type(dataset_type)>=all input variables for this dataset
!!   ! atvshift(16,nsppol,my_natom)=potential energy shift for specific lm channel, spin and atom
!!  electronpositron <type(electronpositron_type)>=quantities for the electron-positron annihilation (optional argument)
!!  enunit=choice for units of output Dij
!!  fatvshift=factor that multiplies atvshift
!!  ipert=index of perturbation (used only for RF calculation ; set ipert<=0 for GS calculations.
!!  mpi_atmtab(:)=--optional-- indexes of the atoms treated by current proc
!!  mpi_comm_atom=--optional-- MPI communicator over atoms
!!  mpi_comm_fft=--optional-- MPI communicator over FT components
!!  my_natom=number of atoms treated by current processor
!!  natom=total number of atoms in cell
!!  nfft=total number of FFt grid
!!  nspden=number of spin-density components
!!  ntypat=number of types of atoms in unit cell.
!!  paral_kgb=Flag related to the kpoint-band-fft parallelism
!!  paw_an(my_natom) <type(paw_an_type)>=paw arrays given on angular mesh
!!  paw_ij(my_natom) <type(paw_ij_type)>=paw arrays given on (i,j) channels
!!  pawang <type(pawang_type)>=paw angular mesh and related data
!!  pawfgrtab(my_natom) <type(pawfgrtab_type)>=atomic data given on fine rectangular grid
!!  pawprtvol=control print volume and debugging output for PAW
!!  pawrad(ntypat) <type(pawrad_type)>=paw radial mesh and related data
!!  pawrhoij(my_natom) <type(pawrhoij_type)>= paw rhoij occupancies and related data
!!  pawspnorb=flag: 1 if spin-orbit coupling is activated
!!  pawtab(ntypat) <type(pawtab_type)>=paw tabulated starting data
!!  pawxcdev=Choice of XC development (0=no dev. (use of angular mesh) ; 1 or 2=dev. on moments)
!!  qphon(3)=wavevector of the phonon
!!  ucvol=unit cell volume
!!  vtrial(cplex*nfft,nspden)=GS potential
!!  vxc(cplex*nfft,nspden)=XC potential (Hartree) on the fine FFT mesh
!!  xred(3,my_natom)= reduced atomic coordinates
!!
!! OUTPUT
!!  paw_ij(iatom)%dij(cplex_dij*lmn2_size,ndij)= total Dij terms (GS calculation, ipert=0)
!!                                               total 1st-order Dij terms (RF ccalc., ipert>0)
!!  May be complex if cplex_dij=2
!!        dij(:,:,1) contains Dij^up-up
!!        dij(:,:,2) contains Dij^dn-dn
!!        dij(:,:,3) contains Dij^up-dn (only if nspinor=2)
!!        dij(:,:,4) contains Dij^dn-up (only if nspinor=2)
!!  May also compute paw_ij(iatom)%dij0,paw_ij(iatom)%dijhartree,paw_ij(iatom)%dijxc,
!!                   paw_ij(iatom)%dijxc_hat,paw_ij(iatom)%dijxc_val,
!!                   paw_ij(iatom)%dijhat,paw_ij(iatom)dijso,
!!                   paw_ij(iatom)%dijU,paw_ij(iatom)%dijexxc
!!
!! NOTES
!!  Response function calculations:
!!    In order to compute first-order Dij, paw_an (resp. paw_ij) datastructures
!!    must contain first-order quantities, namely paw_an1 (resp. paw_ij1).
!!
!! PARENTS
!!      bethe_salpeter,respfn,scfcv,scfcv3,screening,sigma
!!
!! CHILDREN
!!      free_my_atmtab,get_my_atmtab,pawdijhartree,pawdijso,pawexpiqr,pawgylm
!!      pawpupot,print_paw_ij,simp_gen,timab,xsum_mpi
!!
!! SOURCE

#if defined HAVE_CONFIG_H
#include "config.h"
#endif

#include "abi_common.h"

subroutine pawdij(cplex,dtset,enunit,fatvshift,gprimd,ipert,my_natom,natom,nfft,ngfft,&
&                 nspden,ntypat,paw_an,paw_ij,pawang,pawfgrtab,pawprtvol,pawrad,pawrhoij,&
&                 pawspnorb,pawtab,pawxcdev,qphon,ucvol,vtrial,vxc,xred,&
&                 electronpositron,&
&                 mpi_atmtab,mpi_comm_atom,mpi_comm_fft) ! optional arguments

 use defs_basis
 use defs_datatypes
 use defs_abitypes
 use m_xmpi
 use m_errors
 use m_profiling
 use m_paral_atom
 use m_radmesh,          only : simp_gen
 use m_pawrhoij,         only : pawrhoij_type
 use m_paw_toolbox,      only : print_paw_ij
 use m_electronpositron, only : electronpositron_type,electronpositron_calctype

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'pawdij'
 use interfaces_18_timing
 use interfaces_66_paw, except_this_one => pawdij
!End of the abilint section

 implicit none

!Arguments ---------------------------------------------
!scalars
 integer,intent(in) :: cplex,enunit,ipert,my_natom,natom,nfft,nspden,ntypat,pawprtvol,pawspnorb,pawxcdev
 integer,optional,intent(in) :: mpi_comm_atom,mpi_comm_fft
 real(dp),intent(in) :: fatvshift,ucvol
 type(electronpositron_type),pointer,optional :: electronpositron
 type(pawang_type),intent(in) :: pawang
!arrays
 integer,intent(in) :: ngfft(18)
 integer,optional,target,intent(in) :: mpi_atmtab(:)
 real(dp),intent(in) :: gprimd(3,3),qphon(3),vtrial(cplex*nfft,nspden)
 real(dp),intent(in) :: vxc(cplex*nfft,nspden),xred(3,natom)
 type(paw_an_type),intent(in) :: paw_an(my_natom)
 type(paw_ij_type),intent(inout) :: paw_ij(my_natom)
 type(pawfgrtab_type),intent(inout) :: pawfgrtab(my_natom)
 type(pawrad_type),intent(in) :: pawrad(ntypat)
 type(pawrhoij_type),intent(inout) :: pawrhoij(my_natom)
 type(pawtab_type),intent(in) :: pawtab(ntypat)
 type(dataset_type),intent(in) :: dtset

!Local variables ---------------------------------------
!scalars
 integer :: cplex_dij
 integer :: iatom,iatom_tot,ic,icount,idij,idijeff,idijend,ier,ij_size
 integer :: ilm,iln,ils,ils1,ilslm,ilslm1,im1,im2,in1,in2,ipositron,ipts,ir,ir1
 integer :: isel,ispden,itypat,j0lm,j0ln,jc,jlm,jln,klm,klm1,klmn,klmn1
 integer :: kln,l_size,lexexch,ll,lm0,lm_size,lmax,lmin
 integer :: lmn2_size,lmn_size,lpawu,mesh_size,mm
 integer :: need_dij0,need_dijhartree,need_dijxc,need_dijxc_hat,need_dijxc_val,need_dijhat
 integer :: need_dijso,need_dijU,need_dijexxc
 integer :: nfftot,nfgd,npts,nsploop,nsppol,optgr0,optgr1,usexcnhat
 logical :: has_phase,my_atmtab_allocated,need_to_print,paral_atom,qne0
 real(dp) :: vi,vr,VUKS,vxcij22,vxcij22_i,vxcij,vxcij_i,vxcijhat,vxcijhat_i,tmp
 character(len=500) :: msg
!arrays
 integer,allocatable :: indklmn(:,:)
 integer,pointer :: my_atmtab(:)
 logical,allocatable :: lmselect(:)
 real(dp) :: rdum(1),tsec(2)
 real(dp),allocatable :: coeffpawu(:)
 real(dp),allocatable :: dij0(:),dijhartree(:),dijxc(:),dijxc_hat(:),dijxc_val(:),dijhat(:)
 real(dp),allocatable :: dijhat_tmp(:),dijxchat_tmp(:),dijexxc(:),dijpawu(:),dijsymU(:,:)
 real(dp),allocatable :: ff(:),gg(:),prod(:),prodxchat(:),vpawu(:,:,:)
 real(dp),allocatable :: vxcij1(:),vxcij2(:),vxcijtot(:),vxcij1_val(:),vxcval_ij(:)
 real(dp),allocatable :: yylmr(:,:)

! *************************************************************************

 DBG_ENTER("COLL")

 call timab(561,1,tsec)

 if (nspden==4.and.cplex==2) then
   msg='  nspden=4 probably not compatible with cplex=2 !'
   MSG_BUG(msg)
 end if

 if (my_natom>0) then
   if (paw_ij(1)%ndij==4.and.paw_ij(1)%cplex_dij/=2) then
     msg='  invalid cplex size for Dij (4 Dij components) !'
     MSG_BUG(msg)
   end if
   if (paw_ij(1)%cplex/=paw_an(1)%cplex) then
     msg='  paw_ij()%cplex and paw_an()%cplex must be equal !'
     MSG_BUG(msg)
   end if
   if (ipert<=0.and.paw_ij(1)%cplex/=1) then
     msg='  cplex must be 1 for GS calculations !'
     MSG_BUG(msg)
   end if
   if (paw_ij(1)%cplex_dij<cplex) then
     msg='  cplex_dij must be >= cplex !'
     MSG_BUG(msg)
   end if
   if (paw_ij(1)%cplex/=cplex) then
     msg='  paw_ij()%cplex must be equal to cplex !'
     MSG_BUG(msg)
   end if
   if(paw_ij(1)%has_dij==0) then
     msg='  dij must be allocated !'
     MSG_BUG(msg)
   end if
   if ((paw_ij(1)%cplex==2).and.&
&   ((paw_an(1)%has_vxc   >0.and.paw_ij(1)%has_dijxc_hat==1).or.&
&   (paw_an(1)%has_vxcval>0.and.paw_ij(1)%has_dijxc_val==1))) then
     msg =' Computation of dijxchat/dijxcval not compatible with cplex=2 !'
     MSG_BUG(msg)
   end if
 end if

 ipositron=0
 if (present(electronpositron)) then
   ipositron=electronpositron_calctype(electronpositron)
   if (ipositron==1.and.pawtab(1)%has_kij/=2) then
     msg=' kij must be in memory for electronpositron%calctype=1 !'
     MSG_BUG(msg)
   end if
 end if
 if (any(pawtab(:)%useexexch>0).and.ipert<=0.and.ipositron/=1.and.nspden==4)  then
   msg=' Local exact-exch. not implemented for nspden=4 !'
   MSG_ERROR(msg)
 end if

 qne0=(qphon(1)**2+qphon(2)**2+qphon(3)**2>=1.d-15)
 if (my_natom>0) then
   if (pawfgrtab(1)%rfgd_allocated==0.and.ipert>0.and.ipert<=natom.and.qne0) then
     MSG_BUG('  pawfgrtab()%rfgd array must be allocated  !')
   end if
 end if

!Nothing to do for some perturbations (RF case)
 if (ipert==natom+1.or.ipert==natom+5) then
   do iatom=1,my_natom
     paw_ij(iatom)%dij=zero
     if (paw_ij(iatom)%has_dij0==1) paw_ij(iatom)%dij0=zero
     if (paw_ij(iatom)%has_dijhartree==1) paw_ij(iatom)%dijhartree=zero
     if (paw_ij(iatom)%has_dijxc==1) paw_ij(iatom)%dijxc=zero
     if (paw_ij(iatom)%has_dijxc_hat==1) paw_ij(iatom)%dijxc_hat=zero
     if (paw_ij(iatom)%has_dijxc_val==1) paw_ij(iatom)%dijxc_val=zero
     if (paw_ij(iatom)%has_dijhat==1) paw_ij(iatom)%dijhat=zero
     if (paw_ij(iatom)%has_dijso==1) paw_ij(iatom)%dijso=zero
     if (paw_ij(iatom)%has_dijU==1) paw_ij(iatom)%dijU=zero
     if (paw_ij(iatom)%has_dijexxc==1) paw_ij(iatom)%dijexxc=zero
   end do
   return
 end if

!Set up parallelism over atoms
 paral_atom=(present(mpi_comm_atom).and.(my_natom/=natom))
 nullify(my_atmtab);if (present(mpi_atmtab)) my_atmtab => mpi_atmtab
 call get_my_atmtab(mpi_comm_atom,my_atmtab,my_atmtab_allocated,paral_atom,natom,my_natom_ref=my_natom)

!----- Various initializations
 nfftot=ngfft(1)*ngfft(2)*ngfft(3)
 nsppol=1;nsploop=1
 if (my_natom>0) then
   nsppol=paw_ij(1)%nsppol
   nsploop=nsppol;if (paw_ij(1)%ndij==4) nsploop=4
 end if
 usexcnhat=maxval(pawtab(1:ntypat)%usexcnhat)
 VUKS=zero;npts=0

!----- Preliminary computation of Ylm,Ylpmp (only if angular mesh)
 if (pawxcdev==0) then
   npts=pawang%angl_size
   ABI_ALLOCATE(yylmr,(pawang%l_max**2*(pawang%l_max**2+1)/2,npts))
   do ipts=1,npts
     do jlm=1,pawang%l_max**2
       j0lm=jlm*(jlm-1)/2
       do ilm=1,jlm
         klm=j0lm+ilm
         yylmr(klm,ipts)=pawang%ylmr(ilm,ipts)*pawang%ylmr(jlm,ipts)
       end do
     end do
   end do
 end if

!------------------------------------------------------------------------
!----- Big loop over atoms
!------------------------------------------------------------------------

 do iatom=1,my_natom
   iatom_tot=iatom;if (paral_atom) iatom_tot=my_atmtab(iatom)

!  -----------------------------------------------------------------------
!  ----------- Allocations and initializations
!  -----------------------------------------------------------------------

   itypat=paw_ij(iatom)%itypat
   l_size=pawtab(itypat)%l_size
   mesh_size=pawrad(itypat)%mesh_size
   lmn_size=paw_ij(iatom)%lmn_size
   lmn2_size=paw_ij(iatom)%lmn2_size
   lm_size=paw_an(iatom)%lm_size
   ij_size=pawtab(itypat)%ij_size
   cplex_dij=paw_ij(iatom)%cplex_dij
   paw_ij(iatom)%dij(:,:)=zero
   nfgd=pawfgrtab(iatom)%nfgd

   need_dij0=0;need_dijhartree=0;need_dijxc=0;need_dijxc_hat=0;need_dijxc_val=0
   need_dijhat=0;need_dijso=0;need_dijU=0;need_dijexxc=0

   has_phase=.false.
   need_to_print=((abs(pawprtvol)>=1).and. &
&   (iatom_tot==1.or.iatom_tot==natom.or.pawprtvol<0))

!  === Determine which part od Dij have to be computed ===

!  if has_dij0==2, dij0 has already been stored...
   if (paw_ij(iatom)%has_dij0/=2) then
     if (paw_ij(iatom)%has_dij0==1) then
       need_dij0=1;paw_ij(iatom)%dij0(:)=zero
     else if (paw_ij(iatom)%has_dij0==0.and.need_to_print) then
       ABI_ALLOCATE(paw_ij(iatom)%dij0,(lmn2_size))
       need_dij0=1;paw_ij(iatom)%dij0(:)=zero
       paw_ij(iatom)%has_dij0=-1
     end if
   end if

!  if has_dijhartree==2, dijhartree has already been stored...
   if (paw_ij(iatom)%has_dijhartree/=2) then
     if (paw_ij(iatom)%has_dijhartree==1) then
       need_dijhartree=1;paw_ij(iatom)%dijhartree(:)=zero
     else if (paw_ij(iatom)%has_dijhartree==0) then
       ABI_ALLOCATE(paw_ij(iatom)%dijhartree,(cplex*lmn2_size))
       need_dijhartree=1;paw_ij(iatom)%dijhartree(:)=zero
       paw_ij(iatom)%has_dijhartree=-1
     end if
   end if

!  if has_dijxc==2, dijxc has already been computed...
   if (paw_an(iatom)%has_vxc>0.and.paw_ij(iatom)%has_dijxc/=2) then
     if (paw_ij(iatom)%has_dijxc==1) then
       need_dijxc=1;paw_ij(iatom)%dijxc(:,:)=zero
     else if (paw_ij(iatom)%has_dijxc==0.and.need_to_print) then
       ABI_ALLOCATE(paw_ij(iatom)%dijxc,(cplex_dij*lmn2_size,paw_ij(iatom)%ndij))
       need_dijxc=1;paw_ij(iatom)%dijxc(:,:)=zero
       paw_ij(iatom)%has_dijxc=-1
     end if
     ABI_ALLOCATE(dijxc,(cplex_dij*lmn2_size))
   end if

!  if has_dijxc_hat==2, dijxc_hat has already been computed...
   if (paw_an(iatom)%has_vxc>0.and.paw_ij(iatom)%has_dijxc_hat/=2) then
     if (paw_ij(iatom)%has_dijxc_hat==1) then
       need_dijxc_hat=1;paw_ij(iatom)%dijxc_hat(:,:)=zero
!      else if (paw_ij(iatom)%has_dijxc_hat==0.and.need_to_print) then
!      ABI_ALLOCATE(paw_ij(iatom)%dijxc_hat,(cplex_dij*lmn2_size,paw_ij(iatom)%ndij))
!      need_dijxc_hat=1;paw_ij(iatom)%dijxc_hat(:,:)=zero
!      paw_ij(iatom)%has_dijxc_hat=-1
     end if
     if (need_dijxc_hat==1) then
       ABI_ALLOCATE(dijxc_hat,(cplex_dij*lmn2_size))
     end if
   end if

!  if has_dijxc_val==2, dijxc_val has already been computed...
   if (paw_an(iatom)%has_vxcval>0.and.paw_ij(iatom)%has_dijxc_val/=2) then
     if (paw_ij(iatom)%has_dijxc_val==1) then
       need_dijxc_val=1;paw_ij(iatom)%dijxc_val(:,:)=zero
!      else if (paw_ij(iatom)%has_dijxc_val==0.and.need_to_print) then
!      ABI_ALLOCATE(paw_ij(iatom)%dijxc_val,(cplex_dij*lmn2_size,paw_ij(iatom)%ndij))
!      need_dijxc_val=1;paw_ij(iatom)%dijxc_val(:,:)=zero
!      paw_ij(iatom)%has_dijxc_val=-1
     end if
     if (need_dijxc_val==1) then
       ABI_ALLOCATE(dijxc_val,(cplex_dij*lmn2_size))
     end if
   end if

!  if has_dijhat==2, dijhat has already been computed...
   if (paw_ij(iatom)%has_dijhat/=2) then
     if (paw_ij(iatom)%has_dijhat==1) then
       need_dijhat=1;paw_ij(iatom)%dijhat(:,:)=zero
     else if (paw_ij(iatom)%has_dijhat==0.and.need_to_print) then
       ABI_ALLOCATE(paw_ij(iatom)%dijhat,(cplex_dij*lmn2_size,paw_ij(iatom)%ndij))
       need_dijhat=1;paw_ij(iatom)%dijhat(:,:)=zero
       paw_ij(iatom)%has_dijhat=-1
     end if
     ABI_ALLOCATE(dijhat,(cplex_dij*lmn2_size))
   end if

!  if has_dijso==2, dijso has already been computed...
   if (paw_ij(iatom)%has_dijso/=2.and.pawspnorb>0.and.ipert<=0.and.ipositron/=1) then
     if (paw_ij(iatom)%has_dijso==1) then
       need_dijso=1;paw_ij(iatom)%dijso(:,:)=zero
     else if (paw_ij(iatom)%has_dijso==0) then
       ABI_ALLOCATE(paw_ij(iatom)%dijso,(cplex_dij*lmn2_size,paw_ij(iatom)%ndij))
       need_dijso=1;paw_ij(iatom)%dijso(:,:)=zero
       paw_ij(iatom)%has_dijso=-1
     end if
     if (paw_an(iatom)%has_vhartree>0) then
       call pawdijso(iatom,itypat,my_natom,ntypat,paw_an,paw_ij,pawang,pawrad,pawtab,pawxcdev,dtset%spnorbscl)
     else
       paw_ij(iatom)%dijso(:,:)=zero
     end if
   end if

!  if has_dijU==2, dijU has already been computed...
   if (paw_ij(iatom)%has_dijU/=2.and.pawtab(itypat)%usepawu>0.and.ipert<=0.and.ipositron/=1) then
     if (paw_ij(iatom)%has_dijU==1) then
       need_dijU=1;paw_ij(iatom)%dijU(:,:)=zero
     else if (paw_ij(iatom)%has_dijU==0.and.need_to_print) then
       ABI_ALLOCATE(paw_ij(iatom)%dijU,(cplex_dij*lmn2_size,paw_ij(iatom)%ndij))
       need_dijU=1;paw_ij(iatom)%dijU(:,:)=zero
       paw_ij(iatom)%has_dijU=-1
     end if
     ABI_ALLOCATE(dijpawu,(cplex_dij*lmn2_size))
     if (paw_ij(iatom)%ndij==4) then
       ABI_ALLOCATE(dijsymU,(cplex_dij*lmn2_size,4))
     end if
   end if

!  if has_dijexxc==2, dijexxc has already been computed...
   if (paw_ij(iatom)%has_dijexxc/=2.and.pawtab(itypat)%useexexch>0.and.ipert<=0.and.ipositron/=1) then
     if (paw_ij(iatom)%has_dijexxc==1) then
       need_dijexxc=1;paw_ij(iatom)%dijexxc(:,:)=zero
     else if (paw_ij(iatom)%has_dijexxc==0.and.need_to_print) then
       ABI_ALLOCATE(paw_ij(iatom)%dijexxc,(cplex_dij*lmn2_size,paw_ij(iatom)%ndij))
       need_dijexxc=1;paw_ij(iatom)%dijexxc(:,:)=zero
       paw_ij(iatom)%has_dijexxc=-1
     end if
     ABI_ALLOCATE(dijexxc,(cplex_dij*lmn2_size))
   end if

   ABI_ALLOCATE(ff,(mesh_size))
   if (paw_ij(1)%cplex==2)  then
     ABI_ALLOCATE(gg,(mesh_size))
   end if
   ABI_ALLOCATE(indklmn,(6,lmn2_size))
   indklmn(:,:)=pawtab(itypat)%indklmn(:,:)

!  Eventually compute g_l(r).Y_lm(r) factors for the current atom (if not already done)
   if ((pawfgrtab(iatom)%gylm_allocated==0).or.&
&   ((ipert==iatom_tot).and.(pawfgrtab(iatom)%gylmgr_allocated==0))) then
     optgr0=0;optgr1=0
     if (pawfgrtab(iatom)%gylm_allocated==0) then
       if (associated(pawfgrtab(iatom)%gylm))  then
         ABI_DEALLOCATE(pawfgrtab(iatom)%gylm)
       end if
       ABI_ALLOCATE(pawfgrtab(iatom)%gylm,(nfgd,lm_size))
       pawfgrtab(iatom)%gylm_allocated=2;optgr0=1
     end if
     if ((ipert==iatom_tot).and.(pawfgrtab(iatom)%gylmgr_allocated==0)) then
       if (associated(pawfgrtab(iatom)%gylmgr))  then
         ABI_DEALLOCATE(pawfgrtab(iatom)%gylmgr)
       end if
       ABI_ALLOCATE(pawfgrtab(iatom)%gylmgr,(3,nfgd,lm_size))
       pawfgrtab(iatom)%gylmgr_allocated=2;optgr1=1
     end if
     call pawgylm(pawfgrtab(iatom)%gylm,pawfgrtab(iatom)%gylmgr,rdum,&
&     lm_size,nfgd,optgr0,optgr1,0,pawtab(itypat),&
&     pawfgrtab(iatom)%rfgd,pawfgrtab(iatom)%rfgd_allocated)
   end if

!  Eventually compute exp(-i.q.r) factors for the current atom (if not already done)
   if ((ipert==iatom_tot).and.qne0.and.(pawfgrtab(iatom)%expiqr_allocated==0)) then
     if (associated(pawfgrtab(iatom)%expiqr))  then
       ABI_DEALLOCATE(pawfgrtab(iatom)%expiqr)
     end if
     ABI_ALLOCATE(pawfgrtab(iatom)%expiqr,(2,nfgd))
     pawfgrtab(iatom)%expiqr_allocated=2
     call pawexpiqr(gprimd,pawfgrtab(iatom),qphon,xred(:,iatom_tot))
   end if
   has_phase=(qne0.and.ipert>0.and.pawfgrtab(iatom)%expiqr_allocated/=0)


!  ------------------------------------------------------------------------
!  ----- Loop over density components
!  ------------------------------------------------------------------------
   do idij=1,nsploop

!    ------------------------------------------------------------------------
!    ----------- Load atomic Dij0 into Dij
!    ------------------------------------------------------------------------
!    No contribution to 1st-order Dij

     if (idij<=2.and.ipert<=0) then

       ABI_ALLOCATE(dij0,(lmn2_size))
       dij0(:)=pawtab(itypat)%dij0(:)
       if (ipositron==1) dij0(:)=two*pawtab(itypat)%kij(:)-dij0(:)
       klmn1=1
       do klmn=1,lmn2_size
         paw_ij(iatom)%dij(klmn1,idij)=dij0(klmn)
         klmn1=klmn1+cplex_dij
       end do
       if (need_dij0==1.and.idij==1) paw_ij(iatom)%dij0(:)=dij0(:)
       ABI_DEALLOCATE(dij0)

     end if

!    ------------------------------------------------------------------------
!    ----------- Add Dij_Hartree to Dij
!    ------------------------------------------------------------------------

     if (idij<=2) then

       ABI_ALLOCATE(dijhartree,(cplex*lmn2_size))
       if (ipositron/=1.and.need_dijhartree==1.and.idij==1) then
         call pawdijhartree(cplex,iatom,my_natom,ntypat,paw_ij,pawrhoij,pawtab)
       end if
       dijhartree(:)=paw_ij(iatom)%dijhartree(:)
       if (cplex==1) then
         klmn1=1
         do klmn=1,lmn2_size
           paw_ij(iatom)%dij(klmn1,idij)=paw_ij(iatom)%dij(klmn1,idij)+dijhartree(klmn)
           klmn1=klmn1+cplex_dij
         end do
       else
         paw_ij(iatom)%dij(:,idij)=paw_ij(iatom)%dij(:,idij)+dijhartree(:)
       end if
       ABI_DEALLOCATE(dijhartree)

     end if

!    ------------------------------------------------------------------------
!    ----------- Add Dij_xc to Dij
!    ------------------------------------------------------------------------

!    cplex is for RF, cplex_dij is for non-collinear (nspinor==2)
     if (idij<=nsppol.or.(nspden==4.and.idij<=3).or.cplex==2) then
       ABI_ALLOCATE(vxcijtot,(cplex*lmn2_size))
       if (need_dijxc_val==1)  then
         ABI_ALLOCATE(vxcval_ij,(cplex*lmn2_size))
       end if

       idijend=idij+idij/3;if (cplex==2) idijend=idij
       do ispden=idij,idijend

         vxcijtot=zero
         if (need_dijxc_val==1) vxcval_ij=zero

!        ================================================
!        ===== First formalism: use (l,m) moments for vxc
!        ================================================
         if (pawxcdev/=0) then

           ABI_ALLOCATE(vxcij1,(cplex*ij_size))
           if (need_dijxc_val==1)  then
             ABI_ALLOCATE(vxcij1_val,(cplex*ij_size))
           end if
           ABI_ALLOCATE(lmselect,(lm_size))
           lmselect(:)=paw_an(iatom)%lmselect(:)
           if (ipositron/=0) lmselect(1:lm_size)=(lmselect(1:lm_size).or.electronpositron%lmselect_ep(1:lm_size,iatom))

           do klm=1,lm_size
!            Summing over klm moments.
             if (lmselect(klm)) then

!              ===== Vxc_ij_1 (tmp) =====
               vxcij1=zero
               if (cplex==1) then
                 do kln=1,ij_size
                   ff(1:mesh_size)= paw_an(iatom)%vxc1(1:mesh_size,klm,ispden)&
&                   *pawtab(itypat)%phiphj(1:mesh_size,kln)&
&                   - paw_an(iatom)%vxct1(1:mesh_size,klm,ispden)&
&                   *pawtab(itypat)%tphitphj(1:mesh_size,kln)
                   call simp_gen(vxcij1(kln),ff,pawrad(itypat))
                 end do
               else
                 do kln=1,ij_size
                   do ir=1,mesh_size
                     ir1=2*ir
                     ff(ir)= paw_an(iatom)%vxc1(ir1-1,klm,ispden)&
&                     *pawtab(itypat)%phiphj(ir,kln)&
&                     - paw_an(iatom)%vxct1(ir1-1,klm,ispden)&
&                     *pawtab(itypat)%tphitphj(ir,kln)
                     gg(ir)= paw_an(iatom)%vxc1(ir1,klm,ispden)&
&                     *pawtab(itypat)%phiphj(ir,kln)&
&                     - paw_an(iatom)%vxct1(ir1,klm,ispden)&
&                     *pawtab(itypat)%tphitphj(ir,kln)
                   end do
                   call simp_gen(vxcij1(2*kln-1),ff,pawrad(itypat))
                   call simp_gen(vxcij1(2*kln  ),gg,pawrad(itypat))
                 end do
               end if

!              ==== If required calculate valence-only onsite matrix elements ====
               if (need_dijxc_val==1) then ! compatible only with cplex=1
                 vxcij1_val(:)=zero
                 do kln=1,ij_size
                   ff(1:mesh_size)= &
&                   paw_an(iatom)%vxc1_val (1:mesh_size,klm,ispden)*pawtab(itypat)%phiphj  (1:mesh_size,kln)&
&                   -paw_an(iatom)%vxct1_val(1:mesh_size,klm,ispden)*pawtab(itypat)%tphitphj(1:mesh_size,kln)
                   call simp_gen(vxcij1_val(kln),ff,Pawrad(itypat))
                 end do
               end if

!              ===== Vxc_ij_2 (tmp) =====
               vxcij22=zero;vxcij22_i=zero
               if (usexcnhat/=0) then
                 ll=1+int(sqrt(dble(klm)-0.1))
                 if (cplex==1) then
                   ff(1:mesh_size)=paw_an(iatom)%vxct1(1:mesh_size,klm,ispden)&
&                   *pawtab(itypat)%shapefunc(1:mesh_size,ll)&
&                   *pawrad(itypat)%rad(1:mesh_size)**2
                   call simp_gen(vxcij22,ff,pawrad(itypat))
                 else
                   do ir=1,mesh_size
                     ir1=2*ir
                     tmp=pawtab(itypat)%shapefunc(ir,ll)*pawrad(itypat)%rad(ir)**2
                     ff(ir)=paw_an(iatom)%vxct1(ir1-1,klm,ispden)*tmp
                     gg(ir)=paw_an(iatom)%vxct1(ir1  ,klm,ispden)*tmp
                   end do
                   call simp_gen(vxcij22  ,ff,pawrad(itypat))
                   call simp_gen(vxcij22_i,gg,pawrad(itypat))
                 end if
               end if

!              ===== Accumulate over klm moments Vxc_ij_1 and Vxc_ij_2 =====
!              ===== into total Vxc_ij                                 =====
               if (cplex==1) then
                 do klmn=1,lmn2_size
                   klm1=indklmn(1,klmn);kln=indklmn(2,klmn)
                   vxcij=zero
                   isel=pawang%gntselect(klm,klm1)
                   if (isel>0) vxcij=vxcij1(kln)*pawang%realgnt(isel)
                   vxcijhat=pawtab(itypat)%qijl(klm,klmn)*vxcij22
!                  Accumulate into total Vxc_ij
                   vxcijtot(klmn)=vxcijtot(klmn)+vxcij-vxcijhat
!                  Store valence-only matrix elements
                   if (need_dijxc_val==1) then
                     if (isel>0) vxcval_ij(klmn)=vxcval_ij(klmn) + vxcij1_val(kln)*pawang%realgnt(isel)
                   end if
                 end do ! Loop klmn
               else
                 klmn1=1
                 do klmn=1,lmn2_size
                   klm1=indklmn(1,klmn);kln=indklmn(2,klmn)
                   vxcij=zero;vxcij_i=zero
                   isel=pawang%gntselect(klm,klm1)
                   if (isel>0) then
                     vxcij  =vxcij1(2*kln-1)*pawang%realgnt(isel)
                     vxcij_i=vxcij1(2*kln  )*pawang%realgnt(isel)
                   end if
                   vxcijhat  =pawtab(itypat)%qijl(klm,klmn)*vxcij22
                   vxcijhat_i=pawtab(itypat)%qijl(klm,klmn)*vxcij22_i
!                  Accumulate into total Vxc_ij
                   vxcijtot(klmn1  )=vxcijtot(klmn1   )+vxcij  -vxcijhat
                   vxcijtot(klmn1+1)=vxcijtot(klmn1+1 )+vxcij_i-vxcijhat_i
                   klmn1=klmn1+cplex
                 end do ! Loop klmn
               end if

             end if
           end do  ! Loop klm

           ABI_DEALLOCATE(lmselect)
           ABI_DEALLOCATE(vxcij1)
           if (need_dijxc_val==1)  then
             ABI_DEALLOCATE(vxcij1_val)
           end if

!          ================================================
!          ===== Second formalism: use vxc on r,theta,phi
!          ================================================
         else

           ABI_ALLOCATE(vxcij1,(cplex*ij_size))
           ABI_ALLOCATE(vxcij2,(cplex*l_size))
           if (need_dijxc_val==1)  then
             ABI_ALLOCATE(vxcij1_val,(cplex*ij_size))
           end if

!          ===== Loop on angular mesh =====
           do ipts=1,npts

!            ===== Vxc_ij_1 (tmp) =====
             vxcij1=zero
             if (cplex==1) then
               do kln=1,ij_size
                 ff(1:mesh_size)= paw_an(iatom)%vxc1(1:mesh_size,ipts,ispden)&
&                 *pawtab(itypat)%phiphj(1:mesh_size,kln)&
&                 - paw_an(iatom)%vxct1(1:mesh_size,ipts,ispden)&
&                 *pawtab(itypat)%tphitphj(1:mesh_size,kln)
                 call simp_gen(vxcij1(kln),ff,pawrad(itypat))
               end do
             else
               do kln=1,ij_size
                 do ir=1,mesh_size
                   ir1=2*ir
                   ff(ir)= paw_an(iatom)%vxc1(ir1-1,ipts,ispden)&
&                   *pawtab(itypat)%phiphj(ir,kln)&
&                   - paw_an(iatom)%vxct1(ir1-1,ipts,ispden)&
&                   *pawtab(itypat)%tphitphj(ir,kln)
                   gg(ir)= paw_an(iatom)%vxc1(ir1,ipts,ispden)&
&                   *pawtab(itypat)%phiphj(ir,kln)&
&                   - paw_an(iatom)%vxct1(ir1,ipts,ispden)&
&                   *pawtab(itypat)%tphitphj(ir,kln)
                 end do
                 call simp_gen(vxcij1(2*kln-1),ff,pawrad(itypat))
                 call simp_gen(vxcij1(2*kln  ),gg,pawrad(itypat))
               end do
             end if

!            ==== If required calculate valence-only matrix elements ====
             if (need_dijxc_val==1) then ! compatible only with cplex=1
               vxcij1_val(:)=zero
               do kln=1,ij_size
                 ff(1:mesh_size)= &
&                 paw_an(iatom)%vxc1_val (1:mesh_size,ipts,ispden)*pawtab(itypat)%phiphj  (1:mesh_size,kln)&
&                 -paw_an(iatom)%vxct1_val(1:mesh_size,ipts,ispden)*pawtab(itypat)%tphitphj(1:mesh_size,kln)
                 call simp_gen(vxcij1_val(kln),ff,Pawrad(itypat))
               end do
             end if

!            ===== Vxc_ij_2 (tmp) =====
             vxcij2=zero
             if (usexcnhat/=0) then
               if (cplex==1) then
                 do ils=1,l_size
                   ff(1:mesh_size)=paw_an(iatom)%vxct1(1:mesh_size,ipts,ispden)&
&                   *pawtab(itypat)%shapefunc(1:mesh_size,ils)&
&                   *pawrad(itypat)%rad(1:mesh_size)**2
                   call simp_gen(vxcij2(ils),ff,pawrad(itypat))
                 end do
               else
                 do ils=1,l_size
                   do ir=1,mesh_size
                     ir1=2*ir
                     tmp=pawtab(itypat)%shapefunc(ir,ils)*pawrad(itypat)%rad(ir)**2
                     ff(ir)=paw_an(iatom)%vxct1(ir1-1,ipts,ispden)*tmp
                     gg(ir)=paw_an(iatom)%vxct1(ir1  ,ipts,ispden)*tmp
                   end do
                   call simp_gen(vxcij2(2*ils-1),ff,pawrad(itypat))
                   call simp_gen(vxcij2(2*ils  ),gg,pawrad(itypat))
                 end do
               end if
             end if

!            ===== Integrate Vxc_ij_1 and Vxc_ij_2 over the angular mesh =====
!            ===== and accummulate in total Vxc_ij                       =====
             if (cplex==1) then
               do klmn=1,lmn2_size
                 klm=indklmn(1,klmn);kln=indklmn(2,klmn)
                 lmin=indklmn(3,klmn);lmax=indklmn(4,klmn)
                 vxcij=vxcij1(kln)*pawang%angwgth(ipts)*yylmr(klm,ipts)*four_pi
                 vxcijhat=zero
                 if (usexcnhat/=0) then
                   do ils=lmin,lmax,2
                     lm0=ils**2+ils+1
                     vr=four_pi*pawang%angwgth(ipts)*vxcij2(ils+1)
                     do mm=-ils,ils
                       ilslm=lm0+mm;isel=pawang%gntselect(ilslm,klm)
                       tmp=zero;if (isel>0) tmp=pawang%ylmr(ilslm,ipts)*pawtab(itypat)%qijl(ilslm,klmn)
                       vxcijhat=vxcijhat+tmp*vr
                     end do
                   end do
                 end if
!                Accumulate into total Vxc_ij
                 vxcijtot(klmn)=vxcijtot(klmn)+vxcij-vxcijhat
!                Store valence-only matrix elements
                 if (need_dijxc_val==1) then
                   tmp=vxcij1_val(kln)*pawang%angwgth(ipts)*yylmr(klm,ipts)*four_pi
                   vxcval_ij(klmn)=vxcval_ij(klmn)+tmp
                 end if
               end do ! Loop klmn
             else
               klmn1=1
               do klmn=1,lmn2_size
                 klm=indklmn(1,klmn);kln=indklmn(2,klmn)
                 lmin=indklmn(3,klmn);lmax=indklmn(4,klmn)
                 vxcij  =vxcij1(2*kln-1)*pawang%angwgth(ipts)*yylmr(klm,ipts)*four_pi
                 vxcij_i=vxcij1(2*kln  )*pawang%angwgth(ipts)*yylmr(klm,ipts)*four_pi
                 vxcijhat=zero;vxcijhat_i=zero
                 if (usexcnhat/=0) then
                   do ils=lmin,lmax,2
                     lm0=ils**2+ils+1;ils1=2*(ils+1)
                     vr=four_pi*pawang%angwgth(ipts)*vxcij2(ils1-1)
                     vi=four_pi*pawang%angwgth(ipts)*vxcij2(ils1  )
                     do mm=-ils,ils
                       ilslm=lm0+mm;isel=pawang%gntselect(ilslm,klm)
                       tmp=zero;if (isel>0) tmp=pawang%ylmr(ilslm,ipts)*pawtab(itypat)%qijl(ilslm,klmn)
                       vxcijhat  =vxcijhat  +vr*tmp
                       vxcijhat_i=vxcijhat_i+vi*tmp
                     end do
                   end do
                 end if
!                Accumulate into total Vxc_ij
                 vxcijtot(klmn1  )=vxcijtot(klmn1   )+vxcij  -vxcijhat
                 vxcijtot(klmn1+1)=vxcijtot(klmn1+1 )+vxcij_i-vxcijhat_i
                 klmn1=klmn1+cplex
               end do ! Loop klmn
             end if
           end do  ! Loop ipts

           ABI_DEALLOCATE(vxcij1)
           ABI_DEALLOCATE(vxcij2)
           if (need_dijxc_val==1)  then
             ABI_DEALLOCATE(vxcij1_val)
           end if

         end if  ! choice XC

         if (ispden<3) then
           dijxc(1:cplex*lmn2_size)=vxcijtot(1:cplex*lmn2_size)
           if (need_dijxc_val==1) dijxc_val(1:cplex*lmn2_size)=vxcval_ij(1:cplex*lmn2_size)
         else
           if (cplex==cplex_dij) then
             dijxc(1:cplex*lmn2_size)=vxcijtot(1:cplex*lmn2_size)
             if (need_dijxc_val==1) dijxc_val(1:cplex*lmn2_size)=vxcval_ij(1:cplex*lmn2_size)
           else ! Note that cplex_dij>=cplex
             klmn1=max(1,ispden-2)
             do klmn=1,lmn2_size
               dijxc(klmn1)=vxcijtot(klmn)
               klmn1=klmn1+cplex_dij
             end do
           end if
           if (need_dijxc_val==1) then ! compatible only with cplex=1
             klmn1=max(1,ispden-2)
             do klmn=1,lmn2_size
               dijxc_val(klmn1)=vxcval_ij(klmn)
               klmn1=klmn1+cplex_dij
             end do
           end if
         end if

       end do ! ispden
       ABI_DEALLOCATE(vxcijtot)
       if (need_dijxc_val==1)  then
         ABI_DEALLOCATE(vxcval_ij)
       end if

     else if (nspden==4.and.idij==4) then
       klmn1=2
       do klmn=1,lmn2_size
         dijxc(klmn1)=-dijxc(klmn1)
         klmn1=klmn1+cplex_dij
       end do
       if (need_dijxc_val==1) then
         klmn1=2
         do klmn=1,lmn2_size
           dijxc_val(klmn1)=-dijxc_val(klmn1)
           klmn1=klmn1+cplex_dij
         end do
       end if
     end if
     if ((idij<=nsppol.or.idij==2).and.cplex==1) then
       klmn1=1
       do klmn=1,lmn2_size
         paw_ij(iatom)%dij(klmn1,idij)=paw_ij(iatom)%dij(klmn1,idij)+dijxc(klmn)
         klmn1=klmn1+cplex_dij
       end do
       if (need_dijxc==1) then
         klmn1=1
         do klmn=1,lmn2_size
           paw_ij(iatom)%dijxc(klmn1,idij)=dijxc(klmn)
           klmn1=klmn1+cplex_dij
         end do
       end if
       if (need_dijxc_val==1) then
         klmn1=1
         do klmn=1,lmn2_size
           paw_ij(iatom)%dijxc_val(klmn1,idij)=dijxc_val(klmn)
           klmn1=klmn1+cplex_dij
         end do
       end if
     else if (nspden==4.or.cplex==2) then  ! cplex=cplex_dij
       paw_ij(iatom)%dij(:,idij)=paw_ij(iatom)%dij(:,idij)+dijxc(:)
       if (need_dijxc    ==1) paw_ij(iatom)%dijxc    (:,idij)=dijxc(:)
       if (need_dijxc_val==1) paw_ij(iatom)%dijxc_val(:,idij)=dijxc_val(:)
     end if

!    ------------------------------------------------------------------------
!    ----------- Add Dij_hat to Dij
!    ------------------------------------------------------------------------

     if (idij<=nsppol.or.(nspden==4.and.idij<=3).or.cplex==2) then

       ABI_ALLOCATE(dijhat_tmp,(cplex*lmn2_size))
       if (need_dijxc_hat==1.and.usexcnhat/=0)  then
         ABI_ALLOCATE(dijxchat_tmp,(cplex*lmn2_size))
       end if

       idijend=idij+idij/3;if (cplex==2) idijend=idij
       do ispden=idij,idijend

         dijhat_tmp=zero
!        Compute Int[V^(alpha,beta)(r).g_l(r).Y_lm(r)]
!        Remember: if nspden=4, V is stored as : V^11, V^22, V^12, i.V^21
         ABI_ALLOCATE(prod,(cplex*lm_size))
         prod=zero
         if (usexcnhat/=0) then
           if (has_phase) then
             if (cplex==1) then
               do ilslm=1,lm_size
                 do ic=1,nfgd
                   vr=vtrial(pawfgrtab(iatom)%ifftsph(ic),ispden)
                   prod(ilslm)=prod(ilslm)+vr*pawfgrtab(iatom)%gylm(ic,ilslm)&
&                   *pawfgrtab(iatom)%expiqr(1,ic)
                 end do
               end do
             else
               ilslm1=1
               do ilslm=1,lm_size
                 do ic=1,nfgd
                   jc=2*pawfgrtab(iatom)%ifftsph(ic)
                   vr=vtrial(jc-1,ispden);vi=vtrial(jc,ispden)
                   prod(ilslm1  )=prod(ilslm1  )+pawfgrtab(iatom)%gylm(ic,ilslm)&
&                   *(vr*pawfgrtab(iatom)%expiqr(1,ic)-vi*pawfgrtab(iatom)%expiqr(2,ic))
                   prod(ilslm1+1)=prod(ilslm1+1)+pawfgrtab(iatom)%gylm(ic,ilslm)&
&                   *(vr*pawfgrtab(iatom)%expiqr(2,ic)+vi*pawfgrtab(iatom)%expiqr(1,ic))
                 end do
                 ilslm1=ilslm1+cplex
               end do
             end if
           else ! no phase
             if (cplex==1) then
               do ilslm=1,lm_size
                 do ic=1,nfgd
                   vr=vtrial(pawfgrtab(iatom)%ifftsph(ic),ispden)
                   prod(ilslm)=prod(ilslm)+vr*pawfgrtab(iatom)%gylm(ic,ilslm)
                 end do
               end do
             else
               ilslm1=1
               do ilslm=1,lm_size
                 do ic=1,nfgd
                   jc=2*pawfgrtab(iatom)%ifftsph(ic)
                   vr=vtrial(jc-1,ispden);vi=vtrial(jc,ispden)
                   prod(ilslm1  )=prod(ilslm1  )+vr*pawfgrtab(iatom)%gylm(ic,ilslm)
                   prod(ilslm1+1)=prod(ilslm1+1)+vi*pawfgrtab(iatom)%gylm(ic,ilslm)
                 end do
                 ilslm1=ilslm1+cplex
               end do
             end if
           end if
         else ! usexcnhat=0
           if (has_phase) then
             if (cplex==1) then
               do ilslm=1,lm_size
                 do ic=1,nfgd
                   jc=pawfgrtab(iatom)%ifftsph(ic)
                   vr=vtrial(jc,ispden)-vxc(jc,ispden)
                   prod(ilslm)=prod(ilslm)+vr*pawfgrtab(iatom)%gylm(ic,ilslm)&
&                   *pawfgrtab(iatom)%expiqr(1,ic)
                 end do
               end do
             else
               ilslm1=1
               do ilslm=1,lm_size
                 do ic=1,nfgd
                   jc=2*pawfgrtab(iatom)%ifftsph(ic)
                   vr=vtrial(jc-1,ispden)-vxc(jc-1,ispden)
                   vi=vtrial(jc  ,ispden)-vxc(jc  ,ispden)
                   prod(ilslm1  )=prod(ilslm1  )+pawfgrtab(iatom)%gylm(ic,ilslm)&
&                   *(vr*pawfgrtab(iatom)%expiqr(1,ic)-vi*pawfgrtab(iatom)%expiqr(2,ic))
                   prod(ilslm1+1)=prod(ilslm1+1)+pawfgrtab(iatom)%gylm(ic,ilslm)&
&                   *(vr*pawfgrtab(iatom)%expiqr(2,ic)+vi*pawfgrtab(iatom)%expiqr(1,ic))
                 end do
                 ilslm1=ilslm1+cplex
               end do
             end if
           else ! no phase
             if (cplex==1) then
               do ilslm=1,lm_size
                 do ic=1,nfgd
                   jc=pawfgrtab(iatom)%ifftsph(ic)
                   vr=vtrial(jc,ispden)-vxc(jc,ispden)
                   prod(ilslm)=prod(ilslm)+vr*pawfgrtab(iatom)%gylm(ic,ilslm)
                 end do
               end do
             else
               ilslm1=1
               do ilslm=1,lm_size
                 do ic=1,nfgd
                   jc=2*pawfgrtab(iatom)%ifftsph(ic)-1
                   vr=vtrial(jc  ,ispden)-vxc(jc  ,ispden)
                   vi=vtrial(jc+1,ispden)-vxc(jc+1,ispden)
                   prod(ilslm1  )=prod(ilslm1  )+vr*pawfgrtab(iatom)%gylm(ic,ilslm)
                   prod(ilslm1+1)=prod(ilslm1+1)+vi*pawfgrtab(iatom)%gylm(ic,ilslm)
                 end do
                 ilslm1=ilslm1+cplex
               end do
             end if
           end if
         end if ! usexcnhat
         if (present(mpi_comm_fft) ) call xsum_mpi(prod,mpi_comm_fft,ier)

         if (need_dijxc_hat==1.and.usexcnhat/=0) then
           dijxchat_tmp(:)=zero
!          === Evaluate prodxchat i.e $\sum_{lm} \int g_l Ylm v_xc[tn+nhat+tnc]dr$ on the FFT mesh ===
!          * It does not depend on ij
           ABI_ALLOCATE(prodxchat,(lm_size))
           prodxchat(:)=zero
           do ilslm=1,lm_size
             do ic=1,nfgd
               prodxchat(ilslm)=prodxchat(ilslm)&
&               +vxc(pawfgrtab(iatom)%ifftsph(ic),ispden)*pawfgrtab(iatom)%gylm(ic,ilslm)
             end do
           end do
           if (present(mpi_comm_fft) ) call xsum_mpi(prodxchat,mpi_comm_fft,ier)
         end if

!        Compute Sum_(i,j)_LM { q_ij^L Int[V^(alpha,beta)(r).g_l(r).Y_lm(r)] }
         if (cplex==1) then
           do klmn=1,lmn2_size
             klm=indklmn(1,klmn)
             lmin=indklmn(3,klmn);lmax=indklmn(4,klmn)
             do ils=lmin,lmax,2
               lm0=ils**2+ils+1
               do mm=-ils,ils
                 ilslm=lm0+mm;isel=pawang%gntselect(lm0+mm,klm)
                 if (isel>0) dijhat_tmp(klmn)=dijhat_tmp(klmn)+prod(ilslm) &
&                 *pawtab(itypat)%qijl(ilslm,klmn)
               end do
             end do
           end do
           if (need_dijxc_hat==1.and.usexcnhat/=0) then
             do klmn=1,lmn2_size
               klm=indklmn(1,klmn)
               lmin=indklmn(3,klmn);lmax=indklmn(4,klmn)
               do ils=lmin,lmax,2
                 lm0=ils**2+ils+1
                 do mm=-ils,ils
                   ilslm=lm0+mm;isel=pawang%gntselect(lm0+mm,klm)
                   if (isel>0) dijxchat_tmp(klmn)=dijxchat_tmp(klmn)+prodxchat(ilslm) &
&                   *pawtab(itypat)%qijl(ilslm,klmn)
                 end do
               end do
             end do
           end if
         else
           do klmn=1,lmn2_size
             klm=indklmn(1,klmn);klmn1=2*klmn-1
             lmin=indklmn(3,klmn);lmax=indklmn(4,klmn)
             do ils=lmin,lmax,2
               lm0=ils**2+ils+1
               do mm=-ils,ils
                 ilslm=lm0+mm;ilslm1=2*ilslm;isel=pawang%gntselect(lm0+mm,klm)
                 if (isel>0) dijhat_tmp(klmn1:klmn1+1)=dijhat_tmp(klmn1:klmn1+1) &
&                 +prod(ilslm1-1:ilslm1)*pawtab(itypat)%qijl(ilslm,klmn)
               end do
             end do
           end do
         end if
         ABI_DEALLOCATE(prod)
         if (need_dijxc_hat==1.and.usexcnhat/=0)  then
           ABI_DEALLOCATE(prodxchat)
         end if

         if (cplex==1) then
           if (ispden<3) then
             dijhat(1:cplex*lmn2_size)=dijhat_tmp(1:cplex*lmn2_size)*ucvol/dble(nfftot)
             if (need_dijxc_hat==1.and.usexcnhat/=0) &
&             dijxc_hat(1:cplex*lmn2_size)=dijxchat_tmp(1:cplex*lmn2_size)*ucvol/dble(nfftot)
           else
             klmn1=max(1,ispden-2)
             do klmn=1,lmn2_size
               dijhat(klmn1)=dijhat_tmp(klmn)*ucvol/dble(nfftot)
               klmn1=klmn1+cplex_dij
             end do
             if (need_dijxc_hat==1.and.usexcnhat/=0) then
               klmn1=max(1,ispden-2)
               do klmn=1,lmn2_size
                 dijxc_hat(klmn1)=dijxchat_tmp(klmn)*ucvol/dble(nfftot)
                 klmn1=klmn1+cplex_dij
               end do
             end if
           end if
         else !cplex=2
           if (ispden<=3) then
             dijhat(1:cplex*lmn2_size)=dijhat_tmp(1:cplex*lmn2_size)*ucvol/dble(nfftot)
           else
!            Remember V(4) contains i.V^21
             klmn1=1
             do klmn=1,lmn2_size
               dijhat(klmn1  )= dijhat_tmp(klmn+1)*ucvol/dble(nfftot)
               dijhat(klmn1+1)=-dijhat_tmp(klmn  )*ucvol/dble(nfftot)
               klmn1=klmn1+cplex_dij
             end do
           end if
         end if

       end do !ispden

       ABI_DEALLOCATE(dijhat_tmp)
       if (need_dijxc_hat==1.and.usexcnhat/=0)  then
         ABI_DEALLOCATE(dijxchat_tmp)
       end if

     else if (nspden==4.and.idij==4) then  ! Note that cplex=1 here
       klmn1=2
       do klmn=1,lmn2_size
         dijhat(klmn1)=-dijhat(klmn1)
         klmn1=klmn1+cplex_dij
       end do
       if (need_dijxc_hat==1.and.usexcnhat/=0) then
         klmn1=2
         do klmn=1,lmn2_size
           dijxc_hat(klmn1)=-dijxc_hat(klmn1)
           klmn1=klmn1+cplex_dij
         end do
       end if
     end if

     if ((idij<=nsppol.or.idij==2).and.cplex==1) then
       klmn1=1
       do klmn=1,lmn2_size
         paw_ij(iatom)%dij(klmn1,idij)=paw_ij(iatom)%dij(klmn1,idij)+dijhat(klmn)
         klmn1=klmn1+cplex_dij
       end do
       if (need_dijhat==1) then
         klmn1=1
         do klmn=1,lmn2_size
           paw_ij(iatom)%dijhat(klmn1,idij)=dijhat(klmn)
           klmn1=klmn1+cplex_dij
         end do
       end if
       if (need_dijxc_hat==1.and.usexcnhat/=0) then
         klmn1=1
         do klmn=1,lmn2_size
           paw_ij(iatom)%dijxc_hat(klmn1,idij)=dijxc_hat(klmn)
           klmn1=klmn1+cplex_dij
         end do
       end if
     else if (nspden==4.or.cplex==2) then
       paw_ij(iatom)%dij(:,idij)=paw_ij(iatom)%dij(:,idij)+dijhat(:)
       if (need_dijhat==1) paw_ij(iatom)%dijhat(:,idij)=dijhat(:)
       if (need_dijxc_hat==1.and.usexcnhat/=0) paw_ij(iatom)%dijxc_hat(:,idij)=dijxc_hat(:)
     end if

!    ------------------------------------------------------------------------
!    ----------- Add RF frozen Dij to Dij_hat
!    ------------------------------------------------------------------------
!    ----------- RF only

     if (ipert>0.and.paw_ij(iatom)%has_dijfr==2) then

       paw_ij(iatom)%dij(:,idij)=paw_ij(iatom)%dij(:,idij)+paw_ij(iatom)%dijfr(:,idij)
       if (need_dijhat==1) paw_ij(iatom)%dijhat(:,idij)=paw_ij(iatom)%dijhat(:,idij)+paw_ij(iatom)%dijfr(:,idij)

     end if

!    ------------------------------------------------------------------------
!    ----------- Add Dij spin-orbit to Dij
!    ------------------------------------------------------------------------
!    No contribution to 1st-order Dij
     if (pawspnorb>0.and.ipert<=0.and.ipositron/=1) then

       paw_ij(iatom)%dij(:,idij)=paw_ij(iatom)%dij(:,idij)+paw_ij(iatom)%dijso(:,idij)

     end if

!    ------------------------------------------------------------------------
!    ----------- Add Dij_{LDA+U} to Dij
!    ------------------------------------------------------------------------
!    Dijpawu^{\sigma}_{mi,ni,mj,nj}=
!    \sum_{m,m'} [vpawu^{\sigma}_{m,m'}*phiphjint_{ni,nj}^{m,m'}]=
!    [vpawu^{\sigma}_{mi,mj}*phiphjint_{ni,nj}]
!    ------------------------------------------------------------------------
!    No contribution to 1st-order Dij

     if (pawtab(itypat)%usepawu>0.and.ipert<=0.and.ipositron/=1) then

       lpawu=pawtab(itypat)%lpawu
       if (idij<=nsppol.or.(paw_ij(iatom)%ndij==4.and.idij<=3).or.cplex==2) then
         idijend=idij+idij/3
         do idijeff=idij,idijend ! if ndij==4, idijeff is used to compute updn and dnup contributions

           ABI_ALLOCATE(vpawu,(paw_ij(iatom)%cplex_dij,lpawu*2+1,lpawu*2+1))
           if(pawtab(itypat)%usepawu<10) then ! if dmft, do not apply U in LDA+U
             call pawpupot(idijeff,paw_ij(iatom),pawprtvol,pawtab(itypat),vpawu,VUKS) ! idij=1,max(nsppol,nspinor**2)
           else
             vpawu=zero
             VUKS=zero
           end if
           dijpawu=zero
           do klmn=1,lmn2_size
             if(cplex_dij==1) then
               klmn1=klmn
             else
               klmn1=cplex_dij*klmn-1  ! klmn1=cplex_dij*klmn-cplex_dij/2
             end if
             im1=pawtab(itypat)%klmntomn(1,klmn)
             im2=pawtab(itypat)%klmntomn(2,klmn)
             in1=pawtab(itypat)%klmntomn(3,klmn)
             in2=pawtab(itypat)%klmntomn(4,klmn)
             lmin=pawtab(itypat)%indklmn(3,klmn)
             lmax=pawtab(itypat)%indklmn(4,klmn)
             if(lmin==0.and.lmax==2*lpawu) then
               icount=in1+(in2*(in2-1))/2
               if(pawtab(itypat)%ij_proj<icount)  then
                 msg=' PAW+U: Problem in the loop for calculating dijpawu'
                 MSG_BUG(msg)
               end if
               ABI_ALLOCATE(coeffpawu,(cplex_dij))
!              coeffpawu(:)=vpawu(:,im1,im2) ! use real and imaginary part
               coeffpawu(:)=vpawu(:,im2,im1) ! because of transposition in setnoccmmp (for the cplex_dij==2)
               if(dtset%natvshift/=0.and.idij<3.and.im1==im2) then
                 coeffpawu(1)=coeffpawu(1)+fatvshift*dtset%atvshift(im1,idij,iatom_tot)
               end if
               if(cplex_dij==1) then   !cplex_dij=nspinor=1
                 dijpawu(klmn1)=pawtab(itypat)%phiphjint(icount)*coeffpawu(1) ! *dtset%userra
               elseif (cplex_dij==2) then   !cplex_dij=nspinor=2
                 dijpawu(klmn1)=pawtab(itypat)%phiphjint(icount)*coeffpawu(1)
                 dijpawu(klmn1+1)=pawtab(itypat)%phiphjint(icount)*coeffpawu(2) !  spinor==2
               end if
!              write(std_out,*) "cplex_dij",cplex_dij,im1,im2
!              write(std_out,*) "vpawu",vpawu(:,im2,im1)
!              write(std_out,'(a,2i4,f8.5,f8.5)') "dijpawu1",idij,klmn1,dijpawu(klmn1),dijpawu(klmn1+1)
               ABI_DEALLOCATE(coeffpawu)
             end if
           end do ! klmn
           ABI_DEALLOCATE(vpawu)
!          dijsymU useful for printing
           if (ipert<=0.and.paw_ij(iatom)%ndij==4) then
             do klmn=1,cplex_dij*lmn2_size,2
               dijsymU(klmn,idijeff)=dijpawu(klmn)
               dijsymU(klmn+1,idijeff)=dijpawu(klmn+1)
             end do
           end if
         end do ! idijeff

!        else if (nspden==4.and.idij==4) then
!        klmn1=2
!        do klmn=1,lmn2_size
!        dijpawu(klmn1)=-dijpawu(klmn1) !  dij(dnup)=dij(updn)^*: change sign of imaginary part, real part stays identical
!        klmn1=klmn1+cplex_dij
!        end do
       end if


!      if ((idij<=nsppol.or.idij==2).and.cplex==1) then
!      if (paw_ij(iatom)%ndij<4) then
       if (cplex_dij==1)then
         klmn1=1
         do klmn=1,lmn2_size
           paw_ij(iatom)%dij(klmn1,idij)=paw_ij(iatom)%dij(klmn1,idij)+dijpawu(klmn)
           klmn1=klmn1+cplex_dij
         end do
         if (need_dijU==1) then
           klmn1=1
           do klmn=1,lmn2_size
             paw_ij(iatom)%dijU(klmn1,idij)=dijpawu(klmn)
             klmn1=klmn1+cplex_dij
           end do
         end if
       else if (nspden==4.or.cplex==2.or.paw_ij(iatom)%ndij==4.or.cplex_dij==2) then
         if(idij<=2)  then
           paw_ij(iatom)%dij(:,idij)=paw_ij(iatom)%dij(:,idij)+dijpawu(:)
           if (need_dijU==1) paw_ij(iatom)%dijU(:,idij)=dijpawu(:)
         else if(idij>2)  then
           paw_ij(iatom)%dij(:,idij)=paw_ij(iatom)%dij(:,idij)+dijsymU(:,idij)
           if (need_dijU==1) paw_ij(iatom)%dijU(:,idij)=dijsymU(:,idij)
         end if
       end if

       do klmn=1,lmn2_size
         klmn1=cplex_dij*klmn-1  ! klmn1=cplex_dij*klmn-cplex_dij/2
       end do

     end if

!    ------------------------------------------------------------------------
!    ----------- Add Dij_{local exact-exchange} to Dij
!    ------------------------------------------------------------------------
!    No contribution to 1st-order Dij

     if (pawtab(itypat)%useexexch>0.and.ipert<=0.and.ipositron/=1) then

       if (idij<=nsppol) then

         lexexch=pawtab(itypat)%lexexch
         ABI_ALLOCATE(vxcij1,(ij_size))
         ABI_ALLOCATE(vxcijtot,(lmn2_size))
         vxcijtot=zero
         do klm=1,lm_size
           if (paw_an(iatom)%lmselect(klm)) then
!            ===== Vxc_ij_1 (tmp) =====
             vxcij1=zero
             do jln=pawtab(itypat)%lnproju(1),pawtab(itypat)%lnproju(pawtab(itypat)%nproju)
               j0ln=jln*(jln-1)/2
               do iln=pawtab(itypat)%lnproju(1),jln
                 kln=j0ln+iln
                 ff(1:mesh_size)=paw_an(iatom)%vxc_ex(1:mesh_size,klm,idij) &
&                 *pawtab(itypat)%phiphj(1:mesh_size,kln)
                 call simp_gen(vxcij1(kln),ff,pawrad(itypat))
               end do
             end do
!            ===== Contribution to total Vxc_ij =====
             do klmn=1,lmn2_size
               lmin=pawtab(itypat)%indklmn(3,klmn)
               lmax=pawtab(itypat)%indklmn(4,klmn)
               if(lmin==0.and.lmax==2*lexexch) then
                 klm1=indklmn(1,klmn);kln=indklmn(2,klmn)
                 isel=pawang%gntselect(klm,klm1)
                 if (isel>0) vxcijtot(klmn)=vxcijtot(klmn)+vxcij1(kln)*pawang%realgnt(isel)
               end if
             end do ! Loop klmn
           end if
         end do  ! Loop klm
         ABI_DEALLOCATE(vxcij1)

         dijexxc=zero
         do klmn=1,lmn2_size
           in1=pawtab(itypat)%klmntomn(3,klmn)
           in2=pawtab(itypat)%klmntomn(4,klmn)
           lmin=pawtab(itypat)%indklmn(3,klmn)
           lmax=pawtab(itypat)%indklmn(4,klmn)
           if(lmin==0.and.lmax==2*lexexch) then
             icount=in1+(in2*(in2-1))/2
             if(pawtab(itypat)%ij_proj<icount)  then
               msg=' PAW exact-exchange: Problem in the loop for calculating dijexxc !'
               MSG_BUG(msg)
             end if
             dijexxc(klmn)=(paw_ij(iatom)%vpawx(1,klmn,idij)-vxcijtot(klmn))*pawtab(itypat)%exchmix
           end if
         end do
         ABI_DEALLOCATE(vxcijtot)

       end if

       if ((idij<=nsppol.or.idij==2).and.cplex==1) then
         klmn1=1
         do klmn=1,lmn2_size
           paw_ij(iatom)%dij(klmn1,idij)=paw_ij(iatom)%dij(klmn1,idij)+dijexxc(klmn)
           if (need_dijexxc==1) paw_ij(iatom)%dijexxc(klmn1,idij)=dijexxc(klmn)
           klmn1=klmn1+cplex_dij
         end do
       else if (nspden==4.or.cplex==2) then
         paw_ij(iatom)%dij(:,idij)=paw_ij(iatom)%dij(:,idij)+dijexxc(:)
         if (need_dijexxc==1) paw_ij(iatom)%dijexxc(:,idij)=dijexxc(:)
       end if

     end if

!    ----- End loops over iatom and idij
   end do

!  Update some flags
   if (paw_ij(iatom)%has_dij>=1) paw_ij(iatom)%has_dij=2
   if (need_dij0==1.and.paw_ij(iatom)%has_dij0>=1) paw_ij(iatom)%has_dij0=2
   if (need_dijhartree==1.and.paw_ij(iatom)%has_dijhartree>=1) paw_ij(iatom)%has_dijhartree=2
   if (need_dijxc==1.and.paw_ij(iatom)%has_dijxc>=1) paw_ij(iatom)%has_dijxc=2
   if (need_dijxc_hat==1.and.paw_ij(iatom)%has_dijxc_hat>=1) paw_ij(iatom)%has_dijxc_hat=2
   if (need_dijxc_val==1.and.paw_ij(iatom)%has_dijxc_val>=1) paw_ij(iatom)%has_dijxc_val=2
   if (need_dijhat==1.and.paw_ij(iatom)%has_dijhat>=1) paw_ij(iatom)%has_dijhat=2
   if (need_dijso==1.and.paw_ij(iatom)%has_dijso>=1) paw_ij(iatom)%has_dijso=2
   if (need_dijU==1.and.paw_ij(iatom)%has_dijU>=1) paw_ij(iatom)%has_dijU=2
   if (need_dijexxc==1.and.paw_ij(iatom)%has_dijexxc>=1) paw_ij(iatom)%has_dijexxc=2

!  Free temporary storage
   ABI_DEALLOCATE(indklmn)
   ABI_DEALLOCATE(ff)
   if (paw_ij(1)%cplex==2)  then
     ABI_DEALLOCATE(gg)
   end if
   if (allocated(dijxc)) then
     ABI_DEALLOCATE(dijxc)
   end if
   if (allocated(dijxc_hat)) then
     ABI_DEALLOCATE(dijxc_hat)
   end if
   if (allocated(dijxc_val)) then
     ABI_DEALLOCATE(dijxc_val)
   end if
   if (allocated(dijhat)) then
     ABI_DEALLOCATE(dijhat)
   end if
   if (allocated(dijpawu)) then
     ABI_DEALLOCATE(dijpawu)
   end if
   if (allocated(dijsymU)) then
     ABI_DEALLOCATE(dijsymU)
   end if
   if (allocated(dijexxc)) then
     ABI_DEALLOCATE(dijexxc)
   end if
   if (pawfgrtab(iatom)%gylm_allocated==2) then
     ABI_DEALLOCATE(pawfgrtab(iatom)%gylm)
     ABI_ALLOCATE(pawfgrtab(iatom)%gylm,(0,0))
     pawfgrtab(iatom)%gylm_allocated=0
   end if
   if (pawfgrtab(iatom)%gylmgr_allocated==2) then
     ABI_DEALLOCATE(pawfgrtab(iatom)%gylmgr)
     ABI_ALLOCATE(pawfgrtab(iatom)%gylmgr,(0,0,0))
     pawfgrtab(iatom)%gylmgr_allocated=0
   end if
   if (pawfgrtab(iatom)%expiqr_allocated==2) then
     ABI_DEALLOCATE(pawfgrtab(iatom)%expiqr)
     ABI_ALLOCATE(pawfgrtab(iatom)%expiqr,(0,0))
     pawfgrtab(iatom)%expiqr_allocated=0
   end if

 end do ! iatom

!Final printing
 if (paral_atom) then
   call print_paw_ij(paw_ij,unit=std_out,pawprtvol=pawprtvol,pawspnorb=pawspnorb,&
&   mpi_comm_atom=mpi_comm_atom,mpi_atmtab=my_atmtab,natom=natom,&
&   mode_paral='PERS',enunit=enunit,ipert=ipert)
 else
   call print_paw_ij(paw_ij,unit=std_out,pawprtvol=pawprtvol,pawspnorb=pawspnorb,&
&   mode_paral='COLL',enunit=enunit,ipert=ipert)
 end if

!Free temporary storage
 if (pawxcdev==0)then
   ABI_DEALLOCATE(yylmr)
 end if
 do iatom=1,my_natom
   if (paw_ij(iatom)%has_dij0==-1) then
     ABI_DEALLOCATE(paw_ij(iatom)%dij0)
     paw_ij(iatom)%has_dij0=0
   end if
   if (paw_ij(iatom)%has_dijhartree==-1) then
     ABI_DEALLOCATE(paw_ij(iatom)%dijhartree)
     paw_ij(iatom)%has_dijhartree=0
   end if
   if (paw_ij(iatom)%has_dijxc==-1) then
     ABI_DEALLOCATE(paw_ij(iatom)%dijxc)
     paw_ij(iatom)%has_dijxc=0
   end if
   if (paw_ij(iatom)%has_dijxc_hat==-1) then
     ABI_DEALLOCATE(paw_ij(iatom)%dijxc_hat)
     paw_ij(iatom)%has_dijxc_hat=0
   end if
   if (paw_ij(iatom)%has_dijxc_val==-1) then
     ABI_DEALLOCATE(paw_ij(iatom)%dijxc_val)
     paw_ij(iatom)%has_dijxc_val=0
   end if
   if (paw_ij(iatom)%has_dijhat==-1) then
     ABI_DEALLOCATE(paw_ij(iatom)%dijhat)
     paw_ij(iatom)%has_dijhat=0
   end if
   if (paw_ij(iatom)%has_dijso==-1) then
     ABI_DEALLOCATE(paw_ij(iatom)%dijso)
     paw_ij(iatom)%has_dijso=0
   end if
   if (paw_ij(iatom)%has_dijU==-1) then
     ABI_DEALLOCATE(paw_ij(iatom)%dijU)
     paw_ij(iatom)%has_dijU=0
   end if
   if (paw_ij(iatom)%has_dijexxc==-1) then
     ABI_DEALLOCATE(paw_ij(iatom)%dijexxc)
     paw_ij(iatom)%has_dijexxc=0
   end if
 end do

!Destroy atom table used for parallelism
 call free_my_atmtab(my_atmtab,my_atmtab_allocated)

 call timab(561,2,tsec)

 DBG_EXIT("COLL")

end subroutine pawdij
!!***
