!{\src2tex{textfont=tt}}
!!****f* ABINIT/opernl4a
!! NAME
!! opernl4a
!!
!! FUNCTION
!! Operate with the non-local part of the hamiltonian,
!! from reciprocal space to projected quantities
!! (oprnl4b is from projected quantities to reciprocal space)
!!
!! COPYRIGHT
!! Copyright (C) 1998-2009 ABINIT group (DCA, XG, GMR, DRH)
!! This file is distributed under the terms of the
!! GNU General Public License, see ~abinit/COPYING
!! or http://www.gnu.org/copyleft/gpl.txt .
!! For the initials of contributors, see ~abinit/doc/developers/contributors.txt.
!!
!! INPUTS
!!  ffnl(npw,nffnl,lmnmax,ntypat)=nonlocal form factors on basis sphere.
!!  ia3=gives the number of the first atom in the subset presently treated
!!  idir=direction of the perturbation (needed if choice==2 or 5, and ndgxdt=1)
!!  indlmn(6,i,ntypat)= array giving l,m,n,lm,ln,s for i=ln
!!  ispinor=1 or 2, gives the spinorial component of ffnl to be used
!!  istwf_k=option parameter that describes the storage of wfs
!!  itypat = type of atom, needed for ffnl
!!  jproj(nlang)=number of projectors for each angular momentum
!!  kg_k(3,npw)=integer coords of planewaves in basis sphere
!!  kpg_k(npw,npkg)= (k+G) components and related data
!!  kpt(3)=real components of k point in terms of recip. translations
!!  lmnmax=max. number of (l,n) components over all type of psps
!!  matblk=dimension of the array ph3d
!!  mincat= maximum increment of atoms
!!  mlang1 = dimensions for dgxdis1
!!  mlang3 = one of the dimensions of the array gxa
!!  mlang4 = dimension for dgxds
!!  mlang5 = dimensions for dgxdis2
!!  mlang6 = dimension for d2gxds2
!!  mproj=maximum dimension for number of projection operators for each
!!    angular momentum for nonlocal pseudopotential
!!  ndgxdt=second dimension of dgxdt
!!  nffnl=third dimension of ffnl
!!  nincat = number of atoms in the subset here treated
!!  nkpg=second size of array kpg_k
!!  nlang = number of angular momenta to be treated = 1 + highest ang. mom.
!!  nloalg(5)=governs the choice of the algorithm for non-local operator.
!!  npw  = number of plane waves in reciprocal space
!!  ntypat = number of type of atoms, dimension needed for ffnl
!!  vect(2*npw)=starting vector in reciprocal space
!!  ph3d(2,npw,matblk)=three-dimensional phase factors
!!
!! OUTPUT
!!  gxa(2,mlang3,mincat,mproj)= projected scalars
!!  if(choice==2 .or choice==4 .or. choice==5 .or. choice==23)
!!   dgxdt(2,ndgxdt,mlang3,mincat,mproj)= gradients of projected scalars wrt coords
!!    or with respect to ddk
!!  if(choice==3 .or. choice==23)
!!   dgxds((2,mlang4,mincat,mproj) = gradients of projected scalars wrt strains
!!  if(choice==6)
!!   dgxdis((2,mlang1,mincat,mproj) = derivatives of projected scalars
!!    wrt coord. indexed for internal strain
!!   d2gxdis((2,mlang5,mincat,mproj) = 2nd derivatives of projected scalars
!!    wrt strain and coord
!!   d2gxds2((2,mlang6,mincat,mproj) = 2nd derivatives of projected scalars
!!    wrt strains
!!
!! NOTES
!! Operate with the non-local part of the hamiltonian for one type of
!! atom, and within this given type of atom, for a subset
!! of at most nincat atoms.
!! This routine basically replaces getgla (gxa here is the former gla),
!! except for the calculation of <G|dVnl/dk|C> or strain gradients.
!!
!! Present version decomposed according to iffkg
!!
!! PARENTS
!!      nonlop_pl
!!
!! CHILDREN
!!      mkffkg3
!!
!! SOURCE

#if defined HAVE_CONFIG_H
#include "config.h"
#endif

subroutine opernl4a(choice,dgxdis,dgxds,d2gxdis,d2gxds2,dgxdt,&
&  ffnl,gmet,gxa,ia3,idir,indlmn,ispinor,istwf_k,itypat,&
&  jproj,kg_k,kpg_k,kpt,lmnmax,matblk,mincat,mlang1,mlang3,mlang4,&
&  mlang5,mlang6,mproj,ndgxdt,nffnl,nincat,nkpg,nlang,nloalg,npw,&
&  ntypat,ph3d,vect)

 use defs_basis

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
 use interfaces_15nonlocal, except_this_one => opernl4a
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: choice,ia3,idir,ispinor,istwf_k,itypat,lmnmax,matblk
 integer,intent(in) :: mincat,mlang1,mlang3,mlang4,mlang5,mlang6,mproj,ndgxdt
 integer,intent(in) :: nffnl,nincat,nkpg,nlang,npw,ntypat
!arrays
 integer,intent(in) :: indlmn(6,lmnmax,ntypat),jproj(nlang),kg_k(3,npw)
 integer,intent(in) :: nloalg(5)
 real(dp),intent(in) :: ffnl(npw,nffnl,lmnmax,ntypat),gmet(3,3),kpg_k(npw,nkpg)
 real(dp),intent(in) :: kpt(3),ph3d(2,npw,matblk),vect(2,npw)
 real(dp),intent(out) :: d2gxdis(2,mlang5,mincat,mproj)
 real(dp),intent(out) :: d2gxds2(2,mlang6,mincat,mproj)
 real(dp),intent(out) :: dgxdis(2,mlang1,mincat,mproj)
 real(dp),intent(out) :: dgxds(2,mlang4,mincat,mproj)
 real(dp),intent(out) :: dgxdt(2,ndgxdt,mlang3,mincat,mproj)
 real(dp),intent(out) :: gxa(2,mlang3,mincat,mproj)

!Local variables-------------------------------
!scalars
 integer :: chunk,ia,iaph3d,iffkg,iffkgk,iffkgs,iffkgs2,ig,ii,ilang,ilang2
 integer :: ilang3,ilang4,ilang5,ilang6,ilangx,iproj,ipw,ipw1,ipw2,jffkg,jj,jjs
 integer :: jump,mblkpw,mmproj,mu,nffkg,nffkgd,nffkge,nffkgk,nffkgs,nffkgs2
 integer :: nincpw,nproj,ntens,start
 real(dp) :: ai,ar,doti,dotr,gxai,gxar,sci1,sci2,sci3,sci4,sci5,sci6,sci7,sci8
 real(dp) :: scr1,scr2,scr3,scr4,scr5,scr6,scr7,scr8,two_pi2
 character(len=500) :: message
!arrays
 integer,allocatable :: parity(:)
 real(dp) :: tsec(2)
 real(dp),allocatable :: ffkg(:,:),kpgx(:,:),scalars(:,:),teffv(:,:)

! *************************************************************************

!DEBUG
!write(6,*)' opernl4a : enter'
!ENDDEBUG

!mblkpw sets the size of blocks of planewaves
 mblkpw=nloalg(3)

!jump governs, in fine, the use of registers in the most cpu
!time consuming part of the routine. Until now, jump=8 is the maximal value.
!The optimal value will be machine-dependent !
 jump=nloalg(2)

 two_pi2=two_pi*two_pi

!Get the actual maximum number of projectors
 mmproj=maxval(indlmn(3,:,itypat))

!Initialisation before blocking on the plane waves

!Put projected scalars to zero
 gxa(:,:,:,1:mmproj)=0.0d0
 if (choice==2 .or. choice==4 .or. choice==5 .or. choice==23) dgxdt(:,:,:,:,1:mmproj)=0.0d0
 if (choice==3 .or. choice==6 .or. choice==23) dgxds(:,:,:,1:mmproj)=0.0d0
 if (choice==6) then
  dgxdis(:,:,:,1:mmproj)=0.0d0
  d2gxdis(:,:,:,1:mmproj)=0.0d0
  d2gxds2(:,:,:,1:mmproj)=0.0d0
 end if

!Set up dimension of kpgx and allocate
!ntens sets the maximum number of independent tensor components
!over all allowed angular momenta; need 20 for spdf for tensors
!up to rank 3; to handle stress tensor, need up to rank 5
 ntens=1
 if(nlang>=2 .or. choice==2 .or. choice==4 .or. choice==5 .or. choice==23)ntens=4
 if(nlang>=3 .or. (choice==3.or.choice==23))ntens=10
 if(nlang>=4 .or. ((choice==3.or.choice==23) .and. nlang>=2) )ntens=20
 if(((choice==3.or.choice==23) .and. nlang>=3) .or. choice==6)ntens=35
 if(((choice==3.or.choice==23) .and. nlang==4) .or. (choice==6 .and. nlang>=2))ntens=56
 if(choice==6 .and. nlang>=3)ntens=84
 if(choice==6 .and. nlang==4)ntens=120

!Set up second dimension of ffkg array, and allocate
 nffkg=0 ; nffkge=0 ; nffkgd=0 ; nffkgk=0 ; nffkgs=0 ; nffkgs2=0
 do ilang=1,nlang
! Get the number of projectors for that angular momentum
  nproj=jproj(ilang)
! If there is a non-local part, accumulate the number of vectors needed
! The variables ilang below are the number of independent tensors of
! various ranks, the variable names being more historical than logical.
! ilang2=number of rank ilang-1
! ilang3=number of rank ilang+1
! ilang4=number of rank ilang
! ilang5=number of rank ilang+2
! ilang6=number of rank ilang+3
  if(nproj>0)then
   ilang2=(ilang*(ilang+1))/2
   nffkge=nffkge+nproj*ilang2
   if(choice==5)nffkgk=nffkgk+nproj*(2*ilang2-ilang)
   if(choice==2 .or. choice==4 .or. choice==23)nffkgd=nffkgd+ndgxdt*nproj*ilang2
   if(choice==3 .or. choice==6 .or. choice==23)then
    ilang3=((ilang+2)*(ilang+3))/2
    nffkgs=nffkgs+nproj*ilang3
   end if
   if(choice==6)then
    ilang4=((ilang+1)*(ilang+2))/2
    ilang5=((ilang+3)*(ilang+4))/2
    ilang6=((ilang+4)*(ilang+5))/2
    nffkgs2=nffkgs2+nproj*(ilang4+ilang5+ilang6)
   end if
  end if
 end do
 nffkg=nffkge+nffkgd+nffkgs+nffkgs2+nffkgk

!DEBUG
!write(6,*)' jproj(1:nlang)',jproj(1:nlang)
!write(6,*)' nffkg,nffkge,nffkgd,nffkgs,nffkgk',nffkg,nffkge,nffkgd,nffkgs,nffkgk
!ENDDEBUG

!Loop on subsets of plane waves (blocking)
!$OMP PARALLEL DEFAULT(PRIVATE) &
!$OMP&SHARED(choice,dgxds,dgxdt,ffnl,gmet,gxa,ia3,idir,indlmn,ispinor) &
!$OMP&SHARED(istwf_k,itypat,jproj,jump,kg_k,kpg_k,kpt,lmnmax,mblkpw,mproj) &
!$OMP&SHARED(ndgxdt,nffkg,nffkgd,nffkge,nffkgs,nincat,nkpg,nlang) &
!$OMP&SHARED(nloalg,ph3d,npw,ntens,ntypat,two_pi2,vect)

 allocate(ffkg(nffkg,mblkpw),parity(nffkg))
 allocate(kpgx(mblkpw,ntens))
 allocate(scalars(2,nffkg))
 allocate(teffv(2,mblkpw))
!$OMP DO
 do ipw1=1,npw,mblkpw

  ipw2=min(npw,ipw1+mblkpw-1)
  nincpw=ipw2-ipw1+1

! Initialize kpgx array related to tensors defined below
  call mkffkg3(choice,ffkg,ffnl,gmet,idir,indlmn,ipw1,ispinor,itypat,&
&  kg_k,kpg_k,kpgx,kpt,lmnmax,mblkpw,ndgxdt,nffkg,nffnl,nincpw,nkpg,nlang,nloalg,&
&  npw,ntens,ntypat,parity)

  do ia=1,nincat

!  Compute the shift eventually needed to get the phases in ph3d
   iaph3d=ia
   if(nloalg(1)>0)iaph3d=ia+ia3-1

   do iffkg=1,nffkg
    scalars(1,iffkg)=0.0d0 ; scalars(2,iffkg)=0.0d0
   end do

!  DEBUG
!  write(6,*)'opernl4, before first time-consuming'
!  write(6,*)'opernl4 : nffkg,nincpw=',nffkg,nincpw
!  write(6,*)'ig,ipw,ffkg(1:4),vect(1:2)'
!  ig=ipw1
!  do ipw=1,nincpw
!  write(6, '(2i4,13es11.3)' )ig,ipw,ffkg(1:min(9,nffkg),ipw),vect(1:2,ipw),ph3d(1:2,ipw,iaph3d)
!  ig=ig+1
!  end do
!  stop
!  ENDDEBUG

!  ******* Entering the first time-consuming part of the routine *******


!  First, treat small nffkg; send treat the initial phase of big
!  nffkg; finally treat the loop needed for big nffkg

!  In the loops, first multiply by the phase factor.
!  This allows to be left with only real operations afterwards.

!  For the time being, the maximal jump allowed is 8.

!  1) Here, treat small nffkg
   if(nffkg<=jump)then

    select case(nffkg)

     case(1)

      scr1=0.0d0 ; sci1=0.0d0
      ig=ipw1
      do ipw=1,nincpw
       ar=vect(1,ig)*ph3d(1,ig,iaph3d)-vect(2,ig)*ph3d(2,ig,iaph3d)
       ai=vect(2,ig)*ph3d(1,ig,iaph3d)+vect(1,ig)*ph3d(2,ig,iaph3d)
       scr1=scr1+ar*ffkg(1,ipw) ; sci1=sci1+ai*ffkg(1,ipw)
       ig=ig+1
      end do
      scalars(1,1)=scr1 ; scalars(2,1)=sci1

     case(2)

      ig=ipw1
      scr1=0.0d0 ; sci1=0.0d0
      scr2=0.0d0 ; sci2=0.0d0
      do ipw=1,nincpw
       ar=vect(1,ig)*ph3d(1,ig,iaph3d)-vect(2,ig)*ph3d(2,ig,iaph3d)
       ai=vect(2,ig)*ph3d(1,ig,iaph3d)+vect(1,ig)*ph3d(2,ig,iaph3d)
       scr1=scr1+ar*ffkg(1,ipw) ; sci1=sci1+ai*ffkg(1,ipw)
       scr2=scr2+ar*ffkg(2,ipw) ; sci2=sci2+ai*ffkg(2,ipw)
       ig=ig+1
      end do
      scalars(1,1)=scr1 ; scalars(2,1)=sci1
      scalars(1,2)=scr2 ; scalars(2,2)=sci2

     case(3)

      ig=ipw1
      scr1=0.0d0 ; sci1=0.0d0
      scr2=0.0d0 ; sci2=0.0d0
      scr3=0.0d0 ; sci3=0.0d0
      do ipw=1,nincpw
       ar=vect(1,ig)*ph3d(1,ig,iaph3d)-vect(2,ig)*ph3d(2,ig,iaph3d)
       ai=vect(2,ig)*ph3d(1,ig,iaph3d)+vect(1,ig)*ph3d(2,ig,iaph3d)
       scr1=scr1+ar*ffkg(1,ipw) ; sci1=sci1+ai*ffkg(1,ipw)
       scr2=scr2+ar*ffkg(2,ipw) ; sci2=sci2+ai*ffkg(2,ipw)
       scr3=scr3+ar*ffkg(3,ipw) ; sci3=sci3+ai*ffkg(3,ipw)
       ig=ig+1
      end do
      scalars(1,1)=scr1 ; scalars(2,1)=sci1
      scalars(1,2)=scr2 ; scalars(2,2)=sci2
      scalars(1,3)=scr3 ; scalars(2,3)=sci3

     case(4)

      ig=ipw1
      scr1=0.0d0 ; sci1=0.0d0
      scr2=0.0d0 ; sci2=0.0d0
      scr3=0.0d0 ; sci3=0.0d0
      scr4=0.0d0 ; sci4=0.0d0
      do ipw=1,nincpw
       ar=vect(1,ig)*ph3d(1,ig,iaph3d)-vect(2,ig)*ph3d(2,ig,iaph3d)
       ai=vect(2,ig)*ph3d(1,ig,iaph3d)+vect(1,ig)*ph3d(2,ig,iaph3d)
       scr1=scr1+ar*ffkg(1,ipw) ; sci1=sci1+ai*ffkg(1,ipw)
       scr2=scr2+ar*ffkg(2,ipw) ; sci2=sci2+ai*ffkg(2,ipw)
       scr3=scr3+ar*ffkg(3,ipw) ; sci3=sci3+ai*ffkg(3,ipw)
       scr4=scr4+ar*ffkg(4,ipw) ; sci4=sci4+ai*ffkg(4,ipw)
       ig=ig+1
      end do
      scalars(1,1)=scr1 ; scalars(2,1)=sci1
      scalars(1,2)=scr2 ; scalars(2,2)=sci2
      scalars(1,3)=scr3 ; scalars(2,3)=sci3
      scalars(1,4)=scr4 ; scalars(2,4)=sci4

     case(5)

      ig=ipw1
      scr1=0.0d0 ; sci1=0.0d0
      scr2=0.0d0 ; sci2=0.0d0
      scr3=0.0d0 ; sci3=0.0d0
      scr4=0.0d0 ; sci4=0.0d0
      scr5=0.0d0 ; sci5=0.0d0
      do ipw=1,nincpw
       ar=vect(1,ig)*ph3d(1,ig,iaph3d)-vect(2,ig)*ph3d(2,ig,iaph3d)
       ai=vect(2,ig)*ph3d(1,ig,iaph3d)+vect(1,ig)*ph3d(2,ig,iaph3d)
       scr1=scr1+ar*ffkg(1,ipw) ; sci1=sci1+ai*ffkg(1,ipw)
       scr2=scr2+ar*ffkg(2,ipw) ; sci2=sci2+ai*ffkg(2,ipw)
       scr3=scr3+ar*ffkg(3,ipw) ; sci3=sci3+ai*ffkg(3,ipw)
       scr4=scr4+ar*ffkg(4,ipw) ; sci4=sci4+ai*ffkg(4,ipw)
       scr5=scr5+ar*ffkg(5,ipw) ; sci5=sci5+ai*ffkg(5,ipw)
       ig=ig+1
      end do
      scalars(1,1)=scr1 ; scalars(2,1)=sci1
      scalars(1,2)=scr2 ; scalars(2,2)=sci2
      scalars(1,3)=scr3 ; scalars(2,3)=sci3
      scalars(1,4)=scr4 ; scalars(2,4)=sci4
      scalars(1,5)=scr5 ; scalars(2,5)=sci5

     case(6)

      ig=ipw1
      scr1=0.0d0 ; sci1=0.0d0
      scr2=0.0d0 ; sci2=0.0d0
      scr3=0.0d0 ; sci3=0.0d0
      scr4=0.0d0 ; sci4=0.0d0
      scr5=0.0d0 ; sci5=0.0d0
      scr6=0.0d0 ; sci6=0.0d0
      do ipw=1,nincpw
       ar=vect(1,ig)*ph3d(1,ig,iaph3d)-vect(2,ig)*ph3d(2,ig,iaph3d)
       ai=vect(2,ig)*ph3d(1,ig,iaph3d)+vect(1,ig)*ph3d(2,ig,iaph3d)
       scr1=scr1+ar*ffkg(1,ipw) ; sci1=sci1+ai*ffkg(1,ipw)
       scr2=scr2+ar*ffkg(2,ipw) ; sci2=sci2+ai*ffkg(2,ipw)
       scr3=scr3+ar*ffkg(3,ipw) ; sci3=sci3+ai*ffkg(3,ipw)
       scr4=scr4+ar*ffkg(4,ipw) ; sci4=sci4+ai*ffkg(4,ipw)
       scr5=scr5+ar*ffkg(5,ipw) ; sci5=sci5+ai*ffkg(5,ipw)
       scr6=scr6+ar*ffkg(6,ipw) ; sci6=sci6+ai*ffkg(6,ipw)
       ig=ig+1
      end do
      scalars(1,1)=scr1 ; scalars(2,1)=sci1
      scalars(1,2)=scr2 ; scalars(2,2)=sci2
      scalars(1,3)=scr3 ; scalars(2,3)=sci3
      scalars(1,4)=scr4 ; scalars(2,4)=sci4
      scalars(1,5)=scr5 ; scalars(2,5)=sci5
      scalars(1,6)=scr6 ; scalars(2,6)=sci6

     case(7)

      ig=ipw1
      scr1=0.0d0 ; sci1=0.0d0
      scr2=0.0d0 ; sci2=0.0d0
      scr3=0.0d0 ; sci3=0.0d0
      scr4=0.0d0 ; sci4=0.0d0
      scr5=0.0d0 ; sci5=0.0d0
      scr6=0.0d0 ; sci6=0.0d0
      scr7=0.0d0 ; sci7=0.0d0
      do ipw=1,nincpw
       ar=vect(1,ig)*ph3d(1,ig,iaph3d)-vect(2,ig)*ph3d(2,ig,iaph3d)
       ai=vect(2,ig)*ph3d(1,ig,iaph3d)+vect(1,ig)*ph3d(2,ig,iaph3d)
       scr1=scr1+ar*ffkg(1,ipw) ; sci1=sci1+ai*ffkg(1,ipw)
       scr2=scr2+ar*ffkg(2,ipw) ; sci2=sci2+ai*ffkg(2,ipw)
       scr3=scr3+ar*ffkg(3,ipw) ; sci3=sci3+ai*ffkg(3,ipw)
       scr4=scr4+ar*ffkg(4,ipw) ; sci4=sci4+ai*ffkg(4,ipw)
       scr5=scr5+ar*ffkg(5,ipw) ; sci5=sci5+ai*ffkg(5,ipw)
       scr6=scr6+ar*ffkg(6,ipw) ; sci6=sci6+ai*ffkg(6,ipw)
       scr7=scr7+ar*ffkg(7,ipw) ; sci7=sci7+ai*ffkg(7,ipw)
       ig=ig+1
      end do
      scalars(1,1)=scr1 ; scalars(2,1)=sci1
      scalars(1,2)=scr2 ; scalars(2,2)=sci2
      scalars(1,3)=scr3 ; scalars(2,3)=sci3
      scalars(1,4)=scr4 ; scalars(2,4)=sci4
      scalars(1,5)=scr5 ; scalars(2,5)=sci5
      scalars(1,6)=scr6 ; scalars(2,6)=sci6
      scalars(1,7)=scr7 ; scalars(2,7)=sci7

     case(8)

      ig=ipw1
      scr1=0.0d0 ; sci1=0.0d0
      scr2=0.0d0 ; sci2=0.0d0
      scr3=0.0d0 ; sci3=0.0d0
      scr4=0.0d0 ; sci4=0.0d0
      scr5=0.0d0 ; sci5=0.0d0
      scr6=0.0d0 ; sci6=0.0d0
      scr7=0.0d0 ; sci7=0.0d0
      scr8=0.0d0 ; sci8=0.0d0
      do ipw=1,nincpw
       ar=vect(1,ig)*ph3d(1,ig,iaph3d)-vect(2,ig)*ph3d(2,ig,iaph3d)
       ai=vect(2,ig)*ph3d(1,ig,iaph3d)+vect(1,ig)*ph3d(2,ig,iaph3d)
       scr1=scr1+ar*ffkg(1,ipw) ; sci1=sci1+ai*ffkg(1,ipw)
       scr2=scr2+ar*ffkg(2,ipw) ; sci2=sci2+ai*ffkg(2,ipw)
       scr3=scr3+ar*ffkg(3,ipw) ; sci3=sci3+ai*ffkg(3,ipw)
       scr4=scr4+ar*ffkg(4,ipw) ; sci4=sci4+ai*ffkg(4,ipw)
       scr5=scr5+ar*ffkg(5,ipw) ; sci5=sci5+ai*ffkg(5,ipw)
       scr6=scr6+ar*ffkg(6,ipw) ; sci6=sci6+ai*ffkg(6,ipw)
       scr7=scr7+ar*ffkg(7,ipw) ; sci7=sci7+ai*ffkg(7,ipw)
       scr8=scr8+ar*ffkg(8,ipw) ; sci8=sci8+ai*ffkg(8,ipw)
       ig=ig+1
      end do
      scalars(1,1)=scr1 ; scalars(2,1)=sci1
      scalars(1,2)=scr2 ; scalars(2,2)=sci2
      scalars(1,3)=scr3 ; scalars(2,3)=sci3
      scalars(1,4)=scr4 ; scalars(2,4)=sci4
      scalars(1,5)=scr5 ; scalars(2,5)=sci5
      scalars(1,6)=scr6 ; scalars(2,6)=sci6
      scalars(1,7)=scr7 ; scalars(2,7)=sci7
      scalars(1,8)=scr8 ; scalars(2,8)=sci8

    end select

   else
!   Now treat big nffkg

!   2) Here, initialize big nffkg. The only difference with the
!   preceeding case is that the intermediate results are stored.

    select case(jump)

     case(1)

      scr1=0.0d0 ; sci1=0.0d0
      ig=ipw1
      do ipw=1,nincpw
       ar=vect(1,ig)*ph3d(1,ig,iaph3d)-vect(2,ig)*ph3d(2,ig,iaph3d)
       ai=vect(2,ig)*ph3d(1,ig,iaph3d)+vect(1,ig)*ph3d(2,ig,iaph3d)
       teffv(1,ipw)=ar          ; teffv(2,ipw)=ai
       scr1=scr1+ar*ffkg(1,ipw) ; sci1=sci1+ai*ffkg(1,ipw)
       ig=ig+1
      end do
      scalars(1,1)=scr1 ; scalars(2,1)=sci1

     case(2)

      ig=ipw1
      scr1=0.0d0 ; sci1=0.0d0
      scr2=0.0d0 ; sci2=0.0d0
      do ipw=1,nincpw
       ar=vect(1,ig)*ph3d(1,ig,iaph3d)-vect(2,ig)*ph3d(2,ig,iaph3d)
       ai=vect(2,ig)*ph3d(1,ig,iaph3d)+vect(1,ig)*ph3d(2,ig,iaph3d)
       teffv(1,ipw)=ar          ; teffv(2,ipw)=ai
       scr1=scr1+ar*ffkg(1,ipw) ; sci1=sci1+ai*ffkg(1,ipw)
       scr2=scr2+ar*ffkg(2,ipw) ; sci2=sci2+ai*ffkg(2,ipw)
       ig=ig+1
      end do
      scalars(1,1)=scr1 ; scalars(2,1)=sci1
      scalars(1,2)=scr2 ; scalars(2,2)=sci2

     case(3)

      ig=ipw1
      scr1=0.0d0 ; sci1=0.0d0
      scr2=0.0d0 ; sci2=0.0d0
      scr3=0.0d0 ; sci3=0.0d0
      do ipw=1,nincpw
       ar=vect(1,ig)*ph3d(1,ig,iaph3d)-vect(2,ig)*ph3d(2,ig,iaph3d)
       ai=vect(2,ig)*ph3d(1,ig,iaph3d)+vect(1,ig)*ph3d(2,ig,iaph3d)
       teffv(1,ipw)=ar          ; teffv(2,ipw)=ai
       scr1=scr1+ar*ffkg(1,ipw) ; sci1=sci1+ai*ffkg(1,ipw)
       scr2=scr2+ar*ffkg(2,ipw) ; sci2=sci2+ai*ffkg(2,ipw)
       scr3=scr3+ar*ffkg(3,ipw) ; sci3=sci3+ai*ffkg(3,ipw)
       ig=ig+1
      end do
      scalars(1,1)=scr1 ; scalars(2,1)=sci1
      scalars(1,2)=scr2 ; scalars(2,2)=sci2
      scalars(1,3)=scr3 ; scalars(2,3)=sci3

     case(4)

      ig=ipw1
      scr1=0.0d0 ; sci1=0.0d0
      scr2=0.0d0 ; sci2=0.0d0
      scr3=0.0d0 ; sci3=0.0d0
      scr4=0.0d0 ; sci4=0.0d0
      do ipw=1,nincpw
       ar=vect(1,ig)*ph3d(1,ig,iaph3d)-vect(2,ig)*ph3d(2,ig,iaph3d)
       ai=vect(2,ig)*ph3d(1,ig,iaph3d)+vect(1,ig)*ph3d(2,ig,iaph3d)
       teffv(1,ipw)=ar          ; teffv(2,ipw)=ai
       scr1=scr1+ar*ffkg(1,ipw) ; sci1=sci1+ai*ffkg(1,ipw)
       scr2=scr2+ar*ffkg(2,ipw) ; sci2=sci2+ai*ffkg(2,ipw)
       scr3=scr3+ar*ffkg(3,ipw) ; sci3=sci3+ai*ffkg(3,ipw)
       scr4=scr4+ar*ffkg(4,ipw) ; sci4=sci4+ai*ffkg(4,ipw)
       ig=ig+1
      end do
      scalars(1,1)=scr1 ; scalars(2,1)=sci1
      scalars(1,2)=scr2 ; scalars(2,2)=sci2
      scalars(1,3)=scr3 ; scalars(2,3)=sci3
      scalars(1,4)=scr4 ; scalars(2,4)=sci4

     case(5)

      ig=ipw1
      scr1=0.0d0 ; sci1=0.0d0
      scr2=0.0d0 ; sci2=0.0d0
      scr3=0.0d0 ; sci3=0.0d0
      scr4=0.0d0 ; sci4=0.0d0
      scr5=0.0d0 ; sci5=0.0d0
      do ipw=1,nincpw
       ar=vect(1,ig)*ph3d(1,ig,iaph3d)-vect(2,ig)*ph3d(2,ig,iaph3d)
       ai=vect(2,ig)*ph3d(1,ig,iaph3d)+vect(1,ig)*ph3d(2,ig,iaph3d)
       teffv(1,ipw)=ar          ; teffv(2,ipw)=ai
       scr1=scr1+ar*ffkg(1,ipw) ; sci1=sci1+ai*ffkg(1,ipw)
       scr2=scr2+ar*ffkg(2,ipw) ; sci2=sci2+ai*ffkg(2,ipw)
       scr3=scr3+ar*ffkg(3,ipw) ; sci3=sci3+ai*ffkg(3,ipw)
       scr4=scr4+ar*ffkg(4,ipw) ; sci4=sci4+ai*ffkg(4,ipw)
       scr5=scr5+ar*ffkg(5,ipw) ; sci5=sci5+ai*ffkg(5,ipw)
       ig=ig+1
      end do
      scalars(1,1)=scr1 ; scalars(2,1)=sci1
      scalars(1,2)=scr2 ; scalars(2,2)=sci2
      scalars(1,3)=scr3 ; scalars(2,3)=sci3
      scalars(1,4)=scr4 ; scalars(2,4)=sci4
      scalars(1,5)=scr5 ; scalars(2,5)=sci5

     case(6)

      ig=ipw1
      scr1=0.0d0 ; sci1=0.0d0
      scr2=0.0d0 ; sci2=0.0d0
      scr3=0.0d0 ; sci3=0.0d0
      scr4=0.0d0 ; sci4=0.0d0
      scr5=0.0d0 ; sci5=0.0d0
      scr6=0.0d0 ; sci6=0.0d0
      do ipw=1,nincpw
       ar=vect(1,ig)*ph3d(1,ig,iaph3d)-vect(2,ig)*ph3d(2,ig,iaph3d)
       ai=vect(2,ig)*ph3d(1,ig,iaph3d)+vect(1,ig)*ph3d(2,ig,iaph3d)
       teffv(1,ipw)=ar          ; teffv(2,ipw)=ai
       scr1=scr1+ar*ffkg(1,ipw) ; sci1=sci1+ai*ffkg(1,ipw)
       scr2=scr2+ar*ffkg(2,ipw) ; sci2=sci2+ai*ffkg(2,ipw)
       scr3=scr3+ar*ffkg(3,ipw) ; sci3=sci3+ai*ffkg(3,ipw)
       scr4=scr4+ar*ffkg(4,ipw) ; sci4=sci4+ai*ffkg(4,ipw)
       scr5=scr5+ar*ffkg(5,ipw) ; sci5=sci5+ai*ffkg(5,ipw)
       scr6=scr6+ar*ffkg(6,ipw) ; sci6=sci6+ai*ffkg(6,ipw)
       ig=ig+1
      end do
      scalars(1,1)=scr1 ; scalars(2,1)=sci1
      scalars(1,2)=scr2 ; scalars(2,2)=sci2
      scalars(1,3)=scr3 ; scalars(2,3)=sci3
      scalars(1,4)=scr4 ; scalars(2,4)=sci4
      scalars(1,5)=scr5 ; scalars(2,5)=sci5
      scalars(1,6)=scr6 ; scalars(2,6)=sci6

     case(7)

      ig=ipw1
      scr1=0.0d0 ; sci1=0.0d0
      scr2=0.0d0 ; sci2=0.0d0
      scr3=0.0d0 ; sci3=0.0d0
      scr4=0.0d0 ; sci4=0.0d0
      scr5=0.0d0 ; sci5=0.0d0
      scr6=0.0d0 ; sci6=0.0d0
      scr7=0.0d0 ; sci7=0.0d0
      do ipw=1,nincpw
       ar=vect(1,ig)*ph3d(1,ig,iaph3d)-vect(2,ig)*ph3d(2,ig,iaph3d)
       ai=vect(2,ig)*ph3d(1,ig,iaph3d)+vect(1,ig)*ph3d(2,ig,iaph3d)
       teffv(1,ipw)=ar          ; teffv(2,ipw)=ai
       scr1=scr1+ar*ffkg(1,ipw) ; sci1=sci1+ai*ffkg(1,ipw)
       scr2=scr2+ar*ffkg(2,ipw) ; sci2=sci2+ai*ffkg(2,ipw)
       scr3=scr3+ar*ffkg(3,ipw) ; sci3=sci3+ai*ffkg(3,ipw)
       scr4=scr4+ar*ffkg(4,ipw) ; sci4=sci4+ai*ffkg(4,ipw)
       scr5=scr5+ar*ffkg(5,ipw) ; sci5=sci5+ai*ffkg(5,ipw)
       scr6=scr6+ar*ffkg(6,ipw) ; sci6=sci6+ai*ffkg(6,ipw)
       scr7=scr7+ar*ffkg(7,ipw) ; sci7=sci7+ai*ffkg(7,ipw)
       ig=ig+1
      end do
      scalars(1,1)=scr1 ; scalars(2,1)=sci1
      scalars(1,2)=scr2 ; scalars(2,2)=sci2
      scalars(1,3)=scr3 ; scalars(2,3)=sci3
      scalars(1,4)=scr4 ; scalars(2,4)=sci4
      scalars(1,5)=scr5 ; scalars(2,5)=sci5
      scalars(1,6)=scr6 ; scalars(2,6)=sci6
      scalars(1,7)=scr7 ; scalars(2,7)=sci7

     case(8)

      ig=ipw1
      scr1=0.0d0 ; sci1=0.0d0
      scr2=0.0d0 ; sci2=0.0d0
      scr3=0.0d0 ; sci3=0.0d0
      scr4=0.0d0 ; sci4=0.0d0
      scr5=0.0d0 ; sci5=0.0d0
      scr6=0.0d0 ; sci6=0.0d0
      scr7=0.0d0 ; sci7=0.0d0
      scr8=0.0d0 ; sci8=0.0d0
      do ipw=1,nincpw
       ar=vect(1,ig)*ph3d(1,ig,iaph3d)-vect(2,ig)*ph3d(2,ig,iaph3d)
       ai=vect(2,ig)*ph3d(1,ig,iaph3d)+vect(1,ig)*ph3d(2,ig,iaph3d)
       teffv(1,ipw)=ar          ; teffv(2,ipw)=ai
       scr1=scr1+ar*ffkg(1,ipw) ; sci1=sci1+ai*ffkg(1,ipw)
       scr2=scr2+ar*ffkg(2,ipw) ; sci2=sci2+ai*ffkg(2,ipw)
       scr3=scr3+ar*ffkg(3,ipw) ; sci3=sci3+ai*ffkg(3,ipw)
       scr4=scr4+ar*ffkg(4,ipw) ; sci4=sci4+ai*ffkg(4,ipw)
       scr5=scr5+ar*ffkg(5,ipw) ; sci5=sci5+ai*ffkg(5,ipw)
       scr6=scr6+ar*ffkg(6,ipw) ; sci6=sci6+ai*ffkg(6,ipw)
       scr7=scr7+ar*ffkg(7,ipw) ; sci7=sci7+ai*ffkg(7,ipw)
       scr8=scr8+ar*ffkg(8,ipw) ; sci8=sci8+ai*ffkg(8,ipw)
       ig=ig+1
      end do
      scalars(1,1)=scr1 ; scalars(2,1)=sci1
      scalars(1,2)=scr2 ; scalars(2,2)=sci2
      scalars(1,3)=scr3 ; scalars(2,3)=sci3
      scalars(1,4)=scr4 ; scalars(2,4)=sci4
      scalars(1,5)=scr5 ; scalars(2,5)=sci5
      scalars(1,6)=scr6 ; scalars(2,6)=sci6
      scalars(1,7)=scr7 ; scalars(2,7)=sci7
      scalars(1,8)=scr8 ; scalars(2,8)=sci8

    end select

!   3) Here, do-loop for big nffkg.

    do start=1+jump,nffkg,jump
     chunk=min(jump,nffkg-start+1)

     select case(chunk)

      case(1)

       scr1=0.0d0 ; sci1=0.0d0
       do ipw=1,nincpw
        ar=teffv(1,ipw)              ; ai=teffv(2,ipw)
        scr1=scr1+ar*ffkg(start,ipw) ; sci1=sci1+ai*ffkg(start,ipw)
       end do
       scalars(1,start)=scr1 ; scalars(2,start)=sci1

      case(2)

       scr1=0.0d0 ; sci1=0.0d0
       scr2=0.0d0 ; sci2=0.0d0
       do ipw=1,nincpw
        ar=teffv(1,ipw)                ; ai=teffv(2,ipw)
        scr1=scr1+ar*ffkg(start  ,ipw) ; sci1=sci1+ai*ffkg(start  ,ipw)
        scr2=scr2+ar*ffkg(start+1,ipw) ; sci2=sci2+ai*ffkg(start+1,ipw)
       end do
       scalars(1,start  )=scr1 ; scalars(2,start  )=sci1
       scalars(1,start+1)=scr2 ; scalars(2,start+1)=sci2

      case(3)

       scr1=0.0d0 ; sci1=0.0d0
       scr2=0.0d0 ; sci2=0.0d0
       scr3=0.0d0 ; sci3=0.0d0
       do ipw=1,nincpw
        ar=teffv(1,ipw)                ; ai=teffv(2,ipw)
        scr1=scr1+ar*ffkg(start  ,ipw) ; sci1=sci1+ai*ffkg(start  ,ipw)
        scr2=scr2+ar*ffkg(start+1,ipw) ; sci2=sci2+ai*ffkg(start+1,ipw)
        scr3=scr3+ar*ffkg(start+2,ipw) ; sci3=sci3+ai*ffkg(start+2,ipw)
       end do
       scalars(1,start  )=scr1 ; scalars(2,start  )=sci1
       scalars(1,start+1)=scr2 ; scalars(2,start+1)=sci2
       scalars(1,start+2)=scr3 ; scalars(2,start+2)=sci3

      case(4)

       scr1=0.0d0 ; sci1=0.0d0
       scr2=0.0d0 ; sci2=0.0d0
       scr3=0.0d0 ; sci3=0.0d0
       scr4=0.0d0 ; sci4=0.0d0
       do ipw=1,nincpw
        ar=teffv(1,ipw)                ; ai=teffv(2,ipw)
        scr1=scr1+ar*ffkg(start  ,ipw) ; sci1=sci1+ai*ffkg(start  ,ipw)
        scr2=scr2+ar*ffkg(start+1,ipw) ; sci2=sci2+ai*ffkg(start+1,ipw)
        scr3=scr3+ar*ffkg(start+2,ipw) ; sci3=sci3+ai*ffkg(start+2,ipw)
        scr4=scr4+ar*ffkg(start+3,ipw) ; sci4=sci4+ai*ffkg(start+3,ipw)
       end do
       scalars(1,start  )=scr1 ; scalars(2,start  )=sci1
       scalars(1,start+1)=scr2 ; scalars(2,start+1)=sci2
       scalars(1,start+2)=scr3 ; scalars(2,start+2)=sci3
       scalars(1,start+3)=scr4 ; scalars(2,start+3)=sci4

      case(5)

       scr1=0.0d0 ; sci1=0.0d0
       scr2=0.0d0 ; sci2=0.0d0
       scr3=0.0d0 ; sci3=0.0d0
       scr4=0.0d0 ; sci4=0.0d0
       scr5=0.0d0 ; sci5=0.0d0
       do ipw=1,nincpw
        ar=teffv(1,ipw)                ; ai=teffv(2,ipw)
        scr1=scr1+ar*ffkg(start  ,ipw) ; sci1=sci1+ai*ffkg(start  ,ipw)
        scr2=scr2+ar*ffkg(start+1,ipw) ; sci2=sci2+ai*ffkg(start+1,ipw)
        scr3=scr3+ar*ffkg(start+2,ipw) ; sci3=sci3+ai*ffkg(start+2,ipw)
        scr4=scr4+ar*ffkg(start+3,ipw) ; sci4=sci4+ai*ffkg(start+3,ipw)
        scr5=scr5+ar*ffkg(start+4,ipw) ; sci5=sci5+ai*ffkg(start+4,ipw)
       end do
       scalars(1,start  )=scr1 ; scalars(2,start  )=sci1
       scalars(1,start+1)=scr2 ; scalars(2,start+1)=sci2
       scalars(1,start+2)=scr3 ; scalars(2,start+2)=sci3
       scalars(1,start+3)=scr4 ; scalars(2,start+3)=sci4
       scalars(1,start+4)=scr5 ; scalars(2,start+4)=sci5

      case(6)

       scr1=0.0d0 ; sci1=0.0d0
       scr2=0.0d0 ; sci2=0.0d0
       scr3=0.0d0 ; sci3=0.0d0
       scr4=0.0d0 ; sci4=0.0d0
       scr5=0.0d0 ; sci5=0.0d0
       scr6=0.0d0 ; sci6=0.0d0
       do ipw=1,nincpw
        ar=teffv(1,ipw)                ; ai=teffv(2,ipw)
        scr1=scr1+ar*ffkg(start  ,ipw) ; sci1=sci1+ai*ffkg(start  ,ipw)
        scr2=scr2+ar*ffkg(start+1,ipw) ; sci2=sci2+ai*ffkg(start+1,ipw)
        scr3=scr3+ar*ffkg(start+2,ipw) ; sci3=sci3+ai*ffkg(start+2,ipw)
        scr4=scr4+ar*ffkg(start+3,ipw) ; sci4=sci4+ai*ffkg(start+3,ipw)
        scr5=scr5+ar*ffkg(start+4,ipw) ; sci5=sci5+ai*ffkg(start+4,ipw)
        scr6=scr6+ar*ffkg(start+5,ipw) ; sci6=sci6+ai*ffkg(start+5,ipw)
       end do
       scalars(1,start  )=scr1 ; scalars(2,start  )=sci1
       scalars(1,start+1)=scr2 ; scalars(2,start+1)=sci2
       scalars(1,start+2)=scr3 ; scalars(2,start+2)=sci3
       scalars(1,start+3)=scr4 ; scalars(2,start+3)=sci4
       scalars(1,start+4)=scr5 ; scalars(2,start+4)=sci5
       scalars(1,start+5)=scr6 ; scalars(2,start+5)=sci6

      case(7)

       scr1=0.0d0 ; sci1=0.0d0
       scr2=0.0d0 ; sci2=0.0d0
       scr3=0.0d0 ; sci3=0.0d0
       scr4=0.0d0 ; sci4=0.0d0
       scr5=0.0d0 ; sci5=0.0d0
       scr6=0.0d0 ; sci6=0.0d0
       scr7=0.0d0 ; sci7=0.0d0
       do ipw=1,nincpw
        ar=teffv(1,ipw)                ; ai=teffv(2,ipw)
        scr1=scr1+ar*ffkg(start  ,ipw) ; sci1=sci1+ai*ffkg(start  ,ipw)
        scr2=scr2+ar*ffkg(start+1,ipw) ; sci2=sci2+ai*ffkg(start+1,ipw)
        scr3=scr3+ar*ffkg(start+2,ipw) ; sci3=sci3+ai*ffkg(start+2,ipw)
        scr4=scr4+ar*ffkg(start+3,ipw) ; sci4=sci4+ai*ffkg(start+3,ipw)
        scr5=scr5+ar*ffkg(start+4,ipw) ; sci5=sci5+ai*ffkg(start+4,ipw)
        scr6=scr6+ar*ffkg(start+5,ipw) ; sci6=sci6+ai*ffkg(start+5,ipw)
        scr7=scr7+ar*ffkg(start+6,ipw) ; sci7=sci7+ai*ffkg(start+6,ipw)
       end do
       scalars(1,start  )=scr1 ; scalars(2,start  )=sci1
       scalars(1,start+1)=scr2 ; scalars(2,start+1)=sci2
       scalars(1,start+2)=scr3 ; scalars(2,start+2)=sci3
       scalars(1,start+3)=scr4 ; scalars(2,start+3)=sci4
       scalars(1,start+4)=scr5 ; scalars(2,start+4)=sci5
       scalars(1,start+5)=scr6 ; scalars(2,start+5)=sci6
       scalars(1,start+6)=scr7 ; scalars(2,start+6)=sci7

      case(8)

       scr1=0.0d0 ; sci1=0.0d0
       scr2=0.0d0 ; sci2=0.0d0
       scr3=0.0d0 ; sci3=0.0d0
       scr4=0.0d0 ; sci4=0.0d0
       scr5=0.0d0 ; sci5=0.0d0
       scr6=0.0d0 ; sci6=0.0d0
       scr7=0.0d0 ; sci7=0.0d0
       scr8=0.0d0 ; sci8=0.0d0
       do ipw=1,nincpw
        ar=teffv(1,ipw)                ; ai=teffv(2,ipw)
        scr1=scr1+ar*ffkg(start  ,ipw) ; sci1=sci1+ai*ffkg(start  ,ipw)
        scr2=scr2+ar*ffkg(start+1,ipw) ; sci2=sci2+ai*ffkg(start+1,ipw)
        scr3=scr3+ar*ffkg(start+2,ipw) ; sci3=sci3+ai*ffkg(start+2,ipw)
        scr4=scr4+ar*ffkg(start+3,ipw) ; sci4=sci4+ai*ffkg(start+3,ipw)
        scr5=scr5+ar*ffkg(start+4,ipw) ; sci5=sci5+ai*ffkg(start+4,ipw)
        scr6=scr6+ar*ffkg(start+5,ipw) ; sci6=sci6+ai*ffkg(start+5,ipw)
        scr7=scr7+ar*ffkg(start+6,ipw) ; sci7=sci7+ai*ffkg(start+6,ipw)
        scr8=scr8+ar*ffkg(start+7,ipw) ; sci8=sci8+ai*ffkg(start+7,ipw)
       end do
       scalars(1,start  )=scr1 ; scalars(2,start  )=sci1
       scalars(1,start+1)=scr2 ; scalars(2,start+1)=sci2
       scalars(1,start+2)=scr3 ; scalars(2,start+2)=sci3
       scalars(1,start+3)=scr4 ; scalars(2,start+3)=sci4
       scalars(1,start+4)=scr5 ; scalars(2,start+4)=sci5
       scalars(1,start+5)=scr6 ; scalars(2,start+5)=sci6
       scalars(1,start+6)=scr7 ; scalars(2,start+6)=sci7
       scalars(1,start+7)=scr8 ; scalars(2,start+7)=sci8

     end select

!    End loop on start
    end do

!   End if statement for small or big nffkg
   end if

!  ******* Leaving the critical part *********************************

!  DEBUG
!  write(6,*)' opernl4a, write scalars '
!  do iffkg=1,nffkg
!  write(6,*)iffkg,scalars(1:2,iffkg)
!  end do
!  ENDDEBUG

   if(istwf_k>=2)then
!   Impose parity of resulting scalar (this operation could be
!   replaced by direct saving of CPU time in the preceeding section)
    do iffkg=1,nffkg
     scalars(parity(iffkg),iffkg)=0.0d0
    end do
   end if

   iffkg=0 ; iffkgs=nffkge+nffkgd ; iffkgk=nffkge*2
   iffkgs2=nffkge+nffkgs
   do ilang=1,nlang
    nproj=jproj(ilang)
    if(nproj>0)then
!    ilang2 is the number of independent tensor components
!    for symmetric tensor of rank ilang-1
     ilang2=(ilang*(ilang+1))/2

!    Loop over projectors
     do iproj=1,nproj
!     Multiply by the k+G factors (tensors of various rank)
      do ii=1,ilang2
!      Get the starting address for the relevant tensor
       jj=ii+((ilang-1)*ilang*(ilang+1))/6
       iffkg=iffkg+1
!      $OMP CRITICAL (OPERNL4a_1)
       gxa(1,jj,ia,iproj)=gxa(1,jj,ia,iproj)+scalars(1,iffkg)
       gxa(2,jj,ia,iproj)=gxa(2,jj,ia,iproj)+scalars(2,iffkg)
!      $OMP END CRITICAL (OPERNL4a_1)
!      Now, compute gradients, if needed.
       if ((choice==2.or.choice==23) .and. ndgxdt==3) then
        do mu=1,3
         jffkg=nffkge+(iffkg-1)*3+mu
!        Pay attention to the use of reals and imaginary parts here ...
!        $OMP CRITICAL (OPERNL4a_2)
         dgxdt(1,mu,jj,ia,iproj)=dgxdt(1,mu,jj,ia,iproj)-two_pi*scalars(2,jffkg)
         dgxdt(2,mu,jj,ia,iproj)=dgxdt(2,mu,jj,ia,iproj)+two_pi*scalars(1,jffkg)
!        $OMP END CRITICAL (OPERNL4a_2)
        end do
       end if
       if (choice==2 .and. ndgxdt==1) then
        jffkg=nffkge+iffkg
!       Pay attention to the use of reals and imaginary parts here ...
!       $OMP CRITICAL (OPERNL4a_3)
        dgxdt(1,1,jj,ia,iproj)=dgxdt(1,1,jj,ia,iproj)-two_pi*scalars(2,jffkg)
        dgxdt(2,1,jj,ia,iproj)=dgxdt(2,1,jj,ia,iproj)+two_pi*scalars(1,jffkg)
!       $OMP END CRITICAL (OPERNL4a_3)
       end if
       if (choice==4) then
        do mu=1,3
         jffkg=nffkge+(iffkg-1)*9+mu
!        Pay attention to the use of reals and imaginary parts here ...
!        $OMP CRITICAL (OPERNL4a_4)
         dgxdt(1,mu,jj,ia,iproj)=dgxdt(1,mu,jj,ia,iproj)-two_pi*scalars(2,jffkg)
         dgxdt(2,mu,jj,ia,iproj)=dgxdt(2,mu,jj,ia,iproj)+two_pi*scalars(1,jffkg)
!        $OMP END CRITICAL (OPERNL4a_4)
        end do
        do mu=4,9
         jffkg=nffkge+(iffkg-1)*9+mu
!        Pay attention to the use of reals and imaginary parts here ...
!        Also, note the multiplication by (2 pi)**2
!        $OMP CRITICAL (OPERNL4a_5)
         dgxdt(1,mu,jj,ia,iproj)=dgxdt(1,mu,jj,ia,iproj)-two_pi2*scalars(1,jffkg)
         dgxdt(2,mu,jj,ia,iproj)=dgxdt(2,mu,jj,ia,iproj)-two_pi2*scalars(2,jffkg)
!        $OMP END CRITICAL (OPERNL4a_5)
        end do
       end if
!      End loop on ii=1,ilang2
      end do

      if ((choice==3.or.choice==23) .or. choice==6) then
!      Compute additional tensors related to strain gradients
!      ilang3 is number of unique tensor components of rank ilang+1
       ilang3=((ilang+2)*(ilang+3))/2
       jjs=((ilang+1)*(ilang+2)*(ilang+3))/6
!      Compute strain gradient tensor components
       do ii=1,ilang3
!       Note that iffkgs is also used by ddk and 2nd derivative parts
        iffkgs=iffkgs+1
        jj=ii+jjs
!       $OMP CRITICAL (OPERNL4a_6)
        dgxds(1,jj-4,ia,iproj)=dgxds(1,jj-4,ia,iproj)+scalars(1,iffkgs)
        dgxds(2,jj-4,ia,iproj)=dgxds(2,jj-4,ia,iproj)+scalars(2,iffkgs)
!       $OMP END CRITICAL (OPERNL4a_6)
       end do
      end if

      if (choice==6) then
!      Compute additional tensors related to strain 2nd derivatives
!      and internal strain derivatives
!      ilang6 is number of unique tensor components of rank ilang+3
       ilang6=((ilang+4)*(ilang+5))/2
       jjs=((ilang+3)*(ilang+4)*(ilang+5))/6
!      Compute strain gradient tensor components
       do ii=1,ilang6
!       Note that iffkgs is also used by ddk part
        iffkgs2=iffkgs2+1
        jj=ii+jjs
!       $OMP CRITICAL (OPERNL4a_6)
        d2gxds2(1,jj-20,ia,iproj)=d2gxds2(1,jj-20,ia,iproj)+scalars(1,iffkgs2)
        d2gxds2(2,jj-20,ia,iproj)=d2gxds2(2,jj-20,ia,iproj)+scalars(2,iffkgs2)
!       $OMP END CRITICAL (OPERNL4a_6)
       end do

!      ilang4 is number of unique tensor components of rank ilang
       ilang4=((ilang+1)*(ilang+2))/2
       jjs=((ilang)*(ilang+1)*(ilang+2))/6
!      Compute internal strain gradient tensor components
       do ii=1,ilang4
        iffkgs2=iffkgs2+1
        jj=ii+jjs
!       $OMP CRITICAL (OPERNL2_6)
!       Pay attention to the use of reals and imaginary parts here ...
        dgxdis(1,jj-1,ia,iproj)=dgxdis(1,jj-1,ia,iproj)-two_pi*scalars(2,iffkgs2)
        dgxdis(2,jj-1,ia,iproj)=dgxdis(2,jj-1,ia,iproj)+two_pi*scalars(1,iffkgs2)
!       $OMP END CRITICAL (OPERNL2_6)
       end do

!      ilang5 is number of unique tensor components of rank ilang+2
       ilang5=((ilang+3)*(ilang+4))/2
       jjs=((ilang+2)*(ilang+3)*(ilang+4))/6
!      Compute internal strain gradient tensor components
       do ii=1,ilang5
        iffkgs2=iffkgs2+1
        jj=ii+jjs
!       $OMP CRITICAL (OPERNL2_6)
!       Pay attention to the use of reals and imaginary parts here ...
        d2gxdis(1,jj-10,ia,iproj)=d2gxdis(1,jj-10,ia,iproj)-two_pi*scalars(2,iffkgs2)
        d2gxdis(2,jj-10,ia,iproj)=d2gxdis(2,jj-10,ia,iproj)+two_pi*scalars(1,iffkgs2)
!       $OMP END CRITICAL (OPERNL2_6)
       end do
      end if ! choice==6

      if (choice==5) then
!      Compute additional tensors related to ddk with ffnl(:,2,...)
       ilangx=(ilang*(ilang+1))/2
       jjs=((ilang-1)*ilang*(ilang+1))/6
       do ii=1,ilangx
!       Note that iffkgs is also used by strain part
        iffkgs=iffkgs+1
        jj=ii+jjs
!       $OMP CRITICAL (OPERNL4a_7)
        dgxdt(1,1,jj,ia,iproj)=dgxdt(1,1,jj,ia,iproj)+scalars(1,iffkgs)
        dgxdt(2,1,jj,ia,iproj)=dgxdt(2,1,jj,ia,iproj)+scalars(2,iffkgs)
!       $OMP END CRITICAL (OPERNL4a_7)
       end do
!      Compute additional tensors related to ddk with ffnl(:,1,...)
       if(ilang>=2)then
        ilangx=((ilang-1)*ilang)/2
        jjs=((ilang-2)*(ilang-1)*ilang)/6
        do ii=1,ilangx
         iffkgk=iffkgk+1
         jj=ii+jjs
!        $OMP CRITICAL (OPERNL4a_8)
         dgxdt(1,2,jj,ia,iproj)=dgxdt(1,2,jj,ia,iproj)+scalars(1,iffkgk)
         dgxdt(2,2,jj,ia,iproj)=dgxdt(2,2,jj,ia,iproj)+scalars(2,iffkgk)
!        $OMP END CRITICAL (OPERNL4a_8)
        end do
       end if
      end if

!     End projector loop
     end do

!    End condition of non-zero projectors
    end if

!   End angular momentum loop
   end do

!  End loop on atoms
  end do

! End loop on blocks of planewaves
 end do
!$OMP END DO

 deallocate(ffkg,kpgx,parity,scalars,teffv)
!$OMP END PARALLEL

!DEBUG
!write(6,*)' opernl4a : exit'
!ENDDEBUG

end subroutine opernl4a
!!***
