!{\src2tex{textfont=tt}}
!!****f* ABINIT/m_cgtools
!! NAME
!!  m_cgtools
!!
!! FUNCTION
!! This module defines wrappers for BLAS routines. The arguments are stored
!! using the "cg" convention, namely real array of shape cg(2,...)
!!
!! COPYRIGHT
!! Copyright (C) 1992-2012 ABINIT group (MG)
!! This file is distributed under the terms of the
!! GNU General Public License, see ~abinit/COPYING
!! or http://www.gnu.org/copyleft/gpl.txt .
!! For the initials of contributors, see ~abinit/doc/developers/contributors.txt .
!!
!! NOTES
!! 1) The convention about names of interfaced routine is: cg_<name>, 
!!    where <name> is equal to the name of the standard BLAS routine
!!
!! 2) Blas routines are called without an explicit interface on purpose since 
!! 
!!    a) The compiler should pass the base address of the array to the F77 BLAS
!!
!!    b) Any compiler would complain about type mismatch (REAL,COMPLEX)
!!       if an explicit interface is given.
!!

#if defined HAVE_CONFIG_H
#include "config.h"
#endif

#include "abi_common.h" 

#if defined HAVE_LINALG_GEMM3M
#define ABI_ZGEMM ZGEMM3M
#else
#define ABI_ZGEMM ZGEMM
#endif

MODULE m_cgtools

 use defs_basis
 use m_profiling
 use m_errors
 use m_xmpi

 use m_fstrings,  only : toupper

 implicit none

 private

 real(dp),public,parameter :: cg_czero(2) = (/0._dp,0._dp/)
 real(dp),public,parameter :: cg_cone(2)  = (/1._dp,0._dp/)


 ! Helper functions.
 public :: cg_set_zero
 public :: cg_tocplx
 public :: cg_fromcplx
 public :: cg_filter

 ! Blas1
 public :: cg_zcopy
 public :: cg_zscal
 public :: cg_zdotc
 public :: cg_real_zdotc
 public :: cg_zdotu
 public :: cg_zaxpy, test_cg_zaxpy
 public :: cg_zaxpby

 ! Blas2
 public :: cg_zgemv


 !Blas3
 public :: cg_zgemm

 !Helper functions for DFT calculations.
 public :: cg_box2gsph
 public :: cg_addtorho
 public :: cg_vlocpsi
 public :: cgnc_cholesky
 public :: cgpaw_cholesky
 !public :: cgnc_normalize
!***

 integer,parameter,private :: MIN_SIZE = 5000

 !complex(spc),private,parameter :: czero_spc =(0._sp,0._sp)
 !complex(spc),private,parameter :: cone_spc  =(1._sp,0._sp)
 !complex(dpc),private,parameter :: czero_dpc =(0._dp,0._dp)
 !complex(dpc),private,parameter :: cone_dpc  =(1._dp,0._dp)

CONTAINS  !========================================================================================
!!***

!----------------------------------------------------------------------

!!****f* m_cgtools/cg_set_zero
!! NAME
!!  cg_set_zero
!!
!! FUNCTION
!!  Set cg=alpha.
!!
!! INPUTS
!!
!! OUTPUT
!!
!! PARENTS
!!
!! CHILDREN
!!      cg_zaxpy
!!
!! SOURCE

subroutine cg_set_zero(n,cg,alpha)

 use defs_basis

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'cg_set_zero'
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: n
!arrays
 real(dp),optional,intent(in) :: alpha(2)
 real(dp),intent(inout) :: cg(2,n)

!Local variables-------------------------------

! *************************************************************************

 if (PRESENT(alpha)) then
!$OMP PARALLEL
!$OMP WORKSHARE
   cg(1,:)=alpha(1)
   cg(2,:)=alpha(2)
!$OMP END WORKSHARE
!$OMP END PARALLEL
 else 
!$OMP PARALLEL
!$OMP WORKSHARE
   cg(:,:)=zero
!$OMP END WORKSHARE
!$OMP END PARALLEL
 end if

end subroutine cg_set_zero
!!***

!----------------------------------------------------------------------

!!****f* m_cgtools/cg_tocplx
!! NAME
!!  cg_tocplx
!!
!! FUNCTION
!!  Convert a real array with (real,imag) part to complex.
!!
!! INPUTS
!!  n = Specifies the number of elements in cg and ocplx
!!  cg(2*n)=Input array with real and imaginary part.
!!
!! OUTPUT 
!!  ocplx(n)=Output complex array.
!!
!! PARENTS
!!
!! CHILDREN
!!      cg_zaxpy
!!
!! SOURCE

subroutine cg_tocplx(n, cg, ocplx)

 use defs_basis

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'cg_tocplx'
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: n
!arrays
 real(dp),intent(in) :: cg(2*n)
 complex(dpc),intent(out) :: ocplx(n)

!Local variables ------------------------------
!scalars
 integer :: ii,idx

! *************************************************************************

!$OMP PARALLEL DO PRIVATE(ii,idx)
 do ii=1,n
   idx = 2*ii-1
   ocplx(ii) = DCMPLX(cg(idx),cg(idx+1))
 end do

end subroutine cg_tocplx
!!***

!----------------------------------------------------------------------

!!****f* m_cgtools/cg_fromcplx
!! NAME
!!  cg_fromcplx
!!
!! FUNCTION
!!  Convert a complex array to a real array with (real,imag) part 
!!
!! INPUTS
!!  n = Specifies the number of elements in icplx and ocg.
!!  icplx(n)=Input complex array.
!!
!! OUTPUT 
!!  ocg(2*n)=Output array with real and imaginary part.
!!
!! PARENTS
!!
!! CHILDREN
!!      cg_zaxpy
!!
!! SOURCE

subroutine cg_fromcplx(n,icplx,ocg)

 use defs_basis

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'cg_fromcplx'
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: n
!arrays
 real(dp),intent(out) :: ocg(2*n)
 complex(dpc),intent(in) :: icplx(n)

!Local variables ------------------------------
!scalars
 integer :: ii,idx

! *************************************************************************

!$OMP PARALLEL DO PRIVATE(ii,idx)
 do ii=1,n
   idx = 2*ii-1
   ocg(idx  ) = DBLE (icplx(ii))
   ocg(idx+1) = AIMAG(icplx(ii))
 end do

end subroutine cg_fromcplx
!!***

!----------------------------------------------------------------------

!!****f* m_cgtools/cg_filter
!! NAME
!!  cg_filter
!!
!! FUNCTION
!!  Set all the elements of x to zero where mask is .TRUE.
!!
!! INPUTS
!!  n=Specifies the number of elements in vectors x and y.
!!  mask(n)=Logical array.
!!
!! SIDE EFFECTS
!!  x(n)=See description.
!!
!! PARENTS
!!
!! CHILDREN
!!      cg_zaxpy
!!
!! SOURCE

subroutine cg_filter(n, x, mask)

 use defs_basis

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'cg_filter'
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: n
!arrays
 real(dp),intent(inout) :: x(2,n)
 logical,intent(in) :: mask(n)

! *************************************************************************

 where (mask)
   x(1,:) = zero
   x(2,:) = zero
 end where

end subroutine cg_filter
!!***

!----------------------------------------------------------------------

!!****f* m_cgtools/cg_zcopy
!! NAME
!!  cg_zcopy
!!
!! FUNCTION
!!  Perform y = x, where x and y are vectors.
!!
!! INPUTS
!!  n = Specifies the number of elements in vectors x and y.
!!  x = Input Array
!!
!! OUTPUT
!!  y = In output, y contains a copy of the values of x.
!!
!! PARENTS
!!      cgwf,lapackprof,subdiago
!!
!! CHILDREN
!!      cg_zaxpy
!!
!! SOURCE

subroutine cg_zcopy(n, x, y)

 use defs_basis

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'cg_zcopy'
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: n
!arrays
 real(dp),intent(in) :: x(2*n)
 real(dp),intent(out) :: y(2*n)

! *************************************************************************

 call zcopy(n,x,1,y,1)

end subroutine cg_zcopy
!!***

!----------------------------------------------------------------------

!!****f* m_cgtools/cg_zscal
!! NAME
!!  cg_zscal
!!
!! FUNCTION
!!  Perform x = a*x
!!
!! INPUTS
!!  n = Specifies the number of elements in vector x.
!!  a(2)= The scalar a. If a(2) is zero, x = a*x is computed via zdscal
!!
!! OUTPUT
!!  x = Updated vector.
!!
!! OUTPUT
!!
!! PARENTS
!!      cgwf,m_cgtools
!!
!! CHILDREN
!!      cg_zaxpy
!!
!! SOURCE

subroutine cg_zscal(n, a, x)

 use defs_basis

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'cg_zscal'
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: n
 real(dp),intent(in) :: a(2)
!arrays
 real(dp),intent(inout) :: x(2*n)

! *************************************************************************

 if (a(2) == zero) then
   call zdscal(n, a, x, 1)
 else 
   call zscal(n, a, x, 1)
 end if

end subroutine cg_zscal
!!***

!----------------------------------------------------------------------

!!****f* m_cgtools/cg_zdotc
!! NAME
!!  cg_zdotc
!!
!! FUNCTION
!!   Perform a vector-vector operation defined as res = \Sigma (conjg(x)*y) where x and y are n-element vectors.
!!
!! INPUTS
!!  n = Specifies the number of elements in vector x and y
!!  x,y = Input arrays.
!!
!! OUTPUT
!!  res(2)=Real and Imaginary part of the scalar product.
!!
!! PARENTS
!!
!! CHILDREN
!!
!! SOURCE

function cg_zdotc(n,x,y) result(res)

 use defs_basis

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'cg_zdotc'
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: n
!arrays
 real(dp),intent(in) :: x(2,n)
 real(dp),intent(in) :: y(2,n)
 real(dp) :: res(2)

!Local variables-------------------------------
#ifdef HAVE_LINALG_ZDOTC_BUG
 integer :: ii
#endif
 complex(dpc) :: cres
 complex(dpc),external :: zdotc

! *************************************************************************

#ifdef HAVE_LINALG_ZDOTC_BUG
 ! Workaround for veclib on MacOSx
 res = zero
!$OMP PARALLEL DO PRIVATE(ii) REDUCTION(+:res)
 do ii=1,n
   res(1) = res(1) + x(1,ii)*y(1,ii) + x(2,ii)*y(2,ii)
   res(2) = res(2) + x(1,ii)*y(2,ii) - x(2,ii)*y(1,ii)
 end do

#else
 cres = zdotc(n, x, 1, y, 1)
 res(1) = REAL(cres)
 res(2) = AIMAG(cres)
#endif

end function cg_zdotc
!!***

!----------------------------------------------------------------------

!!****f* m_cgtools/cg_real_zdotc
!! NAME
!!  cg_real_zdotc
!!
!! FUNCTION
!!   Perform a vector-vector operation defined as res = REAL (\Sigma (conjg(x)*y)) where x and y are n-element vectors.
!!
!! INPUTS
!!  n = Specifies the number of elements in vector x and y
!!  x,y = Input arrays.
!!
!! OUTPUT
!!  res=Real part of the scalar product.
!!
!! PARENTS
!!
!! CHILDREN
!!
!! SOURCE

function cg_real_zdotc(n,x,y) result(res)

 use defs_basis

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'cg_real_zdotc'
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: n
!arrays
 real(dp),intent(in) :: x(2,n)
 real(dp),intent(in) :: y(2,n)
 real(dp) :: res

!Local variables-------------------------------
 real(dp),external :: ddot

! *************************************************************************

 res = ddot(2*n,x,1,y,1)

end function cg_real_zdotc
!!***

!----------------------------------------------------------------------

!!****f* m_cgtools/cg_zdotu
!! NAME
!!  cg_zdotu
!!
!! FUNCTION
!!   Perform a vector-vector operation defined as res = \Sigma (x*y) where x and y are n-element vectors.
!!   Note that x is unconjugated.
!!
!! INPUTS
!!  n = Specifies the number of elements in vector x and y
!!  x,y = Input arrays.
!!
!! OUTPUT
!!  res(2)=Real and Imaginary part of the scalar product.
!!
!! PARENTS
!!
!! CHILDREN
!!
!! SOURCE

function cg_zdotu(n, x, y) result(res)

 use defs_basis

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'cg_zdotu'
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: n
!arrays
 real(dp),intent(in) :: x(2,n)
 real(dp),intent(in) :: y(2,n)
 real(dp) :: res(2)

!Local variables-------------------------------
#ifdef HAVE_LINALG_ZDOTU_BUG
 integer :: ii
#endif
 complex(dpc) :: cres
 complex(dpc),external :: zdotu

! *************************************************************************

#ifdef HAVE_LINALG_ZDOTU_BUG
 ! Workaround for veclib on MacOSx
 res = zero
!$OMP PARALLEL DO PRIVATE(ii) REDUCTION(+:res)
 do ii=1,n
   res(1) = res(1) + x(1,ii)*y(1,ii) - x(2,ii)*y(2,ii)
   res(2) = res(2) + x(1,ii)*y(2,ii) + x(2,ii)*y(1,ii)
 end do
#else
 cres = zdotu(n, x, 1, y, 1)
 res(1) = REAL(cres)
 res(2) = AIMAG(cres)
#endif

end function cg_zdotu
!!***

!----------------------------------------------------------------------

!!****f* m_cgtools/cg_zaxpy
!! NAME
!!  cg_zaxpy
!!
!! FUNCTION
!!  Computes y = alpha*x + y
!!
!! INPUTS
!!  n = Specifies the number of elements in vectors x and y.
!!  alpha = Specifies the scalar alpha.
!!  x = Array
!!
!! SIDE EFFECTS
!!  y = Array. In output, y contains the updated vector.
!!
!! PARENTS
!!      cgwf,lapackprof,m_cgtools
!!
!! CHILDREN
!!      cg_zaxpy
!!
!! SOURCE

subroutine cg_zaxpy(n,alpha,x,y)

 use defs_basis

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'cg_zaxpy'
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: n
 real(dp),intent(in) :: alpha(2)
!arrays
 real(dp),intent(in) :: x(2*n)
 real(dp),intent(inout) :: y(2*n)

!local variables 
! integer :: ii

! *************************************************************************

 call zaxpy(n,alpha,x,1,y,1)

!!$omp parallel do
! do ii=1,n
!   y(1,ii) = alpha(1)*x(1,ii) - alpha(2)*x(2,ii) + y(1,ii)
!   y(2,ii) = alpha(1)*x(2,ii) + alpha(2)*x(1,ii) + y(2,ii) 
! end do

end subroutine cg_zaxpy
!!***

!----------------------------------------------------------------------

!!****f* m_cgtools/cg_zaxpby
!! NAME
!!  cg_zaxpby
!!
!! FUNCTION
!!  Scales two vectors, adds them to one another and stores result in the vector.
!!  y := a*x + b*y
!!
!! INPUTS
!! n = the number of elements in vectors x and y.
!! a = Specifies the scalar a.
!! x = Array.
!! b = Specifies the scalar b.
!! y = Array
!!
!! OUTPUT
!! y Contains the updated vector y.
!!
!! PARENTS
!!
!! CHILDREN
!!      cg_zaxpy
!!
!! SOURCE

subroutine cg_zaxpby(n,a,x,b,y)

 use defs_basis

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'cg_zaxpby'
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: n
 real(dp),intent(in) :: a(2),b(2)
!arrays
 real(dp),intent(in) :: x(2*n)
 real(dp),intent(inout) :: y(2*n)

! *************************************************************************

#ifdef HAVE_LINALG_AXPBY
 call zaxpby(n, a, x, 1, b, y, 1)
#else
 call zscal(n, b, y, 1)
 call zaxpy(n, a, x, 1, y,1)
#endif

end subroutine cg_zaxpby
!!***

!----------------------------------------------------------------------

!!****f* m_cgtools/cg_zgemv
!! NAME
!!  cg_zgemv
!!
!! FUNCTION
!! The ?gemv routines perform a matrix-vector operation defined as
!!
!! y := alpha*A*x + beta*y,
!! or
!! y := alpha*A'*x + beta*y,
!! or
!! y := alpha*conjg(A')*x + beta*y,
!!
!! where: alpha and beta are scalars, x and y are vectors, A is an m-by-n matrix.
!!
!! INPUTS
!!
!! OUTPUT
!!
!! PARENTS
!!      lapackprof,projbd
!!
!! CHILDREN
!!      cg_zaxpy
!!
!! SOURCE

subroutine cg_zgemv(trans,nrows,ncols,cgmat,vec,matvec,alpha,beta)

 use defs_basis

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'cg_zgemv'
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: nrows,ncols
 real(dp),optional,intent(in) :: alpha(2),beta(2)
 character(len=1),intent(in) :: trans
!arrays
 real(dp),intent(in) :: cgmat(2,nrows*ncols)
 real(dp),intent(in) :: vec(2,*)
 real(dp),intent(inout) :: matvec(2,*)

!Local variables-------------------------------
!scalars
 integer :: mm,nn,kk,lda,ldb,ldc
 real(dp) :: my_alpha(2),my_beta(2)

! *************************************************************************
 
 lda = nrows
 mm  = nrows
 nn  = 1
 kk  = ncols

 if (toupper(trans) /= 'N') then 
   mm = ncols
   kk = nrows
 end if

 ldb = kk
 ldc = mm

 my_alpha = cg_cone;  if (PRESENT(alpha)) my_alpha = alpha
 my_beta  = cg_czero; if (PRESENT(beta))  my_beta  = beta

 call ZGEMM(trans,"N",mm,nn,kk,my_alpha,cgmat,lda,vec,ldb,my_beta,matvec,ldc)
 ! ZGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)

 !call ZGEMV(trans,mm,nn,my_alpha,cgmat,lda,vec,1,my_beta,matvec,1)

end subroutine cg_zgemv
!!***

!----------------------------------------------------------------------

!!****f* m_cgtools/cg_zgemm
!! NAME
!!  cg_zgemm
!!
!! FUNCTION
!!  The ?gemm routines perform a matrix-matrix operation with general matrices. 
!!  The operation is defined as C := alpha*op(A)*op(B) + beta*C,
!!  where:
!!
!!  op(x) is one of op(x) = x, or op(x) = x', or op(x) = conjg(x'),
!!
!!  alpha and beta are scalars,
!!  A, B and C are matrices:
!!  op(A) is an m-by-k matrix,
!!  op(B) is a k-by-n matrix,
!!  C is an m-by-n matrix.
!!
!! INPUTS
!!
!! OUTPUT
!!
!! PARENTS
!!      lapackprof,m_cgtools
!!
!! CHILDREN
!!      cg_zaxpy
!!
!! SOURCE

subroutine cg_zgemm(transa,transb,npws,ncola,ncolb,cg_a,cg_b,cg_c,alpha,beta)

 use defs_basis

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'cg_zgemm'
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: npws,ncola,ncolb
 real(dp),optional,intent(in) :: alpha(2),beta(2)
 character(len=1),intent(in) :: transa,transb
!arrays
 real(dp),intent(in) :: cg_a(2,npws*ncola)
 real(dp),intent(in) :: cg_b(2,npws*ncolb)
 real(dp),intent(inout) :: cg_c(2,*)

!Local variables-------------------------------
!scalars
 integer :: mm,nn,kk,lda,ldb,ldc
 real(dp) :: my_alpha(2),my_beta(2)

! *************************************************************************
 
 lda = npws
 ldb = npws

 mm  = npws
 nn  = ncolb
 kk  = ncola

 if (toupper(transa) /= 'N') then 
   mm = ncola
   kk = npws
 end if
 if (toupper(transb) /= 'N') nn = npws

 ldc = mm

 my_alpha = cg_cone;  if (PRESENT(alpha)) my_alpha = alpha
 my_beta  = cg_czero; if (PRESENT(beta))  my_beta  = beta

 call ZGEMM(transa,transb,mm,nn,kk,my_alpha,cg_a,lda,cg_b,ldb,my_beta,cg_c,ldc)

end subroutine cg_zgemm
!!***

!----------------------------------------------------------------------

!!****f* m_cgtools/cg_box2gsph
!! NAME
!!  cg_box2gsph
!!
!! FUNCTION
!!
!! INPUTS
!!  n1,n2,n3=physical dimension of the FFT box.
!!  n4,n5,n6=Logical dimensions of the arrays.
!!  ndat=number of data in iarrbox 
!!  npw=Number of planewaves in the G-sphere.
!!  gvec(3,npw)=Reduced coordinates of the G-vectoes.
!!  iarrbox(2,n4,n5,n6*ndat)=Input arrays on the FFT box.
!!  [rscal] = Scaling factor
!!
!! OUTPUT
!!  oarrsph(2,npw*ndat)=Data defined on the G-sphere.
!!
!! PARENTS
!!      dfti_fourwf,fftw3_fourwf,fourwf
!!
!! CHILDREN
!!      cg_zaxpy
!!
!! SOURCE

subroutine cg_box2gsph(n1,n2,n3,n4,n5,n6,ndat,npw,gvec,iarrbox,oarrsph,rscal)

 use defs_basis

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'cg_box2gsph'
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: npw,n1,n2,n3,n4,n5,n6,ndat
 real(dp),optional,intent(in) :: rscal
!arrays
 integer,intent(in) :: gvec(3,npw)
 real(dp),intent(in) :: iarrbox(2,n4,n5,n6*ndat)
 real(dp),intent(out) :: oarrsph(2,npw*ndat)

!Local variables-------------------------------
!scalars
 integer :: ig,i1,i2,i3,idat,sph_pad,box_pad

! *************************************************************************

 if (.not. PRESENT(rscal)) then
   !
   if (ndat==1) then
!$OMP PARALLEL DO PRIVATE(i1,i2,i3)
     do ig=1,npw
       i1=gvec(1,ig); if (i1<0) i1=i1+n1; i1=i1+1
       i2=gvec(2,ig); if (i2<0) i2=i2+n2; i2=i2+1
       i3=gvec(3,ig); if (i3<0) i3=i3+n3; i3=i3+1
       oarrsph(1,ig) = iarrbox(1,i1,i2,i3) 
       oarrsph(2,ig) = iarrbox(2,i1,i2,i3) 
     end do
   else 
!$OMP PARALLEL DO PRIVATE(sph_pad,box_pad,i1,i2,i3)
     do idat=1,ndat
       sph_pad = (idat-1)*npw
       box_pad = (idat-1)*n6
       do ig=1,npw
         i1=gvec(1,ig); if (i1<0) i1=i1+n1; i1=i1+1
         i2=gvec(2,ig); if (i2<0) i2=i2+n2; i2=i2+1
         i3=gvec(3,ig); if (i3<0) i3=i3+n3; i3=i3+1
         oarrsph(1,ig+sph_pad) = iarrbox(1,i1,i2,i3+box_pad) 
         oarrsph(2,ig+sph_pad) = iarrbox(2,i1,i2,i3+box_pad) 
       end do
     end do
   end if
   !
 else
   if (ndat==1) then
!$OMP PARALLEL DO PRIVATE(i1,i2,i3)
     do ig=1,npw
       i1=gvec(1,ig); if (i1<0) i1=i1+n1; i1=i1+1
       i2=gvec(2,ig); if (i2<0) i2=i2+n2; i2=i2+1
       i3=gvec(3,ig); if (i3<0) i3=i3+n3; i3=i3+1
       oarrsph(1,ig) = iarrbox(1,i1,i2,i3) * rscal
       oarrsph(2,ig) = iarrbox(2,i1,i2,i3) * rscal
     end do
   else 
!$OMP PARALLEL DO PRIVATE(sph_pad,box_pad,i1,i2,i3)
     do idat=1,ndat
       sph_pad = (idat-1)*npw
       box_pad = (idat-1)*n6
       do ig=1,npw
         i1=gvec(1,ig); if (i1<0) i1=i1+n1; i1=i1+1
         i2=gvec(2,ig); if (i2<0) i2=i2+n2; i2=i2+1
         i3=gvec(3,ig); if (i3<0) i3=i3+n3; i3=i3+1
         oarrsph(1,ig+sph_pad) = iarrbox(1,i1,i2,i3+box_pad) * rscal
         oarrsph(2,ig+sph_pad) = iarrbox(2,i1,i2,i3+box_pad) * rscal
       end do
     end do
   end if
 end if

end subroutine cg_box2gsph
!!***


!----------------------------------------------------------------------

!!****f* m_cgtools/cg_addtorho
!! NAME
!!  cg_addtorho
!!
!! FUNCTION
!!  Add |ur|**2 to the ground-states density rho.
!!    rho = rho + weight_r * |ur|**2
!!
!! INPUTS
!!  n1,n2,n3=physical dimension of the FFT box.
!!  n4,n5,n6=leading dimensions of the arrays.
!!  ndat=number of contributions to accumulate.
!!  weight_r=weight used for the accumulation of the density in real space
!!  ur(2,n4,n5,n6*ndat)=wavefunctions in real space
!!
!! SIDE EFFECTS 
!!  rho(n4,n5,n6) = contains the input density at input,
!!                  modified in input with the contribution gived by ur.
!!
!! PARENTS
!!      dfti_fourwf,fftw3_fourwf,fourwf
!!
!! CHILDREN
!!      cg_zaxpy
!!
!! SOURCE

subroutine cg_addtorho(n1,n2,n3,n4,n5,n6,ndat,weight_r,ur,rho)

 use defs_basis

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'cg_addtorho'
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: n1,n2,n3,n4,n5,n6,ndat 
 real(dp),intent(in) :: weight_r
!arrays
 real(dp),intent(in) :: ur(2,n4,n5,n6*ndat)
 real(dp),intent(inout) :: rho(n4,n5,n6)

!Local variables-------------------------------
!scalars
 integer :: i1,i2,i3,idat,i3dat

! *************************************************************************

 if (ndat==1) then
!$OMP PARALLEL DO 
   do i3=1,n3
     do i2=1,n2
       do i1=1,n1
         rho(i1,i2,i3) = rho(i1,i2,i3) + weight_r * (ur(1,i1,i2,i3)**2 + ur(2,i1,i2,i3)**2)
       end do
     end do
   end do

 else 
! It would be nice to use $OMP PARALLEL DO PRIVATE(i3dat) REDUCTION(+:rho)
! but it's risky as the private rho is allocated on the stack of the thread.
!$OMP PARALLEL PRIVATE(i3dat) 
   do idat=1,ndat
!$OMP DO
     do i3=1,n3
       i3dat = i3 + (idat-1)*n6
       do i2=1,n2
         do i1=1,n1
           rho(i1,i2,i3) = rho(i1,i2,i3) + weight_r * (ur(1,i1,i2,i3dat)**2 + ur(2,i1,i2,i3dat)**2)
         end do
       end do
     end do
!$OMP END DO NOWAIT
   end do
!$OMP END PARALLEL
 end if

end subroutine cg_addtorho
!!***

!----------------------------------------------------------------------

!!****f* m_cgtools/cg_vlocpsi
!! NAME
!!  cg_vlocpsi
!!
!! FUNCTION
!!
!! INPUTS
!!  n1,n2,n3=physical dimension of the FFT box.
!!  n4,n5,n6=leading dimensions of the arrays.
!!  ndat=number of wavefunctions.
!!  cplex=  1 if vloc is real, 2 for complex
!!  vloc(cplex*n4,n5,n6)=Local potential on the FFT box.
!!
!! SIDE EFFECTS
!!  ur(2,n4,n5,n6*ndat)= 
!!    Input = wavefunctions in real space.
!!    Output= vloc |ur>
!!
!! PARENTS
!!      dfti_fourwf,fftw3_fourwf
!!
!! CHILDREN
!!      cg_zaxpy
!!
!! SOURCE

subroutine cg_vlocpsi(n1,n2,n3,n4,n5,n6,ndat,cplex,vloc,ur)

 use defs_basis

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'cg_vlocpsi'
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: n1,n2,n3,n4,n5,n6,ndat,cplex
!arrays
 real(dp),intent(in) :: vloc(cplex*n4,n5,n6)
 real(dp),intent(inout) :: ur(2,n4,n5,n6*ndat)

!Local variables-------------------------------
!scalars
 integer :: idat,i1,i2,i3,padat
 real(dp) :: fim,fre

! *************************************************************************

 if (cplex==1) then
   !

   if (ndat==1) then
!$OMP PARALLEL DO 
     do i3=1,n3
       do i2=1,n2
         do i1=1,n1
           ur(1,i1,i2,i3) = vloc(i1,i2,i3) * ur(1,i1,i2,i3)
           ur(2,i1,i2,i3) = vloc(i1,i2,i3) * ur(2,i1,i2,i3)
         end do
       end do
     end do
     !
   else
     !
!$OMP PARALLEL DO PRIVATE(padat)
     do idat=1,ndat
       padat = n6*(idat-1)
       do i3=1,n3
         do i2=1,n2
           do i1=1,n1
             ur(1,i1,i2,i3+padat) = vloc(i1,i2,i3) * ur(1,i1,i2,i3+padat)
             ur(2,i1,i2,i3+padat) = vloc(i1,i2,i3) * ur(2,i1,i2,i3+padat)
           end do
         end do
       end do
     end do
     !
   end if
   !
 else if (cplex==2)then
   !
   if (ndat==1) then
!$OMP PARALLEL DO PRIVATE(fre,fim)
     do i3=1,n3
       do i2=1,n2
         do i1=1,n1
           fre = ur(1,i1,i2,i3)
           fim = ur(2,i1,i2,i3)
           ur(1,i1,i2,i3) = vloc(2*i1-1,i2,i3)*fre - vloc(2*i1,i2,i3)*fim
           ur(2,i1,i2,i3) = vloc(2*i1-1,i2,i3)*fim + vloc(2*i1,i2,i3)*fre
         end do
       end do
     end do
   else 
!$OMP PARALLEL DO PRIVATE(padat,fre,fim)
     do idat=1,ndat
       padat = n6*(idat-1)
       do i3=1,n3
         do i2=1,n2
           do i1=1,n1
             fre = ur(1,i1,i2,i3+padat)
             fim = ur(2,i1,i2,i3+padat)
             ur(1,i1,i2,i3+padat) = vloc(2*i1-1,i2,i3)*fre - vloc(2*i1,i2,i3)*fim
             ur(2,i1,i2,i3+padat) = vloc(2*i1-1,i2,i3)*fim + vloc(2*i1,i2,i3)*fre
           end do
         end do
       end do
     end do
   end if
   !
 else 
   MSG_BUG("Wrong cplex")
 end if

end subroutine cg_vlocpsi
!!***

!----------------------------------------------------------------------

!!****f* m_blas/cgnc_cholesky
!! NAME
!!  cgnc_cholesky
!!
!! FUNCTION
!!  Cholesky orthonormalization of the vectors stored in cgblock.
!!
!! INPUTS
!!  npws=Size of each vector (usually npw*nspinor)
!!  nband=Number of band in cgblock
!!  istwfk=Storage mode for the wavefunctions. 1 for standard full mode
!!  me_g0=1 if this node has G=0.
!!  comm_pw=MPI communicator for the planewave group. Set to xmpi_self for sequential mode.
!!
!! SIDE EFFECTS
!!  cgblock(2*npws*nband)
!!    input: Input set of vectors.
!!    output: Orthonormalized set.
!!
!! PARENTS
!!      pw_orthon
!!
!! CHILDREN
!!      cg_zaxpy
!!
!! SOURCE

subroutine cgnc_cholesky(npws,nband,cgblock,istwfk,me_g0,comm_pw,use_gemm)


!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'cgnc_cholesky'
 use interfaces_14_hidewrite
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: npws,nband,istwfk
 integer,intent(in) :: comm_pw,me_g0
 logical,optional,intent(in) :: use_gemm
!arrays
 real(dp),intent(inout) :: cgblock(2*npws*nband)

!Local variables ------------------------------
!scalars
 integer :: ierr,b1,b2
#ifdef DEBUG_MODE
 integer :: ptr
#endif
 logical :: my_usegemm
 character(len=500) :: msg
!arrays
 real(dp) :: rcg0(nband)
 real(dp),allocatable :: rovlp(:,:)
 complex(dpc),allocatable :: cf_ovlp(:,:) 

! *************************************************************************

#ifdef DEBUG_MODE
 if (istwfk==2) then
   ierr = 0
   do b1=1,nband
     ptr = 2 + 2*(b1-1)*npws
     if (ABS(cgblock(ptr)) > zero) then
       ierr = ierr + 1
       write(msg,'(a,i0,es13.6)')" Input b1, Im u(g=0) should be zero ",b1,cgblock(ptr)
       call wrtout(std_out,msg,"COLL")
       !cgblock(ptr) = zero
     end if
   end do
   ABI_CHECK(ierr==0,"Non zero imag part")
 end if
#endif

 my_usegemm=.FALSE.; if (PRESENT(use_gemm)) my_usegemm = use_gemm

 ABI_MALLOC(cf_ovlp,(nband,nband))
 !
 ! 1) Calculate O_ij = <phi_i|phi_j>
 if (my_usegemm) then
   call ABI_ZGEMM("Conjugate","Normal",nband,nband,npws,cone,cgblock,npws,cgblock,npws,czero,cf_ovlp,nband)
 else 
   call ZHERK("U","C",nband,npws,one,cgblock,npws,zero,cf_ovlp,nband)
 end if

 if (istwfk==1) then
   !
   ! Sum the overlap if PW are distributed.
   if (comm_pw /= xmpi_self) call xsum_mpi(cf_ovlp,comm_pw,ierr)
   !
   ! 2) Cholesky factorization: O = U^H U with U upper triangle matrix.
   call ZPOTRF('U',nband,cf_ovlp,nband,ierr)

   if (ierr/=0)  then
     write(msg,'(a,i0)')' ZPOTRF returned info = ',ierr
     MSG_ERROR(msg)
   end if 
   
 else 
   ! overlap is real. Note that nspinor is always 1 in this case.
   ABI_MALLOC(rovlp,(nband,nband))
   rovlp = two * REAL(cf_ovlp)

   if (istwfk==2 .and. me_g0==1) then
     ! Extract the real part at G=0 and subtract its contribution to the overlap.
     call dcopy(nband,cgblock,2*npws,rcg0,1)
     do b2=1,nband
       do b1=1,b2
         rovlp(b1,b2) = rovlp(b1,b2) - rcg0(b1)*rcg0(b2) 
       end do
     end do
   end if

   ! Sum the overlap if PW are distributed.
   if (comm_pw /= xmpi_self) call xsum_mpi(rovlp,comm_pw,ierr)
   !
   ! 2) Cholesky factorization: O = U^H U with U upper triangle matrix.
   call DPOTRF('U',nband,rovlp,nband,ierr)

   if (ierr/=0)  then
     write(msg,'(a,i0)')' DPOTRF returned info = ',ierr
     MSG_ERROR(msg)
   end if 

   cf_ovlp = CMPLX(rovlp)
   ABI_FREE(rovlp)
 end if
 !
 ! 3) Solve X U = cgblock. On exit cgblock is orthonormalized.
 call ZTRSM('Right','Upper','Normal','Normal',npws,nband,cone,cf_ovlp,nband,cgblock,npws)

#ifdef DEBUG_MODE
 if (istwfk==2) then
   ierr = 0
   do b1=1,nband
     ptr = 2 + 2*(b1-1)*npws
     if (ABS(cgblock(ptr)) > zero) then
       ierr = ierr + 1
       write(msg,'(a,i0,es13.6)')" Output b1, Im u(g=0) should be zero ",b1,cgblock(ptr)
     end if
   end do
   ABI_CHECK(ierr==0,"Non zero imag part")
 end if
#endif

 ABI_FREE(cf_ovlp)

end subroutine cgnc_cholesky
!!***

!----------------------------------------------------------------------

!!****f* m_blas/cgpaw_cholesky
!! NAME
!!  cgpaw_cholesky
!!
!! FUNCTION
!!  Cholesky orthonormalization of the vectors stored in cgblock.
!!
!! INPUTS
!!  npws=Size of each vector (usually npw*nspinor)
!!  nband=Number of band in cgblock and gsc
!!  istwfk=Storage mode for the wavefunctions. 1 for standard full mode
!!  me_g0=1 if this node has G=0.
!!  comm_pw=MPI communicator for the planewave group. Set to xmpi_self for sequential mode.
!!
!! SIDE EFFECTS
!!  cgblock(2*npws*nband)
!!  gsc(2*npws*nband)
!!    input: Input set of vectors |C>, S|C>
!!    output: Orthonormalized set such as  <C|S|C> = 1
!!
!! PARENTS
!!      pw_orthon
!!
!! CHILDREN
!!      cg_zaxpy
!!
!! SOURCE

subroutine cgpaw_cholesky(npws,nband,cgblock,gsc,istwfk,me_g0,comm_pw)


!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'cgpaw_cholesky'
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: npws,nband,istwfk
 integer,intent(in) :: me_g0,comm_pw
!arrays
 real(dp),intent(inout) :: cgblock(2*npws*nband)
 real(dp),intent(inout) :: gsc(2*npws*nband)

!Local variables ------------------------------
!scalars
 integer :: ierr,b1,b2
 character(len=500) :: msg
!arrays
 real(dp),allocatable :: rovlp(:,:)
 real(dp) :: rcg0(nband),rg0sc(nband)
 complex(dpc),allocatable :: cf_ovlp(:,:)

! *************************************************************************

 ! 1) Calculate O_ij =  <phi_i|S|phi_j>
 ABI_MALLOC(cf_ovlp,(nband,nband))

 call ABI_ZGEMM("C","N",nband,nband,npws,cone,cgblock,npws,gsc,npws,czero,cf_ovlp,nband)

 if (istwfk==1) then
   !
   ! Sum the overlap if PW are distributed.
   if (comm_pw /= xmpi_self) call xsum_mpi(cf_ovlp,comm_pw,ierr)
   !
   ! 2) Cholesky factorization: O = U^H U with U upper triangle matrix.
   call ZPOTRF('U',nband,cf_ovlp,nband,ierr)

   if (ierr/=0)  then
     write(msg,'(a,i0)')' ZPOTRF returned info= ',ierr
     MSG_ERROR(msg)
   end if 

 else
   ! overlap is real. Note that nspinor is always 1 in this case.
   ABI_MALLOC(rovlp,(nband,nband))
   rovlp = two * REAL(cf_ovlp)

   if (istwfk==2 .and. me_g0==1) then
     ! Extract the real part at G=0 and subtract its contribution to the overlap.
     call dcopy(nband,cgblock,2*npws,rcg0,1)
     call dcopy(nband,gsc,2*npws,rg0sc,1)
     do b2=1,nband
       do b1=1,b2
        rovlp(b1,b2) = rovlp(b1,b2) - rcg0(b1)*rg0sc(b2) 
       end do
     end do
   end if
   !
   ! Sum the overlap if PW are distributed.
   if (comm_pw /= xmpi_self) call xsum_mpi(rovlp,comm_pw,ierr)
   !
   ! 2) Cholesky factorization: O = U^H U with U upper triangle matrix.
   call DPOTRF('U',nband,rovlp,nband,ierr)
                                                                        
   if (ierr/=0)  then
     write(msg,'(a,i0)')' DPOTRF returned info= ',ierr
     MSG_ERROR(msg)
   end if 
                                                                        
   cf_ovlp = CMPLX(rovlp,zero)
   ABI_FREE(rovlp)
 end if
 !
 ! 3) Solve X U = cgblock. 
 call ZTRSM('Right','Upper','Normal','Normal',npws,nband,cone,cf_ovlp,nband,cgblock,npws)

 ! 4) Solve Y U = gsc. On exit <cgblock|gsc> = 1 
 call ZTRSM('Right','Upper','Normal','Normal',npws,nband,cone,cf_ovlp,nband,gsc,npws)

 ABI_FREE(cf_ovlp)

end subroutine cgpaw_cholesky
!!***

!----------------------------------------------------------------------

!!****f* m_cgtools/cgnc_normalize
!! NAME
!!  cgnc_normalize
!!
!! FUNCTION
!!
!! INPUTS
!!  npws=Size of each vector (usually npw*nspinor)
!!  nband=Number of vectors in icg1
!!
!! SIDE EFFECTS
!!
!! PARENTS
!!      m_cgtools
!!
!! CHILDREN
!!      cg_zaxpy
!!
!! SOURCE

subroutine cgnc_normalize(npws,nband,cg,istwfk,me_g0,comm_pw)


!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'cgnc_normalize'
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: npws,nband,istwfk
 integer,intent(in) :: me_g0,comm_pw
!arrays
 real(dp),intent(inout) :: cg(2*npws*nband)

!Local variables ------------------------------
!scalars
 integer :: ptr,ierr,band
 character(len=500) :: msg
 complex(dpc),external :: zdotc
 complex(dpc) :: cnorm2
!arrays
 real(dp) :: norm(nband),alpha(2)

! *************************************************************************
 
!$OMP PARALLEL DO PRIVATE(ptr,cnorm2)
 do band=1,nband
   ptr = 1 + 2*npws*(band-1)
   cnorm2 = zdotc(npws,cg(ptr),1,cg(ptr),1)
   norm(band) = REAL(cnorm2)
 end do

 if (istwfk>1) then
   norm = two * norm
   if (istwfk==2 .and. me_g0==1) then
!$OMP PARALLEL DO PRIVATE(ptr)
     do band=1,nband
       ptr = 1 + 2*npws*(band-1)
       norm(band) = norm(band) - cg(ptr)**2
     end do
   end if
 end if

 if (comm_pw /= xmpi_self) call xsum_mpi(norm,comm_pw,ierr)

 ierr = 0
 do band=1,nband
   if (norm(band) > zero) then
     norm(band) = SQRT(norm(band))
   else
     ierr = ierr + 1
   end if
 end do

 if (ierr/=0) then
   write(msg,'(a,i0,a)')" Found ",ierr," vectors with norm <= zero!"
   MSG_ERROR(msg)
 end if

 do band=1,nband
   ptr = 1 + 2*npws*(band-1)
   alpha = (/one/norm(band), zero/)
   call cg_zscal(npws,alpha,cg(ptr))
 end do

end subroutine cgnc_normalize
!!***

!----------------------------------------------------------------------

!!****f* m_cgtools/cgnc_gramschmidt
!! NAME
!!  cgnc_gramschmidt
!!
!! FUNCTION
!!
!! INPUTS
!!  npws=Size of each vector (usually npw*nspinor)
!!  nband1=Number of vectors in icg1
!!  nband1=Number of vectors in cg2
!!  comm_pw=MPI communicator.
!!
!! SIDE EFFECTS
!!  cg2(2*npws*nband2)
!!  icg1(2*npws*nband1)
!!    input: Input set of vectors.
!!    output: Orthonormalized set.
!!
!! PARENTS
!!
!! CHILDREN
!!      cg_zaxpy
!!
!! SOURCE

subroutine cgnc_gramschmidt(npws,nband1,icg1,nband2,iocg2,istwfk,normalize,me_g0,comm_pw)


!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'cgnc_gramschmidt'
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: npws,nband1,nband2,istwfk,me_g0
 integer,optional,intent(in) :: comm_pw
 logical,intent(in) :: normalize
!arrays
 real(dp),intent(in) :: icg1(2*npws*nband1)
 real(dp),intent(inout) :: iocg2(2*npws*nband2)

!Local variables ------------------------------
!scalars
 integer :: ierr,b1,b2
!arrays
 real(dp) :: r_icg1(nband1),r_iocg2(nband2)
 real(dp),allocatable :: proj(:,:,:)

! *************************************************************************

 ABI_MALLOC(proj,(2,nband1,nband2))

 ! 1) Calculate <cg1|cg2>
 call cg_zgemm("C","N",npws,nband1,nband2,icg1,iocg2,proj)

 if (istwfk>1) then
   proj = two * proj
   !
   if (istwfk==2 .and. me_g0==1) then 
     ! nspinor is always 1 in this case.
     ! Extract the real part at G=0 and subtract its contribution.
     call dcopy(nband1,icg1, 2*npws,r_icg1, 1)
     call dcopy(nband2,iocg2,2*npws,r_iocg2,1)
     do b2=1,nband2
       do b1=1,nband1
         proj(1,b1,b2) = proj(1,b1,b2) - r_icg1(b1) * r_iocg2(b2)
       end do
     end do
   end if
   !
 end if
 !
 ! This is for the MPI version
 if (comm_pw /= xmpi_self) call xsum_mpi(proj,comm_pw,ierr)

 ! 2) cg2 = - cg1 <cg1|cg2> + cg2 
 call cg_zgemm("N","N",npws,nband1,nband2,icg1,proj,iocg2,alpha=-cg_cone,beta=cg_cone)

 ABI_FREE(proj)

 ! 3) Normalize iocg2 if required.
 if (normalize) call cgnc_normalize(npws,nband2,iocg2,istwfk,me_g0,comm_pw)

end subroutine cgnc_gramschmidt
!!***

!----------------------------------------------------------------------
!                         Unitary tests
!----------------------------------------------------------------------

!!****f* m_cgtools/test_cg_zaxpy
!! NAME
!!  test_cg_zaxpy
!!
!! FUNCTION
!!  Unitary test for cg_zaxpy.
!!
!! INPUTS
!!  n = Specifies the number of elements in vectors x and y.
!!  a = Specifies the scalar a.
!!  x = Array
!!
!! SIDE EFFECTS
!!  y = Array. In output, y contains the updated vector.
!!
!! PARENTS
!!
!! CHILDREN
!!      cg_zaxpy
!!
!! SOURCE

subroutine test_cg_zaxpy(n,cpu_time,wall_time)

 use defs_basis

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'test_cg_zaxpy'
 use interfaces_18_timing
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: n
 real(dp),intent(out) :: cpu_time(2),wall_time(2)

!Local variables-------------------------------
 integer :: ii,step,meas
 integer :: NMEAS=500
!arrays
 real(dp) :: a(2),mabs_err(2)
 real(dp),allocatable :: x(:,:),y(:,:),ysave(:,:)

! *************************************************************************

 ABI_MALLOC(x,(2,n))
 ABI_MALLOC(y,(2,n))
 ABI_MALLOC(ysave,(2,n))

 a  = (/one,two/)

 do step=1,2
   x  = one
   y  = one

   cpu_time(step)  = abi_cpu_time()
   wall_time(step) = abi_wtime()

   if (step==1) then
     do meas=1,NMEAS
       do ii=1,n
         y(1,ii) = a(1)*x(1,ii) - a(2)*x(2,ii) + y(1,ii)
         y(2,ii) = a(1)*x(2,ii) + a(2)*x(1,ii) + y(2,ii)
       end do
       if (meas == 1) ysave = y
       !a = zero
       !do ii=1,n
       !  a(1) = a(1) + x(1,ii)*y(1,ii) + x(2,ii)*y(2,ii)
       !  a(2) = a(2) + x(1,ii)*y(2,ii) - x(2,ii)*y(1,ii)
       !end do
       !if (meas == 1) ysave(:,1) = a
     end do
   else
     y = one
     do meas=1,NMEAS
       call cg_zaxpy(n,a,x,y)
       if (meas == 1) mabs_err = MAXVAL( ABS(ysave - y), DIM=2)
       !a = cg_zdotc(n, x, y) 
       !if (meas == 1) mabs_err = MAXVAL( ABS(a - ysave(:,1)))
     end do
   end if

   cpu_time(step)  = abi_cpu_time() - cpu_time(step)
   wall_time(step) = abi_wtime() - wall_time(step)
 end do

 ABI_FREE(x)
 ABI_FREE(y)
 ABI_FREE(ysave)

 write(std_out,'(a,2f8.4)')"max error",mabs_err

end subroutine test_cg_zaxpy
!!***

!----------------------------------------------------------------------

END MODULE m_cgtools
!!***
