
!{src2tex{texfont=tt}}
!!****f* ABINIT/wffwritecg
!! NAME
!! wffwritecg
!!
!! COPYRIGHT
!! Copyright (C) 1998-2009 ABINIT group (DCA, XG, GMR, MVer,MB,MD)
!! This file is distributed under the terms of the
!! GNU General Public License, see ~abinit/COPYING
!! or http://www.gnu.org/copyleft/gpl.txt .
!!
!! FUNCTION
!! This procedure write cg in the file .o_WFK using MPI_IO 
!! in case of GS calculation
!!  for one kpt cg are dispatch amoung commcart communicator
!!
!! cg is written like that :
!!  BeginMarker cg ( iband = 1 )  EndMarker  
!!  BeginMarker cg ( iband = 2 ) EndMarker  

!! ....
!! ...
!!  BeginMarker cg( iband = nband_disk ) EndMarker
!!  BeginMarker and EndMarker are given the value of the total length 
!! of cg for one band 
!! 
!! For the library MPI2 (under #define MPI_IO_MORE) , For writting cg, for improving performance
!!  we use a view of the file being written  each for one proc. 
!!
!! INPUTS
!!  wff=struct info for wavefunction
!!  nband_disk =number of bands on disk files to be write
!!  cg(2,npw*nspinor*mband)=planewave coefficients of wavefunctions,
!!  icg=shift to be given to the location of the cg array
!!  mcg=dimention of cg
!!  mpi_enreg  = information about parallelisation 
!!  depl_mpi_to_seq = in case of parallelisation , for each processor, indice of kg 
!!  in sequential mode
!! npwso =npw*nspinor 
!! with npw =number of plane waves
!! and nspinor =number of spinotial components of wavefunctions 
!! spaceComm = Communication space where are dispatched the cg ( commcart)
!!
!! OUTPUT
!! ierr1 = Error status
!!
!! PARENTS
!!      rwwf
!!
!! CHILDREN
!!      mpi_allgather,mpi_comm_rank,mpi_comm_size,mpi_file_close,mpi_file_open
!!      mpi_file_set_view,mpi_file_write,mpi_file_write_all,mpi_type_commit
!!      mpi_type_create_indexed_block,mpi_type_free,mpi_type_indexed
!!      xderivewrecend_cs,xderivewrecinit_cs,xderivewrite
!!
!! SOURCE
subroutine wffwritecg(wff,cg,mcg, icg,nband_disk,npwso, mpi_enreg, depl_mpi_to_seq, ierr1)
 use defs_basis
 use defs_datatypes
#if defined MPI && defined MPI2 && defined MPI_IO
 use mpi
#endif

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
 use interfaces_lib01hidempi
!End of the abilint section

 implicit none
#if defined MPI && defined MPI1 && defined MPI_IO
 include 'mpif.h'
#endif

 type(wffile_type),intent(inout) :: wff
 integer,intent(in) :: nband_disk,npwso
 integer,intent(in) :: mcg 
 integer,intent(in) :: icg 
 integer,intent(out) :: ierr1
 real(dp),intent(in):: cg(2,mcg)
 type(MPI_type) , intent(in) :: mpi_enreg
 integer, intent(in) :: depl_mpi_to_seq(npwso)
!integer :: status1(MPI_STATUS_SIZE)
 

integer :: length(2), depl(2), typ(2)
integer :: length1(3), depl1(3), typ1(3)
integer :: myrank
integer :: ierr , tottaillebande, tottaille1bande, tottaille1bandecg
integer :: filetype, filetype1
integer :: delim_record
integer :: nbproc, iproc
 integer, allocatable:: local_offset(:)
integer :: total_taille, total_offset
integer :: i,j,j1,k, ii

 integer, allocatable :: liste_taille(:)
integer , allocatable :: buffdelim(:)
 integer(abinit_offset) :: offset
 integer(abinit_offset) :: offset_zero = 0
integer(abinit_offset) :: disp
integer(abinit_offset) :: depl_bande
integer(abinit_offset) :: size
integer :: fh,fh1
integer :: iband
integer :: n3 , np1
integer ::  tu, ipw 
integer :: spaceComm
integer :: i1 , idepl 
real(dp), allocatable :: tempo_buf(:), buff(:) 
integer, allocatable :: map_buf(:), map(:)
integer :: fhwff

integer :: ipos
integer :: wfftempo
integer :: nb, iloc, loc_depl_bande

integer :: delim1


integer(abinit_offset) :: isav
 ierr =0
#if defined MPI_IO

isav = wff%offwff

#if defined MPI_IO_MORE

 spaceComm = mpi_enreg%commcart

 wff%off_recs = wff%offwff
call MPI_COMM_SIZE(spaceComm, nbproc, ierr)
 call MPI_COMM_RANK(spaceComm, myrank, ierr)


call MPI_FILE_OPEN(spaceComm ,wff%fname,MPI_MODE_RDWR,MPI_INFO_NULL, wfftempo, ierr)
 call MPI_COMM_SIZE(spaceComm, nbproc, ierr)
 call MPI_COMM_RANK(spaceComm, myrank, ierr)



 allocate(liste_taille(nbproc))
 n3=2*npwso
call MPI_ALLGATHER(n3,1,MPI_INTEGER,liste_taille,1,MPI_INTEGER,spaceComm,ierr)



!total taille 
tottaille1bandecg =0
do iproc=1,nbproc
tottaille1bandecg =liste_taille(iproc ) +  tottaille1bandecg
enddo 

tottaille1bande=tottaille1bandecg+2
tottaillebande=nband_disk*tottaille1bande


 allocate (tempo_buf(tottaillebande))
 allocate(buff((2*npwso+2)*nband_disk))
 allocate(map_buf(tottaillebande))
allocate(map((2*npwso+2)*nband_disk)) 

 map_buf(1:tottaillebande) = -1


!First we put cg in a buffer
!to use the type MPI_TYPE_CREATE_INDEXED_BLOCK, the location describe by array map must be in increasing 
!order so we sort the map_buf ( and the tempo_buf associated)
         depl_bande = 0
         loc_depl_bande = 0
        do iband=1,nband_disk
       
           ipw=(iband-1)*npwso+icg
      
              ii = 0
        
              do tu=ipw+1, ipw+npwso
             
                ii =  ii +1 
             
               
               idepl = depl_bande + 1 + 2*(depl_mpi_to_seq(ii) -1)
               iloc = loc_depl_bande + wff%nbOct_int + &
  &                    2*(depl_mpi_to_seq(ii)-1)*wff%nbOct_dp


         
!map_buf(kk) Location where to write cg in the file  : wwf%offwf + map_buf(idepl) 
            
               tempo_buf(idepl) =  cg(1,tu)
               map_buf(idepl) = iloc
           

               idepl = idepl + 1              
               tempo_buf(idepl) =  cg(2,tu)
               map_buf(idepl) = iloc + wff%nbOct_dp

          enddo
!location in the array containing all the blocks of bande including markers      
          depl_bande = depl_bande + tottaille1bande
!location in octet 
           loc_depl_bande = loc_depl_bande + tottaille1bandecg*wff%nbOct_dp + 2*wff%nbOct_int
     
         enddo
 

!to use the type  MPI_TYPE_CREATE_INDEXED_BLOCK , the location describe by array map must be in incresing 
! order
! we elimate now hole 
i1 =1
do i = 1, tottaillebande
 
   if ( map_buf(i) /= -1 ) then
!     if ( ( map_buf(i) < 0 ).or. (map_buf(i) >  tottaillebande*wff%nbOct_dp) ) then
!             PRINT *,'OULLLA myrank ' , myrank, ' map_buf (i ) < 0 i : ', i , map_buf(i), 'tempo_buf ', tempo_buf(i)
!       endif
   
      map(i1)=map_buf(i)
      buff(i1) = tempo_buf(i)
     i1 = i1 +1 
    endif
enddo
nb=i1-1


!Writing cg in one collective order
! using a view ( better performance)
!!  nb= (2*npwso+2)*nband_disk
call MPI_TYPE_CREATE_INDEXED_BLOCK(nb, wff%nbOct_dp,map, MPI_BYTE, FILETYPE, ierr)
call MPI_TYPE_COMMIT(FILETYPE, ierr)

 disp = wff%offwff


  call MPI_FILE_SET_VIEW(wfftempo, disp, MPI_BYTE,   filetype, "native", MPI_INFO_NULL, ierr)


!writing cg

   call MPI_FILE_WRITE_ALL(wfftempo, buff, nb, MPI_DOUBLE_PRECISION, MPI_STATUS_IGNORE, ierr)

   call MPI_TYPE_FREE(FILETYPE, ierr)


!calcul total taille en octet d'une bande 
 allocate(local_offset(nbproc))
local_offset(1)=wff%nbOct_int !begin marker
do iproc=2,nbproc
   local_offset(iproc)=local_offset(iproc-1)+liste_taille(iproc-1)*wff%nbOct_dp
enddo

total_taille=local_offset(nbproc) + liste_taille(nbproc)*wff%nbOct_dp
!end mark
total_taille=total_taille +wff%nbOct_int


!writing marks in only one order for performance reason
!first we put all marks in a buffer
    allocate(buffdelim(2*nband_disk))
    j=1
   do iband = 1, nband_disk
     delim_record = total_taille -2*wff%nbOct_int
     buffdelim(j) =  delim_record
     j=j+1
     buffdelim(j) =  delim_record
     j=j+1
   enddo

   depl1(1)=0
   length1(1)= wff%nbOct_int
   depl1(2)= total_taille -wff%nbOct_int 
   length1(2)= wff%nbOct_int
   depl1(3)=total_taille
   length1(3)= 0

    call MPI_TYPE_INDEXED(3,length1, depl1, MPI_BYTE, FILETYPE1, ierr)

    call MPI_TYPE_COMMIT(FILETYPE1, ierr)

!Defining the view corresponding to markers 
   call MPI_FILE_SET_VIEW(wfftempo, wff%offwff, MPI_BYTE, filetype1, &
      & "native", MPI_INFO_NULL, ierr)

!Only one process write
   if ( myrank == 0 ) then
   call MPI_FILE_WRITE(wfftempo, buffdelim, 2*nband_disk, MPI_INTEGER, MPI_STATUS_IGNORE, ierr)
   endif
   call MPI_TYPE_FREE(FILETYPE1, ierr)

!Reinit View by the default view
   call MPI_FILE_SET_VIEW(wfftempo, offset_zero, MPI_BYTE, MPI_BYTE, &
 &  "native", MPI_INFO_NULL, ierr)

  call MPI_FILE_CLOSE(wfftempo, ierr)
  
   deallocate(buffdelim)
    deallocate(local_offset)
    deallocate(liste_taille)
    deallocate(tempo_buf)
    deallocate(buff)
   deallocate(map)   
  deallocate(map_buf)   

  wff%offwff=wff%offwff+total_taille*nband_disk

#elif  defined MPI_IO_TEST 


  do iband = 1, nband_disk
     ipw=(iband-1)*npwso+icg
   call xderiveWRecInit_cs(wff,ierr,mpi_enreg%me_cart_2d)
   call xderiveWrite(wff,cg(1:2,ipw+1:ipw+npwso),2,npwso,ierr,mpi_enreg%commcart,depl_mpi_to_seq)
   call xderiveWRecEnd_cs(wff,ierr,mpi_enreg%me_cart_2d)
  enddo
#else
!writing wave function in an order linked to processor

  do iband = 1, nband_disk
     ipw=(iband-1)*npwso+icg
   call xderiveWRecInit_cs(wff,ierr,mpi_enreg%me_cart_2d)
   call xderiveWrite(wff,cg(1:2,ipw+1:ipw+npwso),2,npwso,ierr,mpi_enreg%commcart)
   call xderiveWRecEnd_cs(wff,ierr,mpi_enreg%me_cart_2d)
  enddo




  
#endif

#endif
    ierr1=ierr 
   end subroutine wffwritecg
!!***
