HPhiplusplus/doxygen/_calc_by_l_o_b_p_c_g_8cpp_source.html

 /* HPhi  -  Quantum Lattice Model Simulator */
 /* Copyright (C) 2015 The University of Tokyo */

 /* This program is free software: you can redistribute it and/or modify */
 /* it under the terms of the GNU General Public License as published by */
 /* the Free Software Foundation, either version 3 of the License, or */
 /* (at your option) any later version. */

 /* This program is distributed in the hope that it will be useful, */
 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the */
 /* GNU General Public License for more details. */

 /* You should have received a copy of the GNU General Public License */
 /* along with this program.  If not, see <http://www.gnu.org/licenses/>. */
 #include "Common.hpp"
 #include "xsetmem.hpp"
 #include "mltply.hpp"
 #include "FileIO.hpp"
 #include "wrapperMPI.hpp"
 #include "expec_cisajs.hpp"
 #include "expec_cisajscktaltdc.hpp"
 #include "expec_totalspin.hpp"
 #include "expec_energy_flct.hpp"
 #include "phys.hpp"
 #include <cmath>
 #include "mltplyCommon.hpp"
 #include "./common/setmemory.hpp"

 void debug_print(int num, std::complex<double> *var){
   int i;
   for (i=0;i<num;i++){
     printf("debug %d %f %f\n", i, real(var[i]), imag(var[i]));
   }
 }

 extern "C" {
   extern void zheevd_(char *jobz, char *uplo, int *n, std::complex<double> *a, int *lda, double *w, std::complex<double> *work, int *lwork, double *rwork, int * lrwork, int *iwork, int *liwork, int *info);
   extern void zgemm_(char *transa, char *transb, int *m, int *n, int *k, std::complex<double> *alpha, std::complex<double> *a, int *lda, std::complex<double> *b, int *ldb, std::complex<double> *beta, std::complex<double> *c, int *ldc);
 }
 static int diag_ovrp(
   int nsub,
   std::complex<double> *hsub,
   std::complex<double> *ovlp,
   double *eig
 )
 {
   int *iwork, info, isub, jsub, nsub2;
   char jobz = 'V', uplo = 'U', transa = 'N', transb = 'N';
   double *rwork;
   std::complex<double> *work, *mat;
   int liwork, lwork, lrwork;
   std::complex<double> one = 1.0, zero = 0.0;

   liwork = 5 * nsub + 3;
   lwork = nsub*nsub + 2 * nsub;
   lrwork = 3 * nsub*nsub + (4 + (int)log2(nsub) + 1) * nsub + 1;

   iwork = (int*)malloc(liwork * sizeof(int));
   rwork = (double*)malloc(lrwork * sizeof(double));
   work = (std::complex<double>*)malloc(lwork * sizeof(std::complex<double>));
   mat = (std::complex<double>*)malloc(nsub*nsub * sizeof(std::complex<double>));
   for (isub = 0; isub < nsub*nsub; isub++)mat[isub] = 0.0;
   zheevd_(&jobz, &uplo, &nsub, ovlp, &nsub, eig, work, &lwork, rwork, &lrwork, iwork, &liwork, &info);
   nsub2 = 0;
   for (isub = 0; isub < nsub; isub++) {
     if (eig[isub] > 1.0e-14) {
       for (jsub = 0; jsub < nsub; jsub++)
         ovlp[jsub + nsub*nsub2] = ovlp[jsub + nsub*isub] / sqrt(eig[isub]);
       nsub2 += 1;
     }
   }
   for (isub = nsub2; isub < nsub; isub++)
     for (jsub = 0; jsub < nsub; jsub++)
       ovlp[jsub + nsub*isub] = 0.0;
   transa = 'N';
   zgemm_(&transa, &transb, &nsub, &nsub, &nsub, &one, hsub, &nsub, ovlp, &nsub, &zero, mat, &nsub);
   transa = 'C';
   zgemm_(&transa, &transb, &nsub, &nsub, &nsub, &one, ovlp, &nsub, mat, &nsub, &zero, hsub, &nsub);
   zheevd_(&jobz, &uplo, &nsub2, hsub, &nsub, eig, work, &lwork, rwork, &lrwork, iwork, &liwork, &info);
   transa = 'N';
   zgemm_(&transa, &transb, &nsub, &nsub, &nsub, &one, ovlp, &nsub, hsub, &nsub, &zero, mat, &nsub);
  // printf("%d %d %15.5f %15.5f %15.5f\n", info, nsub2, eig[0], eig[1], eig[2]);
   for (isub = 0; isub < nsub*nsub; isub++)hsub[isub] = mat[isub];

   free(mat);
   free(work);
   free(rwork);
   free(iwork);

   return(nsub2);
 }/*void diag_ovrp*/
 static double calc_preshift(
   double eig,
   double res,
   double eps_LOBPCG
 )
 {
   double k, i;
   double preshift;

   if (fabs(eig) > 10.0) k = trunc(log10(fabs(eig)));
   else k = 1.0;

   if (res < 1.0) {
     if (eps_LOBPCG > res) i = ceil(log10(eps_LOBPCG));
     else i = ceil(log10(res));

     preshift = trunc(eig / pow(10.0, k + i))*pow(10.0, k + i);
   }
   else preshift = 0.0;

   return(preshift);
 }/*void calc_preshift*/
 /*
 @brief Compute initial guess for LOBPCG.
 If this is resuterting run, read from files.
 */
 static void Initialize_wave(
   struct BindStruct *X,
   std::complex<double> **wave
 )
 {
   FILE *fp;
   char sdt[D_FileNameMax];
   size_t byte_size;
   std::complex<double> *vin;
   int ie;
   int iproc, ierr;
   long int iv;
   long int i_max_tmp, sum_i_max, idim, i_max;
   int mythread;
   double *dnorm;
   /*
   For DSFMT
   */
   long int u_long_i;
   dsfmt_t dsfmt;
   if (X->Def.iReStart == RESTART_INOUT || X->Def.iReStart == RESTART_IN) {
     //StartTimer(3600);
     //TimeKeeperWithRandAndStep(&(X->Bind), "%s_Time_TPQ_Step.dat", "  set %d step %d:output vector starts: %s\n", "a", rand_i, step_i);
     fprintf(stdoutMPI, "%s", "  Start:  Input vector.\n");

     ierr = 0;
     vin = cd_1d_allocate(X->Check.idim_max + 1);
     for (ie = 0; ie < X->Def.k_exct; ie++) {

       sprintf(sdt, "tmpvec_set%d_rank_%d.dat", ie, myrank);
       childfopenALL(sdt, "rb", &fp);
       if (fp == NULL) {
         fprintf(stdout, "Restart file is not found.\n");
         fprintf(stdout, "Start from scratch.\n");
         ierr = 1;
         break;
       }
       else {
         byte_size = fread(&iproc, sizeof(int), 1, fp);
         byte_size = fread(&i_max, sizeof(long int), 1, fp);
         //fprintf(stdoutMPI, "Debug: i_max=%ld, step_i=%d\n", i_max, step_i);
         if (i_max != X->Check.idim_max) {
           fprintf(stderr, "Error: Invalid restart file.\n");
           exitMPI(-1);
         }
         byte_size = fread(vin, sizeof(std::complex<double>), X->Check.idim_max + 1, fp);
         for (idim = 1; idim <= i_max; idim++) wave[idim][ie] = vin[idim];
         fclose(fp);
       }
     }/*for (ie = 0; ie < X->Def.k_exct; ie++)*/
     free_cd_1d_allocate(vin);

     if (ierr == 0) {
       //TimeKeeperWithRandAndStep(X, "%s_Time_TPQ_Step.dat", "  set %d step %d:output vector finishes: %s\n", "a", rand_i, step_i);
       fprintf(stdoutMPI, "%s", "  End  :  Input vector.\n");
       //StopTimer(3600);
       if (byte_size == 0) printf("byte_size: %d \n", (int)byte_size);
       return;
     }/*if (ierr == 0)*/

   }/*X->Def.iReStart == RESTART_INOUT || X->Def.iReStart == RESTART_IN*/

   i_max = X->Check.idim_max;

   if (initial_mode == 0) {

     for (ie = 0; ie < X->Def.k_exct; ie++) {

       sum_i_max = SumMPI_li(X->Check.idim_max);
       X->Large.iv = (sum_i_max / 2 + X->Def.initial_iv + ie) % sum_i_max + 1;
       iv = X->Large.iv;
       fprintf(stdoutMPI, "  initial_mode=%d normal: iv = %ld i_max=%ld k_exct =%d\n\n",
         initial_mode, iv, i_max, X->Def.k_exct);
 #pragma omp parallel for default(none) private(idim) shared(wave,i_max,ie)
       for (idim = 1; idim <= i_max; idim++) wave[idim][ie] = 0.0;

       sum_i_max = 0;
       for (iproc = 0; iproc < nproc; iproc++) {

         i_max_tmp = BcastMPI_li(iproc, i_max);
         if (sum_i_max <= iv && iv < sum_i_max + i_max_tmp) {

           if (myrank == iproc) {
             wave[iv - sum_i_max + 1][ie] = 1.0;
             if (X->Def.iInitialVecType == 0) {
               wave[iv - sum_i_max + 1][ie] += 1.0*I;
               wave[iv - sum_i_max + 1][ie] /= sqrt(2.0);
             }
           }/*if (myrank == iproc)*/
         }/*if (sum_i_max <= iv && iv < sum_i_max + i_max_tmp)*/

         sum_i_max += i_max_tmp;

       }/*for (iproc = 0; iproc < nproc; iproc++)*/
     }/*for (ie = 0; ie < X->Def.k_exct; ie++)*/
   }/*if(initial_mode == 0)*/
   else if (initial_mode == 1) {
     iv = X->Def.initial_iv;
     fprintf(stdoutMPI, "  initial_mode=%d (random): iv = %ld i_max=%ld k_exct =%d\n\n",
       initial_mode, iv, i_max, X->Def.k_exct);
 #pragma omp parallel default(none) private(idim, u_long_i, mythread, dsfmt, ie) \
               shared(wave, iv, X, nthreads, myrank, i_max,I)
     {
       /*
        Initialise MT
        */
 #ifdef _OPENMP
       mythread = omp_get_thread_num();
 #else
       mythread = 0;
 #endif
       u_long_i = 123432 + labs(iv) + mythread + nthreads * myrank;
       dsfmt_init_gen_rand(&dsfmt, u_long_i);

       for (ie = 0; ie < X->Def.k_exct; ie++) {
         if (X->Def.iInitialVecType == 0) {
 #pragma omp for
           for (idim = 1; idim <= i_max; idim++)
             wave[idim][ie] = 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5) + 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5)*I;
         }
         else {
 #pragma omp for
           for (idim = 1; idim <= i_max; idim++)
             wave[idim][ie] = 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5);
         }
       }/*for (ie = 0; ie < X->Def.k_exct; ie++)*/

     }/*#pragma omp parallel*/

     dnorm = d_1d_allocate(X->Def.k_exct);
     NormMPI_dv(i_max, X->Def.k_exct, wave, dnorm);
 #pragma omp parallel for default(none) shared(i_max,wave,dnorm,X) private(idim,ie)
     for (idim = 1; idim <= i_max; idim++)
       for (ie = 0; ie < X->Def.k_exct; ie++) wave[idim][ie] /= dnorm[ie];
     free_d_1d_allocate(dnorm);
   }/*else if(initial_mode==1)*/
 }/*static void Initialize_wave*/
 static void Output_restart(
   struct BindStruct *X,
   std::complex<double> **wave
 )
 {
   FILE *fp;
   size_t byte_size;
   char sdt[D_FileNameMax];
   int ie;
   long int idim;
   std::complex<double> *vout;

   //TimeKeeperWithRandAndStep(&(X->Bind), "%s_Time_TPQ_Step.dat", "  set %d step %d:output vector starts: %s\n", "a", rand_i, step_i);
   fprintf(stdoutMPI, "%s", "  Start:  Output vector.\n");

   vout = cd_1d_allocate(X->Check.idim_max + 1);
   for (ie = 0; ie < X->Def.k_exct; ie++) {
     sprintf(sdt, "tmpvec_set%d_rank_%d.dat", ie, myrank);
     if (childfopenALL(sdt, "wb", &fp) != 0) exitMPI(-1);
     byte_size = fwrite(&X->Large.itr, sizeof(X->Large.itr), 1, fp);
     byte_size = fwrite(&X->Check.idim_max, sizeof(X->Check.idim_max), 1, fp);
     for (idim = 1; idim <= X->Check.idim_max; idim++) vout[idim] = wave[idim][ie];
     byte_size = fwrite(vout, sizeof(std::complex<double>), X->Check.idim_max + 1, fp);
     fclose(fp);
   }/*for (ie = 0; ie < X->Def.k_exct; ie++)*/
   free_cd_1d_allocate(vout);

   //TimeKeeperWithRandAndStep(&(X->Bind), "%s_Time_TPQ_Step.dat", "  set %d step %d:output vector finishes: %s\n", "a", rand_i, step_i);
   fprintf(stdoutMPI, "%s", "  End  :  Output vector.\n");
   if(byte_size == 0) printf("byte_size : %d\n", (int)byte_size);
 }/*static void Output_restart*/
 int LOBPCG_Main(
   struct BindStruct *X
 )
 {
   char sdt[D_FileNameMax], sdt_2[D_FileNameMax];
   FILE *fp;
   int iconv = -1, i4_max;
   long int idim, i_max;
   int ie, stp;
   int ii, jj, nsub, nsub_cut, nstate;
   std::complex<double> ***wxp/*[0] w, [1] x, [2] p of Ref.1*/,
     ***hwxp/*[0] h*w, [1] h*x, [2] h*p of Ref.1*/,
     ****hsub, ****ovlp; /*Subspace Hamiltonian and Overlap*/
   double *eig, *dnorm, eps_LOBPCG, eigabs_max, preshift, precon, dnormmax, *eigsub;
   int do_precon = 0;//If = 1, use preconditioning (experimental)
   char tN = 'N', tC = 'C';
   std::complex<double> one = 1.0, zero = 0.0;

   nsub = 3 * X->Def.k_exct;
   nstate = X->Def.k_exct;

   eig = d_1d_allocate(X->Def.k_exct);
   dnorm = d_1d_allocate(X->Def.k_exct);
   eigsub = d_1d_allocate(nsub);
   hsub = cd_4d_allocate(3, X->Def.k_exct, 3, X->Def.k_exct);
   ovlp = cd_4d_allocate(3, X->Def.k_exct, 3, X->Def.k_exct);

   i_max = X->Check.idim_max;
   i4_max = (int)i_max;

   free_cd_2d_allocate(v0);
   free_cd_2d_allocate(v1);
   wxp = cd_3d_allocate(3, X->Check.idim_max + 1, X->Def.k_exct);
   hwxp = cd_3d_allocate(3, X->Check.idim_max + 1, X->Def.k_exct);
   Initialize_wave(X, wxp[1]);

   TimeKeeper(X, "%s_TimeKeeper.dat", "Lanczos Eigen Value start:    %s", "a");

   zclear(i_max*X->Def.k_exct, &hwxp[1][1][0]);
   mltply(X, X->Def.k_exct, hwxp[1], wxp[1]);
   stp = 1;
   TimeKeeperWithStep(X, "%s_TimeKeeper.dat", "%3d th Lanczos step: %s", "a", 0);

   zclear(i_max*X->Def.k_exct, &wxp[2][1][0]);
   zclear(i_max*X->Def.k_exct, &hwxp[2][1][0]);
   for (ie = 0; ie < X->Def.k_exct; ie++) eig[ie] = 0.0;
   for (idim = 1; idim <= i_max; idim++) {
     for (ie = 0; ie < X->Def.k_exct; ie++) {
       wxp[2][idim][ie] = 0.0;
       hwxp[2][idim][ie] = 0.0;
       eig[ie] += real(conj(wxp[1][idim][ie]) * hwxp[1][idim][ie]);
     }
   }
   SumMPI_dv(X->Def.k_exct, eig);

   sprintf(sdt_2, "%s_Lanczos_Step.dat", X->Def.CDataFileHead);
   childfopenMPI(sdt_2, "w", &fp);
   fprintf(stdoutMPI, "    Step   Residual-2-norm     Threshold      Energy\n");
   fprintf(fp, "    Step   Residual-2-norm     Threshold      Energy\n");
   fclose(fp);

   nsub_cut = nsub;
   for (stp = 1; stp <= X->Def.Lanczos_max; stp++) {
     eigabs_max = 0.0;
     for (ie = 0; ie < X->Def.k_exct; ie++)
       if (fabs(eig[ie]) > eigabs_max) eigabs_max = fabs(eig[ie]);
     eps_LOBPCG = pow(10, -0.5 *X->Def.LanczosEps);
     if (eigabs_max > 1.0) eps_LOBPCG *= eigabs_max;
 #pragma omp parallel for default(none) shared(i_max,wxp,hwxp,eig,X) private(idim,ie)
     for (idim = 1; idim <= i_max; idim++) {
       for (ie = 0; ie < X->Def.k_exct; ie++) {
         wxp[0][idim][ie] = hwxp[1][idim][ie] - eig[ie] * wxp[1][idim][ie];
       }
     }
     NormMPI_dv(i_max, X->Def.k_exct, wxp[0], dnorm);

     dnormmax = 0.0;
     for (ie = 0; ie < X->Def.k_exct; ie++)
       if (dnorm[ie] > dnormmax) dnormmax = dnorm[ie];
     if (stp /= 1) {
       if (do_precon == 1) {
         for (ie = 0; ie < X->Def.k_exct; ie++)
           preshift = calc_preshift(eig[ie], dnorm[ie], eps_LOBPCG);
 #pragma omp parallel for default(none) shared(wxp,list_Diagonal,preshift,i_max,eps_LOBPCG,X) \
 private(idim,precon,ie)
         for (idim = 1; idim <= i_max; idim++) {
           for (ie = 0; ie < X->Def.k_exct; ie++){
             precon = list_Diagonal[idim] - preshift;
             if (fabs(precon) > eps_LOBPCG) wxp[0][idim][ie] /= precon;
           }
         }
       }/*if(do_precon == 1)*/
       NormMPI_dv(i_max, X->Def.k_exct, wxp[0], dnorm);
 #pragma omp parallel for default(none) shared(i_max,wxp,dnorm,X) private(idim,ie)
       for (idim = 1; idim <= i_max; idim++)
         for (ie = 0; ie < X->Def.k_exct; ie++)
           wxp[0][idim][ie] /= dnorm[ie];
     }/*if (stp /= 1)*/
     childfopenMPI(sdt_2, "a", &fp);
     fprintf(stdoutMPI, "%9d %15.5e %15.5e      ", stp, dnormmax, eps_LOBPCG);
     fprintf(fp, "%9d %15.5e %15.5e      ", stp, dnormmax, eps_LOBPCG);
     for (ie = 0; ie < X->Def.k_exct; ie++) {
       fprintf(stdoutMPI, " %15.5e", eig[ie]);
       fprintf(fp, " %15.5e", eig[ie]);
     }
     if(nsub_cut == 0) printf("nsub_cut : %d", nsub_cut);
     fprintf(stdoutMPI, "\n");
     fprintf(fp, "\n");
     fclose(fp);

     if (dnormmax < eps_LOBPCG) {
       iconv = 0;
       break;
     }
     zclear(i_max*X->Def.k_exct, &hwxp[0][1][0]);
     mltply(X, X->Def.k_exct, hwxp[0], wxp[0]);

     TimeKeeperWithStep(X, "%s_TimeKeeper.dat", "%3d th Lanczos step: %s", "a", stp);
     for (ii = 0; ii < 3; ii++) {
       for (jj = 0; jj < 3; jj++) {
         zgemm_(&tN, &tC, &nstate, &nstate, &i4_max, &one,
           &wxp[ii][1][0], &nstate, &wxp[jj][1][0], &nstate, &zero, &ovlp[jj][0][ii][0], &nsub);
         zgemm_(&tN, &tC, &nstate, &nstate, &i4_max, &one,
           &wxp[ii][1][0], &nstate, &hwxp[jj][1][0], &nstate, &zero, &hsub[jj][0][ii][0], &nsub);
       }
     }
     SumMPI_cv(nsub*nsub, &ovlp[0][0][0][0]);
     SumMPI_cv(nsub*nsub, &hsub[0][0][0][0]);

     for (ie = 0; ie < X->Def.k_exct; ie++)
       eig[ie] = real(hsub[1][ie][1][ie]);
     nsub_cut = diag_ovrp(nsub, &hsub[0][0][0][0], &ovlp[0][0][0][0], eigsub);
     for (ie = 0; ie < X->Def.k_exct; ie++)
       eig[ie] = 0.5 * (eig[ie] + eigsub[ie]);
     zclear(i_max*X->Def.k_exct, &v1buf[1][0]);
     for (ii = 0; ii < 3; ii++) {
       zgemm_(&tC, &tN, &nstate, &i4_max, &nstate, &one,
         &hsub[0][0][ii][0], &nsub, &wxp[ii][1][0], &nstate, &one, &v1buf[1][0], &nstate);
     }
     for (idim = 1; idim <= i_max; idim++) for (ie = 0; ie < X->Def.k_exct; ie++)
       wxp[1][idim][ie] = v1buf[idim][ie];
     zclear(i_max*X->Def.k_exct, &v1buf[1][0]);
     for (ii = 0; ii < 3; ii++) {
       zgemm_(&tC, &tN, &nstate, &i4_max, &nstate, &one,
         &hsub[0][0][ii][0], &nsub, &hwxp[ii][1][0], &nstate, &one, &v1buf[1][0], &nstate);
     }
     for (idim = 1; idim <= i_max; idim++) for (ie = 0; ie < X->Def.k_exct; ie++)
       hwxp[1][idim][ie] = v1buf[idim][ie];
     zclear(i_max*X->Def.k_exct, &v1buf[1][0]);
     for (ii = 0; ii < 3; ii += 2) {
       zgemm_(&tC, &tN, &nstate, &i4_max, &nstate, &one,
         &hsub[0][0][ii][0], &nsub, &wxp[ii][1][0], &nstate, &one, &v1buf[1][0], &nstate);
     }
     for (idim = 1; idim <= i_max; idim++) for (ie = 0; ie < X->Def.k_exct; ie++)
       wxp[2][idim][ie] = v1buf[idim][ie];
     zclear(i_max*X->Def.k_exct, &v1buf[1][0]);
     for (ii = 0; ii < 3; ii += 2) {
       zgemm_(&tC, &tN, &nstate, &i4_max, &nstate, &one,
         &hsub[0][0][ii][0], &nsub, &hwxp[ii][1][0], &nstate, &one, &v1buf[1][0], &nstate);
     }
     for (idim = 1; idim <= i_max; idim++) for (ie = 0; ie < X->Def.k_exct; ie++)
       hwxp[2][idim][ie] = v1buf[idim][ie];
     for (ii = 1; ii < 3; ii++) {
       NormMPI_dv(i_max, X->Def.k_exct, wxp[ii], dnorm);
 #pragma omp parallel for default(none) shared(i_max,wxp,hwxp,dnorm,ii,X) private(idim,ie)
       for (idim = 1; idim <= i_max; idim++) {
         for (ie = 0; ie < X->Def.k_exct; ie++) {
           wxp[ii][idim][ie] /= dnorm[ie];
           hwxp[ii][idim][ie] /= dnorm[ie];
         }/* for (ie = 0; ie < X->Def.k_exct; ie++)*/
       }
     }/*for (ii = 1; ii < 3; ii++)*/

   }/*for (stp = 1; stp <= X->Def.Lanczos_max; stp++)*/
   //fclose(fp);

   X->Large.itr = stp;
   sprintf(sdt, "%s_TimeKeeper.dat", X->Def.CDataFileHead);

   TimeKeeper(X, "%s_TimeKeeper.dat", "Lanczos Eigenvalue finishes:  %s", "a");
   fprintf(stdoutMPI, "%s", "\n######  End  : Calculate Lanczos EigenValue.  ######\n\n");

   free_d_1d_allocate(eig);
   free_d_1d_allocate(dnorm);
   free_d_1d_allocate(eigsub);
   free_cd_4d_allocate(hsub);
   free_cd_4d_allocate(ovlp);
   free_cd_3d_allocate(hwxp);
   if (X->Def.iReStart == RESTART_OUT || X->Def.iReStart == RESTART_INOUT){
       Output_restart(X, wxp[1]);
       if(iconv != 0) {
           sprintf(sdt, "%s", "Lanczos Eigenvalue is not converged in this process.");
           return 1;
       }
   }
   v0 = cd_2d_allocate(X->Check.idim_max + 1, X->Def.k_exct);
 #pragma omp parallel for default(none) shared(i_max,wxp,v0,X) private(idim,ie)
   for (idim = 1; idim <= i_max; idim++)
     for (ie = 0; ie < X->Def.k_exct; ie++)
       v0[idim][ie] = wxp[1][idim][ie];
   free_cd_3d_allocate(wxp);
   v1 = cd_2d_allocate(X->Check.idim_max + 1, X->Def.k_exct);

   if (iconv != 0) {
     sprintf(sdt, "%s", "Lanczos Eigenvalue is not converged in this process.");
     return -1;
   }
   else {
     return 0;
   }
 }/*int LOBPCG_Main*/
 int CalcByLOBPCG(
   struct EDMainCalStruct *X
 )
 {
   char sdt[D_FileNameMax];
   size_t byte_size;
   long int ie;
   long int i_max = 0;
   long int idim;
   FILE *fp;
   std::complex<double> *vin;

   fprintf(stdoutMPI, "######  Eigenvalue with LOBPCG  #######\n\n");

   if (X->Bind.Def.iInputEigenVec == FALSE) {

     // this part will be modified
     switch (X->Bind.Def.iCalcModel) {
     case HubbardGC:
     case SpinGC:
     case KondoGC:
     case SpinlessFermionGC:
       initial_mode = 1; // 1 -> random initial vector
       break;
     case Hubbard:
     case Kondo:
     case Spin:
     case SpinlessFermion:

       if (X->Bind.Def.iFlgGeneralSpin == TRUE) {
         initial_mode = 1;
       }
       else {
         if (X->Bind.Def.initial_iv>0) {
           initial_mode = 0; // 0 -> only v[iv] = 1
         }
         else {
           initial_mode = 1; // 1 -> random initial vector
         }
       }
       break;
     default:
       //fclose(fp);
       exitMPI(-1);
     }

     int iret = LOBPCG_Main(&(X->Bind));
     if (iret != 0) {
       if(iret ==1) return (TRUE);
       else{
           fprintf(stdoutMPI, "  LOBPCG is not converged in this process.\n");
           return(FALSE);
       }
     }
   }/*if (X->Bind.Def.iInputEigenVec == FALSE)*/
   else {// X->Bind.Def.iInputEigenVec=true :input v1:
     fprintf(stdoutMPI, "An Eigenvector is inputted.\n");
     vin = cd_1d_allocate(X->Bind.Check.idim_max + 1);
     for (ie = 0; ie < X->Bind.Def.k_exct; ie++) {
       TimeKeeper(&(X->Bind), "%s_TimeKeeper.dat", "Read Eigenvector starts:          %s", "a");
       sprintf(sdt, "%s_eigenvec_%ld_rank_%d.dat", X->Bind.Def.CDataFileHead, ie, myrank);
       childfopenALL(sdt, "rb", &fp);
       if (fp == NULL) {
         fprintf(stderr, "Error: Inputvector file is not found.\n");
         exitMPI(-1);
       }
       byte_size = fread(&step_i, sizeof(int), 1, fp);
       byte_size = fread(&i_max, sizeof(long int), 1, fp);
       if (i_max != X->Bind.Check.idim_max) {
         fprintf(stderr, "Error: Invalid Inputvector file.\n");
         exitMPI(-1);
       }
       byte_size = fread(vin, sizeof(std::complex<double>), X->Bind.Check.idim_max + 1, fp);
 #pragma omp parallel for default(none) shared(v1,vin, i_max, ie), private(idim)
       for (idim = 1; idim <= i_max; idim++) {
         v1[ie][idim] = vin[idim];
       }
     }/*for (ie = 0; ie < X->Def.k_exct; ie++)*/
     fclose(fp);
     free_cd_1d_allocate(vin);
     TimeKeeper(&(X->Bind), "%s_TimeKeeper.dat", "Read Eigenvector finishes:        %s", "a");

     if(byte_size == 0) printf("byte_size : %d\n", (int)byte_size);
   }/*X->Bind.Def.iInputEigenVec == TRUE*/

   fprintf(stdoutMPI, "%s", "\n######  End  : Calculate Lanczos EigenVec.  ######\n\n");
   phys(&(X->Bind), X->Bind.Def.k_exct);

   X->Bind.Def.St=1;
   if (X->Bind.Def.St == 0) {
     sprintf(sdt, "%s_energy.dat", X->Bind.Def.CDataFileHead);
   }
   else if (X->Bind.Def.St == 1) {
     sprintf(sdt, "%s_energy.dat", X->Bind.Def.CDataFileHead);
   }

   if (childfopenMPI(sdt, "w", &fp) != 0) {
     exitMPI(-1);
   }
   for (ie = 0; ie < X->Bind.Def.k_exct; ie++) {
     //phys(&(X->Bind), ie);
     fprintf(fp, "State %ld\n", ie);
     fprintf(fp, "  Energy  %.16lf \n", X->Bind.Phys.energy[ie]);
     fprintf(fp, "  Doublon  %.16lf \n", X->Bind.Phys.doublon[ie]);
     fprintf(fp, "  Sz  %.16lf \n", X->Bind.Phys.Sz[ie]);
     //fprintf(fp, "  S^2  %.16lf \n", X->Bind.Phys.s2[ie]);
     //fprintf(fp, "  N_up  %.16lf \n", X->Bind.Phys.num_up[ie]);
     //fprintf(fp, "  N_down  %.16lf \n", X->Bind.Phys.num_down[ie]);
     fprintf(fp, "\n");
   }
   fclose(fp);
   /*
    Output Eigenvector to a file
   */
   if (X->Bind.Def.iOutputEigenVec == TRUE) {
     TimeKeeper(&(X->Bind), "%s_TimeKeeper.dat", "Output Eigenvector starts:          %s", "a");

     vin = cd_1d_allocate(X->Bind.Check.idim_max + 1);
     for (ie = 0; ie < X->Bind.Def.k_exct; ie++) {

 #pragma omp parallel for default(none) shared(X,v1,ie,vin) private(idim)
       for (idim = 1; idim <= X->Bind.Check.idim_max; idim++)
         vin[idim] = v1[idim][ie];

       sprintf(sdt, "%s_eigenvec_%ld_rank_%d.dat", X->Bind.Def.CDataFileHead, ie, myrank);
       if (childfopenALL(sdt, "wb", &fp) != 0) exitMPI(-1);
       byte_size = fwrite(&X->Bind.Large.itr, sizeof(X->Bind.Large.itr), 1, fp);
       byte_size = fwrite(&X->Bind.Check.idim_max, sizeof(X->Bind.Check.idim_max), 1, fp);
       byte_size = fwrite(vin, sizeof(std::complex<double>), X->Bind.Check.idim_max + 1, fp);
       fclose(fp);
     }/*for (ie = 0; ie < X->Bind.Def.k_exct; ie++)*/
     free_cd_1d_allocate(vin);

     TimeKeeper(&(X->Bind), "%s_TimeKeeper.dat", "Output Eigenvector starts:          %s", "a");
   }/*if (X->Bind.Def.iOutputEigenVec == TRUE)*/

   return TRUE;

 }/*int CalcByLOBPCG*/
PhysList::doublon
double * doublon
Expectation value of the Doublon.
Definition: struct.hpp:357

exitMPI
void exitMPI(int errorcode)
MPI Abortation wrapper.
Definition: wrapperMPI.cpp:86

DefineList::LanczosEps
int LanczosEps
log(10 base) of the convergence threshold. Read from Calcmod in readdef.h
Definition: struct.hpp:48

debug_print
void debug_print(int num, std::complex< double > *var)
Definition: CalcByLOBPCG.cpp:34

BindStruct::Def
struct DefineList Def
Definision of system (Hamiltonian) etc.
Definition: struct.hpp:395

nproc
int nproc
Number of processors, defined in InitializeMPI()
Definition: global.cpp:72

DefineList::St
int St
0 or 1, but it affects nothing.
Definition: struct.hpp:80

stdoutMPI
FILE * stdoutMPI
File pointer to the standard output defined in InitializeMPI()
Definition: global.cpp:75

childfopenALL
int childfopenALL(const char *_cPathChild, const char *_cmode, FILE **_fp)
All processes open file in output/ directory.
Definition: FileIO.cpp:50

LargeList::itr
int itr
Iteration number.
Definition: struct.hpp:316

v1buf
std::complex< double > ** v1buf
Definition: global.cpp:22

DefineList::iReStart
int iReStart
Definition: struct.hpp:223

v0
std::complex< double > ** v0
Definition: global.cpp:20

SumMPI_li
long int SumMPI_li(long int idim)
MPI wrapper function to obtain sum of unsigned long integer across processes.
Definition: wrapperMPI.cpp:271

LOBPCG_Main
int LOBPCG_Main(struct BindStruct *X)
Core routine for the LOBPCG method This method is introduced inS. Yamada, et al., Transactions of JSC...
Definition: CalcByLOBPCG.cpp:351

calc_preshift
static double calc_preshift(double eig, double res, double eps_LOBPCG)
Compute adaptively shifted preconditionar written in S. Yamada, et al., Transactions of JSCES...
Definition: CalcByLOBPCG.cpp:139

I
std::complex< double > I(0.0, 1.0)

TimeKeeperWithStep
int TimeKeeperWithStep(struct BindStruct *X, const char *cFileName, const char *cTimeKeeper_Message, const char *cWriteType, const int istep)
Functions for writing a time log.
Definition: log.cpp:78

DefineList::iOutputEigenVec
int iOutputEigenVec
ASwitch for outputting an eigenvector. 0: no output, 1:output.
Definition: struct.hpp:204

mltply
int mltply(struct BindStruct *X, int nstate, std::complex< double > **tmp_v0, std::complex< double > **tmp_v1)
Parent function of multiplying the wavefunction by the Hamiltonian. . First, the calculation of diago...
Definition: mltply.cpp:56

BindStruct::Large
struct LargeList Large
Variables for Matrix-Vector product.
Definition: struct.hpp:397

CalcByLOBPCG
int CalcByLOBPCG(struct EDMainCalStruct *X)
Driver routine for LOB(P)CG method.
Definition: CalcByLOBPCG.cpp:643

BindStruct::Phys
struct PhysList Phys
Physical quantities.
Definition: struct.hpp:398

NormMPI_dv
void NormMPI_dv(long int ndim, int nstate, std::complex< double > **_v1, double *dnorm)
Compute norm of process-distributed vector .
Definition: wrapperMPI.cpp:344

v1
std::complex< double > ** v1
Definition: global.cpp:21

SumMPI_cv
void SumMPI_cv(int nnorm, std::complex< double > *norm)
MPI wrapper function to obtain sum of Double array across processes.
Definition: wrapperMPI.cpp:254

PhysList::Sz
double * Sz
Expectation value of the Total Sz.
Definition: struct.hpp:361

EDMainCalStruct
Bind.
Definition: struct.hpp:404

LargeList::iv
long int iv
Used for initializing vector.
Definition: struct.hpp:317

BindStruct
Bind.
Definition: struct.hpp:394

nthreads
int nthreads
Number of Threads, defined in InitializeMPI()
Definition: global.cpp:74

zclear
void zclear(long int n, std::complex< double > *x)
clear std::complex<double> array.
Definition: mltply.cpp:143

DefineList::Lanczos_max
int Lanczos_max
Maximum number of iterations.
Definition: struct.hpp:74

PhysList::energy
double * energy
Expectation value of the total energy.
Definition: struct.hpp:356

myrank
int myrank
Process ID, defined in InitializeMPI()
Definition: global.cpp:73

step_i
int step_i
Definition: global.cpp:44

initial_mode
int initial_mode
Definition: global.cpp:38

DefineList::iFlgGeneralSpin
int iFlgGeneralSpin
Flag for the general (Sz/=1/2) spin.
Definition: struct.hpp:86

SumMPI_dv
void SumMPI_dv(int nnorm, double *norm)
MPI wrapper function to obtain sum of Double array across processes.
Definition: wrapperMPI.cpp:238

list_Diagonal
double * list_Diagonal
Definition: global.cpp:24

BcastMPI_li
long int BcastMPI_li(int root, long int idim)
MPI wrapper function to broadcast long integer across processes.
Definition: wrapperMPI.cpp:305

Output_restart
static void Output_restart(struct BindStruct *X, std::complex< double > **wave)
Output eigenvectors for restart LOBPCG method.
Definition: CalcByLOBPCG.cpp:312

TimeKeeper
int TimeKeeper(struct BindStruct *X, const char *cFileName, const char *cTimeKeeper_Message, const char *cWriteType)
Functions for writing a time log.
Definition: log.cpp:42

DefineList::iCalcModel
int iCalcModel
Switch for model. 0:Hubbard, 1:Spin, 2:Kondo, 3:HubbardGC, 4:SpinGC, 5:KondoGC, 6:HubbardNConserved.
Definition: struct.hpp:200

diag_ovrp
static int diag_ovrp(int nsub, std::complex< double > *hsub, std::complex< double > *ovlp, double *eig)
Solve the generalized eigenvalue problem  with the Lowdin&#39;s orthogonalization.
Definition: CalcByLOBPCG.cpp:53

Initialize_wave
static void Initialize_wave(struct BindStruct *X, std::complex< double > **wave)
Definition: CalcByLOBPCG.cpp:165

DefineList::initial_iv
long int initial_iv
Seed of random number for initial guesss of wavefunctions.
Definition: struct.hpp:76

zgemm_
void zgemm_(char *transa, char *transb, int *m, int *n, int *k, std::complex< double > *alpha, std::complex< double > *a, int *lda, std::complex< double > *b, int *ldb, std::complex< double > *beta, std::complex< double > *c, int *ldc)

phys
void phys(struct BindStruct *X, long int neig)
A main function to calculate physical quantities by full diagonalization method.
Definition: phys.cpp:48

DefineList::k_exct
int k_exct
Read from Calcmod in readdef.h.
Definition: struct.hpp:47

zheevd_
void zheevd_(char *jobz, char *uplo, int *n, std::complex< double > *a, int *lda, double *w, std::complex< double > *work, int *lwork, double *rwork, int *lrwork, int *iwork, int *liwork, int *info)

DefineList::iInitialVecType
int iInitialVecType
Switch for type of inital vectors. 0:complex type, 1: real type. default value is set as 0 in readdef...
Definition: struct.hpp:197

DefineList::iInputEigenVec
int iInputEigenVec
Switch for reading an eigenvector. 0: no input, 1:input.
Definition: struct.hpp:205

BindStruct::Check
struct CheckList Check
Size of the Hilbert space.
Definition: struct.hpp:396

DefineList::CDataFileHead
char * CDataFileHead
Read from Calcmod in readdef.h. Header of output file such as Green&#39;s function.
Definition: struct.hpp:42

CheckList::idim_max
long int idim_max
The dimension of the Hilbert space of this process.
Definition: struct.hpp:305

EDMainCalStruct::Bind
struct BindStruct Bind
Binded struct.
Definition: struct.hpp:405

childfopenMPI
int childfopenMPI(const char *_cPathChild, const char *_cmode, FILE **_fp)
Only the root process open file in output/ directory.
Definition: FileIO.cpp:27