21 #include "xsetmem.hpp" 24 #include "wrapperMPI.hpp" 25 #include "expec_cisajs.hpp" 26 #include "expec_cisajscktaltdc.hpp" 27 #include "expec_totalspin.hpp" 28 #include "expec_energy_flct.hpp" 31 #include "mltplyCommon.hpp" 32 #include "./common/setmemory.hpp" 37 printf(
"debug %d %f %f\n", i, real(var[i]), imag(var[i]));
42 extern void zheevd_(
char *jobz,
char *uplo,
int *n, std::complex<double> *a,
int *lda,
double *w, std::complex<double> *work,
int *lwork,
double *rwork,
int * lrwork,
int *iwork,
int *liwork,
int *info);
43 extern void zgemm_(
char *transa,
char *transb,
int *m,
int *n,
int *k, std::complex<double> *alpha, std::complex<double> *a,
int *lda, std::complex<double> *b,
int *ldb, std::complex<double> *beta, std::complex<double> *c,
int *ldc);
55 std::complex<double> *hsub,
56 std::complex<double> *ovlp,
60 int *iwork, info, isub, jsub, nsub2;
61 char jobz =
'V', uplo =
'U', transa =
'N', transb =
'N';
63 std::complex<double> *work, *mat;
64 int liwork, lwork, lrwork;
65 std::complex<double> one = 1.0, zero = 0.0;
67 liwork = 5 * nsub + 3;
68 lwork = nsub*nsub + 2 * nsub;
69 lrwork = 3 * nsub*nsub + (4 + (int)log2(nsub) + 1) * nsub + 1;
71 iwork = (
int*)malloc(liwork *
sizeof(
int));
72 rwork = (
double*)malloc(lrwork *
sizeof(
double));
73 work = (std::complex<double>*)malloc(lwork *
sizeof(std::complex<double>));
74 mat = (std::complex<double>*)malloc(nsub*nsub *
sizeof(std::complex<double>));
75 for (isub = 0; isub < nsub*nsub; isub++)mat[isub] = 0.0;
79 zheevd_(&jobz, &uplo, &nsub, ovlp, &nsub, eig, work, &lwork, rwork, &lrwork, iwork, &liwork, &info);
91 for (isub = 0; isub < nsub; isub++) {
92 if (eig[isub] > 1.0e-14) {
93 for (jsub = 0; jsub < nsub; jsub++)
94 ovlp[jsub + nsub*nsub2] = ovlp[jsub + nsub*isub] / sqrt(eig[isub]);
98 for (isub = nsub2; isub < nsub; isub++)
99 for (jsub = 0; jsub < nsub; jsub++)
100 ovlp[jsub + nsub*isub] = 0.0;
106 zgemm_(&transa, &transb, &nsub, &nsub, &nsub, &one, hsub, &nsub, ovlp, &nsub, &zero, mat, &nsub);
108 zgemm_(&transa, &transb, &nsub, &nsub, &nsub, &one, ovlp, &nsub, mat, &nsub, &zero, hsub, &nsub);
115 zheevd_(&jobz, &uplo, &nsub2, hsub, &nsub, eig, work, &lwork, rwork, &lrwork, iwork, &liwork, &info);
123 zgemm_(&transa, &transb, &nsub, &nsub, &nsub, &one, ovlp, &nsub, hsub, &nsub, &zero, mat, &nsub);
125 for (isub = 0; isub < nsub*nsub; isub++)hsub[isub] = mat[isub];
148 if (fabs(eig) > 10.0) k = trunc(log10(fabs(eig)));
152 if (eps_LOBPCG > res) i = ceil(log10(eps_LOBPCG));
153 else i = ceil(log10(res));
155 preshift = trunc(eig / pow(10.0, k + i))*pow(10.0, k + i);
167 std::complex<double> **wave
171 char sdt[D_FileNameMax];
173 std::complex<double> *vin;
177 long int i_max_tmp, sum_i_max, idim, i_max;
191 fprintf(
stdoutMPI,
"%s",
" Start: Input vector.\n");
197 sprintf(sdt,
"tmpvec_set%d_rank_%d.dat", ie,
myrank);
200 fprintf(stdout,
"Restart file is not found.\n");
201 fprintf(stdout,
"Start from scratch.\n");
206 byte_size = fread(&iproc,
sizeof(
int), 1, fp);
207 byte_size = fread(&i_max,
sizeof(
long int), 1, fp);
210 fprintf(stderr,
"Error: Invalid restart file.\n");
213 byte_size = fread(vin,
sizeof(std::complex<double>), X->
Check.
idim_max + 1, fp);
214 for (idim = 1; idim <= i_max; idim++) wave[idim][ie] = vin[idim];
218 free_cd_1d_allocate(vin);
222 fprintf(
stdoutMPI,
"%s",
" End : Input vector.\n");
224 if (byte_size == 0) printf(
"byte_size: %d \n", (
int)byte_size);
243 fprintf(
stdoutMPI,
" initial_mode=%d normal: iv = %ld i_max=%ld k_exct =%d\n\n",
245 #pragma omp parallel for default(none) private(idim) shared(wave,i_max,ie) 246 for (idim = 1; idim <= i_max; idim++) wave[idim][ie] = 0.0;
249 for (iproc = 0; iproc <
nproc; iproc++) {
252 if (sum_i_max <= iv && iv < sum_i_max + i_max_tmp) {
255 wave[iv - sum_i_max + 1][ie] = 1.0;
257 wave[iv - sum_i_max + 1][ie] += 1.0*
I;
258 wave[iv - sum_i_max + 1][ie] /= sqrt(2.0);
263 sum_i_max += i_max_tmp;
270 fprintf(
stdoutMPI,
" initial_mode=%d (random): iv = %ld i_max=%ld k_exct =%d\n\n",
272 #pragma omp parallel default(none) private(idim, u_long_i, mythread, dsfmt, ie) \ 273 shared(wave, iv, X, nthreads, myrank, i_max,I) 279 mythread = omp_get_thread_num();
284 dsfmt_init_gen_rand(&dsfmt, u_long_i);
289 for (idim = 1; idim <= i_max; idim++)
290 wave[idim][ie] = 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5) + 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5)*
I;
294 for (idim = 1; idim <= i_max; idim++)
295 wave[idim][ie] = 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5);
303 #pragma omp parallel for default(none) shared(i_max,wave,dnorm,X) private(idim,ie) 304 for (idim = 1; idim <= i_max; idim++)
305 for (ie = 0; ie < X->
Def.
k_exct; ie++) wave[idim][ie] /= dnorm[ie];
306 free_d_1d_allocate(dnorm);
314 std::complex<double> **wave
319 char sdt[D_FileNameMax];
322 std::complex<double> *vout;
325 fprintf(
stdoutMPI,
"%s",
" Start: Output vector.\n");
329 sprintf(sdt,
"tmpvec_set%d_rank_%d.dat", ie,
myrank);
333 for (idim = 1; idim <= X->
Check.
idim_max; idim++) vout[idim] = wave[idim][ie];
334 byte_size = fwrite(vout,
sizeof(std::complex<double>), X->
Check.
idim_max + 1, fp);
337 free_cd_1d_allocate(vout);
340 fprintf(
stdoutMPI,
"%s",
" End : Output vector.\n");
341 if(byte_size == 0) printf(
"byte_size : %d\n", (
int)byte_size);
355 char sdt[D_FileNameMax], sdt_2[D_FileNameMax];
357 int iconv = -1, i4_max;
358 long int idim, i_max;
360 int ii, jj, nsub, nsub_cut, nstate;
361 std::complex<double> ***wxp,
364 double *eig, *dnorm, eps_LOBPCG, eigabs_max, preshift, precon, dnormmax, *eigsub;
366 char tN =
'N', tC =
'C';
367 std::complex<double> one = 1.0, zero = 0.0;
374 eigsub = d_1d_allocate(nsub);
381 free_cd_2d_allocate(
v0);
382 free_cd_2d_allocate(
v1);
392 TimeKeeper(X,
"%s_TimeKeeper.dat",
"Lanczos Eigen Value start: %s",
"a");
401 for (ie = 0; ie < X->
Def.
k_exct; ie++) eig[ie] = 0.0;
402 for (idim = 1; idim <= i_max; idim++) {
404 wxp[2][idim][ie] = 0.0;
405 hwxp[2][idim][ie] = 0.0;
406 eig[ie] += real(conj(wxp[1][idim][ie]) * hwxp[1][idim][ie]);
413 fprintf(
stdoutMPI,
" Step Residual-2-norm Threshold Energy\n");
414 fprintf(fp,
" Step Residual-2-norm Threshold Energy\n");
429 if (fabs(eig[ie]) > eigabs_max) eigabs_max = fabs(eig[ie]);
431 if (eigabs_max > 1.0) eps_LOBPCG *= eigabs_max;
439 #pragma omp parallel for default(none) shared(i_max,wxp,hwxp,eig,X) private(idim,ie) 440 for (idim = 1; idim <= i_max; idim++) {
442 wxp[0][idim][ie] = hwxp[1][idim][ie] - eig[ie] * wxp[1][idim][ie];
449 if (dnorm[ie] > dnormmax) dnormmax = dnorm[ie];
454 if (do_precon == 1) {
457 #pragma omp parallel for default(none) shared(wxp,list_Diagonal,preshift,i_max,eps_LOBPCG,X) \ 458 private(idim,precon,ie) 459 for (idim = 1; idim <= i_max; idim++) {
462 if (fabs(precon) > eps_LOBPCG) wxp[0][idim][ie] /= precon;
470 #pragma omp parallel for default(none) shared(i_max,wxp,dnorm,X) private(idim,ie) 471 for (idim = 1; idim <= i_max; idim++)
473 wxp[0][idim][ie] /= dnorm[ie];
481 fprintf(
stdoutMPI,
"%9d %15.5e %15.5e ", stp, dnormmax, eps_LOBPCG);
482 fprintf(fp,
"%9d %15.5e %15.5e ", stp, dnormmax, eps_LOBPCG);
485 fprintf(fp,
" %15.5e", eig[ie]);
487 if(nsub_cut == 0) printf(
"nsub_cut : %d", nsub_cut);
492 if (dnormmax < eps_LOBPCG) {
509 for (ii = 0; ii < 3; ii++) {
510 for (jj = 0; jj < 3; jj++) {
511 zgemm_(&tN, &tC, &nstate, &nstate, &i4_max, &one,
512 &wxp[ii][1][0], &nstate, &wxp[jj][1][0], &nstate, &zero, &ovlp[jj][0][ii][0], &nsub);
513 zgemm_(&tN, &tC, &nstate, &nstate, &i4_max, &one,
514 &wxp[ii][1][0], &nstate, &hwxp[jj][1][0], &nstate, &zero, &hsub[jj][0][ii][0], &nsub);
521 eig[ie] = real(hsub[1][ie][1][ie]);
527 nsub_cut =
diag_ovrp(nsub, &hsub[0][0][0][0], &ovlp[0][0][0][0], eigsub);
532 eig[ie] = 0.5 * (eig[ie] + eigsub[ie]);
538 for (ii = 0; ii < 3; ii++) {
539 zgemm_(&tC, &tN, &nstate, &i4_max, &nstate, &one,
540 &hsub[0][0][ii][0], &nsub, &wxp[ii][1][0], &nstate, &one, &
v1buf[1][0], &nstate);
542 for (idim = 1; idim <= i_max; idim++)
for (ie = 0; ie < X->
Def.
k_exct; ie++)
543 wxp[1][idim][ie] =
v1buf[idim][ie];
549 for (ii = 0; ii < 3; ii++) {
550 zgemm_(&tC, &tN, &nstate, &i4_max, &nstate, &one,
551 &hsub[0][0][ii][0], &nsub, &hwxp[ii][1][0], &nstate, &one, &
v1buf[1][0], &nstate);
553 for (idim = 1; idim <= i_max; idim++)
for (ie = 0; ie < X->
Def.
k_exct; ie++)
554 hwxp[1][idim][ie] =
v1buf[idim][ie];
560 for (ii = 0; ii < 3; ii += 2) {
561 zgemm_(&tC, &tN, &nstate, &i4_max, &nstate, &one,
562 &hsub[0][0][ii][0], &nsub, &wxp[ii][1][0], &nstate, &one, &
v1buf[1][0], &nstate);
564 for (idim = 1; idim <= i_max; idim++)
for (ie = 0; ie < X->
Def.
k_exct; ie++)
565 wxp[2][idim][ie] =
v1buf[idim][ie];
571 for (ii = 0; ii < 3; ii += 2) {
572 zgemm_(&tC, &tN, &nstate, &i4_max, &nstate, &one,
573 &hsub[0][0][ii][0], &nsub, &hwxp[ii][1][0], &nstate, &one, &
v1buf[1][0], &nstate);
575 for (idim = 1; idim <= i_max; idim++)
for (ie = 0; ie < X->
Def.
k_exct; ie++)
576 hwxp[2][idim][ie] =
v1buf[idim][ie];
580 for (ii = 1; ii < 3; ii++) {
582 #pragma omp parallel for default(none) shared(i_max,wxp,hwxp,dnorm,ii,X) private(idim,ie) 583 for (idim = 1; idim <= i_max; idim++) {
585 wxp[ii][idim][ie] /= dnorm[ie];
586 hwxp[ii][idim][ie] /= dnorm[ie];
601 TimeKeeper(X,
"%s_TimeKeeper.dat",
"Lanczos Eigenvalue finishes: %s",
"a");
602 fprintf(
stdoutMPI,
"%s",
"\n###### End : Calculate Lanczos EigenValue. ######\n\n");
604 free_d_1d_allocate(eig);
605 free_d_1d_allocate(dnorm);
606 free_d_1d_allocate(eigsub);
607 free_cd_4d_allocate(hsub);
608 free_cd_4d_allocate(ovlp);
609 free_cd_3d_allocate(hwxp);
616 sprintf(sdt,
"%s",
"Lanczos Eigenvalue is not converged in this process.");
625 #pragma omp parallel for default(none) shared(i_max,wxp,v0,X) private(idim,ie) 626 for (idim = 1; idim <= i_max; idim++)
628 v0[idim][ie] = wxp[1][idim][ie];
629 free_cd_3d_allocate(wxp);
633 sprintf(sdt,
"%s",
"Lanczos Eigenvalue is not converged in this process.");
647 char sdt[D_FileNameMax];
653 std::complex<double> *vin;
655 fprintf(
stdoutMPI,
"###### Eigenvalue with LOBPCG #######\n\n");
664 case SpinlessFermionGC:
670 case SpinlessFermion:
691 if(iret ==1)
return (TRUE);
693 fprintf(
stdoutMPI,
" LOBPCG is not converged in this process.\n");
703 fprintf(
stdoutMPI,
"An Eigenvector is inputted.\n");
706 TimeKeeper(&(X->
Bind),
"%s_TimeKeeper.dat",
"Read Eigenvector starts: %s",
"a");
710 fprintf(stderr,
"Error: Inputvector file is not found.\n");
713 byte_size = fread(&
step_i,
sizeof(
int), 1, fp);
714 byte_size = fread(&i_max,
sizeof(
long int), 1, fp);
716 fprintf(stderr,
"Error: Invalid Inputvector file.\n");
719 byte_size = fread(vin,
sizeof(std::complex<double>), X->
Bind.
Check.
idim_max + 1, fp);
720 #pragma omp parallel for default(none) shared(v1,vin, i_max, ie), private(idim) 721 for (idim = 1; idim <= i_max; idim++) {
722 v1[ie][idim] = vin[idim];
726 free_cd_1d_allocate(vin);
727 TimeKeeper(&(X->
Bind),
"%s_TimeKeeper.dat",
"Read Eigenvector finishes: %s",
"a");
729 if(byte_size == 0) printf(
"byte_size : %d\n", (
int)byte_size);
732 fprintf(
stdoutMPI,
"%s",
"\n###### End : Calculate Lanczos EigenVec. ######\n\n");
752 fprintf(fp,
"State %ld\n", ie);
755 fprintf(fp,
" Sz %.16lf \n", X->
Bind.
Phys.
Sz[ie]);
766 TimeKeeper(&(X->
Bind),
"%s_TimeKeeper.dat",
"Output Eigenvector starts: %s",
"a");
771 #pragma omp parallel for default(none) shared(X,v1,ie,vin) private(idim) 773 vin[idim] =
v1[idim][ie];
779 byte_size = fwrite(vin,
sizeof(std::complex<double>), X->
Bind.
Check.
idim_max + 1, fp);
782 free_cd_1d_allocate(vin);
784 TimeKeeper(&(X->
Bind),
"%s_TimeKeeper.dat",
"Output Eigenvector starts: %s",
"a");
double * doublon
Expectation value of the Doublon.
void exitMPI(int errorcode)
MPI Abortation wrapper.
int LanczosEps
log(10 base) of the convergence threshold. Read from Calcmod in readdef.h
void debug_print(int num, std::complex< double > *var)
struct DefineList Def
Definision of system (Hamiltonian) etc.
int nproc
Number of processors, defined in InitializeMPI()
int St
0 or 1, but it affects nothing.
FILE * stdoutMPI
File pointer to the standard output defined in InitializeMPI()
int childfopenALL(const char *_cPathChild, const char *_cmode, FILE **_fp)
All processes open file in output/ directory.
std::complex< double > ** v1buf
std::complex< double > ** v0
long int SumMPI_li(long int idim)
MPI wrapper function to obtain sum of unsigned long integer across processes.
int LOBPCG_Main(struct BindStruct *X)
Core routine for the LOBPCG method This method is introduced inS. Yamada, et al., Transactions of JSC...
static double calc_preshift(double eig, double res, double eps_LOBPCG)
Compute adaptively shifted preconditionar written in S. Yamada, et al., Transactions of JSCES...
std::complex< double > I(0.0, 1.0)
int TimeKeeperWithStep(struct BindStruct *X, const char *cFileName, const char *cTimeKeeper_Message, const char *cWriteType, const int istep)
Functions for writing a time log.
int iOutputEigenVec
ASwitch for outputting an eigenvector. 0: no output, 1:output.
int mltply(struct BindStruct *X, int nstate, std::complex< double > **tmp_v0, std::complex< double > **tmp_v1)
Parent function of multiplying the wavefunction by the Hamiltonian. . First, the calculation of diago...
struct LargeList Large
Variables for Matrix-Vector product.
int CalcByLOBPCG(struct EDMainCalStruct *X)
Driver routine for LOB(P)CG method.
struct PhysList Phys
Physical quantities.
void NormMPI_dv(long int ndim, int nstate, std::complex< double > **_v1, double *dnorm)
Compute norm of process-distributed vector .
std::complex< double > ** v1
void SumMPI_cv(int nnorm, std::complex< double > *norm)
MPI wrapper function to obtain sum of Double array across processes.
double * Sz
Expectation value of the Total Sz.
long int iv
Used for initializing vector.
int nthreads
Number of Threads, defined in InitializeMPI()
void zclear(long int n, std::complex< double > *x)
clear std::complex<double> array.
int Lanczos_max
Maximum number of iterations.
double * energy
Expectation value of the total energy.
int myrank
Process ID, defined in InitializeMPI()
int iFlgGeneralSpin
Flag for the general (Sz/=1/2) spin.
void SumMPI_dv(int nnorm, double *norm)
MPI wrapper function to obtain sum of Double array across processes.
long int BcastMPI_li(int root, long int idim)
MPI wrapper function to broadcast long integer across processes.
static void Output_restart(struct BindStruct *X, std::complex< double > **wave)
Output eigenvectors for restart LOBPCG method.
int TimeKeeper(struct BindStruct *X, const char *cFileName, const char *cTimeKeeper_Message, const char *cWriteType)
Functions for writing a time log.
int iCalcModel
Switch for model. 0:Hubbard, 1:Spin, 2:Kondo, 3:HubbardGC, 4:SpinGC, 5:KondoGC, 6:HubbardNConserved.
static int diag_ovrp(int nsub, std::complex< double > *hsub, std::complex< double > *ovlp, double *eig)
Solve the generalized eigenvalue problem with the Lowdin's orthogonalization.
static void Initialize_wave(struct BindStruct *X, std::complex< double > **wave)
long int initial_iv
Seed of random number for initial guesss of wavefunctions.
void zgemm_(char *transa, char *transb, int *m, int *n, int *k, std::complex< double > *alpha, std::complex< double > *a, int *lda, std::complex< double > *b, int *ldb, std::complex< double > *beta, std::complex< double > *c, int *ldc)
void phys(struct BindStruct *X, long int neig)
A main function to calculate physical quantities by full diagonalization method.
int k_exct
Read from Calcmod in readdef.h.
void zheevd_(char *jobz, char *uplo, int *n, std::complex< double > *a, int *lda, double *w, std::complex< double > *work, int *lwork, double *rwork, int *lrwork, int *iwork, int *liwork, int *info)
int iInitialVecType
Switch for type of inital vectors. 0:complex type, 1: real type. default value is set as 0 in readdef...
int iInputEigenVec
Switch for reading an eigenvector. 0: no input, 1:input.
struct CheckList Check
Size of the Hilbert space.
char * CDataFileHead
Read from Calcmod in readdef.h. Header of output file such as Green's function.
long int idim_max
The dimension of the Hilbert space of this process.
struct BindStruct Bind
Binded struct.
int childfopenMPI(const char *_cPathChild, const char *_cmode, FILE **_fp)
Only the root process open file in output/ directory.