HPhi++  3.1.0
CalcByLOBPCG.cpp
Go to the documentation of this file.
1 /* HPhi - Quantum Lattice Model Simulator */
2 /* Copyright (C) 2015 The University of Tokyo */
3 
4 /* This program is free software: you can redistribute it and/or modify */
5 /* it under the terms of the GNU General Public License as published by */
6 /* the Free Software Foundation, either version 3 of the License, or */
7 /* (at your option) any later version. */
8 
9 /* This program is distributed in the hope that it will be useful, */
10 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
11 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
12 /* GNU General Public License for more details. */
13 
14 /* You should have received a copy of the GNU General Public License */
15 /* along with this program. If not, see <http://www.gnu.org/licenses/>. */
20 #include "Common.hpp"
21 #include "xsetmem.hpp"
22 #include "mltply.hpp"
23 #include "FileIO.hpp"
24 #include "wrapperMPI.hpp"
25 #include "expec_cisajs.hpp"
26 #include "expec_cisajscktaltdc.hpp"
27 #include "expec_totalspin.hpp"
28 #include "expec_energy_flct.hpp"
29 #include "phys.hpp"
30 #include <cmath>
31 #include "mltplyCommon.hpp"
32 #include "./common/setmemory.hpp"
33 
34 void debug_print(int num, std::complex<double> *var){
35  int i;
36  for (i=0;i<num;i++){
37  printf("debug %d %f %f\n", i, real(var[i]), imag(var[i]));
38  }
39 }
40 
41 extern "C" {
42  extern void zheevd_(char *jobz, char *uplo, int *n, std::complex<double> *a, int *lda, double *w, std::complex<double> *work, int *lwork, double *rwork, int * lrwork, int *iwork, int *liwork, int *info);
43  extern void zgemm_(char *transa, char *transb, int *m, int *n, int *k, std::complex<double> *alpha, std::complex<double> *a, int *lda, std::complex<double> *b, int *ldb, std::complex<double> *beta, std::complex<double> *c, int *ldc);
44 }
53 static int diag_ovrp(
54  int nsub,
55  std::complex<double> *hsub,
56  std::complex<double> *ovlp,
57  double *eig
58 )
59 {
60  int *iwork, info, isub, jsub, nsub2;
61  char jobz = 'V', uplo = 'U', transa = 'N', transb = 'N';
62  double *rwork;
63  std::complex<double> *work, *mat;
64  int liwork, lwork, lrwork;
65  std::complex<double> one = 1.0, zero = 0.0;
66 
67  liwork = 5 * nsub + 3;
68  lwork = nsub*nsub + 2 * nsub;
69  lrwork = 3 * nsub*nsub + (4 + (int)log2(nsub) + 1) * nsub + 1;
70 
71  iwork = (int*)malloc(liwork * sizeof(int));
72  rwork = (double*)malloc(lrwork * sizeof(double));
73  work = (std::complex<double>*)malloc(lwork * sizeof(std::complex<double>));
74  mat = (std::complex<double>*)malloc(nsub*nsub * sizeof(std::complex<double>));
75  for (isub = 0; isub < nsub*nsub; isub++)mat[isub] = 0.0;
79  zheevd_(&jobz, &uplo, &nsub, ovlp, &nsub, eig, work, &lwork, rwork, &lrwork, iwork, &liwork, &info);
90  nsub2 = 0;
91  for (isub = 0; isub < nsub; isub++) {
92  if (eig[isub] > 1.0e-14) {
93  for (jsub = 0; jsub < nsub; jsub++)
94  ovlp[jsub + nsub*nsub2] = ovlp[jsub + nsub*isub] / sqrt(eig[isub]);
95  nsub2 += 1;
96  }
97  }
98  for (isub = nsub2; isub < nsub; isub++)
99  for (jsub = 0; jsub < nsub; jsub++)
100  ovlp[jsub + nsub*isub] = 0.0;
105  transa = 'N';
106  zgemm_(&transa, &transb, &nsub, &nsub, &nsub, &one, hsub, &nsub, ovlp, &nsub, &zero, mat, &nsub);
107  transa = 'C';
108  zgemm_(&transa, &transb, &nsub, &nsub, &nsub, &one, ovlp, &nsub, mat, &nsub, &zero, hsub, &nsub);
115  zheevd_(&jobz, &uplo, &nsub2, hsub, &nsub, eig, work, &lwork, rwork, &lrwork, iwork, &liwork, &info);
122  transa = 'N';
123  zgemm_(&transa, &transb, &nsub, &nsub, &nsub, &one, ovlp, &nsub, hsub, &nsub, &zero, mat, &nsub);
124  // printf("%d %d %15.5f %15.5f %15.5f\n", info, nsub2, eig[0], eig[1], eig[2]);
125  for (isub = 0; isub < nsub*nsub; isub++)hsub[isub] = mat[isub];
126 
127  free(mat);
128  free(work);
129  free(rwork);
130  free(iwork);
131 
132  return(nsub2);
133 }/*void diag_ovrp*/
139 static double calc_preshift(
140  double eig,
141  double res,
142  double eps_LOBPCG
143 )
144 {
145  double k, i;
146  double preshift;
147 
148  if (fabs(eig) > 10.0) k = trunc(log10(fabs(eig)));
149  else k = 1.0;
150 
151  if (res < 1.0) {
152  if (eps_LOBPCG > res) i = ceil(log10(eps_LOBPCG));
153  else i = ceil(log10(res));
154 
155  preshift = trunc(eig / pow(10.0, k + i))*pow(10.0, k + i);
156  }
157  else preshift = 0.0;
158 
159  return(preshift);
160 }/*void calc_preshift*/
161 /*
162 @brief Compute initial guess for LOBPCG.
163 If this is resuterting run, read from files.
164 */
165 static void Initialize_wave(
166  struct BindStruct *X,
167  std::complex<double> **wave
168 )
169 {
170  FILE *fp;
171  char sdt[D_FileNameMax];
172  size_t byte_size;
173  std::complex<double> *vin;
174  int ie;
175  int iproc, ierr;
176  long int iv;
177  long int i_max_tmp, sum_i_max, idim, i_max;
178  int mythread;
179  double *dnorm;
180  /*
181  For DSFMT
182  */
183  long int u_long_i;
184  dsfmt_t dsfmt;
188  if (X->Def.iReStart == RESTART_INOUT || X->Def.iReStart == RESTART_IN) {
189  //StartTimer(3600);
190  //TimeKeeperWithRandAndStep(&(X->Bind), "%s_Time_TPQ_Step.dat", " set %d step %d:output vector starts: %s\n", "a", rand_i, step_i);
191  fprintf(stdoutMPI, "%s", " Start: Input vector.\n");
192 
193  ierr = 0;
194  vin = cd_1d_allocate(X->Check.idim_max + 1);
195  for (ie = 0; ie < X->Def.k_exct; ie++) {
196 
197  sprintf(sdt, "tmpvec_set%d_rank_%d.dat", ie, myrank);
198  childfopenALL(sdt, "rb", &fp);
199  if (fp == NULL) {
200  fprintf(stdout, "Restart file is not found.\n");
201  fprintf(stdout, "Start from scratch.\n");
202  ierr = 1;
203  break;
204  }
205  else {
206  byte_size = fread(&iproc, sizeof(int), 1, fp);
207  byte_size = fread(&i_max, sizeof(long int), 1, fp);
208  //fprintf(stdoutMPI, "Debug: i_max=%ld, step_i=%d\n", i_max, step_i);
209  if (i_max != X->Check.idim_max) {
210  fprintf(stderr, "Error: Invalid restart file.\n");
211  exitMPI(-1);
212  }
213  byte_size = fread(vin, sizeof(std::complex<double>), X->Check.idim_max + 1, fp);
214  for (idim = 1; idim <= i_max; idim++) wave[idim][ie] = vin[idim];
215  fclose(fp);
216  }
217  }/*for (ie = 0; ie < X->Def.k_exct; ie++)*/
218  free_cd_1d_allocate(vin);
219 
220  if (ierr == 0) {
221  //TimeKeeperWithRandAndStep(X, "%s_Time_TPQ_Step.dat", " set %d step %d:output vector finishes: %s\n", "a", rand_i, step_i);
222  fprintf(stdoutMPI, "%s", " End : Input vector.\n");
223  //StopTimer(3600);
224  if (byte_size == 0) printf("byte_size: %d \n", (int)byte_size);
225  return;
226  }/*if (ierr == 0)*/
227 
228  }/*X->Def.iReStart == RESTART_INOUT || X->Def.iReStart == RESTART_IN*/
229 
234  i_max = X->Check.idim_max;
235 
236  if (initial_mode == 0) {
237 
238  for (ie = 0; ie < X->Def.k_exct; ie++) {
239 
240  sum_i_max = SumMPI_li(X->Check.idim_max);
241  X->Large.iv = (sum_i_max / 2 + X->Def.initial_iv + ie) % sum_i_max + 1;
242  iv = X->Large.iv;
243  fprintf(stdoutMPI, " initial_mode=%d normal: iv = %ld i_max=%ld k_exct =%d\n\n",
244  initial_mode, iv, i_max, X->Def.k_exct);
245 #pragma omp parallel for default(none) private(idim) shared(wave,i_max,ie)
246  for (idim = 1; idim <= i_max; idim++) wave[idim][ie] = 0.0;
247 
248  sum_i_max = 0;
249  for (iproc = 0; iproc < nproc; iproc++) {
250 
251  i_max_tmp = BcastMPI_li(iproc, i_max);
252  if (sum_i_max <= iv && iv < sum_i_max + i_max_tmp) {
253 
254  if (myrank == iproc) {
255  wave[iv - sum_i_max + 1][ie] = 1.0;
256  if (X->Def.iInitialVecType == 0) {
257  wave[iv - sum_i_max + 1][ie] += 1.0*I;
258  wave[iv - sum_i_max + 1][ie] /= sqrt(2.0);
259  }
260  }/*if (myrank == iproc)*/
261  }/*if (sum_i_max <= iv && iv < sum_i_max + i_max_tmp)*/
262 
263  sum_i_max += i_max_tmp;
264 
265  }/*for (iproc = 0; iproc < nproc; iproc++)*/
266  }/*for (ie = 0; ie < X->Def.k_exct; ie++)*/
267  }/*if(initial_mode == 0)*/
268  else if (initial_mode == 1) {
269  iv = X->Def.initial_iv;
270  fprintf(stdoutMPI, " initial_mode=%d (random): iv = %ld i_max=%ld k_exct =%d\n\n",
271  initial_mode, iv, i_max, X->Def.k_exct);
272 #pragma omp parallel default(none) private(idim, u_long_i, mythread, dsfmt, ie) \
273  shared(wave, iv, X, nthreads, myrank, i_max,I)
274  {
275  /*
276  Initialise MT
277  */
278 #ifdef _OPENMP
279  mythread = omp_get_thread_num();
280 #else
281  mythread = 0;
282 #endif
283  u_long_i = 123432 + labs(iv) + mythread + nthreads * myrank;
284  dsfmt_init_gen_rand(&dsfmt, u_long_i);
285 
286  for (ie = 0; ie < X->Def.k_exct; ie++) {
287  if (X->Def.iInitialVecType == 0) {
288 #pragma omp for
289  for (idim = 1; idim <= i_max; idim++)
290  wave[idim][ie] = 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5) + 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5)*I;
291  }
292  else {
293 #pragma omp for
294  for (idim = 1; idim <= i_max; idim++)
295  wave[idim][ie] = 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5);
296  }
297  }/*for (ie = 0; ie < X->Def.k_exct; ie++)*/
298 
299  }/*#pragma omp parallel*/
300 
301  dnorm = d_1d_allocate(X->Def.k_exct);
302  NormMPI_dv(i_max, X->Def.k_exct, wave, dnorm);
303 #pragma omp parallel for default(none) shared(i_max,wave,dnorm,X) private(idim,ie)
304  for (idim = 1; idim <= i_max; idim++)
305  for (ie = 0; ie < X->Def.k_exct; ie++) wave[idim][ie] /= dnorm[ie];
306  free_d_1d_allocate(dnorm);
307  }/*else if(initial_mode==1)*/
308 }/*static void Initialize_wave*/
312 static void Output_restart(
313  struct BindStruct *X,
314  std::complex<double> **wave
315 )
316 {
317  FILE *fp;
318  size_t byte_size;
319  char sdt[D_FileNameMax];
320  int ie;
321  long int idim;
322  std::complex<double> *vout;
323 
324  //TimeKeeperWithRandAndStep(&(X->Bind), "%s_Time_TPQ_Step.dat", " set %d step %d:output vector starts: %s\n", "a", rand_i, step_i);
325  fprintf(stdoutMPI, "%s", " Start: Output vector.\n");
326 
327  vout = cd_1d_allocate(X->Check.idim_max + 1);
328  for (ie = 0; ie < X->Def.k_exct; ie++) {
329  sprintf(sdt, "tmpvec_set%d_rank_%d.dat", ie, myrank);
330  if (childfopenALL(sdt, "wb", &fp) != 0) exitMPI(-1);
331  byte_size = fwrite(&X->Large.itr, sizeof(X->Large.itr), 1, fp);
332  byte_size = fwrite(&X->Check.idim_max, sizeof(X->Check.idim_max), 1, fp);
333  for (idim = 1; idim <= X->Check.idim_max; idim++) vout[idim] = wave[idim][ie];
334  byte_size = fwrite(vout, sizeof(std::complex<double>), X->Check.idim_max + 1, fp);
335  fclose(fp);
336  }/*for (ie = 0; ie < X->Def.k_exct; ie++)*/
337  free_cd_1d_allocate(vout);
338 
339  //TimeKeeperWithRandAndStep(&(X->Bind), "%s_Time_TPQ_Step.dat", " set %d step %d:output vector finishes: %s\n", "a", rand_i, step_i);
340  fprintf(stdoutMPI, "%s", " End : Output vector.\n");
341  if(byte_size == 0) printf("byte_size : %d\n", (int)byte_size);
342 }/*static void Output_restart*/
352  struct BindStruct *X
353 )
354 {
355  char sdt[D_FileNameMax], sdt_2[D_FileNameMax];
356  FILE *fp;
357  int iconv = -1, i4_max;
358  long int idim, i_max;
359  int ie, stp;
360  int ii, jj, nsub, nsub_cut, nstate;
361  std::complex<double> ***wxp/*[0] w, [1] x, [2] p of Ref.1*/,
362  ***hwxp/*[0] h*w, [1] h*x, [2] h*p of Ref.1*/,
363  ****hsub, ****ovlp; /*Subspace Hamiltonian and Overlap*/
364  double *eig, *dnorm, eps_LOBPCG, eigabs_max, preshift, precon, dnormmax, *eigsub;
365  int do_precon = 0;//If = 1, use preconditioning (experimental)
366  char tN = 'N', tC = 'C';
367  std::complex<double> one = 1.0, zero = 0.0;
368 
369  nsub = 3 * X->Def.k_exct;
370  nstate = X->Def.k_exct;
371 
372  eig = d_1d_allocate(X->Def.k_exct);
373  dnorm = d_1d_allocate(X->Def.k_exct);
374  eigsub = d_1d_allocate(nsub);
375  hsub = cd_4d_allocate(3, X->Def.k_exct, 3, X->Def.k_exct);
376  ovlp = cd_4d_allocate(3, X->Def.k_exct, 3, X->Def.k_exct);
377 
378  i_max = X->Check.idim_max;
379  i4_max = (int)i_max;
380 
381  free_cd_2d_allocate(v0);
382  free_cd_2d_allocate(v1);
383  wxp = cd_3d_allocate(3, X->Check.idim_max + 1, X->Def.k_exct);
384  hwxp = cd_3d_allocate(3, X->Check.idim_max + 1, X->Def.k_exct);
390  Initialize_wave(X, wxp[1]);
391 
392  TimeKeeper(X, "%s_TimeKeeper.dat", "Lanczos Eigen Value start: %s", "a");
393 
394  zclear(i_max*X->Def.k_exct, &hwxp[1][1][0]);
395  mltply(X, X->Def.k_exct, hwxp[1], wxp[1]);
396  stp = 1;
397  TimeKeeperWithStep(X, "%s_TimeKeeper.dat", "%3d th Lanczos step: %s", "a", 0);
398 
399  zclear(i_max*X->Def.k_exct, &wxp[2][1][0]);
400  zclear(i_max*X->Def.k_exct, &hwxp[2][1][0]);
401  for (ie = 0; ie < X->Def.k_exct; ie++) eig[ie] = 0.0;
402  for (idim = 1; idim <= i_max; idim++) {
403  for (ie = 0; ie < X->Def.k_exct; ie++) {
404  wxp[2][idim][ie] = 0.0;
405  hwxp[2][idim][ie] = 0.0;
406  eig[ie] += real(conj(wxp[1][idim][ie]) * hwxp[1][idim][ie]);
407  }
408  }
409  SumMPI_dv(X->Def.k_exct, eig);
410 
411  sprintf(sdt_2, "%s_Lanczos_Step.dat", X->Def.CDataFileHead);
412  childfopenMPI(sdt_2, "w", &fp);
413  fprintf(stdoutMPI, " Step Residual-2-norm Threshold Energy\n");
414  fprintf(fp, " Step Residual-2-norm Threshold Energy\n");
415  fclose(fp);
416 
417  nsub_cut = nsub;
422  for (stp = 1; stp <= X->Def.Lanczos_max; stp++) {
427  eigabs_max = 0.0;
428  for (ie = 0; ie < X->Def.k_exct; ie++)
429  if (fabs(eig[ie]) > eigabs_max) eigabs_max = fabs(eig[ie]);
430  eps_LOBPCG = pow(10, -0.5 *X->Def.LanczosEps);
431  if (eigabs_max > 1.0) eps_LOBPCG *= eigabs_max;
439 #pragma omp parallel for default(none) shared(i_max,wxp,hwxp,eig,X) private(idim,ie)
440  for (idim = 1; idim <= i_max; idim++) {
441  for (ie = 0; ie < X->Def.k_exct; ie++) {
442  wxp[0][idim][ie] = hwxp[1][idim][ie] - eig[ie] * wxp[1][idim][ie];
443  }
444  }
445  NormMPI_dv(i_max, X->Def.k_exct, wxp[0], dnorm);
446 
447  dnormmax = 0.0;
448  for (ie = 0; ie < X->Def.k_exct; ie++)
449  if (dnorm[ie] > dnormmax) dnormmax = dnorm[ie];
453  if (stp /= 1) {
454  if (do_precon == 1) {
455  for (ie = 0; ie < X->Def.k_exct; ie++)
456  preshift = calc_preshift(eig[ie], dnorm[ie], eps_LOBPCG);
457 #pragma omp parallel for default(none) shared(wxp,list_Diagonal,preshift,i_max,eps_LOBPCG,X) \
458 private(idim,precon,ie)
459  for (idim = 1; idim <= i_max; idim++) {
460  for (ie = 0; ie < X->Def.k_exct; ie++){
461  precon = list_Diagonal[idim] - preshift;
462  if (fabs(precon) > eps_LOBPCG) wxp[0][idim][ie] /= precon;
463  }
464  }
465  }/*if(do_precon == 1)*/
469  NormMPI_dv(i_max, X->Def.k_exct, wxp[0], dnorm);
470 #pragma omp parallel for default(none) shared(i_max,wxp,dnorm,X) private(idim,ie)
471  for (idim = 1; idim <= i_max; idim++)
472  for (ie = 0; ie < X->Def.k_exct; ie++)
473  wxp[0][idim][ie] /= dnorm[ie];
474  }/*if (stp /= 1)*/
480  childfopenMPI(sdt_2, "a", &fp);
481  fprintf(stdoutMPI, "%9d %15.5e %15.5e ", stp, dnormmax, eps_LOBPCG);
482  fprintf(fp, "%9d %15.5e %15.5e ", stp, dnormmax, eps_LOBPCG);
483  for (ie = 0; ie < X->Def.k_exct; ie++) {
484  fprintf(stdoutMPI, " %15.5e", eig[ie]);
485  fprintf(fp, " %15.5e", eig[ie]);
486  }
487  if(nsub_cut == 0) printf("nsub_cut : %d", nsub_cut);
488  fprintf(stdoutMPI, "\n");
489  fprintf(fp, "\n");
490  fclose(fp);
491 
492  if (dnormmax < eps_LOBPCG) {
493  iconv = 0;
494  break;
495  }
499  zclear(i_max*X->Def.k_exct, &hwxp[0][1][0]);
500  mltply(X, X->Def.k_exct, hwxp[0], wxp[0]);
501 
502  TimeKeeperWithStep(X, "%s_TimeKeeper.dat", "%3d th Lanczos step: %s", "a", stp);
509  for (ii = 0; ii < 3; ii++) {
510  for (jj = 0; jj < 3; jj++) {
511  zgemm_(&tN, &tC, &nstate, &nstate, &i4_max, &one,
512  &wxp[ii][1][0], &nstate, &wxp[jj][1][0], &nstate, &zero, &ovlp[jj][0][ii][0], &nsub);
513  zgemm_(&tN, &tC, &nstate, &nstate, &i4_max, &one,
514  &wxp[ii][1][0], &nstate, &hwxp[jj][1][0], &nstate, &zero, &hsub[jj][0][ii][0], &nsub);
515  }
516  }
517  SumMPI_cv(nsub*nsub, &ovlp[0][0][0][0]);
518  SumMPI_cv(nsub*nsub, &hsub[0][0][0][0]);
519 
520  for (ie = 0; ie < X->Def.k_exct; ie++)
521  eig[ie] = real(hsub[1][ie][1][ie]);
527  nsub_cut = diag_ovrp(nsub, &hsub[0][0][0][0], &ovlp[0][0][0][0], eigsub);
531  for (ie = 0; ie < X->Def.k_exct; ie++)
532  eig[ie] = 0.5 * (eig[ie] + eigsub[ie]);
537  zclear(i_max*X->Def.k_exct, &v1buf[1][0]);
538  for (ii = 0; ii < 3; ii++) {
539  zgemm_(&tC, &tN, &nstate, &i4_max, &nstate, &one,
540  &hsub[0][0][ii][0], &nsub, &wxp[ii][1][0], &nstate, &one, &v1buf[1][0], &nstate);
541  }
542  for (idim = 1; idim <= i_max; idim++) for (ie = 0; ie < X->Def.k_exct; ie++)
543  wxp[1][idim][ie] = v1buf[idim][ie];
548  zclear(i_max*X->Def.k_exct, &v1buf[1][0]);
549  for (ii = 0; ii < 3; ii++) {
550  zgemm_(&tC, &tN, &nstate, &i4_max, &nstate, &one,
551  &hsub[0][0][ii][0], &nsub, &hwxp[ii][1][0], &nstate, &one, &v1buf[1][0], &nstate);
552  }
553  for (idim = 1; idim <= i_max; idim++) for (ie = 0; ie < X->Def.k_exct; ie++)
554  hwxp[1][idim][ie] = v1buf[idim][ie];
559  zclear(i_max*X->Def.k_exct, &v1buf[1][0]);
560  for (ii = 0; ii < 3; ii += 2) {
561  zgemm_(&tC, &tN, &nstate, &i4_max, &nstate, &one,
562  &hsub[0][0][ii][0], &nsub, &wxp[ii][1][0], &nstate, &one, &v1buf[1][0], &nstate);
563  }
564  for (idim = 1; idim <= i_max; idim++) for (ie = 0; ie < X->Def.k_exct; ie++)
565  wxp[2][idim][ie] = v1buf[idim][ie];
570  zclear(i_max*X->Def.k_exct, &v1buf[1][0]);
571  for (ii = 0; ii < 3; ii += 2) {
572  zgemm_(&tC, &tN, &nstate, &i4_max, &nstate, &one,
573  &hsub[0][0][ii][0], &nsub, &hwxp[ii][1][0], &nstate, &one, &v1buf[1][0], &nstate);
574  }
575  for (idim = 1; idim <= i_max; idim++) for (ie = 0; ie < X->Def.k_exct; ie++)
576  hwxp[2][idim][ie] = v1buf[idim][ie];
580  for (ii = 1; ii < 3; ii++) {
581  NormMPI_dv(i_max, X->Def.k_exct, wxp[ii], dnorm);
582 #pragma omp parallel for default(none) shared(i_max,wxp,hwxp,dnorm,ii,X) private(idim,ie)
583  for (idim = 1; idim <= i_max; idim++) {
584  for (ie = 0; ie < X->Def.k_exct; ie++) {
585  wxp[ii][idim][ie] /= dnorm[ie];
586  hwxp[ii][idim][ie] /= dnorm[ie];
587  }/* for (ie = 0; ie < X->Def.k_exct; ie++)*/
588  }
589  }/*for (ii = 1; ii < 3; ii++)*/
590 
591  }/*for (stp = 1; stp <= X->Def.Lanczos_max; stp++)*/
596  //fclose(fp);
597 
598  X->Large.itr = stp;
599  sprintf(sdt, "%s_TimeKeeper.dat", X->Def.CDataFileHead);
600 
601  TimeKeeper(X, "%s_TimeKeeper.dat", "Lanczos Eigenvalue finishes: %s", "a");
602  fprintf(stdoutMPI, "%s", "\n###### End : Calculate Lanczos EigenValue. ######\n\n");
603 
604  free_d_1d_allocate(eig);
605  free_d_1d_allocate(dnorm);
606  free_d_1d_allocate(eigsub);
607  free_cd_4d_allocate(hsub);
608  free_cd_4d_allocate(ovlp);
609  free_cd_3d_allocate(hwxp);
613  if (X->Def.iReStart == RESTART_OUT || X->Def.iReStart == RESTART_INOUT){
614  Output_restart(X, wxp[1]);
615  if(iconv != 0) {
616  sprintf(sdt, "%s", "Lanczos Eigenvalue is not converged in this process.");
617  return 1;
618  }
619  }
624  v0 = cd_2d_allocate(X->Check.idim_max + 1, X->Def.k_exct);
625 #pragma omp parallel for default(none) shared(i_max,wxp,v0,X) private(idim,ie)
626  for (idim = 1; idim <= i_max; idim++)
627  for (ie = 0; ie < X->Def.k_exct; ie++)
628  v0[idim][ie] = wxp[1][idim][ie];
629  free_cd_3d_allocate(wxp);
630  v1 = cd_2d_allocate(X->Check.idim_max + 1, X->Def.k_exct);
631 
632  if (iconv != 0) {
633  sprintf(sdt, "%s", "Lanczos Eigenvalue is not converged in this process.");
634  return -1;
635  }
636  else {
637  return 0;
638  }
639 }/*int LOBPCG_Main*/
644  struct EDMainCalStruct *X
645 )
646 {
647  char sdt[D_FileNameMax];
648  size_t byte_size;
649  long int ie;
650  long int i_max = 0;
651  long int idim;
652  FILE *fp;
653  std::complex<double> *vin;
654 
655  fprintf(stdoutMPI, "###### Eigenvalue with LOBPCG #######\n\n");
656 
657  if (X->Bind.Def.iInputEigenVec == FALSE) {
658 
659  // this part will be modified
660  switch (X->Bind.Def.iCalcModel) {
661  case HubbardGC:
662  case SpinGC:
663  case KondoGC:
664  case SpinlessFermionGC:
665  initial_mode = 1; // 1 -> random initial vector
666  break;
667  case Hubbard:
668  case Kondo:
669  case Spin:
670  case SpinlessFermion:
671 
672  if (X->Bind.Def.iFlgGeneralSpin == TRUE) {
673  initial_mode = 1;
674  }
675  else {
676  if (X->Bind.Def.initial_iv>0) {
677  initial_mode = 0; // 0 -> only v[iv] = 1
678  }
679  else {
680  initial_mode = 1; // 1 -> random initial vector
681  }
682  }
683  break;
684  default:
685  //fclose(fp);
686  exitMPI(-1);
687  }
688 
689  int iret = LOBPCG_Main(&(X->Bind));
690  if (iret != 0) {
691  if(iret ==1) return (TRUE);
692  else{
693  fprintf(stdoutMPI, " LOBPCG is not converged in this process.\n");
694  return(FALSE);
695  }
696  }
697  }/*if (X->Bind.Def.iInputEigenVec == FALSE)*/
698  else {// X->Bind.Def.iInputEigenVec=true :input v1:
703  fprintf(stdoutMPI, "An Eigenvector is inputted.\n");
704  vin = cd_1d_allocate(X->Bind.Check.idim_max + 1);
705  for (ie = 0; ie < X->Bind.Def.k_exct; ie++) {
706  TimeKeeper(&(X->Bind), "%s_TimeKeeper.dat", "Read Eigenvector starts: %s", "a");
707  sprintf(sdt, "%s_eigenvec_%ld_rank_%d.dat", X->Bind.Def.CDataFileHead, ie, myrank);
708  childfopenALL(sdt, "rb", &fp);
709  if (fp == NULL) {
710  fprintf(stderr, "Error: Inputvector file is not found.\n");
711  exitMPI(-1);
712  }
713  byte_size = fread(&step_i, sizeof(int), 1, fp);
714  byte_size = fread(&i_max, sizeof(long int), 1, fp);
715  if (i_max != X->Bind.Check.idim_max) {
716  fprintf(stderr, "Error: Invalid Inputvector file.\n");
717  exitMPI(-1);
718  }
719  byte_size = fread(vin, sizeof(std::complex<double>), X->Bind.Check.idim_max + 1, fp);
720 #pragma omp parallel for default(none) shared(v1,vin, i_max, ie), private(idim)
721  for (idim = 1; idim <= i_max; idim++) {
722  v1[ie][idim] = vin[idim];
723  }
724  }/*for (ie = 0; ie < X->Def.k_exct; ie++)*/
725  fclose(fp);
726  free_cd_1d_allocate(vin);
727  TimeKeeper(&(X->Bind), "%s_TimeKeeper.dat", "Read Eigenvector finishes: %s", "a");
728 
729  if(byte_size == 0) printf("byte_size : %d\n", (int)byte_size);
730  }/*X->Bind.Def.iInputEigenVec == TRUE*/
731 
732  fprintf(stdoutMPI, "%s", "\n###### End : Calculate Lanczos EigenVec. ######\n\n");
737  phys(&(X->Bind), X->Bind.Def.k_exct);
738 
739  X->Bind.Def.St=1;
740  if (X->Bind.Def.St == 0) {
741  sprintf(sdt, "%s_energy.dat", X->Bind.Def.CDataFileHead);
742  }
743  else if (X->Bind.Def.St == 1) {
744  sprintf(sdt, "%s_energy.dat", X->Bind.Def.CDataFileHead);
745  }
746 
747  if (childfopenMPI(sdt, "w", &fp) != 0) {
748  exitMPI(-1);
749  }
750  for (ie = 0; ie < X->Bind.Def.k_exct; ie++) {
751  //phys(&(X->Bind), ie);
752  fprintf(fp, "State %ld\n", ie);
753  fprintf(fp, " Energy %.16lf \n", X->Bind.Phys.energy[ie]);
754  fprintf(fp, " Doublon %.16lf \n", X->Bind.Phys.doublon[ie]);
755  fprintf(fp, " Sz %.16lf \n", X->Bind.Phys.Sz[ie]);
756  //fprintf(fp, " S^2 %.16lf \n", X->Bind.Phys.s2[ie]);
757  //fprintf(fp, " N_up %.16lf \n", X->Bind.Phys.num_up[ie]);
758  //fprintf(fp, " N_down %.16lf \n", X->Bind.Phys.num_down[ie]);
759  fprintf(fp, "\n");
760  }
761  fclose(fp);
762  /*
763  Output Eigenvector to a file
764  */
765  if (X->Bind.Def.iOutputEigenVec == TRUE) {
766  TimeKeeper(&(X->Bind), "%s_TimeKeeper.dat", "Output Eigenvector starts: %s", "a");
767 
768  vin = cd_1d_allocate(X->Bind.Check.idim_max + 1);
769  for (ie = 0; ie < X->Bind.Def.k_exct; ie++) {
770 
771 #pragma omp parallel for default(none) shared(X,v1,ie,vin) private(idim)
772  for (idim = 1; idim <= X->Bind.Check.idim_max; idim++)
773  vin[idim] = v1[idim][ie];
774 
775  sprintf(sdt, "%s_eigenvec_%ld_rank_%d.dat", X->Bind.Def.CDataFileHead, ie, myrank);
776  if (childfopenALL(sdt, "wb", &fp) != 0) exitMPI(-1);
777  byte_size = fwrite(&X->Bind.Large.itr, sizeof(X->Bind.Large.itr), 1, fp);
778  byte_size = fwrite(&X->Bind.Check.idim_max, sizeof(X->Bind.Check.idim_max), 1, fp);
779  byte_size = fwrite(vin, sizeof(std::complex<double>), X->Bind.Check.idim_max + 1, fp);
780  fclose(fp);
781  }/*for (ie = 0; ie < X->Bind.Def.k_exct; ie++)*/
782  free_cd_1d_allocate(vin);
783 
784  TimeKeeper(&(X->Bind), "%s_TimeKeeper.dat", "Output Eigenvector starts: %s", "a");
785  }/*if (X->Bind.Def.iOutputEigenVec == TRUE)*/
786 
787  return TRUE;
788 
789 }/*int CalcByLOBPCG*/
double * doublon
Expectation value of the Doublon.
Definition: struct.hpp:357
void exitMPI(int errorcode)
MPI Abortation wrapper.
Definition: wrapperMPI.cpp:86
int LanczosEps
log(10 base) of the convergence threshold. Read from Calcmod in readdef.h
Definition: struct.hpp:48
void debug_print(int num, std::complex< double > *var)
struct DefineList Def
Definision of system (Hamiltonian) etc.
Definition: struct.hpp:395
int nproc
Number of processors, defined in InitializeMPI()
Definition: global.cpp:72
int St
0 or 1, but it affects nothing.
Definition: struct.hpp:80
FILE * stdoutMPI
File pointer to the standard output defined in InitializeMPI()
Definition: global.cpp:75
int childfopenALL(const char *_cPathChild, const char *_cmode, FILE **_fp)
All processes open file in output/ directory.
Definition: FileIO.cpp:50
int itr
Iteration number.
Definition: struct.hpp:316
std::complex< double > ** v1buf
Definition: global.cpp:22
int iReStart
Definition: struct.hpp:223
std::complex< double > ** v0
Definition: global.cpp:20
long int SumMPI_li(long int idim)
MPI wrapper function to obtain sum of unsigned long integer across processes.
Definition: wrapperMPI.cpp:271
int LOBPCG_Main(struct BindStruct *X)
Core routine for the LOBPCG method This method is introduced inS. Yamada, et al., Transactions of JSC...
static double calc_preshift(double eig, double res, double eps_LOBPCG)
Compute adaptively shifted preconditionar written in S. Yamada, et al., Transactions of JSCES...
std::complex< double > I(0.0, 1.0)
int TimeKeeperWithStep(struct BindStruct *X, const char *cFileName, const char *cTimeKeeper_Message, const char *cWriteType, const int istep)
Functions for writing a time log.
Definition: log.cpp:78
int iOutputEigenVec
ASwitch for outputting an eigenvector. 0: no output, 1:output.
Definition: struct.hpp:204
int mltply(struct BindStruct *X, int nstate, std::complex< double > **tmp_v0, std::complex< double > **tmp_v1)
Parent function of multiplying the wavefunction by the Hamiltonian. . First, the calculation of diago...
Definition: mltply.cpp:56
struct LargeList Large
Variables for Matrix-Vector product.
Definition: struct.hpp:397
int CalcByLOBPCG(struct EDMainCalStruct *X)
Driver routine for LOB(P)CG method.
struct PhysList Phys
Physical quantities.
Definition: struct.hpp:398
void NormMPI_dv(long int ndim, int nstate, std::complex< double > **_v1, double *dnorm)
Compute norm of process-distributed vector .
Definition: wrapperMPI.cpp:344
std::complex< double > ** v1
Definition: global.cpp:21
void SumMPI_cv(int nnorm, std::complex< double > *norm)
MPI wrapper function to obtain sum of Double array across processes.
Definition: wrapperMPI.cpp:254
double * Sz
Expectation value of the Total Sz.
Definition: struct.hpp:361
long int iv
Used for initializing vector.
Definition: struct.hpp:317
Bind.
Definition: struct.hpp:394
int nthreads
Number of Threads, defined in InitializeMPI()
Definition: global.cpp:74
void zclear(long int n, std::complex< double > *x)
clear std::complex<double> array.
Definition: mltply.cpp:143
int Lanczos_max
Maximum number of iterations.
Definition: struct.hpp:74
double * energy
Expectation value of the total energy.
Definition: struct.hpp:356
int myrank
Process ID, defined in InitializeMPI()
Definition: global.cpp:73
int step_i
Definition: global.cpp:44
int initial_mode
Definition: global.cpp:38
int iFlgGeneralSpin
Flag for the general (Sz/=1/2) spin.
Definition: struct.hpp:86
void SumMPI_dv(int nnorm, double *norm)
MPI wrapper function to obtain sum of Double array across processes.
Definition: wrapperMPI.cpp:238
double * list_Diagonal
Definition: global.cpp:24
long int BcastMPI_li(int root, long int idim)
MPI wrapper function to broadcast long integer across processes.
Definition: wrapperMPI.cpp:305
static void Output_restart(struct BindStruct *X, std::complex< double > **wave)
Output eigenvectors for restart LOBPCG method.
int TimeKeeper(struct BindStruct *X, const char *cFileName, const char *cTimeKeeper_Message, const char *cWriteType)
Functions for writing a time log.
Definition: log.cpp:42
int iCalcModel
Switch for model. 0:Hubbard, 1:Spin, 2:Kondo, 3:HubbardGC, 4:SpinGC, 5:KondoGC, 6:HubbardNConserved.
Definition: struct.hpp:200
static int diag_ovrp(int nsub, std::complex< double > *hsub, std::complex< double > *ovlp, double *eig)
Solve the generalized eigenvalue problem with the Lowdin&#39;s orthogonalization.
static void Initialize_wave(struct BindStruct *X, std::complex< double > **wave)
long int initial_iv
Seed of random number for initial guesss of wavefunctions.
Definition: struct.hpp:76
void zgemm_(char *transa, char *transb, int *m, int *n, int *k, std::complex< double > *alpha, std::complex< double > *a, int *lda, std::complex< double > *b, int *ldb, std::complex< double > *beta, std::complex< double > *c, int *ldc)
void phys(struct BindStruct *X, long int neig)
A main function to calculate physical quantities by full diagonalization method.
Definition: phys.cpp:48
int k_exct
Read from Calcmod in readdef.h.
Definition: struct.hpp:47
void zheevd_(char *jobz, char *uplo, int *n, std::complex< double > *a, int *lda, double *w, std::complex< double > *work, int *lwork, double *rwork, int *lrwork, int *iwork, int *liwork, int *info)
int iInitialVecType
Switch for type of inital vectors. 0:complex type, 1: real type. default value is set as 0 in readdef...
Definition: struct.hpp:197
int iInputEigenVec
Switch for reading an eigenvector. 0: no input, 1:input.
Definition: struct.hpp:205
struct CheckList Check
Size of the Hilbert space.
Definition: struct.hpp:396
char * CDataFileHead
Read from Calcmod in readdef.h. Header of output file such as Green&#39;s function.
Definition: struct.hpp:42
long int idim_max
The dimension of the Hilbert space of this process.
Definition: struct.hpp:305
struct BindStruct Bind
Binded struct.
Definition: struct.hpp:405
int childfopenMPI(const char *_cPathChild, const char *_cmode, FILE **_fp)
Only the root process open file in output/ directory.
Definition: FileIO.cpp:27