HPhi++  3.1.0
expec_energy_flct.cpp
Go to the documentation of this file.
1 /* HPhi - Quantum Lattice Model Simulator */
2 /* Copyright (C) 2015 The University of Tokyo */
3 
4 /* This program is free software: you can redistribute it and/or modify */
5 /* it under the terms of the GNU General Public License as published by */
6 /* the Free Software Foundation, either version 3 of the License, or */
7 /* (at your option) any later version. */
8 
9 /* This program is distributed in the hope that it will be useful, */
10 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
11 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
12 /* GNU General Public License for more details. */
13 
14 /* You should have received a copy of the GNU General Public License */
15 /* along with this program. If not, see <http://www.gnu.org/licenses/>. */
16 
17 #include "bitcalc.hpp"
18 #include "mltplyCommon.hpp"
19 #include "mltply.hpp"
20 #include "expec_energy_flct.hpp"
21 #include "wrapperMPI.hpp"
22 #include "CalcTime.hpp"
23 #include "common/setmemory.hpp"
30  struct BindStruct *X,
31  int nstate,
32  std::complex<double> **tmp_v0
33 ) {
34  long int j;
35  long int isite1;
36  long int is1_up_a, is1_up_b;
37  long int is1_down_a, is1_down_b;
38  int bit_up, bit_down, bit_D, istate, mythread;
39  long int ibit_up, ibit_down, ibit_D;
40  double D, N, Sz;
41  double *tmp_v02;
42  long int i_max;
43  int l_ibit1, u_ibit1, i_32;
44  double **doublon_t, **doublon2_t, **num_t, **num2_t, **Sz_t, **Sz2_t;
45 
46  i_max = X->Check.idim_max;
47  doublon_t = d_2d_allocate(nthreads, nstate);
48  doublon2_t = d_2d_allocate(nthreads, nstate);
49  num_t = d_2d_allocate(nthreads, nstate);
50  num2_t = d_2d_allocate(nthreads, nstate);
51  Sz_t = d_2d_allocate(nthreads, nstate);
52  Sz2_t = d_2d_allocate(nthreads, nstate);
53  i_32 = 0xFFFFFFFF; //2^32 - 1
54 
55  //[s] for bit count
56  is1_up_a = 0;
57  is1_up_b = 0;
58  is1_down_a = 0;
59  is1_down_b = 0;
60  for (isite1 = 1; isite1 <= X->Def.NsiteMPI; isite1++) {
61  if (isite1 > X->Def.Nsite) {
62  is1_up_a += X->Def.Tpow[2 * isite1 - 2];
63  is1_down_a += X->Def.Tpow[2 * isite1 - 1];
64  }
65  else {
66  is1_up_b += X->Def.Tpow[2 * isite1 - 2];
67  is1_down_b += X->Def.Tpow[2 * isite1 - 1];
68  }
69  }
70  //[e]
71 #pragma omp parallel default(none) \
72 shared(tmp_v0,list_1,doublon_t,doublon2_t,num_t,num2_t,Sz_t,Sz2_t,nstate) \
73 firstprivate(i_max,X,myrank,is1_up_a,is1_down_a,is1_up_b,is1_down_b,i_32) \
74 private(j,tmp_v02,D,N,Sz,isite1,bit_up,bit_down,bit_D,u_ibit1,l_ibit1, \
75  ibit_up,ibit_down,ibit_D,mythread,istate)
76  {
77  tmp_v02 = d_1d_allocate(nstate);
78 #ifdef _OPENMP
79  mythread = omp_get_thread_num();
80 #else
81  mythread = 0;
82 #endif
83 #pragma omp for
84  for (j = 1; j <= i_max; j++) {
85  for (istate = 0; istate < nstate; istate++)
86  tmp_v02[istate] = real(conj(tmp_v0[j][istate]) * tmp_v0[j][istate]);
87  bit_up = 0;
88  bit_down = 0;
89  bit_D = 0;
90  // isite1 > X->Def.Nsite
91  ibit_up = (long int) myrank & is1_up_a;
92  u_ibit1 = ibit_up >> 32;
93  l_ibit1 = ibit_up & i_32;
94  bit_up += pop(u_ibit1);
95  bit_up += pop(l_ibit1);
96 
97  ibit_down = (long int) myrank & is1_down_a;
98  u_ibit1 = ibit_down >> 32;
99  l_ibit1 = ibit_down & i_32;
100  bit_down += pop(u_ibit1);
101  bit_down += pop(l_ibit1);
102 
103  ibit_D = (ibit_up) & (ibit_down >> 1);
104  u_ibit1 = ibit_D >> 32;
105  l_ibit1 = ibit_D & i_32;
106  bit_D += pop(u_ibit1);
107  bit_D += pop(l_ibit1);
108 
109  // isite1 <= X->Def.Nsite
110  ibit_up = (long int) (j - 1) & is1_up_b;
111  u_ibit1 = ibit_up >> 32;
112  l_ibit1 = ibit_up & i_32;
113  bit_up += pop(u_ibit1);
114  bit_up += pop(l_ibit1);
115 
116  ibit_down = (long int) (j - 1) & is1_down_b;
117  u_ibit1 = ibit_down >> 32;
118  l_ibit1 = ibit_down & i_32;
119  bit_down += pop(u_ibit1);
120  bit_down += pop(l_ibit1);
121 
122  ibit_D = (ibit_up) & (ibit_down >> 1);
123  u_ibit1 = ibit_D >> 32;
124  l_ibit1 = ibit_D & i_32;
125  bit_D += pop(u_ibit1);
126  bit_D += pop(l_ibit1);
127 
128  D = bit_D;
129  N = bit_up + bit_down;
130  Sz = bit_up - bit_down;
131 
132  for (istate = 0; istate < nstate; istate++) {
133  doublon_t[mythread][istate] += tmp_v02[istate] * D;
134  doublon2_t[mythread][istate] += tmp_v02[istate] * D * D;
135  num_t[mythread][istate] += tmp_v02[istate] * N;
136  num2_t[mythread][istate] += tmp_v02[istate] * N * N;
137  Sz_t[mythread][istate] += tmp_v02[istate] * Sz;
138  Sz2_t[mythread][istate] += tmp_v02[istate] * Sz * Sz;
139  }
140  }/*for (j = 1; j <= i_max; j++)*/
141  free_d_1d_allocate(tmp_v02);
142  }/*end of parallel region*/
143 
144  for (istate = 0; istate < nstate; istate++) {
145  X->Phys.doublon[istate] = 0.0;
146  X->Phys.doublon2[istate] = 0.0;
147  X->Phys.num[istate] = 0.0;
148  X->Phys.num2[istate] = 0.0;
149  X->Phys.Sz[istate] = 0.0;
150  X->Phys.Sz2[istate] = 0.0;
151  for (mythread = 0; mythread < nthreads; mythread++) {
152  X->Phys.doublon[istate] += doublon_t[mythread][istate];
153  X->Phys.doublon2[istate] += doublon2_t[mythread][istate];
154  X->Phys.num[istate] += num_t[mythread][istate];
155  X->Phys.num2[istate] += num2_t[mythread][istate];
156  X->Phys.Sz[istate] += Sz_t[mythread][istate];
157  X->Phys.Sz2[istate] += Sz2_t[mythread][istate];
158  }
159  }
160  SumMPI_dv(nstate, X->Phys.doublon);
161  SumMPI_dv(nstate, X->Phys.doublon2);
162  SumMPI_dv(nstate, X->Phys.num);
163  SumMPI_dv(nstate, X->Phys.num2);
164  SumMPI_dv(nstate, X->Phys.Sz);
165  SumMPI_dv(nstate, X->Phys.Sz2);
166 
167  for (istate = 0; istate < nstate; istate++) {
168  X->Phys.Sz[istate] *= 0.5;
169  X->Phys.Sz2[istate] *= 0.25;
170  X->Phys.num_up[istate] = 0.5*(X->Phys.num[istate] + X->Phys.Sz[istate]);
171  X->Phys.num_down[istate] = 0.5*(X->Phys.num[istate] - X->Phys.Sz[istate]);
172  }
173 
174  free_d_2d_allocate(doublon_t);
175  free_d_2d_allocate(doublon2_t);
176  free_d_2d_allocate(num_t);
177  free_d_2d_allocate(num2_t);
178  free_d_2d_allocate(Sz_t);
179  free_d_2d_allocate(Sz2_t);
180  return 0;
181 }
188  struct BindStruct *X,
189  int nstate,
190  std::complex<double> **tmp_v0
191 ) {
192  long int j;
193  long int isite1;
194  long int is1_up_a, is1_up_b;
195  long int is1_down_a, is1_down_b;
196  int bit_up, bit_down, bit_D, istate, mythread;
197  long int ibit_up, ibit_down, ibit_D;
198  double **doublon_t, **doublon2_t, **num_t, **num2_t, **Sz_t, **Sz2_t;
199  double D, N, Sz;
200  double *tmp_v02;
201  long int i_max, tmp_list_1;
202  int l_ibit1, u_ibit1, i_32;
203 
204  i_max = X->Check.idim_max;
205 
206  doublon_t = d_2d_allocate(nthreads, nstate);
207  doublon2_t = d_2d_allocate(nthreads, nstate);
208  num_t = d_2d_allocate(nthreads, nstate);
209  num2_t = d_2d_allocate(nthreads, nstate);
210  Sz_t = d_2d_allocate(nthreads, nstate);
211  Sz2_t = d_2d_allocate(nthreads, nstate);
212  i_32 = 0xFFFFFFFF;
213 
214  //[s] for bit count
215  is1_up_a = 0;
216  is1_up_b = 0;
217  is1_down_a = 0;
218  is1_down_b = 0;
219  for (isite1 = 1; isite1 <= X->Def.NsiteMPI; isite1++) {
220  if (isite1 > X->Def.Nsite) {
221  is1_up_a += X->Def.Tpow[2 * isite1 - 2];
222  is1_down_a += X->Def.Tpow[2 * isite1 - 1];
223  }
224  else {
225  is1_up_b += X->Def.Tpow[2 * isite1 - 2];
226  is1_down_b += X->Def.Tpow[2 * isite1 - 1];
227  }
228  }
229  //[e]
230 #pragma omp parallel default(none) \
231 shared(tmp_v0,list_1,doublon_t,doublon2_t,num_t,num2_t,Sz_t,Sz2_t,nstate) \
232 firstprivate(i_max, X,myrank,is1_up_a,is1_down_a,is1_up_b,is1_down_b,i_32) \
233 private(j,tmp_v02,D,N,Sz,isite1,tmp_list_1,bit_up,bit_down,bit_D,u_ibit1, \
234  l_ibit1,ibit_up,ibit_down,ibit_D,mythread,istate)
235  {
236  tmp_v02 = d_1d_allocate(nstate);
237 #ifdef _OPENMP
238  mythread = omp_get_thread_num();
239 #else
240  mythread = 0;
241 #endif
242 #pragma omp for
243  for (j = 1; j <= i_max; j++) {
244  for (istate = 0; istate < nstate; istate++)
245  tmp_v02[istate] = real(conj(tmp_v0[j][istate]) * tmp_v0[j][istate]);
246  bit_up = 0;
247  bit_down = 0;
248  bit_D = 0;
249  tmp_list_1 = list_1[j];
250  // isite1 > X->Def.Nsite
251  ibit_up = (long int) myrank & is1_up_a;
252  u_ibit1 = ibit_up >> 32;
253  l_ibit1 = ibit_up & i_32;
254  bit_up += pop(u_ibit1);
255  bit_up += pop(l_ibit1);
256 
257  ibit_down = (long int) myrank & is1_down_a;
258  u_ibit1 = ibit_down >> 32;
259  l_ibit1 = ibit_down & i_32;
260  bit_down += pop(u_ibit1);
261  bit_down += pop(l_ibit1);
262 
263  ibit_D = (ibit_up) & (ibit_down >> 1);
264  u_ibit1 = ibit_D >> 32;
265  l_ibit1 = ibit_D & i_32;
266  bit_D += pop(u_ibit1);
267  bit_D += pop(l_ibit1);
268 
269  // isite1 <= X->Def.Nsite
270  ibit_up = (long int) tmp_list_1 & is1_up_b;
271  u_ibit1 = ibit_up >> 32;
272  l_ibit1 = ibit_up & i_32;
273  bit_up += pop(u_ibit1);
274  bit_up += pop(l_ibit1);
275 
276  ibit_down = (long int) tmp_list_1 & is1_down_b;
277  u_ibit1 = ibit_down >> 32;
278  l_ibit1 = ibit_down & i_32;
279  bit_down += pop(u_ibit1);
280  bit_down += pop(l_ibit1);
281 
282  ibit_D = (ibit_up) & (ibit_down >> 1);
283  u_ibit1 = ibit_D >> 32;
284  l_ibit1 = ibit_D & i_32;
285  bit_D += pop(u_ibit1);
286  bit_D += pop(l_ibit1);
287 
288  D = bit_D;
289  N = bit_up + bit_down;
290  Sz = bit_up - bit_down;
291 
292  for (istate = 0; istate < nstate; istate++) {
293  doublon_t[mythread][istate] += tmp_v02[istate] * D;
294  doublon2_t[mythread][istate] += tmp_v02[istate] * D * D;
295  num_t[mythread][istate] += tmp_v02[istate] * N;
296  num2_t[mythread][istate] += tmp_v02[istate] * N * N;
297  Sz_t[mythread][istate] += tmp_v02[istate] * Sz;
298  Sz2_t[mythread][istate] += tmp_v02[istate] * Sz * Sz;
299  }
300  }/*for (j = 1; j <= i_max; j++)*/
301  free_d_1d_allocate(tmp_v02);
302  }/*end of parallel region*/
303 
304  for (istate = 0; istate < nstate; istate++) {
305  X->Phys.doublon[istate] = 0.0;
306  X->Phys.doublon2[istate] = 0.0;
307  X->Phys.num[istate] = 0.0;
308  X->Phys.num2[istate] = 0.0;
309  X->Phys.Sz[istate] = 0.0;
310  X->Phys.Sz2[istate] = 0.0;
311  for (mythread = 0; mythread < nthreads; mythread++) {
312  X->Phys.doublon[istate] += doublon_t[mythread][istate];
313  X->Phys.doublon2[istate] += doublon2_t[mythread][istate];
314  X->Phys.num[istate] += num_t[mythread][istate];
315  X->Phys.num2[istate] += num2_t[mythread][istate];
316  X->Phys.Sz[istate] += Sz_t[mythread][istate];
317  X->Phys.Sz2[istate] += Sz2_t[mythread][istate];
318  }
319  }
320  SumMPI_dv(nstate, X->Phys.doublon);
321  SumMPI_dv(nstate, X->Phys.doublon2);
322  SumMPI_dv(nstate, X->Phys.num);
323  SumMPI_dv(nstate, X->Phys.num2);
324  SumMPI_dv(nstate, X->Phys.Sz);
325  SumMPI_dv(nstate, X->Phys.Sz2);
326 
327  for (istate = 0; istate < nstate; istate++) {
328  X->Phys.Sz[istate] *= 0.5;
329  X->Phys.Sz2[istate] *= 0.25;
330  X->Phys.num_up[istate] = 0.5*(X->Phys.num[istate] + X->Phys.Sz[istate]);
331  X->Phys.num_down[istate] = 0.5*(X->Phys.num[istate] - X->Phys.Sz[istate]);
332  }
333 
334  free_d_2d_allocate(doublon_t);
335  free_d_2d_allocate(doublon2_t);
336  free_d_2d_allocate(num_t);
337  free_d_2d_allocate(num2_t);
338  free_d_2d_allocate(Sz_t);
339  free_d_2d_allocate(Sz2_t);
340  return 0;
341 }
348  struct BindStruct *X,
349  int nstate,
350  std::complex<double> **tmp_v0
351 ) {
352  long int j;
353  long int isite1;
354  long int is1_up_a, is1_up_b;
355 
356  long int ibit1;
357  double Sz;
358  double *tmp_v02;
359  long int i_max;
360  int l_ibit1, u_ibit1, i_32;
361  int istate, mythread;
362  double **Sz_t, **Sz2_t;
363 
364  i_max = X->Check.idim_max;
365 
366  Sz_t = d_2d_allocate(nthreads, nstate);
367  Sz2_t = d_2d_allocate(nthreads, nstate);
368  i_32 = 0xFFFFFFFF; //2^32 - 1
369 
370  //[s] for bit count
371  is1_up_a = 0;
372  is1_up_b = 0;
373  for (isite1 = 1; isite1 <= X->Def.NsiteMPI; isite1++) {
374  if (isite1 > X->Def.Nsite) {
375  is1_up_a += X->Def.Tpow[isite1 - 1];
376  }
377  else {
378  is1_up_b += X->Def.Tpow[isite1 - 1];
379  }
380  }
381  //[e]
382 #pragma omp parallel default(none) shared(tmp_v0,Sz_t,Sz2_t,nstate) \
383 firstprivate(i_max,X,myrank,i_32,is1_up_a,is1_up_b) \
384 private(j,Sz,ibit1,isite1,tmp_v02,u_ibit1,l_ibit1,mythread,istate)
385  {
386  tmp_v02 = d_1d_allocate(nstate);
387 #ifdef _OPENMP
388  mythread = omp_get_thread_num();
389 #else
390  mythread = 0;
391 #endif
392 #pragma omp for
393  for (j = 1; j <= i_max; j++) {
394  for (istate = 0; istate < nstate; istate++)
395  tmp_v02[istate] = real(conj(tmp_v0[j][istate]) * tmp_v0[j][istate]);
396  Sz = 0.0;
397 
398  // isite1 > X->Def.Nsite
399  ibit1 = (long int) myrank & is1_up_a;
400  u_ibit1 = ibit1 >> 32;
401  l_ibit1 = ibit1 & i_32;
402  Sz += pop(u_ibit1);
403  Sz += pop(l_ibit1);
404  // isite1 <= X->Def.Nsite
405  ibit1 = (long int) (j - 1)&is1_up_b;
406  u_ibit1 = ibit1 >> 32;
407  l_ibit1 = ibit1 & i_32;
408  Sz += pop(u_ibit1);
409  Sz += pop(l_ibit1);
410  Sz = 2 * Sz - X->Def.NsiteMPI;
411 
412  for (istate = 0; istate < nstate; istate++) {
413  Sz_t[mythread][istate] += tmp_v02[istate] * Sz;
414  Sz2_t[mythread][istate] += tmp_v02[istate] * Sz * Sz;
415  }
416  }/*for (j = 1; j <= i_max; j++)*/
417  free_d_1d_allocate(tmp_v02);
418  }/*End of parallel region*/
419  for (istate = 0; istate < nstate; istate++) {
420  X->Phys.Sz[istate] = 0.0;
421  X->Phys.Sz2[istate] = 0.0;
422  for (mythread = 0; mythread < nthreads; mythread++) {
423  X->Phys.Sz[istate] += Sz_t[mythread][istate];
424  X->Phys.Sz2[istate] += Sz2_t[mythread][istate];
425  }
426  }
427  SumMPI_dv(nstate, X->Phys.Sz);
428  SumMPI_dv(nstate, X->Phys.Sz2);
429 
430  for (istate = 0; istate < nstate; istate++) {
431  X->Phys.doublon[istate] = 0.0;
432  X->Phys.doublon2[istate] = 0.0;
433  X->Phys.num[istate] = X->Def.NsiteMPI;
434  X->Phys.num2[istate] = X->Def.NsiteMPI*X->Def.NsiteMPI;
435  X->Phys.Sz[istate] *= 0.5;
436  X->Phys.Sz2[istate] *= 0.25;
437  X->Phys.num_up[istate] = 0.5*(X->Phys.num[istate] + X->Phys.Sz[istate]);
438  X->Phys.num_down[istate] = 0.5*(X->Phys.num[istate] - X->Phys.Sz[istate]);
439  }
440 
441  free_d_2d_allocate(Sz_t);
442  free_d_2d_allocate(Sz2_t);
443  return 0;
444 }
451  struct BindStruct *X,
452  int nstate,
453  std::complex<double> **tmp_v0
454 ) {
455  long int j;
456  long int isite1;
457  int istate, mythread;
458  double Sz;
459  double *tmp_v02;
460  long int i_max;
461  double **Sz_t, **Sz2_t;
462 
463  Sz_t = d_2d_allocate(nthreads, nstate);
464  Sz2_t = d_2d_allocate(nthreads, nstate);
465  i_max = X->Check.idim_max;
466 
467 #pragma omp parallel default(none) \
468 shared(i_max,nstate,X,myrank,Sz_t,Sz2_t,tmp_v0) \
469 private(j,istate,tmp_v02,Sz,isite1,mythread)
470  {
471  tmp_v02 = d_1d_allocate(nstate);
472 #ifdef _OPENMP
473  mythread = omp_get_thread_num();
474 #else
475  mythread = 0;
476 #endif
477 #pragma omp for
478  for (j = 1; j <= i_max; j++) {
479  for (istate = 0; istate < nstate; istate++) \
480  tmp_v02[istate] = real(conj(tmp_v0[j][istate]) * tmp_v0[j][istate]);
481  Sz = 0.0;
482  for (isite1 = 1; isite1 <= X->Def.NsiteMPI; isite1++) {
483  //prefactor 0.5 is added later.
484  if (isite1 > X->Def.Nsite) {
485  Sz += GetLocal2Sz(isite1, myrank, X->Def.SiteToBit, X->Def.Tpow);
486  }
487  else {
488  Sz += GetLocal2Sz(isite1, j - 1, X->Def.SiteToBit, X->Def.Tpow);
489  }
490  }
491  for (istate = 0; istate < nstate; istate++) {
492  Sz_t[mythread][istate] += tmp_v02[istate] * Sz;
493  Sz2_t[mythread][istate] += tmp_v02[istate] * Sz * Sz;
494  }
495  }/*for (j = 1; j <= i_max; j++)*/
496  free_d_1d_allocate(tmp_v02);
497  }/*End of parallel region*/
498  for (istate = 0; istate < nstate; istate++) {
499  X->Phys.Sz[istate] = 0.0;
500  X->Phys.Sz2[istate] = 0.0;
501  for (mythread = 0; mythread < nthreads; mythread++) {
502  X->Phys.Sz[istate] += Sz_t[mythread][istate];
503  X->Phys.Sz2[istate] += Sz2_t[mythread][istate];
504  }
505  }
506  SumMPI_dv(nstate, X->Phys.Sz);
507  SumMPI_dv(nstate, X->Phys.Sz2);
508 
509  for (istate = 0; istate < nstate; istate++) {
510  X->Phys.doublon[istate] = 0.0;
511  X->Phys.doublon2[istate] = 0.0;
512  X->Phys.num[istate] = X->Def.NsiteMPI;
513  X->Phys.num2[istate] = X->Def.NsiteMPI*X->Def.NsiteMPI;
514  X->Phys.Sz[istate] *= 0.5;
515  X->Phys.Sz2[istate] *= 0.25;
516  X->Phys.num_up[istate] = 0.5*(X->Phys.num[istate] + X->Phys.Sz[istate]);
517  X->Phys.num_down[istate] = 0.5*(X->Phys.num[istate] - X->Phys.Sz[istate]);
518  }
519 
520  free_d_2d_allocate(Sz_t);
521  free_d_2d_allocate(Sz2_t);
522  return 0;
523 }
530  struct BindStruct *X,
531  int nstate,
532  std::complex<double> **tmp_v0
533 ) {
534  long int j;
535  long int isite1;
536  long int is1_up_a, is1_up_b;
537 
538  long int ibit1;
539  double Sz;
540  double *tmp_v02;
541  long int i_max, tmp_list_1;
542  int l_ibit1, u_ibit1, i_32;
543  int istate, mythread;
544  double **Sz_t, **Sz2_t;
545 
546  i_max = X->Check.idim_max;
547  Sz_t = d_2d_allocate(nthreads, nstate);
548  Sz2_t = d_2d_allocate(nthreads, nstate);
549  i_32 = 0xFFFFFFFF; //2^32 - 1
550 
551  //[s] for bit count
552  is1_up_a = 0;
553  is1_up_b = 0;
554  for (isite1 = 1; isite1 <= X->Def.NsiteMPI; isite1++) {
555  if (isite1 > X->Def.Nsite) {
556  is1_up_a += X->Def.Tpow[isite1 - 1];
557  }
558  else {
559  is1_up_b += X->Def.Tpow[isite1 - 1];
560  }
561  }
562  //[e]
563 #pragma omp parallel default(none) shared(tmp_v0, list_1,Sz_t,Sz2_t,nstate) \
564 firstprivate(i_max,X,myrank,i_32,is1_up_a,is1_up_b) \
565 private(j,Sz,ibit1,isite1,tmp_v02,u_ibit1,l_ibit1, tmp_list_1,mythread,istate)
566  {
567  tmp_v02 = d_1d_allocate(nstate);
568 #ifdef _OPENMP
569  mythread = omp_get_thread_num();
570 #else
571  mythread = 0;
572 #endif
573 #pragma omp for
574  for (j = 1; j <= i_max; j++) {
575  for (istate = 0; istate < nstate; istate++) \
576  tmp_v02[istate] = real(conj(tmp_v0[j][istate]) * tmp_v0[j][istate]);
577  Sz = 0.0;
578  tmp_list_1 = list_1[j];
579 
580  // isite1 > X->Def.Nsite
581  ibit1 = (long int) myrank & is1_up_a;
582  u_ibit1 = ibit1 >> 32;
583  l_ibit1 = ibit1 & i_32;
584  Sz += pop(u_ibit1);
585  Sz += pop(l_ibit1);
586  // isite1 <= X->Def.Nsite
587  ibit1 = (long int) tmp_list_1 &is1_up_b;
588  u_ibit1 = ibit1 >> 32;
589  l_ibit1 = ibit1 & i_32;
590  Sz += pop(u_ibit1);
591  Sz += pop(l_ibit1);
592  Sz = 2 * Sz - X->Def.NsiteMPI;
593 
594  for (istate = 0; istate < nstate; istate++) {
595  Sz_t[mythread][istate] += tmp_v02[istate] * Sz;
596  Sz2_t[mythread][istate] += tmp_v02[istate] * Sz * Sz;
597  }
598  }/*for (j = 1; j <= i_max; j++)*/
599  free_d_1d_allocate(tmp_v02);
600  }/*End of parallel region*/
601  for (istate = 0; istate < nstate; istate++) {
602  X->Phys.Sz[istate] = 0.0;
603  X->Phys.Sz2[istate] = 0.0;
604  for (mythread = 0; mythread < nthreads; mythread++) {
605  X->Phys.Sz[istate] += Sz_t[mythread][istate];
606  X->Phys.Sz2[istate] += Sz2_t[mythread][istate];
607  }
608  }
609  SumMPI_dv(nstate, X->Phys.Sz);
610  SumMPI_dv(nstate, X->Phys.Sz2);
611 
612  for (istate = 0; istate < nstate; istate++) {
613  X->Phys.doublon[istate] = 0.0;
614  X->Phys.doublon2[istate] = 0.0;
615  X->Phys.num[istate] = X->Def.NsiteMPI;
616  X->Phys.num2[istate] = X->Def.NsiteMPI*X->Def.NsiteMPI;
617  X->Phys.Sz[istate] *= 0.5;
618  X->Phys.Sz2[istate] *= 0.25;
619  X->Phys.num_up[istate] = 0.5*(X->Phys.num[istate] + X->Phys.Sz[istate]);
620  X->Phys.num_down[istate] = 0.5*(X->Phys.num[istate] - X->Phys.Sz[istate]);
621  }
622 
623  free_d_2d_allocate(Sz_t);
624  free_d_2d_allocate(Sz2_t);
625  return 0;
626 }
633  struct BindStruct *X,
634  int nstate,
635  std::complex<double> **tmp_v0
636 ) {
637  long int j;
638  long int isite1;
639  int istate, mythread;
640  double Sz;
641  double *tmp_v02;
642  long int i_max, tmp_list1;
643  double **Sz_t, **Sz2_t;
644 
645  Sz_t = d_2d_allocate(nthreads, nstate);
646  Sz2_t = d_2d_allocate(nthreads, nstate);
647  i_max = X->Check.idim_max;
648 
649 #pragma omp parallel default(none) shared(tmp_v0, list_1,Sz_t,Sz2_t,nstate) \
650 firstprivate(i_max,X,myrank) private(j,Sz,isite1,tmp_v02, tmp_list1,istate,mythread)
651  {
652  tmp_v02 = d_1d_allocate(nstate);
653 #ifdef _OPENMP
654  mythread = omp_get_thread_num();
655 #else
656  mythread = 0;
657 #endif
658 #pragma omp for
659  for (j = 1; j <= i_max; j++) {
660  for (istate = 0; istate < nstate; istate++)
661  tmp_v02[istate] = real(conj(tmp_v0[j][istate]) * tmp_v0[j][istate]);
662  Sz = 0.0;
663  tmp_list1 = list_1[j];
664  for (isite1 = 1; isite1 <= X->Def.NsiteMPI; isite1++) {
665  //prefactor 0.5 is added later.
666  if (isite1 > X->Def.Nsite) {
667  Sz += GetLocal2Sz(isite1, myrank, X->Def.SiteToBit, X->Def.Tpow);
668  }
669  else {
670  Sz += GetLocal2Sz(isite1, tmp_list1, X->Def.SiteToBit, X->Def.Tpow);
671  }
672  }
673  for (istate = 0; istate < nstate; istate++) {
674  Sz_t[mythread][istate] += tmp_v02[istate] * Sz;
675  Sz2_t[mythread][istate] += tmp_v02[istate] * Sz * Sz;
676  }
677  }/*for (j = 1; j <= i_max; j++)*/
678  free_d_1d_allocate(tmp_v02);
679  }/*End of parallel region*/
680  for (istate = 0; istate < nstate; istate++) {
681  X->Phys.Sz[istate] = 0.0;
682  X->Phys.Sz2[istate] = 0.0;
683  for (mythread = 0; mythread < nthreads; mythread++) {
684  X->Phys.Sz[istate] += Sz_t[mythread][istate];
685  X->Phys.Sz2[istate] += Sz2_t[mythread][istate];
686  }
687  }
688  SumMPI_dv(nstate, X->Phys.Sz);
689  SumMPI_dv(nstate, X->Phys.Sz2);
690 
691  for (istate = 0; istate < nstate; istate++) {
692  X->Phys.doublon[istate] = 0.0;
693  X->Phys.doublon2[istate] = 0.0;
694  X->Phys.num[istate] = X->Def.NsiteMPI;
695  X->Phys.num2[istate] = X->Def.NsiteMPI*X->Def.NsiteMPI;
696  X->Phys.Sz[istate] *= 0.5;
697  X->Phys.Sz2[istate] *= 0.25;
698  X->Phys.num_up[istate] = 0.5*(X->Phys.num[istate] + X->Phys.Sz[istate]);
699  X->Phys.num_down[istate] = 0.5*(X->Phys.num[istate] - X->Phys.Sz[istate]);
700  }
701 
702  free_d_2d_allocate(Sz_t);
703  free_d_2d_allocate(Sz2_t);
704  return 0;
705 }
717  struct BindStruct *X,
718  int nstate,
719  std::complex<double> **tmp_v0,
720  std::complex<double> **tmp_v1
721 ) {
722 
723  long int i, j;
724  long int irght, ilft, ihfbit;
725  long int i_max;
726  int istate;
727 
728  switch (X->Def.iCalcType) {
729  case TPQCalc:
730  case TimeEvolution:
731 #ifdef _DEBUG
732  fprintf(stdoutMPI, "%s", " Start: Calculate Energy.\n");
733  TimeKeeperWithStep(X, "%s_TimeKeeper.dat", "step %d: Calculate energy begins: %s", "a", step_i);
734 #endif
735  break;
736  case FullDiag:
737  case CG:
738  break;
739  default:
740  return -1;
741  }
742 
743  i_max = X->Check.idim_max;
744  if (GetSplitBitByModel(X->Def.Nsite, X->Def.iCalcModel, &irght, &ilft, &ihfbit) != 0) {
745  return -1;
746  }
747 
748  X->Large.i_max = i_max;
749  X->Large.irght = irght;
750  X->Large.ilft = ilft;
751  X->Large.ihfbit = ihfbit;
752  X->Large.mode = M_ENERGY;
753  for (istate = 0; istate < nstate; istate++) X->Phys.energy[istate] = 0.0;
754 
755  int nCalcFlct;
756  if (X->Def.iCalcType == TPQCalc) {
757  nCalcFlct = 3201;
758  }
759  else {//For FullDiag
760  nCalcFlct = 5301;
761  }
762  StartTimer(nCalcFlct);
763 
764  switch (X->Def.iCalcModel) {
765  case HubbardGC:
766  expec_energy_flct_HubbardGC(X, nstate, tmp_v0);
767  break;
768  case KondoGC:
769  case Hubbard:
770  case Kondo:
771  expec_energy_flct_Hubbard(X, nstate, tmp_v0);
772  break;
773 
774  case SpinGC:
775  if (X->Def.iFlgGeneralSpin == FALSE) {
776  expec_energy_flct_HalfSpinGC(X, nstate, tmp_v0);
777  }
778  else {//for generalspin
779  expec_energy_flct_GeneralSpinGC(X, nstate, tmp_v0);
780  }
781  break;/*case SpinGC*/
782  /* SpinGCBoost */
783  case Spin:
784  /*
785  if(X->Def.iFlgGeneralSpin == FALSE){
786  expec_energy_flct_HalfSpin(X);
787  }
788  else{
789  expec_energy_flct_GeneralSpin(X);
790  }
791  */
792  for (istate = 0; istate < nstate; istate++) {
793  X->Phys.doublon[istate] = 0.0;
794  X->Phys.doublon2[istate] = 0.0;
795  X->Phys.num[istate] = X->Def.NsiteMPI;
796  X->Phys.num2[istate] = X->Def.NsiteMPI*X->Def.NsiteMPI;
797  X->Phys.Sz[istate] = 0.5 * (double)X->Def.Total2SzMPI;
798  X->Phys.Sz2[istate] = X->Phys.Sz[istate] * X->Phys.Sz[istate];
799  }
800  break;
801  default:
802  return -1;
803  }
804 
805  StopTimer(nCalcFlct);
806 
807 #pragma omp parallel for default(none) private(i,istate) \
808 shared(tmp_v1,tmp_v0,nstate) firstprivate(i_max)
809  for (i = 1; i <= i_max; i++) {
810  for (istate = 0; istate < nstate; istate++) {
811  tmp_v1[i][istate] = tmp_v0[i][istate];
812  tmp_v0[i][istate] = 0.0;
813  }
814  }
815 
816  int nCalcExpec;
817  if (X->Def.iCalcType == TPQCalc) {
818  nCalcExpec = 3202;
819  }
820  else {//For FullDiag
821  nCalcExpec = 5302;
822  }
823  StartTimer(nCalcExpec);
824  mltply(X, nstate, tmp_v0, tmp_v1); // v0+=H*v1
825  StopTimer(nCalcExpec);
826  /* switch -> SpinGCBoost */
827 
828  for (istate = 0; istate < nstate; istate++) {
829  X->Phys.energy[istate] = 0.0;
830  X->Phys.var[istate] = 0.0;
831  }
832  for (j = 1; j <= i_max; j++) {
833  for (istate = 0; istate < nstate; istate++) {
834  X->Phys.energy[istate] += real(conj(tmp_v1[j][istate])*tmp_v0[j][istate]); // E = <v1|H|v1>=<v1|v0>
835  X->Phys.var[istate] += real(conj(tmp_v0[j][istate])*tmp_v0[j][istate]); // E^2 = <v1|H*H|v1>=<v0|v0>
836  }
837  }
838  SumMPI_dv(nstate, X->Phys.energy);
839  SumMPI_dv(nstate, X->Phys.var);
840 
841  switch (X->Def.iCalcType) {
842  case TPQCalc:
843  case TimeEvolution:
844 #ifdef _DEBUG
845  fprintf(stdoutMPI, "%s", " End : Calculate Energy.\n");
846  TimeKeeperWithStep(X, "%s_TimeKeeper.dat", "step %d: Calculate energy finishes: %s", "a", step_i);
847 #endif
848  break;
849  default:
850  break;
851  }
852  return 0;
853 }
double * doublon
Expectation value of the Doublon.
Definition: struct.hpp:357
struct DefineList Def
Definision of system (Hamiltonian) etc.
Definition: struct.hpp:395
double * num_down
Expectation value of the number of down-spin electtrons.
Definition: struct.hpp:364
int pop(int x)
calculating number of 1-bits in x (32 bit) This method is introduced in S.H. Warren, Hacker’s Delight, second ed., Addison-Wesley, ISBN: 0321842685, 2012.
Definition: bitcalc.cpp:491
double * var
Expectation value of the Energy variance.
Definition: struct.hpp:367
FILE * stdoutMPI
File pointer to the standard output defined in InitializeMPI()
Definition: global.cpp:75
int GetSplitBitByModel(const int Nsite, const int iCalcModel, long int *irght, long int *ilft, long int *ihfbit)
function of splitting original bit into right and left spaces.
Definition: bitcalc.cpp:78
int expec_energy_flct(struct BindStruct *X, int nstate, std::complex< double > **tmp_v0, std::complex< double > **tmp_v1)
Parent function to calculate expected values of energy and physical quantities.
int Nsite
Number of sites in the INTRA process region.
Definition: struct.hpp:56
int TimeKeeperWithStep(struct BindStruct *X, const char *cFileName, const char *cTimeKeeper_Message, const char *cWriteType, const int istep)
Functions for writing a time log.
Definition: log.cpp:78
int mltply(struct BindStruct *X, int nstate, std::complex< double > **tmp_v0, std::complex< double > **tmp_v1)
Parent function of multiplying the wavefunction by the Hamiltonian. . First, the calculation of diago...
Definition: mltply.cpp:56
struct LargeList Large
Variables for Matrix-Vector product.
Definition: struct.hpp:397
double * num2
Expectation value of the quare of the number of electrons.
Definition: struct.hpp:360
int expec_energy_flct_GeneralSpin(struct BindStruct *X, int nstate, std::complex< double > **tmp_v0)
Calculate expected values of energies and physical quantities for General-Spin model.
struct PhysList Phys
Physical quantities.
Definition: struct.hpp:398
double * Sz2
Expectation value of the Square of total Sz.
Definition: struct.hpp:362
int mode
multiply or expectation value.
Definition: struct.hpp:331
long int irght
Used for Ogata-Lin ???
Definition: struct.hpp:344
double * Sz
Expectation value of the Total Sz.
Definition: struct.hpp:361
int expec_energy_flct_HalfSpin(struct BindStruct *X, int nstate, std::complex< double > **tmp_v0)
Calculate expected values of energies and physical quantities for Half-Spin model.
int expec_energy_flct_Hubbard(struct BindStruct *X, int nstate, std::complex< double > **tmp_v0)
Calculate expected values of energies and physical quantities for Hubbard model.
int NsiteMPI
Total number of sites, differ from DefineList::Nsite.
Definition: struct.hpp:57
long int ilft
Used for Ogata-Lin ???
Definition: struct.hpp:345
double * num
Expectation value of the Number of electrons.
Definition: struct.hpp:359
Bind.
Definition: struct.hpp:394
int nthreads
Number of Threads, defined in InitializeMPI()
Definition: global.cpp:74
long int ihfbit
Used for Ogata-Lin ???
Definition: struct.hpp:346
long int i_max
Length of eigenvector.
Definition: struct.hpp:318
double * energy
Expectation value of the total energy.
Definition: struct.hpp:356
int myrank
Process ID, defined in InitializeMPI()
Definition: global.cpp:73
int step_i
Definition: global.cpp:44
int expec_energy_flct_GeneralSpinGC(struct BindStruct *X, int nstate, std::complex< double > **tmp_v0)
Calculate expected values of energies and physical quantities for General-SpinGC model.
int expec_energy_flct_HubbardGC(struct BindStruct *X, int nstate, std::complex< double > **tmp_v0)
Calculate expected values of energies and physical quantities for Hubbard GC model.
int GetLocal2Sz(const int isite, const long int org_bit, const long int *SiteToBit, const long int *Tpow)
get 2sz at a site for general spin
Definition: bitcalc.cpp:448
int iFlgGeneralSpin
Flag for the general (Sz/=1/2) spin.
Definition: struct.hpp:86
void StopTimer(int n)
function for calculating elapse time [elapse time=StartTimer-StopTimer]
Definition: time.cpp:83
void SumMPI_dv(int nnorm, double *norm)
MPI wrapper function to obtain sum of Double array across processes.
Definition: wrapperMPI.cpp:238
long int * SiteToBit
[DefineList::NsiteMPI] Similar to DefineList::Tpow. For general spin.
Definition: struct.hpp:94
double * doublon2
Expectation value of the Square of doublon.
Definition: struct.hpp:358
long int * Tpow
[2 * DefineList::NsiteMPI] malloc in setmem_def().
Definition: struct.hpp:90
int expec_energy_flct_HalfSpinGC(struct BindStruct *X, int nstate, std::complex< double > **tmp_v0)
Calculate expected values of energies and physical quantities for Half-SpinGC model.
double * num_up
Expectation value of the number of up-spin electtrons.
Definition: struct.hpp:363
int iCalcModel
Switch for model. 0:Hubbard, 1:Spin, 2:Kondo, 3:HubbardGC, 4:SpinGC, 5:KondoGC, 6:HubbardNConserved.
Definition: struct.hpp:200
struct CheckList Check
Size of the Hilbert space.
Definition: struct.hpp:396
long int * list_1
Definition: global.cpp:25
long int idim_max
The dimension of the Hilbert space of this process.
Definition: struct.hpp:305
int Total2SzMPI
Total across processes.
Definition: struct.hpp:70
int iCalcType
Switch for calculation type. 0:Lanczos, 1:TPQCalc, 2:FullDiag.
Definition: struct.hpp:194
void StartTimer(int n)
function for initializing elapse time [start]
Definition: time.cpp:71