17 #include "bitcalc.hpp" 18 #include "mltplyCommon.hpp" 20 #include "expec_energy_flct.hpp" 21 #include "wrapperMPI.hpp" 22 #include "CalcTime.hpp" 23 #include "common/setmemory.hpp" 32 std::complex<double> **tmp_v0
36 long int is1_up_a, is1_up_b;
37 long int is1_down_a, is1_down_b;
38 int bit_up, bit_down, bit_D, istate, mythread;
39 long int ibit_up, ibit_down, ibit_D;
43 int l_ibit1, u_ibit1, i_32;
44 double **doublon_t, **doublon2_t, **num_t, **num2_t, **Sz_t, **Sz2_t;
47 doublon_t = d_2d_allocate(
nthreads, nstate);
48 doublon2_t = d_2d_allocate(
nthreads, nstate);
49 num_t = d_2d_allocate(
nthreads, nstate);
50 num2_t = d_2d_allocate(
nthreads, nstate);
51 Sz_t = d_2d_allocate(
nthreads, nstate);
52 Sz2_t = d_2d_allocate(
nthreads, nstate);
60 for (isite1 = 1; isite1 <= X->
Def.
NsiteMPI; isite1++) {
62 is1_up_a += X->
Def.
Tpow[2 * isite1 - 2];
63 is1_down_a += X->
Def.
Tpow[2 * isite1 - 1];
66 is1_up_b += X->
Def.
Tpow[2 * isite1 - 2];
67 is1_down_b += X->
Def.
Tpow[2 * isite1 - 1];
71 #pragma omp parallel default(none) \ 72 shared(tmp_v0,list_1,doublon_t,doublon2_t,num_t,num2_t,Sz_t,Sz2_t,nstate) \ 73 firstprivate(i_max,X,myrank,is1_up_a,is1_down_a,is1_up_b,is1_down_b,i_32) \ 74 private(j,tmp_v02,D,N,Sz,isite1,bit_up,bit_down,bit_D,u_ibit1,l_ibit1, \ 75 ibit_up,ibit_down,ibit_D,mythread,istate) 77 tmp_v02 = d_1d_allocate(nstate);
79 mythread = omp_get_thread_num();
84 for (j = 1; j <= i_max; j++) {
85 for (istate = 0; istate < nstate; istate++)
86 tmp_v02[istate] = real(conj(tmp_v0[j][istate]) * tmp_v0[j][istate]);
91 ibit_up = (
long int)
myrank & is1_up_a;
92 u_ibit1 = ibit_up >> 32;
93 l_ibit1 = ibit_up & i_32;
94 bit_up +=
pop(u_ibit1);
95 bit_up +=
pop(l_ibit1);
97 ibit_down = (
long int)
myrank & is1_down_a;
98 u_ibit1 = ibit_down >> 32;
99 l_ibit1 = ibit_down & i_32;
100 bit_down +=
pop(u_ibit1);
101 bit_down +=
pop(l_ibit1);
103 ibit_D = (ibit_up) & (ibit_down >> 1);
104 u_ibit1 = ibit_D >> 32;
105 l_ibit1 = ibit_D & i_32;
106 bit_D +=
pop(u_ibit1);
107 bit_D +=
pop(l_ibit1);
110 ibit_up = (
long int) (j - 1) & is1_up_b;
111 u_ibit1 = ibit_up >> 32;
112 l_ibit1 = ibit_up & i_32;
113 bit_up +=
pop(u_ibit1);
114 bit_up +=
pop(l_ibit1);
116 ibit_down = (
long int) (j - 1) & is1_down_b;
117 u_ibit1 = ibit_down >> 32;
118 l_ibit1 = ibit_down & i_32;
119 bit_down +=
pop(u_ibit1);
120 bit_down +=
pop(l_ibit1);
122 ibit_D = (ibit_up) & (ibit_down >> 1);
123 u_ibit1 = ibit_D >> 32;
124 l_ibit1 = ibit_D & i_32;
125 bit_D +=
pop(u_ibit1);
126 bit_D +=
pop(l_ibit1);
129 N = bit_up + bit_down;
130 Sz = bit_up - bit_down;
132 for (istate = 0; istate < nstate; istate++) {
133 doublon_t[mythread][istate] += tmp_v02[istate] * D;
134 doublon2_t[mythread][istate] += tmp_v02[istate] * D * D;
135 num_t[mythread][istate] += tmp_v02[istate] * N;
136 num2_t[mythread][istate] += tmp_v02[istate] * N * N;
137 Sz_t[mythread][istate] += tmp_v02[istate] * Sz;
138 Sz2_t[mythread][istate] += tmp_v02[istate] * Sz * Sz;
141 free_d_1d_allocate(tmp_v02);
144 for (istate = 0; istate < nstate; istate++) {
151 for (mythread = 0; mythread <
nthreads; mythread++) {
152 X->
Phys.
doublon[istate] += doublon_t[mythread][istate];
153 X->
Phys.
doublon2[istate] += doublon2_t[mythread][istate];
154 X->
Phys.
num[istate] += num_t[mythread][istate];
155 X->
Phys.
num2[istate] += num2_t[mythread][istate];
156 X->
Phys.
Sz[istate] += Sz_t[mythread][istate];
157 X->
Phys.
Sz2[istate] += Sz2_t[mythread][istate];
167 for (istate = 0; istate < nstate; istate++) {
168 X->
Phys.
Sz[istate] *= 0.5;
174 free_d_2d_allocate(doublon_t);
175 free_d_2d_allocate(doublon2_t);
176 free_d_2d_allocate(num_t);
177 free_d_2d_allocate(num2_t);
178 free_d_2d_allocate(Sz_t);
179 free_d_2d_allocate(Sz2_t);
190 std::complex<double> **tmp_v0
194 long int is1_up_a, is1_up_b;
195 long int is1_down_a, is1_down_b;
196 int bit_up, bit_down, bit_D, istate, mythread;
197 long int ibit_up, ibit_down, ibit_D;
198 double **doublon_t, **doublon2_t, **num_t, **num2_t, **Sz_t, **Sz2_t;
201 long int i_max, tmp_list_1;
202 int l_ibit1, u_ibit1, i_32;
206 doublon_t = d_2d_allocate(
nthreads, nstate);
207 doublon2_t = d_2d_allocate(
nthreads, nstate);
208 num_t = d_2d_allocate(
nthreads, nstate);
209 num2_t = d_2d_allocate(
nthreads, nstate);
210 Sz_t = d_2d_allocate(
nthreads, nstate);
211 Sz2_t = d_2d_allocate(
nthreads, nstate);
219 for (isite1 = 1; isite1 <= X->
Def.
NsiteMPI; isite1++) {
221 is1_up_a += X->
Def.
Tpow[2 * isite1 - 2];
222 is1_down_a += X->
Def.
Tpow[2 * isite1 - 1];
225 is1_up_b += X->
Def.
Tpow[2 * isite1 - 2];
226 is1_down_b += X->
Def.
Tpow[2 * isite1 - 1];
230 #pragma omp parallel default(none) \ 231 shared(tmp_v0,list_1,doublon_t,doublon2_t,num_t,num2_t,Sz_t,Sz2_t,nstate) \ 232 firstprivate(i_max, X,myrank,is1_up_a,is1_down_a,is1_up_b,is1_down_b,i_32) \ 233 private(j,tmp_v02,D,N,Sz,isite1,tmp_list_1,bit_up,bit_down,bit_D,u_ibit1, \ 234 l_ibit1,ibit_up,ibit_down,ibit_D,mythread,istate) 236 tmp_v02 = d_1d_allocate(nstate);
238 mythread = omp_get_thread_num();
243 for (j = 1; j <= i_max; j++) {
244 for (istate = 0; istate < nstate; istate++)
245 tmp_v02[istate] = real(conj(tmp_v0[j][istate]) * tmp_v0[j][istate]);
251 ibit_up = (
long int)
myrank & is1_up_a;
252 u_ibit1 = ibit_up >> 32;
253 l_ibit1 = ibit_up & i_32;
254 bit_up +=
pop(u_ibit1);
255 bit_up +=
pop(l_ibit1);
257 ibit_down = (
long int)
myrank & is1_down_a;
258 u_ibit1 = ibit_down >> 32;
259 l_ibit1 = ibit_down & i_32;
260 bit_down +=
pop(u_ibit1);
261 bit_down +=
pop(l_ibit1);
263 ibit_D = (ibit_up) & (ibit_down >> 1);
264 u_ibit1 = ibit_D >> 32;
265 l_ibit1 = ibit_D & i_32;
266 bit_D +=
pop(u_ibit1);
267 bit_D +=
pop(l_ibit1);
270 ibit_up = (
long int) tmp_list_1 & is1_up_b;
271 u_ibit1 = ibit_up >> 32;
272 l_ibit1 = ibit_up & i_32;
273 bit_up +=
pop(u_ibit1);
274 bit_up +=
pop(l_ibit1);
276 ibit_down = (
long int) tmp_list_1 & is1_down_b;
277 u_ibit1 = ibit_down >> 32;
278 l_ibit1 = ibit_down & i_32;
279 bit_down +=
pop(u_ibit1);
280 bit_down +=
pop(l_ibit1);
282 ibit_D = (ibit_up) & (ibit_down >> 1);
283 u_ibit1 = ibit_D >> 32;
284 l_ibit1 = ibit_D & i_32;
285 bit_D +=
pop(u_ibit1);
286 bit_D +=
pop(l_ibit1);
289 N = bit_up + bit_down;
290 Sz = bit_up - bit_down;
292 for (istate = 0; istate < nstate; istate++) {
293 doublon_t[mythread][istate] += tmp_v02[istate] * D;
294 doublon2_t[mythread][istate] += tmp_v02[istate] * D * D;
295 num_t[mythread][istate] += tmp_v02[istate] * N;
296 num2_t[mythread][istate] += tmp_v02[istate] * N * N;
297 Sz_t[mythread][istate] += tmp_v02[istate] * Sz;
298 Sz2_t[mythread][istate] += tmp_v02[istate] * Sz * Sz;
301 free_d_1d_allocate(tmp_v02);
304 for (istate = 0; istate < nstate; istate++) {
311 for (mythread = 0; mythread <
nthreads; mythread++) {
312 X->
Phys.
doublon[istate] += doublon_t[mythread][istate];
313 X->
Phys.
doublon2[istate] += doublon2_t[mythread][istate];
314 X->
Phys.
num[istate] += num_t[mythread][istate];
315 X->
Phys.
num2[istate] += num2_t[mythread][istate];
316 X->
Phys.
Sz[istate] += Sz_t[mythread][istate];
317 X->
Phys.
Sz2[istate] += Sz2_t[mythread][istate];
327 for (istate = 0; istate < nstate; istate++) {
328 X->
Phys.
Sz[istate] *= 0.5;
334 free_d_2d_allocate(doublon_t);
335 free_d_2d_allocate(doublon2_t);
336 free_d_2d_allocate(num_t);
337 free_d_2d_allocate(num2_t);
338 free_d_2d_allocate(Sz_t);
339 free_d_2d_allocate(Sz2_t);
350 std::complex<double> **tmp_v0
354 long int is1_up_a, is1_up_b;
360 int l_ibit1, u_ibit1, i_32;
361 int istate, mythread;
362 double **Sz_t, **Sz2_t;
366 Sz_t = d_2d_allocate(
nthreads, nstate);
367 Sz2_t = d_2d_allocate(
nthreads, nstate);
373 for (isite1 = 1; isite1 <= X->
Def.
NsiteMPI; isite1++) {
375 is1_up_a += X->
Def.
Tpow[isite1 - 1];
378 is1_up_b += X->
Def.
Tpow[isite1 - 1];
382 #pragma omp parallel default(none) shared(tmp_v0,Sz_t,Sz2_t,nstate) \ 383 firstprivate(i_max,X,myrank,i_32,is1_up_a,is1_up_b) \ 384 private(j,Sz,ibit1,isite1,tmp_v02,u_ibit1,l_ibit1,mythread,istate) 386 tmp_v02 = d_1d_allocate(nstate);
388 mythread = omp_get_thread_num();
393 for (j = 1; j <= i_max; j++) {
394 for (istate = 0; istate < nstate; istate++)
395 tmp_v02[istate] = real(conj(tmp_v0[j][istate]) * tmp_v0[j][istate]);
399 ibit1 = (
long int)
myrank & is1_up_a;
400 u_ibit1 = ibit1 >> 32;
401 l_ibit1 = ibit1 & i_32;
405 ibit1 = (
long int) (j - 1)&is1_up_b;
406 u_ibit1 = ibit1 >> 32;
407 l_ibit1 = ibit1 & i_32;
412 for (istate = 0; istate < nstate; istate++) {
413 Sz_t[mythread][istate] += tmp_v02[istate] * Sz;
414 Sz2_t[mythread][istate] += tmp_v02[istate] * Sz * Sz;
417 free_d_1d_allocate(tmp_v02);
419 for (istate = 0; istate < nstate; istate++) {
422 for (mythread = 0; mythread <
nthreads; mythread++) {
423 X->
Phys.
Sz[istate] += Sz_t[mythread][istate];
424 X->
Phys.
Sz2[istate] += Sz2_t[mythread][istate];
430 for (istate = 0; istate < nstate; istate++) {
435 X->
Phys.
Sz[istate] *= 0.5;
441 free_d_2d_allocate(Sz_t);
442 free_d_2d_allocate(Sz2_t);
453 std::complex<double> **tmp_v0
457 int istate, mythread;
461 double **Sz_t, **Sz2_t;
463 Sz_t = d_2d_allocate(
nthreads, nstate);
464 Sz2_t = d_2d_allocate(
nthreads, nstate);
467 #pragma omp parallel default(none) \ 468 shared(i_max,nstate,X,myrank,Sz_t,Sz2_t,tmp_v0) \ 469 private(j,istate,tmp_v02,Sz,isite1,mythread) 471 tmp_v02 = d_1d_allocate(nstate);
473 mythread = omp_get_thread_num();
478 for (j = 1; j <= i_max; j++) {
479 for (istate = 0; istate < nstate; istate++) \
480 tmp_v02[istate] = real(conj(tmp_v0[j][istate]) * tmp_v0[j][istate]);
482 for (isite1 = 1; isite1 <= X->
Def.
NsiteMPI; isite1++) {
491 for (istate = 0; istate < nstate; istate++) {
492 Sz_t[mythread][istate] += tmp_v02[istate] * Sz;
493 Sz2_t[mythread][istate] += tmp_v02[istate] * Sz * Sz;
496 free_d_1d_allocate(tmp_v02);
498 for (istate = 0; istate < nstate; istate++) {
501 for (mythread = 0; mythread <
nthreads; mythread++) {
502 X->
Phys.
Sz[istate] += Sz_t[mythread][istate];
503 X->
Phys.
Sz2[istate] += Sz2_t[mythread][istate];
509 for (istate = 0; istate < nstate; istate++) {
514 X->
Phys.
Sz[istate] *= 0.5;
520 free_d_2d_allocate(Sz_t);
521 free_d_2d_allocate(Sz2_t);
532 std::complex<double> **tmp_v0
536 long int is1_up_a, is1_up_b;
541 long int i_max, tmp_list_1;
542 int l_ibit1, u_ibit1, i_32;
543 int istate, mythread;
544 double **Sz_t, **Sz2_t;
547 Sz_t = d_2d_allocate(
nthreads, nstate);
548 Sz2_t = d_2d_allocate(
nthreads, nstate);
554 for (isite1 = 1; isite1 <= X->
Def.
NsiteMPI; isite1++) {
556 is1_up_a += X->
Def.
Tpow[isite1 - 1];
559 is1_up_b += X->
Def.
Tpow[isite1 - 1];
563 #pragma omp parallel default(none) shared(tmp_v0, list_1,Sz_t,Sz2_t,nstate) \ 564 firstprivate(i_max,X,myrank,i_32,is1_up_a,is1_up_b) \ 565 private(j,Sz,ibit1,isite1,tmp_v02,u_ibit1,l_ibit1, tmp_list_1,mythread,istate) 567 tmp_v02 = d_1d_allocate(nstate);
569 mythread = omp_get_thread_num();
574 for (j = 1; j <= i_max; j++) {
575 for (istate = 0; istate < nstate; istate++) \
576 tmp_v02[istate] = real(conj(tmp_v0[j][istate]) * tmp_v0[j][istate]);
581 ibit1 = (
long int)
myrank & is1_up_a;
582 u_ibit1 = ibit1 >> 32;
583 l_ibit1 = ibit1 & i_32;
587 ibit1 = (
long int) tmp_list_1 &is1_up_b;
588 u_ibit1 = ibit1 >> 32;
589 l_ibit1 = ibit1 & i_32;
594 for (istate = 0; istate < nstate; istate++) {
595 Sz_t[mythread][istate] += tmp_v02[istate] * Sz;
596 Sz2_t[mythread][istate] += tmp_v02[istate] * Sz * Sz;
599 free_d_1d_allocate(tmp_v02);
601 for (istate = 0; istate < nstate; istate++) {
604 for (mythread = 0; mythread <
nthreads; mythread++) {
605 X->
Phys.
Sz[istate] += Sz_t[mythread][istate];
606 X->
Phys.
Sz2[istate] += Sz2_t[mythread][istate];
612 for (istate = 0; istate < nstate; istate++) {
617 X->
Phys.
Sz[istate] *= 0.5;
623 free_d_2d_allocate(Sz_t);
624 free_d_2d_allocate(Sz2_t);
635 std::complex<double> **tmp_v0
639 int istate, mythread;
642 long int i_max, tmp_list1;
643 double **Sz_t, **Sz2_t;
645 Sz_t = d_2d_allocate(
nthreads, nstate);
646 Sz2_t = d_2d_allocate(
nthreads, nstate);
649 #pragma omp parallel default(none) shared(tmp_v0, list_1,Sz_t,Sz2_t,nstate) \ 650 firstprivate(i_max,X,myrank) private(j,Sz,isite1,tmp_v02, tmp_list1,istate,mythread) 652 tmp_v02 = d_1d_allocate(nstate);
654 mythread = omp_get_thread_num();
659 for (j = 1; j <= i_max; j++) {
660 for (istate = 0; istate < nstate; istate++)
661 tmp_v02[istate] = real(conj(tmp_v0[j][istate]) * tmp_v0[j][istate]);
664 for (isite1 = 1; isite1 <= X->
Def.
NsiteMPI; isite1++) {
673 for (istate = 0; istate < nstate; istate++) {
674 Sz_t[mythread][istate] += tmp_v02[istate] * Sz;
675 Sz2_t[mythread][istate] += tmp_v02[istate] * Sz * Sz;
678 free_d_1d_allocate(tmp_v02);
680 for (istate = 0; istate < nstate; istate++) {
683 for (mythread = 0; mythread <
nthreads; mythread++) {
684 X->
Phys.
Sz[istate] += Sz_t[mythread][istate];
685 X->
Phys.
Sz2[istate] += Sz2_t[mythread][istate];
691 for (istate = 0; istate < nstate; istate++) {
696 X->
Phys.
Sz[istate] *= 0.5;
702 free_d_2d_allocate(Sz_t);
703 free_d_2d_allocate(Sz2_t);
719 std::complex<double> **tmp_v0,
720 std::complex<double> **tmp_v1
724 long int irght, ilft, ihfbit;
732 fprintf(
stdoutMPI,
"%s",
" Start: Calculate Energy.\n");
753 for (istate = 0; istate < nstate; istate++) X->
Phys.
energy[istate] = 0.0;
792 for (istate = 0; istate < nstate; istate++) {
807 #pragma omp parallel for default(none) private(i,istate) \ 808 shared(tmp_v1,tmp_v0,nstate) firstprivate(i_max) 809 for (i = 1; i <= i_max; i++) {
810 for (istate = 0; istate < nstate; istate++) {
811 tmp_v1[i][istate] = tmp_v0[i][istate];
812 tmp_v0[i][istate] = 0.0;
824 mltply(X, nstate, tmp_v0, tmp_v1);
828 for (istate = 0; istate < nstate; istate++) {
832 for (j = 1; j <= i_max; j++) {
833 for (istate = 0; istate < nstate; istate++) {
834 X->
Phys.
energy[istate] += real(conj(tmp_v1[j][istate])*tmp_v0[j][istate]);
835 X->
Phys.
var[istate] += real(conj(tmp_v0[j][istate])*tmp_v0[j][istate]);
845 fprintf(
stdoutMPI,
"%s",
" End : Calculate Energy.\n");
double * doublon
Expectation value of the Doublon.
struct DefineList Def
Definision of system (Hamiltonian) etc.
double * num_down
Expectation value of the number of down-spin electtrons.
int pop(int x)
calculating number of 1-bits in x (32 bit) This method is introduced in S.H. Warren, Hacker’s Delight, second ed., Addison-Wesley, ISBN: 0321842685, 2012.
double * var
Expectation value of the Energy variance.
FILE * stdoutMPI
File pointer to the standard output defined in InitializeMPI()
int GetSplitBitByModel(const int Nsite, const int iCalcModel, long int *irght, long int *ilft, long int *ihfbit)
function of splitting original bit into right and left spaces.
int expec_energy_flct(struct BindStruct *X, int nstate, std::complex< double > **tmp_v0, std::complex< double > **tmp_v1)
Parent function to calculate expected values of energy and physical quantities.
int Nsite
Number of sites in the INTRA process region.
int TimeKeeperWithStep(struct BindStruct *X, const char *cFileName, const char *cTimeKeeper_Message, const char *cWriteType, const int istep)
Functions for writing a time log.
int mltply(struct BindStruct *X, int nstate, std::complex< double > **tmp_v0, std::complex< double > **tmp_v1)
Parent function of multiplying the wavefunction by the Hamiltonian. . First, the calculation of diago...
struct LargeList Large
Variables for Matrix-Vector product.
double * num2
Expectation value of the quare of the number of electrons.
int expec_energy_flct_GeneralSpin(struct BindStruct *X, int nstate, std::complex< double > **tmp_v0)
Calculate expected values of energies and physical quantities for General-Spin model.
struct PhysList Phys
Physical quantities.
double * Sz2
Expectation value of the Square of total Sz.
int mode
multiply or expectation value.
long int irght
Used for Ogata-Lin ???
double * Sz
Expectation value of the Total Sz.
int expec_energy_flct_HalfSpin(struct BindStruct *X, int nstate, std::complex< double > **tmp_v0)
Calculate expected values of energies and physical quantities for Half-Spin model.
int expec_energy_flct_Hubbard(struct BindStruct *X, int nstate, std::complex< double > **tmp_v0)
Calculate expected values of energies and physical quantities for Hubbard model.
int NsiteMPI
Total number of sites, differ from DefineList::Nsite.
long int ilft
Used for Ogata-Lin ???
double * num
Expectation value of the Number of electrons.
int nthreads
Number of Threads, defined in InitializeMPI()
long int ihfbit
Used for Ogata-Lin ???
long int i_max
Length of eigenvector.
double * energy
Expectation value of the total energy.
int myrank
Process ID, defined in InitializeMPI()
int expec_energy_flct_GeneralSpinGC(struct BindStruct *X, int nstate, std::complex< double > **tmp_v0)
Calculate expected values of energies and physical quantities for General-SpinGC model.
int expec_energy_flct_HubbardGC(struct BindStruct *X, int nstate, std::complex< double > **tmp_v0)
Calculate expected values of energies and physical quantities for Hubbard GC model.
int GetLocal2Sz(const int isite, const long int org_bit, const long int *SiteToBit, const long int *Tpow)
get 2sz at a site for general spin
int iFlgGeneralSpin
Flag for the general (Sz/=1/2) spin.
void StopTimer(int n)
function for calculating elapse time [elapse time=StartTimer-StopTimer]
void SumMPI_dv(int nnorm, double *norm)
MPI wrapper function to obtain sum of Double array across processes.
long int * SiteToBit
[DefineList::NsiteMPI] Similar to DefineList::Tpow. For general spin.
double * doublon2
Expectation value of the Square of doublon.
long int * Tpow
[2 * DefineList::NsiteMPI] malloc in setmem_def().
int expec_energy_flct_HalfSpinGC(struct BindStruct *X, int nstate, std::complex< double > **tmp_v0)
Calculate expected values of energies and physical quantities for Half-SpinGC model.
double * num_up
Expectation value of the number of up-spin electtrons.
int iCalcModel
Switch for model. 0:Hubbard, 1:Spin, 2:Kondo, 3:HubbardGC, 4:SpinGC, 5:KondoGC, 6:HubbardNConserved.
struct CheckList Check
Size of the Hilbert space.
long int idim_max
The dimension of the Hilbert space of this process.
int Total2SzMPI
Total across processes.
int iCalcType
Switch for calculation type. 0:Lanczos, 1:TPQCalc, 2:FullDiag.
void StartTimer(int n)
function for initializing elapse time [start]