20 #include "mltplyCommon.hpp" 21 #include "mltplyHubbardCore.hpp" 22 #include "mltplyMPIHubbard.hpp" 23 #include "mltplyMPIHubbardCore.hpp" 24 #include "bitcalc.hpp" 25 #include "wrapperMPI.hpp" 53 ibit_tmp = orgbit & is1_spin;
55 *offbit = orgbit + is1_spin;
73 ibit_tmp = orgbit & is1_spin;
75 *offbit = orgbit - is1_spin;
101 long int tmp_org, tmp_off;
102 int iflgBitExist = TRUE;
106 if (
CheckPE(org_isite1, X) == TRUE) {
107 tmp_ispin = X->
Def.
Tpow[2 * org_isite1 + org_isigma1];
108 if (
CheckBit_Ajt(tmp_ispin, tmp_org, &tmp_off) != TRUE) {
109 iflgBitExist = FALSE;
114 if (
CheckPE(org_isite2, X) == TRUE ) {
115 tmp_ispin = X->
Def.
Tpow[2 * org_isite2 + org_isigma2];
116 if (
CheckBit_Cis(tmp_ispin, tmp_org, &tmp_off) != TRUE) {
117 iflgBitExist = FALSE;
122 if (
CheckPE(org_isite3, X) == TRUE) {
123 tmp_ispin = X->
Def.
Tpow[2 * org_isite3 + org_isigma3];
124 if (
CheckBit_Ajt(tmp_ispin, tmp_org, &tmp_off) != TRUE) {
125 iflgBitExist = FALSE;
130 if (
CheckPE(org_isite4, X) == TRUE) {
131 tmp_ispin = X->
Def.
Tpow[2 * org_isite4 + org_isigma4];
132 if (
CheckBit_Cis(tmp_ispin, tmp_org, &tmp_off) != TRUE) {
133 iflgBitExist = FALSE;
138 if(iflgBitExist != TRUE){
159 long int tmp_org, tmp_off;
160 int iflgBitExist = TRUE;
163 if(
CheckPE(org_isite1, X)==TRUE){
164 tmp_ispin = X->
Def.
Tpow[2 * org_isite1 + org_isigma1];
165 if (
CheckBit_Ajt(tmp_ispin, tmp_org, &tmp_off) != TRUE) {
170 if (
CheckPE(org_isite3, X) == TRUE) {
171 tmp_ispin = X->
Def.
Tpow[2 * org_isite3 + org_isigma3];
172 if (
CheckBit_Ajt(tmp_ispin, tmp_org, &tmp_off) != TRUE) {
173 iflgBitExist = FALSE;
177 if(iflgBitExist != TRUE){
201 long int tmp_ispin1, tmp_ispin2;
207 if (tmp_ispin1 == tmp_ispin2) {
208 if ((orgbit & tmp_ispin1) == 0) {
217 if (tmp_ispin2 > tmp_ispin1) diffA = tmp_ispin2 - tmp_ispin1 * 2;
218 else diffA = tmp_ispin1-tmp_ispin2*2;
220 tmp_sgn=
X_GC_CisAjt(orgbit, tmp_ispin1, tmp_ispin2, tmp_ispin1+tmp_ispin2, diffA, &tmp_off);
231 if(tmp_ispin1 == tmp_ispin2){
232 if( (tmp_off & tmp_ispin1) == 0){
240 if(tmp_ispin2 > tmp_ispin1) diffA = tmp_ispin2 - tmp_ispin1*2;
241 else diffA = tmp_ispin1-tmp_ispin2*2;
242 tmp_sgn *=
X_GC_CisAjt(tmp_off, tmp_ispin1, tmp_ispin2, tmp_ispin1+tmp_ispin2, diffA, offbit);
264 std::complex<double> tmp_V,
266 int nstate, std::complex<double> **tmp_v0,
267 std::complex<double> **tmp_v1
281 zaxpy_long(i_max*nstate, tmp_V, &tmp_v1[1][0], &tmp_v0[1][0]);
284 if (org_isite1 > org_isite3) tmp_ispin1 = X->
Def.
Tpow[2 * org_isite3 + org_ispin3];
285 else tmp_ispin1 = X->
Def.
Tpow[2 * org_isite1 + org_ispin1];
287 #pragma omp parallel default(none) \ 288 shared(org_isite1,org_ispin1,org_isite3,org_ispin3,nstate,one,tmp_v0,tmp_v1,tmp_ispin1) \ 289 firstprivate(i_max,tmp_V,X) private(j,tmp_off) 291 for (j = 1; j <= i_max; j++) {
292 if (
CheckBit_Ajt(tmp_ispin1, j - 1, &tmp_off) == TRUE) {
293 zaxpy_(&nstate, &tmp_V, &tmp_v1[j][0], &one, &tmp_v0[j][0], &one);
309 std::complex<double> tmp_V,
311 int nstate, std::complex<double> **tmp_v0,
312 std::complex<double> **tmp_v1
315 long int idim_max_buf;
317 long int isite1, isite2, isite3;
318 long int tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4;
319 long int j, Asum, Adiff;
320 std::complex<double> dmv;
321 long int origin, tmp_off;
322 long int org_rankbit;
325 iCheck =
CheckBit_InterAllPE(org_isite1, org_ispin1, org_isite2, org_ispin2, org_isite3, org_ispin3, org_isite3, org_ispin3, X, (
long int)
myrank, &origin);
326 isite1 = X->
Def.
Tpow[2 * org_isite1 + org_ispin1];
327 isite2 = X->
Def.
Tpow[2 * org_isite2 + org_ispin2];
328 isite3 = X->
Def.
Tpow[2 * org_isite3 + org_ispin3];
330 if (iCheck == TRUE) {
331 tmp_isite1 = X->
Def.
OrgTpow[2 * org_isite1 + org_ispin1];
332 tmp_isite2 = X->
Def.
OrgTpow[2 * org_isite2 + org_ispin2];
333 tmp_isite3 = X->
Def.
OrgTpow[2 * org_isite3 + org_ispin3];
334 tmp_isite4 = X->
Def.
OrgTpow[2 * org_isite3 + org_ispin3];
335 Asum = tmp_isite1 + tmp_isite2;
336 if (tmp_isite2 > tmp_isite1) Adiff = tmp_isite2 - tmp_isite1 * 2;
337 else Adiff = tmp_isite1 - tmp_isite2 * 2;
340 iCheck =
CheckBit_InterAllPE(org_isite3, org_ispin3, org_isite3, org_ispin3, org_isite2, org_ispin2, org_isite1, org_ispin1, X, (
long int) myrank, &origin);
341 if (iCheck == TRUE) {
343 tmp_isite4 = X->
Def.
OrgTpow[2 * org_isite1 + org_ispin1];
344 tmp_isite3 = X->
Def.
OrgTpow[2 * org_isite2 + org_ispin2];
345 tmp_isite2 = X->
Def.
OrgTpow[2 * org_isite3 + org_ispin3];
346 tmp_isite1 = X->
Def.
OrgTpow[2 * org_isite3 + org_ispin3];
347 Asum = tmp_isite3 + tmp_isite4;
348 if (tmp_isite4 > tmp_isite3) Adiff = tmp_isite4 - tmp_isite3 * 2;
349 else Adiff = tmp_isite3 - tmp_isite4 * 2;
359 if (myrank == origin) {
361 if (
CheckBit_Ajt(isite3, myrank, &tmp_off) == FALSE)
return;
363 #pragma omp parallel default(none) \ 364 firstprivate(i_max,X,Asum,Adiff,isite1,isite2, tmp_V) \ 365 private(j,tmp_off) shared(tmp_v0, tmp_v1,nstate) 368 for (j = 1; j <= i_max; j++)
369 GC_CisAjt(j, nstate, tmp_v0, tmp_v1, isite2, isite1, Asum, Adiff, tmp_V, &tmp_off);
373 for (j = 1; j <= i_max; j++)
374 GC_CisAjt(j, nstate, tmp_v0, tmp_v1, isite1, isite2, Asum, Adiff, tmp_V, &tmp_off);
383 #pragma omp parallel default(none) private(j,dmv,tmp_off,Fsgn,org_rankbit,Adiff) \ 384 shared(v1buf,tmp_v1,nstate,one,tmp_v0,myrank,origin,isite3,org_isite3,isite1,isite2,org_isite2,org_isite1) \ 385 firstprivate(idim_max_buf,tmp_V,X,tmp_isite1,tmp_isite2,tmp_isite3,tmp_isite4) 388 if (isite2 > isite1) Adiff = isite2 - isite1 * 2;
389 else Adiff = isite1 - isite2 * 2;
390 SgnBit(((
long int) myrank & Adiff), &Fsgn);
393 if (org_isite3 + 1 > X->
Def.
Nsite) {
395 for (j = 1; j <= idim_max_buf; j++) {
396 zaxpy_(&nstate, &tmp_V, &
v1buf[j][0], &one, &tmp_v0[j][0], &one);
401 for (j = 1; j <= idim_max_buf; j++) {
403 zaxpy_(&nstate, &tmp_V, &
v1buf[j][0], &one, &tmp_v0[j][0], &one);
411 for (j = 1; j <= idim_max_buf; j++) {
412 if (
GetSgnInterAll(tmp_isite4, tmp_isite3, tmp_isite2, tmp_isite1, &Fsgn, X, (j - 1) + org_rankbit, &tmp_off) == TRUE) {
413 dmv = tmp_V * (std::complex<double>)Fsgn;
414 zaxpy_(&nstate, &dmv, &
v1buf[j][0], &one, &tmp_v0[tmp_off + 1][0], &one);
432 std::complex<double> tmp_V,
434 int nstate, std::complex<double> **tmp_v0,
435 std::complex<double> **tmp_v1
438 org_isite4, org_ispin4, org_isite3, org_ispin3,
439 org_isite1, org_ispin1, conj(tmp_V), X, nstate, tmp_v0, tmp_v1);
454 std::complex<double> tmp_V,
456 int nstate, std::complex<double> **tmp_v0,
457 std::complex<double> **tmp_v1
460 long int idim_max_buf;
462 long int isite1, isite2, isite3, isite4;
463 long int tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4;
464 long int j, Adiff, Bdiff;
465 std::complex<double> dmv;
466 long int origin, tmp_off, tmp_off2;
467 long int org_rankbit;
468 int iFlgHermite = FALSE;
472 org_isite3, org_ispin3, org_isite4, org_ispin4,
473 X, (
long int)
myrank, &origin);
474 isite1 = X->
Def.
Tpow[2 * org_isite1 + org_ispin1];
475 isite2 = X->
Def.
Tpow[2 * org_isite2 + org_ispin2];
476 isite3 = X->
Def.
Tpow[2 * org_isite3 + org_ispin3];
477 isite4 = X->
Def.
Tpow[2 * org_isite4 + org_ispin4];
479 if (iCheck == TRUE) {
480 tmp_isite1 = X->
Def.
OrgTpow[2 * org_isite1 + org_ispin1];
481 tmp_isite2 = X->
Def.
OrgTpow[2 * org_isite2 + org_ispin2];
482 tmp_isite3 = X->
Def.
OrgTpow[2 * org_isite3 + org_ispin3];
483 tmp_isite4 = X->
Def.
OrgTpow[2 * org_isite4 + org_ispin4];
487 org_isite2, org_ispin2, org_isite1, org_ispin1,
488 X, (
long int) myrank, &origin);
489 if (iCheck == TRUE) {
491 tmp_isite4 = X->
Def.
OrgTpow[2 * org_isite1 + org_ispin1];
492 tmp_isite3 = X->
Def.
OrgTpow[2 * org_isite2 + org_ispin2];
493 tmp_isite2 = X->
Def.
OrgTpow[2 * org_isite3 + org_ispin3];
494 tmp_isite1 = X->
Def.
OrgTpow[2 * org_isite4 + org_ispin4];
505 if (myrank == origin) {
506 if (isite1 == isite4 && isite2 == isite3) {
512 else if (isite2 == isite3) {
514 if (isite4 > isite1) Adiff = isite4 - isite1 * 2;
515 else Adiff = isite1 - isite4 * 2;
517 #pragma omp parallel for default(none) private(j, tmp_off) \ 518 firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, tmp_v0,nstate) 519 for (j = 1; j <= i_max; j++)
520 GC_CisAjt(j - 1, nstate, tmp_v0, tmp_v1, isite1, isite4, (isite1 + isite4), Adiff, tmp_V, &tmp_off);
524 org_isite2, org_ispin2, -tmp_V, X, nstate, tmp_v0, tmp_v1);
526 #pragma omp parallel for default(none) private(j, tmp_off) \ 527 firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, tmp_v0,nstate) 528 for (j = 1; j <= i_max; j++)
529 GC_CisAjt(j - 1, nstate, tmp_v0, tmp_v1, isite4, isite1, (isite1 + isite4), Adiff, tmp_V, &tmp_off);
533 org_isite1, org_ispin1, -tmp_V, X, nstate, tmp_v0, tmp_v1);
538 org_isite2, org_ispin2, -tmp_V, X, nstate, tmp_v0, tmp_v1);
541 org_isite3, org_ispin3, -tmp_V, X, nstate, tmp_v0, tmp_v1);
553 if (isite2 > isite1) Adiff = isite2 - isite1 * 2;
554 else Adiff = isite1 - isite2 * 2;
555 if (isite4 > isite3) Bdiff = isite4 - isite3 * 2;
556 else Bdiff = isite3 - isite4 * 2;
558 if (iFlgHermite == FALSE) {
559 Fsgn =
X_GC_CisAjt((
long int) myrank, isite2, isite1, (isite1 + isite2), Adiff, &tmp_off2);
560 Fsgn *=
X_GC_CisAjt(tmp_off2, isite4, isite3, (isite3 + isite4), Bdiff, &tmp_off);
564 Fsgn =
X_GC_CisAjt((
long int) myrank, isite3, isite4, (isite3 + isite4), Bdiff, &tmp_off2);
565 Fsgn *=
X_GC_CisAjt(tmp_off2, isite1, isite2, (isite1 + isite2), Adiff, &tmp_off);
573 #pragma omp parallel for default(none) private(j,dmv,tmp_off,Fsgn) \ 574 firstprivate(idim_max_buf,tmp_V,X,tmp_isite1,tmp_isite2,tmp_isite3,tmp_isite4,org_rankbit) \ 575 shared(v1buf,tmp_v1,tmp_v0,nstate,one) 576 for (j = 1; j <= idim_max_buf; j++) {
577 if (
GetSgnInterAll(tmp_isite4, tmp_isite3, tmp_isite2, tmp_isite1, &Fsgn, X, (j - 1) + org_rankbit, &tmp_off) == TRUE) {
578 dmv = tmp_V * (std::complex<double>)Fsgn;
579 zaxpy_(&nstate, &dmv, &
v1buf[j][0], &one, &tmp_v0[tmp_off + 1][0], &one);
592 std::complex<double> tmp_V,
594 int nstate, std::complex<double> **tmp_v0,
595 std::complex<double> **tmp_v1
598 long int j, isite1, tmp_off;
601 isite1 = X->
Def.
Tpow[2 * org_isite1 + org_ispin1];
602 if (org_isite1 + 1 > X->
Def.
Nsite) {
605 zaxpy_long(i_max*nstate, tmp_V, &tmp_v1[1][0], &tmp_v0[1][0]);
608 #pragma omp parallel default(none) shared(tmp_v0, tmp_v1,nstate,one) \ 609 firstprivate(i_max, tmp_V, X, isite1) private(j, tmp_off) 612 for (j = 1; j <= i_max; j++) {
614 zaxpy_(&nstate, &tmp_V, &tmp_v1[j][0], &one, &tmp_v0[j][0], &one);
629 std::complex<double> tmp_trans,
631 int nstate, std::complex<double> **tmp_v0,
632 std::complex<double> **tmp_v1
655 std::complex<double> tmp_V,
657 int nstate, std::complex<double> **tmp_v0,
658 std::complex<double> **tmp_v1
667 if (iCheck != TRUE)
return;
670 zaxpy_long(i_max*nstate, tmp_V, &tmp_v1[1][0], &tmp_v0[1][0]);
673 if (org_isite1 > org_isite3) tmp_ispin1 = X->
Def.
Tpow[2 * org_isite3 + org_ispin3];
674 else tmp_ispin1 = X->
Def.
Tpow[2 * org_isite1 + org_ispin1];
676 #pragma omp parallel for default(none) \ 677 shared(tmp_v0,tmp_v1,list_1,org_isite1,org_ispin1,org_isite3,org_ispin3,nstate,one) \ 678 firstprivate(i_max,tmp_V,X,tmp_ispin1) private(j,tmp_off) 679 for (j = 1; j <= i_max; j++) {
681 zaxpy_(&nstate, &tmp_V, &tmp_v1[j][0], &one, &tmp_v0[j][0], &one);
699 std::complex<double> tmp_V,
701 int nstate, std::complex<double> **tmp_v0,
702 std::complex<double> **tmp_v1
705 long int idim_max_buf;
707 long int isite1, isite2, isite3, isite4;
708 long int tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4;
709 long int j, Adiff, Bdiff;
710 std::complex<double> dmv;
711 long int origin, tmp_off, tmp_off2;
712 long int org_rankbit, ioff;
713 int iFlgHermite = FALSE;
717 org_isite3, org_ispin3, org_isite4, org_ispin4,
718 X, (
long int)
myrank, &origin);
720 isite1 = X->
Def.
Tpow[2 * org_isite1 + org_ispin1];
721 isite2 = X->
Def.
Tpow[2 * org_isite2 + org_ispin2];
722 isite3 = X->
Def.
Tpow[2 * org_isite3 + org_ispin3];
723 isite4 = X->
Def.
Tpow[2 * org_isite4 + org_ispin4];
725 if (iCheck == TRUE) {
726 tmp_isite1 = X->
Def.
OrgTpow[2 * org_isite1 + org_ispin1];
727 tmp_isite2 = X->
Def.
OrgTpow[2 * org_isite2 + org_ispin2];
728 tmp_isite3 = X->
Def.
OrgTpow[2 * org_isite3 + org_ispin3];
729 tmp_isite4 = X->
Def.
OrgTpow[2 * org_isite4 + org_ispin4];
733 org_isite2, org_ispin2, org_isite1, org_ispin1,
734 X, (
long int) myrank, &origin);
735 if (iCheck == TRUE) {
737 tmp_isite4 = X->
Def.
OrgTpow[2 * org_isite1 + org_ispin1];
738 tmp_isite3 = X->
Def.
OrgTpow[2 * org_isite2 + org_ispin2];
739 tmp_isite2 = X->
Def.
OrgTpow[2 * org_isite3 + org_ispin3];
740 tmp_isite1 = X->
Def.
OrgTpow[2 * org_isite4 + org_ispin4];
747 if (myrank == origin) {
748 if (isite1 == isite4 && isite2 == isite3) {
754 else if (isite2 == isite3) {
755 if (isite4 > isite1) Adiff = isite4 - isite1 * 2;
756 else Adiff = isite1 - isite4 * 2;
759 #pragma omp parallel for default(none) private(j, tmp_off) \ 760 firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, nstate, tmp_v0, list_1) 761 for (j = 1; j <= i_max; j++)
762 CisAjt(j, nstate, tmp_v0, tmp_v1, X, isite1, isite4, (isite1 + isite4), Adiff, tmp_V);
766 org_isite2, org_ispin2, -tmp_V, X, nstate, tmp_v0, tmp_v1);
769 #pragma omp parallel for default(none) private(j, tmp_off) \ 770 firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, tmp_v0,nstate) 771 for (j = 1; j <= i_max; j++)
772 CisAjt(j, nstate, tmp_v0, tmp_v1, X, isite4, isite1, (isite1 + isite4), Adiff, tmp_V);
776 org_isite1, org_ispin1, -tmp_V, X, nstate, tmp_v0, tmp_v1);
781 org_isite2, org_ispin2, -tmp_V, X, nstate, tmp_v0, tmp_v1);
785 org_isite3, org_ispin3, -tmp_V, X, nstate, tmp_v0, tmp_v1);
797 if (isite2 > isite1) Adiff = isite2 - isite1 * 2;
798 else Adiff = isite1 - isite2 * 2;
799 if (isite4 > isite3) Bdiff = isite4 - isite3 * 2;
800 else Bdiff = isite3 - isite4 * 2;
802 if (iFlgHermite == FALSE) {
803 Fsgn =
X_GC_CisAjt((
long int) myrank, isite2, isite1, (isite1 + isite2), Adiff, &tmp_off2);
804 Fsgn *=
X_GC_CisAjt(tmp_off2, isite4, isite3, (isite3 + isite4), Bdiff, &tmp_off);
808 Fsgn =
X_GC_CisAjt((
long int) myrank, isite3, isite4, (isite3 + isite4), Bdiff, &tmp_off2);
809 Fsgn *=
X_GC_CisAjt(tmp_off2, isite1, isite2, (isite1 + isite2), Adiff, &tmp_off);
812 #pragma omp parallel default(none) private(j,ioff) \ 813 firstprivate(idim_max_buf,tmp_V,X) \ 814 shared(v1buf,tmp_v1,nstate,tmp_v0,list_2_1,list_2_2,list_1buf,one) 817 for (j = 1; j <= idim_max_buf; j++) {
821 zaxpy_(&nstate, &tmp_V, &
v1buf[j][0], &one, &tmp_v0[ioff][0], &one);
830 #pragma omp parallel default(none) private(j, dmv, tmp_off, Fsgn, ioff) \ 831 firstprivate(myrank, idim_max_buf, tmp_V, X, tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4, org_rankbit, \ 832 org_isite1, org_ispin1, org_isite2, org_ispin2, org_isite3, org_ispin3, org_isite4, org_ispin4) \ 833 shared(v1buf, tmp_v1, nstate,one, tmp_v0, list_1buf, list_2_1, list_2_2) 836 for (j = 1; j <= idim_max_buf; j++) {
837 if (
GetSgnInterAll(tmp_isite4, tmp_isite3, tmp_isite2, tmp_isite1, &Fsgn, X,
838 list_1buf[j] + org_rankbit, &tmp_off) == TRUE)
842 dmv = tmp_V * (std::complex<double>)Fsgn;
843 zaxpy_(&nstate, &dmv, &
v1buf[j][0], &one, &tmp_v0[ioff][0], &one);
862 std::complex<double> tmp_V,
864 int nstate, std::complex<double> **tmp_v0,
865 std::complex<double> **tmp_v1
868 long int idim_max_buf, ioff;
870 long int isite1, isite2, isite3;
871 long int tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4;
872 long int j, Asum, Adiff;
873 std::complex<double> dmv;
874 long int origin, tmp_off;
875 long int org_rankbit;
878 iCheck =
CheckBit_InterAllPE(org_isite1, org_ispin1, org_isite2, org_ispin2, org_isite3, org_ispin3, org_isite3, org_ispin3, X, (
long int)
myrank, &origin);
881 isite1 = X->
Def.
Tpow[2 * org_isite1 + org_ispin1];
882 isite2 = X->
Def.
Tpow[2 * org_isite2 + org_ispin2];
883 isite3 = X->
Def.
Tpow[2 * org_isite3 + org_ispin3];
885 if (iCheck == TRUE) {
886 tmp_isite1 = X->
Def.
OrgTpow[2 * org_isite1 + org_ispin1];
887 tmp_isite2 = X->
Def.
OrgTpow[2 * org_isite2 + org_ispin2];
888 tmp_isite3 = X->
Def.
OrgTpow[2 * org_isite3 + org_ispin3];
889 tmp_isite4 = X->
Def.
OrgTpow[2 * org_isite3 + org_ispin3];
890 Asum = tmp_isite1 + tmp_isite2;
891 if (tmp_isite2 > tmp_isite1) Adiff = tmp_isite2 - tmp_isite1 * 2;
892 else Adiff = tmp_isite1 - tmp_isite2 * 2;
895 iCheck =
CheckBit_InterAllPE(org_isite3, org_ispin3, org_isite3, org_ispin3, org_isite2, org_ispin2, org_isite1, org_ispin1, X, (
long int) myrank, &origin);
896 if (iCheck == TRUE) {
898 tmp_isite4 = X->
Def.
OrgTpow[2 * org_isite1 + org_ispin1];
899 tmp_isite3 = X->
Def.
OrgTpow[2 * org_isite2 + org_ispin2];
900 tmp_isite2 = X->
Def.
OrgTpow[2 * org_isite3 + org_ispin3];
901 tmp_isite1 = X->
Def.
OrgTpow[2 * org_isite3 + org_ispin3];
902 Asum = tmp_isite3 + tmp_isite4;
903 if (tmp_isite4 > tmp_isite3) Adiff = tmp_isite4 - tmp_isite3 * 2;
904 else Adiff = tmp_isite3 - tmp_isite4 * 2;
911 if (myrank == origin) {
913 #pragma omp parallel default(none) \ 914 firstprivate(i_max, Asum, Adiff, isite1, isite2, tmp_V, X) \ 915 private(j) shared(tmp_v0, tmp_v1,nstate) 918 for (j = 1; j <= i_max; j++)
919 CisAjt(j, nstate, tmp_v0, tmp_v1, X, isite1, isite2, Asum, Adiff, tmp_V);
923 for (j = 1; j <= i_max; j++)
924 CisAjt(j, nstate, tmp_v0, tmp_v1, X, isite2, isite1, Asum, Adiff, tmp_V);
934 #pragma omp parallel default(none) private(j,dmv,ioff,tmp_off,Fsgn,Adiff,org_rankbit) \ 935 firstprivate(idim_max_buf,tmp_V,X,tmp_isite1,tmp_isite2,tmp_isite3,tmp_isite4,isite3) \ 936 shared(v1buf,tmp_v1,nstate,one,tmp_v0,list_1buf,list_2_1,list_2_2,origin,org_isite3,myrank,isite1,isite2,org_isite1,org_isite2) 940 if (isite2 > isite1) Adiff = isite2 - isite1 * 2;
941 else Adiff = isite1 - isite2 * 2;
942 SgnBit(((
long int) myrank & Adiff), &Fsgn);
945 if (org_isite3 + 1 > X->
Def.
Nsite) {
947 for (j = 1; j <= idim_max_buf; j++) {
950 zaxpy_(&nstate, &tmp_V, &
v1buf[j][0], &one, &tmp_v0[ioff][0], &one);
955 for (j = 1; j <= idim_max_buf; j++) {
959 zaxpy_(&nstate, &tmp_V, &
v1buf[j][0], &one, &tmp_v0[ioff][0], &one);
967 for (j = 1; j <= idim_max_buf; j++) {
968 if (
GetSgnInterAll(tmp_isite4, tmp_isite3, tmp_isite2, tmp_isite1, &Fsgn, X,
969 list_1buf[j] + org_rankbit, &tmp_off) == TRUE) {
970 dmv = tmp_V * (std::complex<double>)Fsgn;
973 zaxpy_(&nstate, &dmv, &
v1buf[j][0], &one, &tmp_v0[ioff][0], &one);
991 std::complex<double> tmp_V,
993 int nstate, std::complex<double> **tmp_v0,
994 std::complex<double> **tmp_v1
997 org_isite4, org_ispin4, org_isite3, org_ispin3,
998 org_isite1, org_ispin1, conj(tmp_V), X, nstate, tmp_v0, tmp_v1);
1004 std::complex<double> tmp_V,
1007 std::complex<double> **tmp_v0,
1008 std::complex<double> **tmp_v1
1011 long int j, isite1, tmp_off;
1014 isite1 = X->
Def.
Tpow[2 * org_isite1 + org_ispin1];
1015 if (org_isite1 + 1 > X->
Def.
Nsite) {
1019 zaxpy_long(i_max*nstate, tmp_V, &tmp_v1[1][0], &tmp_v0[1][0]);
1022 #pragma omp parallel default(none) shared(tmp_v0, tmp_v1, list_1,nstate,one) \ 1023 firstprivate(i_max, tmp_V, X, isite1) private(j, tmp_off) 1026 for (j = 1; j <= i_max; j++) {
1028 zaxpy_(&nstate, &tmp_V, &tmp_v1[j][0], &one, &tmp_v0[j][0], &one);
1044 std::complex<double> tmp_trans,
1046 std::complex<double> **tmp_v0,
1047 std::complex<double> **tmp_v1,
1051 int mask2, state2, origin, bit2diff, Fsgn;
1052 long int idim_max_buf;
1053 std::complex<double> trans;
1056 mask2 = (int)Tpow[2 * org_isite + org_ispin];
1059 state2 = origin & mask2;
1067 SgnBit((
long int) (bit2diff), &Fsgn);
1070 SendRecv_cv(origin, idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &
v1buf[1][0]);
1072 if (state2 == mask2) {
1075 else if (state2 == 0) {
1076 trans = (double)Fsgn * tmp_trans;
1092 std::complex<double> tmp_trans,
1094 std::complex<double> **tmp_v0,
1095 std::complex<double> **tmp_v1,
1099 int mask2, state2, origin, bit2diff, Fsgn;
1100 long int idim_max_buf;
1101 std::complex<double> trans;
1104 mask2 = (int)Tpow[2 * org_isite + org_ispin];
1107 state2 = origin & mask2;
1115 SgnBit((
long int) (bit2diff), &Fsgn);
1118 SendRecv_cv(origin, idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &
v1buf[1][0]);
1120 if ( state2 == 0 ) trans = 0;
1121 else if (state2 == mask2) trans = (double)Fsgn * tmp_trans;
1133 std::complex<double> tmp_trans,
1135 std::complex<double> **tmp_v0,
1136 std::complex<double> **tmp_v1,
1143 int mask2, state2, origin, bit2diff, Fsgn;
1144 long int idim_max_buf, j, ioff;
1145 std::complex<double> trans;
1149 mask2 = (int)Tpow[2 * org_isite + org_ispin];
1152 state2 = origin & mask2;
1159 SgnBit((
long int) (bit2diff), &Fsgn);
1163 SendRecv_cv(origin, idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &
v1buf[1][0]);
1165 if (state2 == mask2) {
1168 else if (state2 == 0) {
1169 trans = (double)Fsgn * tmp_trans;
1173 #pragma omp parallel for default(none) private(j) \ 1174 firstprivate(idim_max_buf, trans, ioff, _irght, _ilft, _ihfbit, list_2_1, list_2_2) \ 1175 shared(v1buf, tmp_v1, nstate,one, tmp_v0, list_1buf_org) 1176 for (j = 1; j <= idim_max_buf; j++) {
1178 _irght, _ilft, _ihfbit, &ioff);
1179 zaxpy_(&nstate, &trans, &
v1buf[j][0], &one, &tmp_v0[ioff][0], &one);
1189 std::complex<double> tmp_trans,
1190 int nstate, std::complex<double> **tmp_v0,
1191 std::complex<double> **tmp_v1,
1198 int mask2, state2, origin, bit2diff, Fsgn;
1199 long int idim_max_buf, j, ioff;
1200 std::complex<double> trans;
1204 mask2 = (int)Tpow[2 * org_isite + org_ispin];
1207 state2 = origin & mask2;
1214 SgnBit((
long int) (bit2diff), &Fsgn);
1217 SendRecv_cv(origin, idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &
v1buf[1][0]);
1222 else if (state2 == mask2) {
1223 trans = (double)Fsgn * tmp_trans;
1227 #pragma omp parallel for default(none) private(j) \ 1228 firstprivate(idim_max_buf, trans, ioff, _irght, _ilft, _ihfbit, list_2_1, list_2_2) \ 1229 shared(v1buf, tmp_v1, nstate,one, tmp_v0, list_1buf_org) 1230 for (j = 1; j <= idim_max_buf; j++) {
1232 _irght, _ilft, _ihfbit, &ioff);
1233 zaxpy_(&nstate, &trans, &
v1buf[j][0], &one, &tmp_v0[ioff][0], &one);
void exitMPI(int errorcode)
MPI Abortation wrapper.
int CheckBit_PairPE(int org_isite1, int org_isigma1, int org_isite3, int org_isigma3, struct BindStruct *X, long int orgbit)
Check the occupation of both site 1 and site 3.
struct DefineList Def
Definision of system (Hamiltonian) etc.
int CheckBit_Cis(long int is1_spin, long int orgbit, long int *offbit)
Check the occupation of state, and compute the index of final wavefunction associated to ...
long int * OrgTpow
[2 * DefineList::NsiteMPI] malloc in setmem_def().
void X_Cis_MPI(int org_isite, int org_ispin, std::complex< double > tmp_trans, int nstate, std::complex< double > **tmp_v0, std::complex< double > **tmp_v1, long int idim_max, long int *Tpow, long int _irght, long int _ilft, long int _ihfbit)
Compute term of canonical Hubbard system.
void GC_CisAjt(long int j, int nstate, std::complex< double > **tmp_v0, std::complex< double > **tmp_v1, long int is1_spin, long int is2_spin, long int sum_spin, long int diff_spin, std::complex< double > tmp_V, long int *tmp_off)
term for grandcanonical Hubbard
std::complex< double > ** v1buf
void X_GC_Ajt_MPI(int org_isite, int org_ispin, std::complex< double > tmp_trans, int nstate, std::complex< double > **tmp_v0, std::complex< double > **tmp_v1, long int idim_max, long int *Tpow)
Single creation/annihilation operator in the inter process region for HubbardGC.
void X_child_CisAisCjtAku_Hubbard_MPI(int org_isite1, int org_ispin1, int org_isite3, int org_ispin3, int org_isite4, int org_ispin4, std::complex< double > tmp_V, struct BindStruct *X, int nstate, std::complex< double > **tmp_v0, std::complex< double > **tmp_v1)
Compute term of canonical Hubbard system.
int Nsite
Number of sites in the INTRA process region.
void zaxpy_long(long int n, std::complex< double > a, std::complex< double > *x, std::complex< double > *y)
Wrapper of zaxpy.
struct LargeList Large
Variables for Matrix-Vector product.
void X_child_CisAjtCkuAlv_Hubbard_MPI(int org_isite1, int org_ispin1, int org_isite2, int org_ispin2, int org_isite3, int org_ispin3, int org_isite4, int org_ispin4, std::complex< double > tmp_V, struct BindStruct *X, int nstate, std::complex< double > **tmp_v0, std::complex< double > **tmp_v1)
Compute term of canonical Hubbard system.
int mode
multiply or expectation value.
long int irght
Used for Ogata-Lin ???
void X_GC_child_CisAjtCkuAlv_Hubbard_MPI(int org_isite1, int org_ispin1, int org_isite2, int org_ispin2, int org_isite3, int org_ispin3, int org_isite4, int org_ispin4, std::complex< double > tmp_V, struct BindStruct *X, int nstate, std::complex< double > **tmp_v0, std::complex< double > **tmp_v1)
Compute term of grandcanonical Hubbard system.
int X_GC_CisAjt(long int list_1_j, long int is1_spin, long int is2_spin, long int sum_spin, long int diff_spin, long int *tmp_off)
Compute index of wavefunction of final state.
long int SendRecv_i(int origin, long int isend)
Wrapper of MPI_Sendrecv for long integer number.
void X_child_CisAjtCkuAku_Hubbard_MPI(int org_isite1, int org_ispin1, int org_isite2, int org_ispin2, int org_isite3, int org_ispin3, std::complex< double > tmp_V, struct BindStruct *X, int nstate, std::complex< double > **tmp_v0, std::complex< double > **tmp_v1)
Compute term of canonical Hubbard system.
void X_child_CisAis_Hubbard_MPI(int org_isite1, int org_ispin1, std::complex< double > tmp_V, struct BindStruct *X, int nstate, std::complex< double > **tmp_v0, std::complex< double > **tmp_v1)
long int ilft
Used for Ogata-Lin ???
void X_GC_child_CisAjtCkuAku_Hubbard_MPI(int org_isite1, int org_ispin1, int org_isite2, int org_ispin2, int org_isite3, int org_ispin3, std::complex< double > tmp_V, struct BindStruct *X, int nstate, std::complex< double > **tmp_v0, std::complex< double > **tmp_v1)
Compute term of grandcanonical Hubbard system.
void X_GC_Cis_MPI(int org_isite, int org_ispin, std::complex< double > tmp_trans, int nstate, std::complex< double > **tmp_v0, std::complex< double > **tmp_v1, long int idim_max, long int *Tpow)
Single creation/annihilation operator in the inter process region for HubbardGC.
int GetOffComp(long int *_list_2_1, long int *_list_2_2, long int _ibit, const long int _irght, const long int _ilft, const long int _ihfbit, long int *_ioffComp)
function of getting off-diagonal component
void X_GC_child_CisAis_Hubbard_MPI(int org_isite1, int org_ispin1, std::complex< double > tmp_V, struct BindStruct *X, int nstate, std::complex< double > **tmp_v0, std::complex< double > **tmp_v1)
Compute term of grandcanonical Hubbard system.
long int ihfbit
Used for Ogata-Lin ???
int GetSgnInterAll(long int isite1, long int isite2, long int isite3, long int isite4, int *Fsgn, struct BindStruct *X, long int orgbit, long int *offbit)
Compute the index of final wavefunction associated to , and Fermion sign.
int X_CisAis(long int list_1_j, long int is1_spin)
term in Hubbard (canonical)
void X_GC_child_CisAjt_Hubbard_MPI(int org_isite1, int org_ispin1, int org_isite2, int org_ispin2, std::complex< double > tmp_trans, struct BindStruct *X, int nstate, std::complex< double > **tmp_v0, std::complex< double > **tmp_v1)
Compute term of grandcanonical Hubbard system.
int myrank
Process ID, defined in InitializeMPI()
void SendRecv_cv(int origin, long int nMsgS, long int nMsgR, std::complex< double > *vecs, std::complex< double > *vecr)
Wrapper of MPI_Sendrecv for std::complex<double> number. When we pass a message longer than 2^31-1 (m...
long int * Tpow
[2 * DefineList::NsiteMPI] malloc in setmem_def().
void X_Ajt_MPI(int org_isite, int org_ispin, std::complex< double > tmp_trans, int nstate, std::complex< double > **tmp_v0, std::complex< double > **tmp_v1, long int idim_max, long int *Tpow, long int _irght, long int _ilft, long int _ihfbit)
Compute term of canonical Hubbard system.
int CheckBit_Ajt(long int is1_spin, long int orgbit, long int *offbit)
Check the occupation of state, and compute the index of final wavefunction associated to ...
void X_GC_child_CisAisCjtAku_Hubbard_MPI(int org_isite1, int org_ispin1, int org_isite3, int org_ispin3, int org_isite4, int org_ispin4, std::complex< double > tmp_V, struct BindStruct *X, int nstate, std::complex< double > **tmp_v0, std::complex< double > **tmp_v1)
Compute term of grandcanonical Hubbard system.
void SendRecv_iv(int origin, long int nMsgS, long int nMsgR, long int *vecs, long int *vecr)
Wrapper of MPI_Sendrecv for long integer number. When we pass a message longer than 2^31-1 (max of in...
void X_child_CisAisCjtAjt_Hubbard_MPI(int org_isite1, int org_ispin1, int org_isite3, int org_ispin3, std::complex< double > tmp_V, struct BindStruct *X, int nstate, std::complex< double > **tmp_v0, std::complex< double > **tmp_v1)
Compute term of canonical Hubbard system.
int CheckPE(int org_isite, struct BindStruct *X)
Check whether this site is in the inter process region or not.
void CisAjt(long int j, int nstate, std::complex< double > **tmp_v0, std::complex< double > **tmp_v1, struct BindStruct *X, long int is1_spin, long int is2_spin, long int sum_spin, long int diff_spin, std::complex< double > tmp_V)
term for canonical Hubbard
struct CheckList Check
Size of the Hilbert space.
void X_GC_child_general_hopp_MPIdouble(int org_isite1, int org_ispin1, int org_isite2, int org_ispin2, std::complex< double > tmp_trans, struct BindStruct *X, int nstate, std::complex< double > **tmp_v0, std::complex< double > **tmp_v1)
Hopping term in Hubbard + GC When both site1 and site2 are in the inter process region.
void X_GC_child_general_hopp_MPIsingle(int org_isite1, int org_ispin1, int org_isite2, int org_ispin2, std::complex< double > tmp_trans, struct BindStruct *X, int nstate, std::complex< double > **tmp_v0, std::complex< double > **tmp_v1)
Hopping term in Hubbard + GC When only site2 is in the inter process region.
long int idim_max
The dimension of the Hilbert space of this process.
void SgnBit(const long int org_bit, int *sgn)
function of getting fermion sign (64 bit)
int CheckBit_InterAllPE(int org_isite1, int org_isigma1, int org_isite2, int org_isigma2, int org_isite3, int org_isigma3, int org_isite4, int org_isigma4, struct BindStruct *X, long int orgbit, long int *offbit)
Compute the index of final wavefunction associated to , and check whether this operator is relevant o...
void X_GC_child_CisAisCjtAjt_Hubbard_MPI(int org_isite1, int org_ispin1, int org_isite3, int org_ispin3, std::complex< double > tmp_V, struct BindStruct *X, int nstate, std::complex< double > **tmp_v0, std::complex< double > **tmp_v1)
Compute term of grandcanonical Hubbard system.