HPhi++  3.1.0
mltplyMPIHubbardCore.cpp
Go to the documentation of this file.
1 /* HPhi - Quantum Lattice Model Simulator */
2 /* Copyright (C) 2015 The University of Tokyo */
3 
4 /* This program is free software: you can redistribute it and/or modify */
5 /* it under the terms of the GNU General Public License as published by */
6 /* the Free Software Foundation, either version 3 of the License, or */
7 /* (at your option) any later version. */
8 
9 /* This program is distributed in the hope that it will be useful, */
10 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
11 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
12 /* GNU General Public License for more details. */
13 
14 /* You should have received a copy of the GNU General Public License */
15 /* along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "Common.hpp"
20 #include "mltplyCommon.hpp"
21 #include "mltplyHubbardCore.hpp"
22 #include "mltplyMPIHubbard.hpp"
23 #include "mltplyMPIHubbardCore.hpp"
24 #include "bitcalc.hpp"
25 #include "wrapperMPI.hpp"
30 int CheckPE(
31  int org_isite,
32  struct BindStruct *X
33 ){
34  if (org_isite + 1 > X->Def.Nsite) {
35  return TRUE;
36  }
37  else {
38  return FALSE;
39  }
40 }/*int CheckPE*/
48  long int is1_spin,
49  long int orgbit,
50  long int *offbit
51 ) {
52  long int ibit_tmp;
53  ibit_tmp = orgbit & is1_spin;
54  if (ibit_tmp == 0) {
55  *offbit = orgbit + is1_spin;
56  return TRUE;
57  }
58  *offbit = 0;
59  return FALSE;
60 }/*int CheckBit_Cis*/
68  long int is1_spin,
69  long int orgbit,
70  long int *offbit
71 ) {
72  long int ibit_tmp;
73  ibit_tmp = orgbit & is1_spin;
74  if (ibit_tmp != 0) {
75  *offbit = orgbit - is1_spin;
76  return TRUE;
77  }
78  *offbit = 0;
79  return FALSE;
80 }/*int CheckBit_Ajt*/
88  int org_isite1,
89  int org_isigma1,
90  int org_isite2,
91  int org_isigma2,
92  int org_isite3,
93  int org_isigma3,
94  int org_isite4,
95  int org_isigma4,
96  struct BindStruct *X,
97  long int orgbit,
98  long int *offbit
99 ){
100  long int tmp_ispin;
101  long int tmp_org, tmp_off;
102  int iflgBitExist = TRUE;
103  tmp_org=orgbit;
104  tmp_off=0;
105 
106  if (CheckPE(org_isite1, X) == TRUE) {
107  tmp_ispin = X->Def.Tpow[2 * org_isite1 + org_isigma1];
108  if (CheckBit_Ajt(tmp_ispin, tmp_org, &tmp_off) != TRUE) {
109  iflgBitExist = FALSE;
110  }
111  tmp_org = tmp_off;
112  }
113 
114  if (CheckPE(org_isite2, X) == TRUE ) {
115  tmp_ispin = X->Def.Tpow[2 * org_isite2 + org_isigma2];
116  if (CheckBit_Cis(tmp_ispin, tmp_org, &tmp_off) != TRUE) {
117  iflgBitExist = FALSE;
118  }
119  tmp_org = tmp_off;
120  }
121 
122  if (CheckPE(org_isite3, X) == TRUE) {
123  tmp_ispin = X->Def.Tpow[2 * org_isite3 + org_isigma3];
124  if (CheckBit_Ajt(tmp_ispin, tmp_org, &tmp_off) != TRUE) {
125  iflgBitExist = FALSE;
126  }
127  tmp_org = tmp_off;
128  }
129 
130  if (CheckPE(org_isite4, X) == TRUE) {
131  tmp_ispin = X->Def.Tpow[2 * org_isite4 + org_isigma4];
132  if (CheckBit_Cis(tmp_ispin, tmp_org, &tmp_off) != TRUE) {
133  iflgBitExist = FALSE;
134  }
135  tmp_org = tmp_off;
136  }
137 
138  if(iflgBitExist != TRUE){
139  *offbit=0;
140  return FALSE;
141  }
142 
143  *offbit=tmp_org;
144  return TRUE;
145 }/*int CheckBit_InterAllPE*/
151  int org_isite1,
152  int org_isigma1,
153  int org_isite3,
154  int org_isigma3,
155  struct BindStruct *X,
156  long int orgbit
157 ){
158  long int tmp_ispin;
159  long int tmp_org, tmp_off;
160  int iflgBitExist = TRUE;
161  tmp_org=orgbit;
162 
163  if(CheckPE(org_isite1, X)==TRUE){
164  tmp_ispin = X->Def.Tpow[2 * org_isite1 + org_isigma1];
165  if (CheckBit_Ajt(tmp_ispin, tmp_org, &tmp_off) != TRUE) {
166  iflgBitExist=FALSE;
167  }
168  }
169 
170  if (CheckPE(org_isite3, X) == TRUE) {
171  tmp_ispin = X->Def.Tpow[2 * org_isite3 + org_isigma3];
172  if (CheckBit_Ajt(tmp_ispin, tmp_org, &tmp_off) != TRUE) {
173  iflgBitExist = FALSE;
174  }
175  }
176 
177  if(iflgBitExist != TRUE){
178  return FALSE;
179  }
180 
181  return TRUE;
182 }/*int CheckBit_PairPE*/
190  long int isite1,
191  long int isite2,
192  long int isite3,
193  long int isite4,
194  int *Fsgn,
195  struct BindStruct *X,
196  long int orgbit,
197  long int *offbit
198 ){
199  long int diffA;
200  long int tmp_off;
201  long int tmp_ispin1, tmp_ispin2;
202  int tmp_sgn=0;
203 
204  tmp_ispin1=isite2;
205  tmp_ispin2=isite1;
206 
207  if (tmp_ispin1 == tmp_ispin2) {
208  if ((orgbit & tmp_ispin1) == 0) {
209  *offbit = 0;
210  *Fsgn = tmp_sgn;
211  return FALSE;
212  }
213  tmp_sgn=1;
214  tmp_off = orgbit;
215  }
216  else {
217  if (tmp_ispin2 > tmp_ispin1) diffA = tmp_ispin2 - tmp_ispin1 * 2;
218  else diffA = tmp_ispin1-tmp_ispin2*2;
219 
220  tmp_sgn=X_GC_CisAjt(orgbit, tmp_ispin1, tmp_ispin2, tmp_ispin1+tmp_ispin2, diffA, &tmp_off);
221  if(tmp_sgn ==0){
222  *offbit =0;
223  *Fsgn = 0;
224  return FALSE;
225  }
226  }
227 
228  tmp_ispin1 = isite4;
229  tmp_ispin2 = isite3;
230 
231  if(tmp_ispin1 == tmp_ispin2){
232  if( (tmp_off & tmp_ispin1) == 0){
233  *offbit =0;
234  *Fsgn = 0;
235  return FALSE;
236  }
237  *offbit=tmp_off;
238  }
239  else{
240  if(tmp_ispin2 > tmp_ispin1) diffA = tmp_ispin2 - tmp_ispin1*2;
241  else diffA = tmp_ispin1-tmp_ispin2*2;
242  tmp_sgn *=X_GC_CisAjt(tmp_off, tmp_ispin1, tmp_ispin2, tmp_ispin1+tmp_ispin2, diffA, offbit);
243 
244  if(tmp_sgn ==0){
245  *offbit =0;
246  *Fsgn = 0;
247  return FALSE;
248  }
249  }
250 
251  *Fsgn =tmp_sgn;
252  *offbit = *offbit%X->Def.OrgTpow[2*X->Def.Nsite];
253  return TRUE;
254 }/*int GetSgnInterAll*/
260  int org_isite1,
261  int org_ispin1,
262  int org_isite3,
263  int org_ispin3,
264  std::complex<double> tmp_V,
265  struct BindStruct *X,
266  int nstate, std::complex<double> **tmp_v0,
267  std::complex<double> **tmp_v1
268 ) {
269  int iCheck;
270  long int tmp_ispin1;
271  long int i_max = X->Check.idim_max;
272  long int tmp_off, j;
273  int one = 1;
274 
275  iCheck=CheckBit_PairPE(org_isite1, org_ispin1, org_isite3, org_ispin3, X, (long int) myrank);
276  if(iCheck != TRUE){
277  return;
278  }
279 
280  if (org_isite1 + 1 > X->Def.Nsite && org_isite3 + 1 > X->Def.Nsite) {
281  zaxpy_long(i_max*nstate, tmp_V, &tmp_v1[1][0], &tmp_v0[1][0]);
282  }/*if (org_isite1 + 1 > X->Def.Nsite && org_isite3 + 1 > X->Def.Nsite)*/
283  else if (org_isite1 + 1 > X->Def.Nsite || org_isite3 + 1 > X->Def.Nsite) {
284  if (org_isite1 > org_isite3) tmp_ispin1 = X->Def.Tpow[2 * org_isite3 + org_ispin3];
285  else tmp_ispin1 = X->Def.Tpow[2 * org_isite1 + org_ispin1];
286 
287 #pragma omp parallel default(none) \
288  shared(org_isite1,org_ispin1,org_isite3,org_ispin3,nstate,one,tmp_v0,tmp_v1,tmp_ispin1) \
289  firstprivate(i_max,tmp_V,X) private(j,tmp_off)
290 #pragma omp for
291  for (j = 1; j <= i_max; j++) {
292  if (CheckBit_Ajt(tmp_ispin1, j - 1, &tmp_off) == TRUE) {
293  zaxpy_(&nstate, &tmp_V, &tmp_v1[j][0], &one, &tmp_v0[j][0], &one);
294  }
295  }/*for (j = 1; j <= i_max; j++)*/
296  }
297 }/*std::complex<double> X_GC_child_CisAisCjtAjt_Hubbard_MPI*/
303  int org_isite1,
304  int org_ispin1,
305  int org_isite2,
306  int org_ispin2,
307  int org_isite3,
308  int org_ispin3,
309  std::complex<double> tmp_V,
310  struct BindStruct *X,
311  int nstate, std::complex<double> **tmp_v0,
312  std::complex<double> **tmp_v1
313 ) {
314  long int i_max = X->Check.idim_max;
315  long int idim_max_buf;
316  int iCheck, Fsgn;
317  long int isite1, isite2, isite3;
318  long int tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4;
319  long int j, Asum, Adiff;
320  std::complex<double> dmv;
321  long int origin, tmp_off;
322  long int org_rankbit;
323  int one = 1;
324 
325  iCheck = CheckBit_InterAllPE(org_isite1, org_ispin1, org_isite2, org_ispin2, org_isite3, org_ispin3, org_isite3, org_ispin3, X, (long int) myrank, &origin);
326  isite1 = X->Def.Tpow[2 * org_isite1 + org_ispin1];
327  isite2 = X->Def.Tpow[2 * org_isite2 + org_ispin2];
328  isite3 = X->Def.Tpow[2 * org_isite3 + org_ispin3];
329 
330  if (iCheck == TRUE) {
331  tmp_isite1 = X->Def.OrgTpow[2 * org_isite1 + org_ispin1];
332  tmp_isite2 = X->Def.OrgTpow[2 * org_isite2 + org_ispin2];
333  tmp_isite3 = X->Def.OrgTpow[2 * org_isite3 + org_ispin3];
334  tmp_isite4 = X->Def.OrgTpow[2 * org_isite3 + org_ispin3];
335  Asum = tmp_isite1 + tmp_isite2;
336  if (tmp_isite2 > tmp_isite1) Adiff = tmp_isite2 - tmp_isite1 * 2;
337  else Adiff = tmp_isite1 - tmp_isite2 * 2;
338  }
339  else {
340  iCheck = CheckBit_InterAllPE(org_isite3, org_ispin3, org_isite3, org_ispin3, org_isite2, org_ispin2, org_isite1, org_ispin1, X, (long int) myrank, &origin);
341  if (iCheck == TRUE) {
342  tmp_V = conj(tmp_V);
343  tmp_isite4 = X->Def.OrgTpow[2 * org_isite1 + org_ispin1];
344  tmp_isite3 = X->Def.OrgTpow[2 * org_isite2 + org_ispin2];
345  tmp_isite2 = X->Def.OrgTpow[2 * org_isite3 + org_ispin3];
346  tmp_isite1 = X->Def.OrgTpow[2 * org_isite3 + org_ispin3];
347  Asum = tmp_isite3 + tmp_isite4;
348  if (tmp_isite4 > tmp_isite3) Adiff = tmp_isite4 - tmp_isite3 * 2;
349  else Adiff = tmp_isite3 - tmp_isite4 * 2;
350  if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) {
351  tmp_V = 0;
352  }
353  }
354  else {
355  return;
356  }
357  }
358 
359  if (myrank == origin) {// only k is in PE
360 
361  if (CheckBit_Ajt(isite3, myrank, &tmp_off) == FALSE) return;
362 
363 #pragma omp parallel default(none) \
364 firstprivate(i_max,X,Asum,Adiff,isite1,isite2, tmp_V) \
365  private(j,tmp_off) shared(tmp_v0, tmp_v1,nstate)
366  {
367 #pragma omp for
368  for (j = 1; j <= i_max; j++)
369  GC_CisAjt(j, nstate, tmp_v0, tmp_v1, isite2, isite1, Asum, Adiff, tmp_V, &tmp_off);
370 
371  if (X->Large.mode != M_CORR) {
372 #pragma omp for
373  for (j = 1; j <= i_max; j++)
374  GC_CisAjt(j, nstate, tmp_v0, tmp_v1, isite1, isite2, Asum, Adiff, tmp_V, &tmp_off);
375  }/*if (X->Large.mode != M_CORR)*/
376  }/*End of paralle region*/
377  return;
378  }//myrank =origin
379  else {
380  idim_max_buf = SendRecv_i(origin, X->Check.idim_max);
381  SendRecv_cv(origin, X->Check.idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]);
382 
383 #pragma omp parallel default(none) private(j,dmv,tmp_off,Fsgn,org_rankbit,Adiff) \
384  shared(v1buf,tmp_v1,nstate,one,tmp_v0,myrank,origin,isite3,org_isite3,isite1,isite2,org_isite2,org_isite1) \
385 firstprivate(idim_max_buf,tmp_V,X,tmp_isite1,tmp_isite2,tmp_isite3,tmp_isite4)
386  {
387  if (org_isite1 + 1 > X->Def.Nsite && org_isite2 + 1 > X->Def.Nsite) {
388  if (isite2 > isite1) Adiff = isite2 - isite1 * 2;
389  else Adiff = isite1 - isite2 * 2;
390  SgnBit(((long int) myrank & Adiff), &Fsgn);
391  tmp_V *= Fsgn;
392 
393  if (org_isite3 + 1 > X->Def.Nsite) {
394 #pragma omp for
395  for (j = 1; j <= idim_max_buf; j++) {
396  zaxpy_(&nstate, &tmp_V, &v1buf[j][0], &one, &tmp_v0[j][0], &one);
397  }/*for (j = 1; j <= idim_max_buf; j++)*/
398  }
399  else { //org_isite3 <= X->Def.Nsite
400 #pragma omp for
401  for (j = 1; j <= idim_max_buf; j++) {
402  if (CheckBit_Ajt(isite3, j - 1, &tmp_off) == TRUE) {
403  zaxpy_(&nstate, &tmp_V, &v1buf[j][0], &one, &tmp_v0[j][0], &one);
404  }
405  }/*for (j = 1; j <= idim_max_buf; j++)*/
406  }
407  }/*if (org_isite1 + 1 > X->Def.Nsite && org_isite2 + 1 > X->Def.Nsite)*/
408  else {
409  org_rankbit = X->Def.OrgTpow[2 * X->Def.Nsite] * origin;
410 #pragma omp for
411  for (j = 1; j <= idim_max_buf; j++) {
412  if (GetSgnInterAll(tmp_isite4, tmp_isite3, tmp_isite2, tmp_isite1, &Fsgn, X, (j - 1) + org_rankbit, &tmp_off) == TRUE) {
413  dmv = tmp_V * (std::complex<double>)Fsgn;
414  zaxpy_(&nstate, &dmv, &v1buf[j][0], &one, &tmp_v0[tmp_off + 1][0], &one);
415  }
416  }/*for (j = 1; j <= idim_max_buf; j++)*/
417  }
418  }/*End of parallel region*/
419  }/*myrank != origin*/
420 }/*std::complex<double> X_GC_child_CisAjtCkuAku_Hubbard_MPI*/
426  int org_isite1,
427  int org_ispin1,
428  int org_isite3,
429  int org_ispin3,
430  int org_isite4,
431  int org_ispin4,
432  std::complex<double> tmp_V,
433  struct BindStruct *X,
434  int nstate, std::complex<double> **tmp_v0,
435  std::complex<double> **tmp_v1
436 ) {
438  org_isite4, org_ispin4, org_isite3, org_ispin3,
439  org_isite1, org_ispin1, conj(tmp_V), X, nstate, tmp_v0, tmp_v1);
440 }/*std::complex<double> X_GC_child_CisAisCjtAku_Hubbard_MPI*/
446  int org_isite1,
447  int org_ispin1,
448  int org_isite2,
449  int org_ispin2,
450  int org_isite3,
451  int org_ispin3,
452  int org_isite4,
453  int org_ispin4,
454  std::complex<double> tmp_V,
455  struct BindStruct *X,
456  int nstate, std::complex<double> **tmp_v0,
457  std::complex<double> **tmp_v1
458 ) {
459  long int i_max = X->Check.idim_max;
460  long int idim_max_buf;
461  int iCheck, Fsgn;
462  long int isite1, isite2, isite3, isite4;
463  long int tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4;
464  long int j, Adiff, Bdiff;
465  std::complex<double> dmv;
466  long int origin, tmp_off, tmp_off2;
467  long int org_rankbit;
468  int iFlgHermite = FALSE;
469  int one = 1;
470 
471  iCheck = CheckBit_InterAllPE(org_isite1, org_ispin1, org_isite2, org_ispin2,
472  org_isite3, org_ispin3, org_isite4, org_ispin4,
473  X, (long int) myrank, &origin);
474  isite1 = X->Def.Tpow[2 * org_isite1 + org_ispin1];
475  isite2 = X->Def.Tpow[2 * org_isite2 + org_ispin2];
476  isite3 = X->Def.Tpow[2 * org_isite3 + org_ispin3];
477  isite4 = X->Def.Tpow[2 * org_isite4 + org_ispin4];
478 
479  if (iCheck == TRUE) {
480  tmp_isite1 = X->Def.OrgTpow[2 * org_isite1 + org_ispin1];
481  tmp_isite2 = X->Def.OrgTpow[2 * org_isite2 + org_ispin2];
482  tmp_isite3 = X->Def.OrgTpow[2 * org_isite3 + org_ispin3];
483  tmp_isite4 = X->Def.OrgTpow[2 * org_isite4 + org_ispin4];
484  }
485  else {
486  iCheck = CheckBit_InterAllPE(org_isite4, org_ispin4, org_isite3, org_ispin3,
487  org_isite2, org_ispin2, org_isite1, org_ispin1,
488  X, (long int) myrank, &origin);
489  if (iCheck == TRUE) {
490  tmp_V = conj(tmp_V);
491  tmp_isite4 = X->Def.OrgTpow[2 * org_isite1 + org_ispin1];
492  tmp_isite3 = X->Def.OrgTpow[2 * org_isite2 + org_ispin2];
493  tmp_isite2 = X->Def.OrgTpow[2 * org_isite3 + org_ispin3];
494  tmp_isite1 = X->Def.OrgTpow[2 * org_isite4 + org_ispin4];
495  iFlgHermite = TRUE;
496  if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) {
497  tmp_V = 0;
498  }
499  }
500  else {
501  return;
502  }
503  }
504 
505  if (myrank == origin) {
506  if (isite1 == isite4 && isite2 == isite3) { // CisAjvCjvAis =Cis(1-njv)Ais=nis-nisnjv
507  //calc nis
508  X_GC_child_CisAis_Hubbard_MPI(org_isite1, org_ispin1, tmp_V, X, nstate, tmp_v0, tmp_v1);
509  //calc -nisniv
510  X_GC_child_CisAisCjtAjt_Hubbard_MPI(org_isite1, org_ispin1, org_isite3, org_ispin3, -tmp_V, X, nstate, tmp_v0, tmp_v1);
511  }/*if (isite1 == isite4 && isite2 == isite3)*/
512  else if (isite2 == isite3) { // CisAjvCjvAku= Cis(1-njv)Aku=-CisAkunjv+CisAku: j is in PE
513  //calc CisAku
514  if (isite4 > isite1) Adiff = isite4 - isite1 * 2;
515  else Adiff = isite1 - isite4 * 2;
516 
517 #pragma omp parallel for default(none) private(j, tmp_off) \
518  firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, tmp_v0,nstate)
519  for (j = 1; j <= i_max; j++)
520  GC_CisAjt(j - 1, nstate, tmp_v0, tmp_v1, isite1, isite4, (isite1 + isite4), Adiff, tmp_V, &tmp_off);
521 
522  //calc -CisAku njv
523  X_GC_child_CisAjtCkuAku_Hubbard_MPI(org_isite1, org_ispin1, org_isite4, org_ispin4,
524  org_isite2, org_ispin2, -tmp_V, X, nstate, tmp_v0, tmp_v1);
525  if (X->Large.mode != M_CORR) { //for hermite
526 #pragma omp parallel for default(none) private(j, tmp_off) \
527  firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, tmp_v0,nstate)
528  for (j = 1; j <= i_max; j++)
529  GC_CisAjt(j - 1, nstate, tmp_v0, tmp_v1, isite4, isite1, (isite1 + isite4), Adiff, tmp_V, &tmp_off);
530 
531  //calc -njvCkuAis
532  X_GC_child_CisAisCjtAku_Hubbard_MPI(org_isite2, org_ispin2, org_isite4, org_ispin4,
533  org_isite1, org_ispin1, -tmp_V, X, nstate, tmp_v0, tmp_v1);
534  }/*if (X->Large.mode != M_CORR)*/
535  }/*if (isite2 == isite3)*/
536  else {// CisAjtCkuAis = -CisAisCkuAjt: i is in PE
537  X_GC_child_CisAisCjtAku_Hubbard_MPI(org_isite1, org_ispin1, org_isite3, org_ispin3,
538  org_isite2, org_ispin2, -tmp_V, X, nstate, tmp_v0, tmp_v1);
539  if (X->Large.mode != M_CORR) { //for hermite
540  X_GC_child_CisAisCjtAku_Hubbard_MPI(org_isite1, org_ispin1, org_isite2, org_ispin2,
541  org_isite3, org_ispin3, -tmp_V, X, nstate, tmp_v0, tmp_v1);
542  }/*if (X->Large.mode != M_CORR)*/
543  }/*if (isite2 != isite3)*/
544  return;
545  }//myrank =origin
546  else {
547  idim_max_buf = SendRecv_i(origin, X->Check.idim_max);
548  SendRecv_cv(origin, X->Check.idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]);
549 
550  if (org_isite1 + 1 > X->Def.Nsite && org_isite2 + 1 > X->Def.Nsite
551  && org_isite3 + 1 > X->Def.Nsite && org_isite4 + 1 > X->Def.Nsite) {
552 
553  if (isite2 > isite1) Adiff = isite2 - isite1 * 2;
554  else Adiff = isite1 - isite2 * 2;
555  if (isite4 > isite3) Bdiff = isite4 - isite3 * 2;
556  else Bdiff = isite3 - isite4 * 2;
557 
558  if (iFlgHermite == FALSE) {
559  Fsgn = X_GC_CisAjt((long int) myrank, isite2, isite1, (isite1 + isite2), Adiff, &tmp_off2);
560  Fsgn *= X_GC_CisAjt(tmp_off2, isite4, isite3, (isite3 + isite4), Bdiff, &tmp_off);
561  tmp_V *= Fsgn;
562  }/*if (iFlgHermite == FALSE)*/
563  else {
564  Fsgn = X_GC_CisAjt((long int) myrank, isite3, isite4, (isite3 + isite4), Bdiff, &tmp_off2);
565  Fsgn *= X_GC_CisAjt(tmp_off2, isite1, isite2, (isite1 + isite2), Adiff, &tmp_off);
566  tmp_V *= Fsgn;
567  }/*if (iFlgHermite == TRUE)*/
568 
569  zaxpy_long(i_max*nstate, tmp_V, &v1buf[1][0], &tmp_v0[1][0]);
570  }
571  else {
572  org_rankbit = X->Def.OrgTpow[2 * X->Def.Nsite] * origin;
573 #pragma omp parallel for default(none) private(j,dmv,tmp_off,Fsgn) \
574  firstprivate(idim_max_buf,tmp_V,X,tmp_isite1,tmp_isite2,tmp_isite3,tmp_isite4,org_rankbit) \
575  shared(v1buf,tmp_v1,tmp_v0,nstate,one)
576  for (j = 1; j <= idim_max_buf; j++) {
577  if (GetSgnInterAll(tmp_isite4, tmp_isite3, tmp_isite2, tmp_isite1, &Fsgn, X, (j - 1) + org_rankbit, &tmp_off) == TRUE) {
578  dmv = tmp_V * (std::complex<double>)Fsgn;
579  zaxpy_(&nstate, &dmv, &v1buf[j][0], &one, &tmp_v0[tmp_off + 1][0], &one);
580  }
581  }/*for (j = 1; j <= idim_max_buf; j++)*/
582  }
583  }/*myrank != origin*/
584 }/*std::complex<double> X_GC_child_CisAjtCkuAlv_Hubbard_MPI*/
590  int org_isite1,
591  int org_ispin1,
592  std::complex<double> tmp_V,
593  struct BindStruct *X,
594  int nstate, std::complex<double> **tmp_v0,
595  std::complex<double> **tmp_v1
596 ) {
597  long int i_max = X->Check.idim_max;
598  long int j, isite1, tmp_off;
599  int one = 1;
600 
601  isite1 = X->Def.Tpow[2 * org_isite1 + org_ispin1];
602  if (org_isite1 + 1 > X->Def.Nsite) {
603  if (CheckBit_Ajt(isite1, (long int) myrank, &tmp_off) == FALSE) return;
604 
605  zaxpy_long(i_max*nstate, tmp_V, &tmp_v1[1][0], &tmp_v0[1][0]);
606  }/*if (org_isite1 + 1 > X->Def.Nsite)*/
607  else {
608 #pragma omp parallel default(none) shared(tmp_v0, tmp_v1,nstate,one) \
609  firstprivate(i_max, tmp_V, X, isite1) private(j, tmp_off)
610  {
611 #pragma omp for
612  for (j = 1; j <= i_max; j++) {
613  if (CheckBit_Ajt(isite1, j - 1, &tmp_off) == TRUE) {
614  zaxpy_(&nstate, &tmp_V, &tmp_v1[j][0], &one, &tmp_v0[j][0], &one);
615  }/*if (CheckBit_Ajt(isite1, j - 1, &tmp_off) == TRUE)*/
616  }/*for (j = 1; j <= i_max; j++)*/
617  }/*End of parallel region*/
618  }/*if (org_isite1 + 1 <= X->Def.Nsite)*/
619 }/*std::complex<double> X_GC_child_CisAis_Hubbard_MPI*/
625  int org_isite1,
626  int org_ispin1,
627  int org_isite2,
628  int org_ispin2,
629  std::complex<double> tmp_trans,
630  struct BindStruct *X,
631  int nstate, std::complex<double> **tmp_v0,
632  std::complex<double> **tmp_v1
633 ) {
634 
635  if (org_isite1 + 1 > X->Def.Nsite && org_isite2 + 1 > X->Def.Nsite) {
636  X_GC_child_general_hopp_MPIdouble(org_isite1, org_ispin1, org_isite2, org_ispin2, tmp_trans, X, nstate, tmp_v0, tmp_v1);
637  }
638  else if (org_isite1 + 1 > X->Def.Nsite || org_isite2 + 1 > X->Def.Nsite) {
639  X_GC_child_general_hopp_MPIsingle(org_isite1, org_ispin1, org_isite2, org_ispin2, tmp_trans, X, nstate, tmp_v0, tmp_v1);
640  }
641  else {
642  //error message will be added.
643  exitMPI(-1);
644  }
645 }/*std::complex<double> X_GC_child_CisAjt_Hubbard_MPI*/
651  int org_isite1,
652  int org_ispin1,
653  int org_isite3,
654  int org_ispin3,
655  std::complex<double> tmp_V,
656  struct BindStruct *X,
657  int nstate, std::complex<double> **tmp_v0,
658  std::complex<double> **tmp_v1
659 ) {
660  int iCheck;
661  long int tmp_ispin1;
662  long int i_max = X->Check.idim_max;
663  long int tmp_off, j;
664  int one = 1;
665 
666  iCheck = CheckBit_PairPE(org_isite1, org_ispin1, org_isite3, org_ispin3, X, (long int) myrank);
667  if (iCheck != TRUE) return;
668 
669  if (org_isite1 + 1 > X->Def.Nsite && org_isite3 + 1 > X->Def.Nsite) {
670  zaxpy_long(i_max*nstate, tmp_V, &tmp_v1[1][0], &tmp_v0[1][0]);
671  }/*if (org_isite1 + 1 > X->Def.Nsite && org_isite3 + 1 > X->Def.Nsite)*/
672  else if (org_isite1 + 1 > X->Def.Nsite || org_isite3 + 1 > X->Def.Nsite) {
673  if (org_isite1 > org_isite3) tmp_ispin1 = X->Def.Tpow[2 * org_isite3 + org_ispin3];
674  else tmp_ispin1 = X->Def.Tpow[2 * org_isite1 + org_ispin1];
675 
676 #pragma omp parallel for default(none) \
677 shared(tmp_v0,tmp_v1,list_1,org_isite1,org_ispin1,org_isite3,org_ispin3,nstate,one) \
678  firstprivate(i_max,tmp_V,X,tmp_ispin1) private(j,tmp_off)
679  for (j = 1; j <= i_max; j++) {
680  if (CheckBit_Ajt(tmp_ispin1, list_1[j], &tmp_off) == TRUE) {
681  zaxpy_(&nstate, &tmp_V, &tmp_v1[j][0], &one, &tmp_v0[j][0], &one);
682  }
683  }/*for (j = 1; j <= i_max; j++)*/
684  }/*if (org_isite1 + 1 > X->Def.Nsite || org_isite3 + 1 > X->Def.Nsite)*/
685 }/*std::complex<double> X_child_CisAisCjtAjt_Hubbard_MPI*/
691  int org_isite1,
692  int org_ispin1,
693  int org_isite2,
694  int org_ispin2,
695  int org_isite3,
696  int org_ispin3,
697  int org_isite4,
698  int org_ispin4,
699  std::complex<double> tmp_V,
700  struct BindStruct *X,
701  int nstate, std::complex<double> **tmp_v0,
702  std::complex<double> **tmp_v1
703 ) {
704  long int i_max = X->Check.idim_max;
705  long int idim_max_buf;
706  int iCheck, Fsgn;
707  long int isite1, isite2, isite3, isite4;
708  long int tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4;
709  long int j, Adiff, Bdiff;
710  std::complex<double> dmv;
711  long int origin, tmp_off, tmp_off2;
712  long int org_rankbit, ioff;
713  int iFlgHermite = FALSE;
714  int one = 1;
715 
716  iCheck = CheckBit_InterAllPE(org_isite1, org_ispin1, org_isite2, org_ispin2,
717  org_isite3, org_ispin3, org_isite4, org_ispin4,
718  X, (long int) myrank, &origin);
719  //printf("iCheck=%d, myrank=%d, origin=%d\n", iCheck, myrank, origin);
720  isite1 = X->Def.Tpow[2 * org_isite1 + org_ispin1];
721  isite2 = X->Def.Tpow[2 * org_isite2 + org_ispin2];
722  isite3 = X->Def.Tpow[2 * org_isite3 + org_ispin3];
723  isite4 = X->Def.Tpow[2 * org_isite4 + org_ispin4];
724 
725  if (iCheck == TRUE) {
726  tmp_isite1 = X->Def.OrgTpow[2 * org_isite1 + org_ispin1];
727  tmp_isite2 = X->Def.OrgTpow[2 * org_isite2 + org_ispin2];
728  tmp_isite3 = X->Def.OrgTpow[2 * org_isite3 + org_ispin3];
729  tmp_isite4 = X->Def.OrgTpow[2 * org_isite4 + org_ispin4];
730  }/*if (iCheck == TRUE)*/
731  else {
732  iCheck = CheckBit_InterAllPE(org_isite4, org_ispin4, org_isite3, org_ispin3,
733  org_isite2, org_ispin2, org_isite1, org_ispin1,
734  X, (long int) myrank, &origin);
735  if (iCheck == TRUE) {
736  tmp_V = conj(tmp_V);
737  tmp_isite4 = X->Def.OrgTpow[2 * org_isite1 + org_ispin1];
738  tmp_isite3 = X->Def.OrgTpow[2 * org_isite2 + org_ispin2];
739  tmp_isite2 = X->Def.OrgTpow[2 * org_isite3 + org_ispin3];
740  tmp_isite1 = X->Def.OrgTpow[2 * org_isite4 + org_ispin4];
741  iFlgHermite = TRUE;
742  if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) tmp_V = 0;
743  }/*if (iCheck == TRUE)*/
744  else return;
745  }/*if (iCheck == FALSE)*/
746 
747  if (myrank == origin) {
748  if (isite1 == isite4 && isite2 == isite3) { // CisAjvCjvAis =Cis(1-njv)Ais=nis-nisnjv
749  //calc nis
750  X_child_CisAis_Hubbard_MPI(org_isite1, org_ispin1, tmp_V, X, nstate, tmp_v0, tmp_v1);
751  //calc -nisniv
752  X_child_CisAisCjtAjt_Hubbard_MPI(org_isite1, org_ispin1, org_isite3, org_ispin3, -tmp_V, X, nstate, tmp_v0, tmp_v1);
753  }/* if (isite1 == isite4 && isite2 == isite3)*/
754  else if (isite2 == isite3) { // CisAjvCjvAku= Cis(1-njv)Aku=-CisAkunjv+CisAku: j is in PE
755  if (isite4 > isite1) Adiff = isite4 - isite1 * 2;
756  else Adiff = isite1 - isite4 * 2;
757 
758  //calc CisAku
759 #pragma omp parallel for default(none) private(j, tmp_off) \
760 firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, nstate, tmp_v0, list_1)
761  for (j = 1; j <= i_max; j++)
762  CisAjt(j, nstate, tmp_v0, tmp_v1, X, isite1, isite4, (isite1 + isite4), Adiff, tmp_V);
763 
764  //calc -CisAku njv
765  X_child_CisAjtCkuAku_Hubbard_MPI(org_isite1, org_ispin1, org_isite4, org_ispin4,
766  org_isite2, org_ispin2, -tmp_V, X, nstate, tmp_v0, tmp_v1);
767 
768  if (X->Large.mode != M_CORR) { //for hermite
769 #pragma omp parallel for default(none) private(j, tmp_off) \
770  firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, tmp_v0,nstate)
771  for (j = 1; j <= i_max; j++)
772  CisAjt(j, nstate, tmp_v0, tmp_v1, X, isite4, isite1, (isite1 + isite4), Adiff, tmp_V);
773 
774  //calc -njvCkuAis
775  X_child_CisAisCjtAku_Hubbard_MPI(org_isite2, org_ispin2, org_isite4, org_ispin4,
776  org_isite1, org_ispin1, -tmp_V, X, nstate, tmp_v0, tmp_v1);
777  }/*if (X->Large.mode != M_CORR)*/
778  }/*if (isite2 == isite3)*/
779  else {// CisAjtCkuAis = -CisAisCkuAjt: i is in PE
780  X_child_CisAisCjtAku_Hubbard_MPI(org_isite1, org_ispin1, org_isite3, org_ispin3,
781  org_isite2, org_ispin2, -tmp_V, X, nstate, tmp_v0, tmp_v1);
782 
783  if (X->Large.mode != M_CORR) //for hermite: CisAkuCjtAis=-CisAisCjtAku
784  X_child_CisAisCjtAku_Hubbard_MPI(org_isite1, org_ispin1, org_isite2, org_ispin2,
785  org_isite3, org_ispin3, -tmp_V, X, nstate, tmp_v0, tmp_v1);
786  }/*if (isite2 != isite3)*/
787  return;
788  }//myrank =origin
789  else {
790  idim_max_buf = SendRecv_i(origin, X->Check.idim_max);
791  SendRecv_iv(origin, X->Check.idim_max + 1, idim_max_buf + 1, list_1, list_1buf);
792 
793  SendRecv_cv(origin, X->Check.idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]);
794  if (org_isite1 + 1 > X->Def.Nsite && org_isite2 + 1 > X->Def.Nsite
795  && org_isite3 + 1 > X->Def.Nsite && org_isite4 + 1 > X->Def.Nsite)
796  {
797  if (isite2 > isite1) Adiff = isite2 - isite1 * 2;
798  else Adiff = isite1 - isite2 * 2;
799  if (isite4 > isite3) Bdiff = isite4 - isite3 * 2;
800  else Bdiff = isite3 - isite4 * 2;
801 
802  if (iFlgHermite == FALSE) {
803  Fsgn = X_GC_CisAjt((long int) myrank, isite2, isite1, (isite1 + isite2), Adiff, &tmp_off2);
804  Fsgn *= X_GC_CisAjt(tmp_off2, isite4, isite3, (isite3 + isite4), Bdiff, &tmp_off);
805  tmp_V *= Fsgn;
806  }/*if (iFlgHermite == FALSE)*/
807  else {
808  Fsgn = X_GC_CisAjt((long int) myrank, isite3, isite4, (isite3 + isite4), Bdiff, &tmp_off2);
809  Fsgn *= X_GC_CisAjt(tmp_off2, isite1, isite2, (isite1 + isite2), Adiff, &tmp_off);
810  tmp_V *= Fsgn;
811  }/*if (iFlgHermite == TRUE)*/
812 #pragma omp parallel default(none) private(j,ioff) \
813 firstprivate(idim_max_buf,tmp_V,X) \
814  shared(v1buf,tmp_v1,nstate,tmp_v0,list_2_1,list_2_2,list_1buf,one)
815  {
816 #pragma omp for
817  for (j = 1; j <= idim_max_buf; j++) {
819  X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff) == TRUE)
820  {
821  zaxpy_(&nstate, &tmp_V, &v1buf[j][0], &one, &tmp_v0[ioff][0], &one);
822  }
823  }/*for (j = 1; j <= idim_max_buf; j++)*/
824  }/*End of parallel region*/
825  }//org_isite1+1 > X->Def.Nsite && org_isite2+1 > X->Def.Nsite
826  // && org_isite3+1 > X->Def.Nsite && org_isite4+1 > X->Def.Nsite
827  else {
828  org_rankbit = X->Def.OrgTpow[2 * X->Def.Nsite] * origin;
829 
830 #pragma omp parallel default(none) private(j, dmv, tmp_off, Fsgn, ioff) \
831 firstprivate(myrank, idim_max_buf, tmp_V, X, tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4, org_rankbit, \
832 org_isite1, org_ispin1, org_isite2, org_ispin2, org_isite3, org_ispin3, org_isite4, org_ispin4) \
833  shared(v1buf, tmp_v1, nstate,one, tmp_v0, list_1buf, list_2_1, list_2_2)
834  {
835 #pragma omp for
836  for (j = 1; j <= idim_max_buf; j++) {
837  if (GetSgnInterAll(tmp_isite4, tmp_isite3, tmp_isite2, tmp_isite1, &Fsgn, X,
838  list_1buf[j] + org_rankbit, &tmp_off) == TRUE)
839  {
840  if (GetOffComp(list_2_1, list_2_2, tmp_off, X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff) == TRUE)
841  {
842  dmv = tmp_V * (std::complex<double>)Fsgn;
843  zaxpy_(&nstate, &dmv, &v1buf[j][0], &one, &tmp_v0[ioff][0], &one);
844  }
845  }
846  }/*for (j = 1; j <= idim_max_buf; j++)*/
847  }/*End of parallel region*/
848  }
849  }/*if (myrank != origin)*/
850 }/*std::complex<double> X_child_CisAjtCkuAlv_Hubbard_MPI*/
856  int org_isite1,
857  int org_ispin1,
858  int org_isite2,
859  int org_ispin2,
860  int org_isite3,
861  int org_ispin3,
862  std::complex<double> tmp_V,
863  struct BindStruct *X,
864  int nstate, std::complex<double> **tmp_v0,
865  std::complex<double> **tmp_v1
866 ) {
867  long int i_max = X->Check.idim_max;
868  long int idim_max_buf, ioff;
869  int iCheck, Fsgn;
870  long int isite1, isite2, isite3;
871  long int tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4;
872  long int j, Asum, Adiff;
873  std::complex<double> dmv;
874  long int origin, tmp_off;
875  long int org_rankbit;
876  int one = 1;
877  //printf("Deubg0-0: org_isite1=%d, org_ispin1=%d, org_isite2=%d, org_ispin2=%d, org_isite3=%d, org_ispin3=%d\n", org_isite1, org_ispin1,org_isite2, org_ispin2,org_isite3, org_ispin3);
878  iCheck = CheckBit_InterAllPE(org_isite1, org_ispin1, org_isite2, org_ispin2, org_isite3, org_ispin3, org_isite3, org_ispin3, X, (long int) myrank, &origin);
879  //printf("iCheck=%d, myrank=%d, origin=%d\n", iCheck, myrank, origin);
880 
881  isite1 = X->Def.Tpow[2 * org_isite1 + org_ispin1];
882  isite2 = X->Def.Tpow[2 * org_isite2 + org_ispin2];
883  isite3 = X->Def.Tpow[2 * org_isite3 + org_ispin3];
884 
885  if (iCheck == TRUE) {
886  tmp_isite1 = X->Def.OrgTpow[2 * org_isite1 + org_ispin1];
887  tmp_isite2 = X->Def.OrgTpow[2 * org_isite2 + org_ispin2];
888  tmp_isite3 = X->Def.OrgTpow[2 * org_isite3 + org_ispin3];
889  tmp_isite4 = X->Def.OrgTpow[2 * org_isite3 + org_ispin3];
890  Asum = tmp_isite1 + tmp_isite2;
891  if (tmp_isite2 > tmp_isite1) Adiff = tmp_isite2 - tmp_isite1 * 2;
892  else Adiff = tmp_isite1 - tmp_isite2 * 2;
893  }/*if (iCheck == TRUE)*/
894  else {
895  iCheck = CheckBit_InterAllPE(org_isite3, org_ispin3, org_isite3, org_ispin3, org_isite2, org_ispin2, org_isite1, org_ispin1, X, (long int) myrank, &origin);
896  if (iCheck == TRUE) {
897  tmp_V = conj(tmp_V);
898  tmp_isite4 = X->Def.OrgTpow[2 * org_isite1 + org_ispin1];
899  tmp_isite3 = X->Def.OrgTpow[2 * org_isite2 + org_ispin2];
900  tmp_isite2 = X->Def.OrgTpow[2 * org_isite3 + org_ispin3];
901  tmp_isite1 = X->Def.OrgTpow[2 * org_isite3 + org_ispin3];
902  Asum = tmp_isite3 + tmp_isite4;
903  if (tmp_isite4 > tmp_isite3) Adiff = tmp_isite4 - tmp_isite3 * 2;
904  else Adiff = tmp_isite3 - tmp_isite4 * 2;
905  if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) tmp_V = 0;
906  //printf("tmp_isite1=%ld, tmp_isite2=%ld, Adiff=%ld\n", tmp_isite1, tmp_isite2, Adiff);
907  }/*if (iCheck == TRUE)*/
908  else return;
909  }/*if (iCheck == FALSE)*/
910 
911  if (myrank == origin) {// only k is in PE
912  //for hermite
913 #pragma omp parallel default(none) \
914 firstprivate(i_max, Asum, Adiff, isite1, isite2, tmp_V, X) \
915  private(j) shared(tmp_v0, tmp_v1,nstate)
916  {
917 #pragma omp for
918  for (j = 1; j <= i_max; j++)
919  CisAjt(j, nstate, tmp_v0, tmp_v1, X, isite1, isite2, Asum, Adiff, tmp_V);
920 
921  if (X->Large.mode != M_CORR) {
922 #pragma omp for
923  for (j = 1; j <= i_max; j++)
924  CisAjt(j, nstate, tmp_v0, tmp_v1, X, isite2, isite1, Asum, Adiff, tmp_V);
925  }/*if (X->Large.mode != M_CORR)*/
926  }/*End of parallel region*/
927  return;
928  }//myrank =origin
929  else {
930  idim_max_buf = SendRecv_i(origin, X->Check.idim_max);
931  SendRecv_iv(origin, X->Check.idim_max + 1, idim_max_buf + 1, list_1, list_1buf);
932  SendRecv_cv(origin, X->Check.idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]);
933 
934 #pragma omp parallel default(none) private(j,dmv,ioff,tmp_off,Fsgn,Adiff,org_rankbit) \
935 firstprivate(idim_max_buf,tmp_V,X,tmp_isite1,tmp_isite2,tmp_isite3,tmp_isite4,isite3) \
936  shared(v1buf,tmp_v1,nstate,one,tmp_v0,list_1buf,list_2_1,list_2_2,origin,org_isite3,myrank,isite1,isite2,org_isite1,org_isite2)
937  {
938 
939  if (org_isite1 + 1 > X->Def.Nsite && org_isite2 + 1 > X->Def.Nsite) {
940  if (isite2 > isite1) Adiff = isite2 - isite1 * 2;
941  else Adiff = isite1 - isite2 * 2;
942  SgnBit(((long int) myrank & Adiff), &Fsgn);
943  tmp_V *= Fsgn;
944 
945  if (org_isite3 + 1 > X->Def.Nsite) {
946 #pragma omp for
947  for (j = 1; j <= idim_max_buf; j++) {
949  X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff);
950  zaxpy_(&nstate, &tmp_V, &v1buf[j][0], &one, &tmp_v0[ioff][0], &one);
951  }/*for (j = 1; j <= idim_max_buf; j++)*/
952  }/*if (org_isite3 + 1 > X->Def.Nsite)*/
953  else { //org_isite3 <= X->Def.Nsite
954 #pragma omp for
955  for (j = 1; j <= idim_max_buf; j++) {
956  if (CheckBit_Ajt(isite3, list_1buf[j], &tmp_off) == TRUE) {
958  X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff);
959  zaxpy_(&nstate, &tmp_V, &v1buf[j][0], &one, &tmp_v0[ioff][0], &one);
960  }
961  }/*for (j = 1; j <= idim_max_buf; j++)*/
962  }/*if (org_isite3 + 1 <= X->Def.Nsite)*/
963  }/*if (org_isite1 + 1 > X->Def.Nsite && org_isite2 + 1 > X->Def.Nsite)*/
964  else {
965  org_rankbit = X->Def.OrgTpow[2 * X->Def.Nsite] * origin;
966 #pragma omp for
967  for (j = 1; j <= idim_max_buf; j++) {
968  if (GetSgnInterAll(tmp_isite4, tmp_isite3, tmp_isite2, tmp_isite1, &Fsgn, X,
969  list_1buf[j] + org_rankbit, &tmp_off) == TRUE) {
970  dmv = tmp_V * (std::complex<double>)Fsgn;
971  GetOffComp(list_2_1, list_2_2, tmp_off,
972  X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff);
973  zaxpy_(&nstate, &dmv, &v1buf[j][0], &one, &tmp_v0[ioff][0], &one);
974  }
975  }/*for (j = 1; j <= idim_max_buf; j++)*/
976  }
977  }/*End of parallel region*/
978  }/*if (myrank != origin)*/
979 }/*std::complex<double> X_child_CisAjtCkuAku_Hubbard_MPI*/
985  int org_isite1,
986  int org_ispin1,
987  int org_isite3,
988  int org_ispin3,
989  int org_isite4,
990  int org_ispin4,
991  std::complex<double> tmp_V,
992  struct BindStruct *X,
993  int nstate, std::complex<double> **tmp_v0,
994  std::complex<double> **tmp_v1
995 ) {
997  org_isite4, org_ispin4, org_isite3, org_ispin3,
998  org_isite1, org_ispin1, conj(tmp_V), X, nstate, tmp_v0, tmp_v1);
999 }/*std::complex<double> X_child_CisAisCjtAku_Hubbard_MPI*/
1000 
1002  int org_isite1,
1003  int org_ispin1,
1004  std::complex<double> tmp_V,
1005  struct BindStruct *X,
1006  int nstate,
1007  std::complex<double> **tmp_v0,
1008  std::complex<double> **tmp_v1
1009 ) {
1010  long int i_max = X->Check.idim_max;
1011  long int j, isite1, tmp_off;
1012  int one = 1;
1013 
1014  isite1 = X->Def.Tpow[2 * org_isite1 + org_ispin1];
1015  if (org_isite1 + 1 > X->Def.Nsite) {
1016  if (CheckBit_Ajt(isite1, (long int) myrank, &tmp_off) == FALSE)
1017  return;
1018 
1019  zaxpy_long(i_max*nstate, tmp_V, &tmp_v1[1][0], &tmp_v0[1][0]);
1020  }/*if (org_isite1 + 1 > X->Def.Nsite)*/
1021  else {
1022 #pragma omp parallel default(none) shared(tmp_v0, tmp_v1, list_1,nstate,one) \
1023  firstprivate(i_max, tmp_V, X, isite1) private(j, tmp_off)
1024  {
1025 #pragma omp for
1026  for (j = 1; j <= i_max; j++) {
1027  if (X_CisAis(list_1[j], isite1) != 0) {
1028  zaxpy_(&nstate, &tmp_V, &tmp_v1[j][0], &one, &tmp_v0[j][0], &one);
1029  }/*if (X_CisAis(list_1[j], X, isite1) != 0)*/
1030  }/*for (j = 1; j <= i_max; j++)*/
1031  }/*End of parallel region*/
1032  }/*if (org_isite1 + 1 <= X->Def.Nsite)*/
1033 }/*std::complex<double> X_child_CisAis_Hubbard_MPI*/
1042  int org_isite,
1043  int org_ispin,
1044  std::complex<double> tmp_trans,
1045  int nstate,
1046  std::complex<double> **tmp_v0,
1047  std::complex<double> **tmp_v1,
1048  long int idim_max,
1049  long int *Tpow
1050 ) {
1051  int mask2, state2, origin, bit2diff, Fsgn;
1052  long int idim_max_buf;
1053  std::complex<double> trans;
1054 
1055  // org_isite >= Nsite
1056  mask2 = (int)Tpow[2 * org_isite + org_ispin];
1057 
1058  origin = myrank ^ mask2; // XOR
1059  state2 = origin & mask2;
1060 
1061  //if state2 = mask2, the state (org_isite, org_ispin) is not occupied in myrank
1062  //origin: if the state (org_isite, org_ispin) is occupied in myrank, the state is not occupied in origin.
1063 
1064  bit2diff = myrank - ((2 * mask2 - 1) & myrank);
1065 
1066  //SgnBit((long int) (origin & bit2diff), &Fsgn); // Fermion sign
1067  SgnBit((long int) (bit2diff), &Fsgn); // Fermion sign
1068 
1069  idim_max_buf = SendRecv_i(origin, idim_max);
1070  SendRecv_cv(origin, idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]);
1071 
1072  if (state2 == mask2) {
1073  trans = 0;
1074  }
1075  else if (state2 == 0) {
1076  trans = (double)Fsgn * tmp_trans;
1077  }
1078  else return;
1079 
1080  zaxpy_long(idim_max_buf*nstate, trans, &v1buf[1][0], &tmp_v0[1][0]);
1081 }/*std::complex<double> X_GC_Cis_MPI*/
1090  int org_isite,
1091  int org_ispin,
1092  std::complex<double> tmp_trans,
1093  int nstate,
1094  std::complex<double> **tmp_v0,
1095  std::complex<double> **tmp_v1,
1096  long int idim_max,
1097  long int *Tpow
1098 ) {
1099  int mask2, state2, origin, bit2diff, Fsgn;
1100  long int idim_max_buf;
1101  std::complex<double> trans;
1102 
1103  // org_isite >= Nsite
1104  mask2 = (int)Tpow[2 * org_isite + org_ispin];
1105 
1106  origin = myrank ^ mask2; // XOR
1107  state2 = origin & mask2;
1108 
1109  //if state2 = mask2, the state (org_isite, org_ispin) is not occupied in myrank
1110  //origin: if the state (org_isite, org_ispin) is occupied in myrank, the state is not occupied in origin.
1111 
1112  bit2diff = myrank - ((2 * mask2 - 1) & myrank);
1113 
1114  //SgnBit((long int) (origin & bit2diff), &Fsgn); // Fermion sign
1115  SgnBit((long int) (bit2diff), &Fsgn); // Fermion sign
1116 
1117  idim_max_buf = SendRecv_i(origin, idim_max);
1118  SendRecv_cv(origin, idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]);
1119 
1120  if ( state2 == 0 ) trans = 0;
1121  else if (state2 == mask2) trans = (double)Fsgn * tmp_trans;
1122  else return;
1123 
1124  zaxpy_long(idim_max_buf*nstate, trans, &v1buf[1][0], &tmp_v0[1][0]);
1125 }/*std::complex<double> X_GC_Ajt_MPI*/
1131  int org_isite,
1132  int org_ispin,
1133  std::complex<double> tmp_trans,
1134  int nstate,
1135  std::complex<double> **tmp_v0,
1136  std::complex<double> **tmp_v1,
1137  long int idim_max,
1138  long int *Tpow,
1139  long int _irght,
1140  long int _ilft,
1141  long int _ihfbit
1142 ) {
1143  int mask2, state2, origin, bit2diff, Fsgn;
1144  long int idim_max_buf, j, ioff;
1145  std::complex<double> trans;
1146  int one = 1;
1147 
1148  // org_isite >= Nsite
1149  mask2 = (int)Tpow[2 * org_isite + org_ispin];
1150 
1151  origin = myrank ^ mask2; // XOR
1152  state2 = origin & mask2;
1153 
1154  //if state2 = mask2, the state (org_isite, org_ispin) is not occupied in myrank
1155  //origin: if the state (org_isite, org_ispin) is occupied in myrank, the state is not occupied in origin.
1156 
1157  bit2diff = myrank - ((2 * mask2 - 1) & myrank);
1158 
1159  SgnBit((long int) (bit2diff), &Fsgn); // Fermion sign
1160 
1161  idim_max_buf = SendRecv_i(origin, idim_max);
1162  SendRecv_iv(origin, idim_max + 1, idim_max_buf + 1, list_1_org, list_1buf_org);
1163  SendRecv_cv(origin, idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]);
1164 
1165  if (state2 == mask2) {
1166  trans = 0;
1167  }
1168  else if (state2 == 0) {
1169  trans = (double)Fsgn * tmp_trans;
1170  }
1171  else return;
1172 
1173 #pragma omp parallel for default(none) private(j) \
1174 firstprivate(idim_max_buf, trans, ioff, _irght, _ilft, _ihfbit, list_2_1, list_2_2) \
1175  shared(v1buf, tmp_v1, nstate,one, tmp_v0, list_1buf_org)
1176  for (j = 1; j <= idim_max_buf; j++) {//idim_max_buf -> original
1178  _irght, _ilft, _ihfbit, &ioff);
1179  zaxpy_(&nstate, &trans, &v1buf[j][0], &one, &tmp_v0[ioff][0], &one);
1180  }/*for (j = 1; j <= idim_max_buf; j++)*/
1181 }/*std::complex<double> X_GC_Cis_MPI*/
1187  int org_isite,
1188  int org_ispin,
1189  std::complex<double> tmp_trans,
1190  int nstate, std::complex<double> **tmp_v0,
1191  std::complex<double> **tmp_v1,
1192  long int idim_max,
1193  long int *Tpow,
1194  long int _irght,
1195  long int _ilft,
1196  long int _ihfbit
1197 ){
1198  int mask2, state2, origin, bit2diff, Fsgn;
1199  long int idim_max_buf, j, ioff;
1200  std::complex<double> trans;
1201  int one = 1;
1202 
1203  // org_isite >= Nsite
1204  mask2 = (int)Tpow[2 * org_isite + org_ispin];
1205 
1206  origin = myrank ^ mask2; // XOR
1207  state2 = origin & mask2;
1208 
1209  //if state2 = mask2, the state (org_isite, org_ispin) is not occupied in myrank
1210  //origin: if the state (org_isite, org_ispin) is occupied in myrank, the state is not occupied in origin.
1211 
1212  bit2diff = myrank - ((2 * mask2 - 1) & myrank);
1213 
1214  SgnBit((long int) (bit2diff), &Fsgn); // Fermion sign
1215  idim_max_buf = SendRecv_i(origin, idim_max);
1216  SendRecv_iv(origin, idim_max + 1, idim_max_buf + 1, list_1_org, list_1buf_org);
1217  SendRecv_cv(origin, idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]);
1218 
1219  if (state2 == 0) {
1220  trans = 0;
1221  }
1222  else if (state2 == mask2) {
1223  trans = (double)Fsgn * tmp_trans;
1224  }
1225  else return;
1226 
1227 #pragma omp parallel for default(none) private(j) \
1228 firstprivate(idim_max_buf, trans, ioff, _irght, _ilft, _ihfbit, list_2_1, list_2_2) \
1229  shared(v1buf, tmp_v1, nstate,one, tmp_v0, list_1buf_org)
1230  for (j = 1; j <= idim_max_buf; j++) {
1232  _irght, _ilft, _ihfbit, &ioff);
1233  zaxpy_(&nstate, &trans, &v1buf[j][0], &one, &tmp_v0[ioff][0], &one);
1234  }
1235 }/*std::complex<double> X_Ajt_MPI*/
void exitMPI(int errorcode)
MPI Abortation wrapper.
Definition: wrapperMPI.cpp:86
int CheckBit_PairPE(int org_isite1, int org_isigma1, int org_isite3, int org_isigma3, struct BindStruct *X, long int orgbit)
Check the occupation of both site 1 and site 3.
struct DefineList Def
Definision of system (Hamiltonian) etc.
Definition: struct.hpp:395
long int * list_2_1
Definition: global.cpp:27
long int * list_2_2
Definition: global.cpp:28
int CheckBit_Cis(long int is1_spin, long int orgbit, long int *offbit)
Check the occupation of state, and compute the index of final wavefunction associated to ...
long int * OrgTpow
[2 * DefineList::NsiteMPI] malloc in setmem_def().
Definition: struct.hpp:92
void X_Cis_MPI(int org_isite, int org_ispin, std::complex< double > tmp_trans, int nstate, std::complex< double > **tmp_v0, std::complex< double > **tmp_v1, long int idim_max, long int *Tpow, long int _irght, long int _ilft, long int _ihfbit)
Compute term of canonical Hubbard system.
void GC_CisAjt(long int j, int nstate, std::complex< double > **tmp_v0, std::complex< double > **tmp_v1, long int is1_spin, long int is2_spin, long int sum_spin, long int diff_spin, std::complex< double > tmp_V, long int *tmp_off)
term for grandcanonical Hubbard
std::complex< double > ** v1buf
Definition: global.cpp:22
void X_GC_Ajt_MPI(int org_isite, int org_ispin, std::complex< double > tmp_trans, int nstate, std::complex< double > **tmp_v0, std::complex< double > **tmp_v1, long int idim_max, long int *Tpow)
Single creation/annihilation operator in the inter process region for HubbardGC.
void X_child_CisAisCjtAku_Hubbard_MPI(int org_isite1, int org_ispin1, int org_isite3, int org_ispin3, int org_isite4, int org_ispin4, std::complex< double > tmp_V, struct BindStruct *X, int nstate, std::complex< double > **tmp_v0, std::complex< double > **tmp_v1)
Compute term of canonical Hubbard system.
int Nsite
Number of sites in the INTRA process region.
Definition: struct.hpp:56
void zaxpy_long(long int n, std::complex< double > a, std::complex< double > *x, std::complex< double > *y)
Wrapper of zaxpy.
Definition: mltply.cpp:128
struct LargeList Large
Variables for Matrix-Vector product.
Definition: struct.hpp:397
long int * list_1_org
Definition: global.cpp:31
void X_child_CisAjtCkuAlv_Hubbard_MPI(int org_isite1, int org_ispin1, int org_isite2, int org_ispin2, int org_isite3, int org_ispin3, int org_isite4, int org_ispin4, std::complex< double > tmp_V, struct BindStruct *X, int nstate, std::complex< double > **tmp_v0, std::complex< double > **tmp_v1)
Compute term of canonical Hubbard system.
int mode
multiply or expectation value.
Definition: struct.hpp:331
long int irght
Used for Ogata-Lin ???
Definition: struct.hpp:344
void X_GC_child_CisAjtCkuAlv_Hubbard_MPI(int org_isite1, int org_ispin1, int org_isite2, int org_ispin2, int org_isite3, int org_ispin3, int org_isite4, int org_ispin4, std::complex< double > tmp_V, struct BindStruct *X, int nstate, std::complex< double > **tmp_v0, std::complex< double > **tmp_v1)
Compute term of grandcanonical Hubbard system.
int X_GC_CisAjt(long int list_1_j, long int is1_spin, long int is2_spin, long int sum_spin, long int diff_spin, long int *tmp_off)
Compute index of wavefunction of final state.
long int SendRecv_i(int origin, long int isend)
Wrapper of MPI_Sendrecv for long integer number.
Definition: wrapperMPI.cpp:504
void X_child_CisAjtCkuAku_Hubbard_MPI(int org_isite1, int org_ispin1, int org_isite2, int org_ispin2, int org_isite3, int org_ispin3, std::complex< double > tmp_V, struct BindStruct *X, int nstate, std::complex< double > **tmp_v0, std::complex< double > **tmp_v1)
Compute term of canonical Hubbard system.
long int * list_1buf
Definition: global.cpp:26
void X_child_CisAis_Hubbard_MPI(int org_isite1, int org_ispin1, std::complex< double > tmp_V, struct BindStruct *X, int nstate, std::complex< double > **tmp_v0, std::complex< double > **tmp_v1)
long int ilft
Used for Ogata-Lin ???
Definition: struct.hpp:345
Bind.
Definition: struct.hpp:394
void X_GC_child_CisAjtCkuAku_Hubbard_MPI(int org_isite1, int org_ispin1, int org_isite2, int org_ispin2, int org_isite3, int org_ispin3, std::complex< double > tmp_V, struct BindStruct *X, int nstate, std::complex< double > **tmp_v0, std::complex< double > **tmp_v1)
Compute term of grandcanonical Hubbard system.
void X_GC_Cis_MPI(int org_isite, int org_ispin, std::complex< double > tmp_trans, int nstate, std::complex< double > **tmp_v0, std::complex< double > **tmp_v1, long int idim_max, long int *Tpow)
Single creation/annihilation operator in the inter process region for HubbardGC.
int GetOffComp(long int *_list_2_1, long int *_list_2_2, long int _ibit, const long int _irght, const long int _ilft, const long int _ihfbit, long int *_ioffComp)
function of getting off-diagonal component
Definition: bitcalc.cpp:195
void X_GC_child_CisAis_Hubbard_MPI(int org_isite1, int org_ispin1, std::complex< double > tmp_V, struct BindStruct *X, int nstate, std::complex< double > **tmp_v0, std::complex< double > **tmp_v1)
Compute term of grandcanonical Hubbard system.
long int ihfbit
Used for Ogata-Lin ???
Definition: struct.hpp:346
int GetSgnInterAll(long int isite1, long int isite2, long int isite3, long int isite4, int *Fsgn, struct BindStruct *X, long int orgbit, long int *offbit)
Compute the index of final wavefunction associated to , and Fermion sign.
int X_CisAis(long int list_1_j, long int is1_spin)
term in Hubbard (canonical)
void X_GC_child_CisAjt_Hubbard_MPI(int org_isite1, int org_ispin1, int org_isite2, int org_ispin2, std::complex< double > tmp_trans, struct BindStruct *X, int nstate, std::complex< double > **tmp_v0, std::complex< double > **tmp_v1)
Compute term of grandcanonical Hubbard system.
int myrank
Process ID, defined in InitializeMPI()
Definition: global.cpp:73
void SendRecv_cv(int origin, long int nMsgS, long int nMsgR, std::complex< double > *vecs, std::complex< double > *vecr)
Wrapper of MPI_Sendrecv for std::complex<double> number. When we pass a message longer than 2^31-1 (m...
Definition: wrapperMPI.cpp:424
long int * Tpow
[2 * DefineList::NsiteMPI] malloc in setmem_def().
Definition: struct.hpp:90
void X_Ajt_MPI(int org_isite, int org_ispin, std::complex< double > tmp_trans, int nstate, std::complex< double > **tmp_v0, std::complex< double > **tmp_v1, long int idim_max, long int *Tpow, long int _irght, long int _ilft, long int _ihfbit)
Compute term of canonical Hubbard system.
int CheckBit_Ajt(long int is1_spin, long int orgbit, long int *offbit)
Check the occupation of state, and compute the index of final wavefunction associated to ...
void X_GC_child_CisAisCjtAku_Hubbard_MPI(int org_isite1, int org_ispin1, int org_isite3, int org_ispin3, int org_isite4, int org_ispin4, std::complex< double > tmp_V, struct BindStruct *X, int nstate, std::complex< double > **tmp_v0, std::complex< double > **tmp_v1)
Compute term of grandcanonical Hubbard system.
void SendRecv_iv(int origin, long int nMsgS, long int nMsgR, long int *vecs, long int *vecr)
Wrapper of MPI_Sendrecv for long integer number. When we pass a message longer than 2^31-1 (max of in...
Definition: wrapperMPI.cpp:465
long int * list_1buf_org
Definition: global.cpp:32
void X_child_CisAisCjtAjt_Hubbard_MPI(int org_isite1, int org_ispin1, int org_isite3, int org_ispin3, std::complex< double > tmp_V, struct BindStruct *X, int nstate, std::complex< double > **tmp_v0, std::complex< double > **tmp_v1)
Compute term of canonical Hubbard system.
int CheckPE(int org_isite, struct BindStruct *X)
Check whether this site is in the inter process region or not.
void CisAjt(long int j, int nstate, std::complex< double > **tmp_v0, std::complex< double > **tmp_v1, struct BindStruct *X, long int is1_spin, long int is2_spin, long int sum_spin, long int diff_spin, std::complex< double > tmp_V)
term for canonical Hubbard
struct CheckList Check
Size of the Hilbert space.
Definition: struct.hpp:396
void X_GC_child_general_hopp_MPIdouble(int org_isite1, int org_ispin1, int org_isite2, int org_ispin2, std::complex< double > tmp_trans, struct BindStruct *X, int nstate, std::complex< double > **tmp_v0, std::complex< double > **tmp_v1)
Hopping term in Hubbard + GC When both site1 and site2 are in the inter process region.
void X_GC_child_general_hopp_MPIsingle(int org_isite1, int org_ispin1, int org_isite2, int org_ispin2, std::complex< double > tmp_trans, struct BindStruct *X, int nstate, std::complex< double > **tmp_v0, std::complex< double > **tmp_v1)
Hopping term in Hubbard + GC When only site2 is in the inter process region.
long int * list_1
Definition: global.cpp:25
long int idim_max
The dimension of the Hilbert space of this process.
Definition: struct.hpp:305
void SgnBit(const long int org_bit, int *sgn)
function of getting fermion sign (64 bit)
Definition: bitcalc.cpp:338
int CheckBit_InterAllPE(int org_isite1, int org_isigma1, int org_isite2, int org_isigma2, int org_isite3, int org_isigma3, int org_isite4, int org_isigma4, struct BindStruct *X, long int orgbit, long int *offbit)
Compute the index of final wavefunction associated to , and check whether this operator is relevant o...
void X_GC_child_CisAisCjtAjt_Hubbard_MPI(int org_isite1, int org_ispin1, int org_isite3, int org_ispin3, std::complex< double > tmp_V, struct BindStruct *X, int nstate, std::complex< double > **tmp_v0, std::complex< double > **tmp_v1)
Compute term of grandcanonical Hubbard system.