HPhi++  3.1.0
dSFMT.cpp
Go to the documentation of this file.
1 
14 #include <cstdio>
15 #include <cstring>
16 #include <cstdlib>
17 #include "dSFMT-params.hpp"
18 #include "wrapperMPI.hpp"
19 
23 static const int dsfmt_mexp = DSFMT_MEXP;
24 
25 /*----------------
26  STATIC FUNCTIONS
27  ----------------*/
28 inline static uint32_t ini_func1(uint32_t x);
29 inline static uint32_t ini_func2(uint32_t x);
30 inline static void gen_rand_array_c1o2(dsfmt_t *dsfmt, w128_t *array,
31  int size);
32 inline static void gen_rand_array_c0o1(dsfmt_t *dsfmt, w128_t *array,
33  int size);
34 inline static void gen_rand_array_o0c1(dsfmt_t *dsfmt, w128_t *array,
35  int size);
36 inline static void gen_rand_array_o0o1(dsfmt_t *dsfmt, w128_t *array,
37  int size);
38 inline static int idxof(int i);
39 static void initial_mask(dsfmt_t *dsfmt);
40 static void period_certification(dsfmt_t *dsfmt);
41 
42 #if defined(HAVE_SSE2)
43 # include <emmintrin.hpp>
45 static __m128i sse2_param_mask;
47 static __m128i sse2_int_one;
49 static __m128d sse2_double_two;
51 static __m128d sse2_double_m_one;
52 
53 static void setup_const(void);
54 #endif
55 
60 #if defined(DSFMT_BIG_ENDIAN)
61 inline static int idxof(int i) {
62  return i ^ 1;
63 }
64 #else
65 inline static int idxof(int i) {
66  return i;
67 }
68 #endif
69 
77 #if defined(HAVE_ALTIVEC)
78 inline static void do_recursion(w128_t *r, w128_t *a, w128_t * b,
79  w128_t *lung) {
80  const vector unsigned char sl1 = ALTI_SL1;
81  const vector unsigned char sl1_perm = ALTI_SL1_PERM;
82  const vector unsigned int sl1_msk = ALTI_SL1_MSK;
83  const vector unsigned char sr1 = ALTI_SR;
84  const vector unsigned char sr1_perm = ALTI_SR_PERM;
85  const vector unsigned int sr1_msk = ALTI_SR_MSK;
86  const vector unsigned char perm = ALTI_PERM;
87  const vector unsigned int msk1 = ALTI_MSK;
88  vector unsigned int w, x, y, z;
89 
90  z = a->s;
91  w = lung->s;
92  x = vec_perm(w, (vector unsigned int)perm, perm);
93  y = vec_perm(z, sl1_perm, sl1_perm);
94  y = vec_sll(y, sl1);
95  y = vec_and(y, sl1_msk);
96  w = vec_xor(x, b->s);
97  w = vec_xor(w, y);
98  x = vec_perm(w, (vector unsigned int)sr1_perm, sr1_perm);
99  x = vec_srl(x, sr1);
100  x = vec_and(x, sr1_msk);
101  y = vec_and(w, msk1);
102  z = vec_xor(z, y);
103  r->s = vec_xor(z, x);
104  lung->s = w;
105 }
106 #elif defined(HAVE_SSE2)
107 
110 static void setup_const(void) {
111  static int first = 1;
112  if (!first) {
113  return;
114  }
115  sse2_param_mask = _mm_set_epi32(DSFMT_MSK32_3, DSFMT_MSK32_4,
116  DSFMT_MSK32_1, DSFMT_MSK32_2);
117  sse2_int_one = _mm_set_epi32(0, 1, 0, 1);
118  sse2_double_two = _mm_set_pd(2.0, 2.0);
119  sse2_double_m_one = _mm_set_pd(-1.0, -1.0);
120  first = 0;
121 }
122 
130 inline static void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *u) {
131  __m128i v, w, x, y, z;
132 
133  x = a->si;
134  z = _mm_slli_epi64(x, DSFMT_SL1);
135  y = _mm_shuffle_epi32(u->si, SSE2_SHUFF);
136  z = _mm_xor_si128(z, b->si);
137  y = _mm_xor_si128(y, z);
138 
139  v = _mm_srli_epi64(y, DSFMT_SR);
140  w = _mm_and_si128(y, sse2_param_mask);
141  v = _mm_xor_si128(v, x);
142  v = _mm_xor_si128(v, w);
143  r->si = v;
144  u->si = y;
145 }
146 #else /* standard C */
147 
154 inline static void do_recursion(w128_t *r, w128_t *a, w128_t * b,
155  w128_t *lung) {
156  uint64_t t0, t1, L0, L1;
157 
158  t0 = a->u[0];
159  t1 = a->u[1];
160  L0 = lung->u[0];
161  L1 = lung->u[1];
162  lung->u[0] = (t0 << DSFMT_SL1) ^ (L1 >> 32) ^ (L1 << 32) ^ b->u[0];
163  lung->u[1] = (t1 << DSFMT_SL1) ^ (L0 >> 32) ^ (L0 << 32) ^ b->u[1];
164  r->u[0] = (lung->u[0] >> DSFMT_SR) ^ (lung->u[0] & DSFMT_MSK1) ^ t0;
165  r->u[1] = (lung->u[1] >> DSFMT_SR) ^ (lung->u[1] & DSFMT_MSK2) ^ t1;
166 }
167 #endif
168 
169 #if defined(HAVE_SSE2)
170 
176 inline static void convert_c0o1(w128_t *w) {
177  w->sd = _mm_add_pd(w->sd, sse2_double_m_one);
178 }
179 
186 inline static void convert_o0c1(w128_t *w) {
187  w->sd = _mm_sub_pd(sse2_double_two, w->sd);
188 }
189 
196 inline static void convert_o0o1(w128_t *w) {
197  w->si = _mm_or_si128(w->si, sse2_int_one);
198  w->sd = _mm_add_pd(w->sd, sse2_double_m_one);
199 }
200 #else /* standard C and altivec */
201 
207 inline static void convert_c0o1(w128_t *w) {
208  w->d[0] -= 1.0;
209  w->d[1] -= 1.0;
210 }
211 
218 inline static void convert_o0c1(w128_t *w) {
219  w->d[0] = 2.0 - w->d[0];
220  w->d[1] = 2.0 - w->d[1];
221 }
222 
229 inline static void convert_o0o1(w128_t *w) {
230  w->u[0] |= 1;
231  w->u[1] |= 1;
232  w->d[0] -= 1.0;
233  w->d[1] -= 1.0;
234 }
235 #endif
236 
244 inline static void gen_rand_array_c1o2(dsfmt_t *dsfmt, w128_t *array,
245  int size) {
246  int i, j;
247  w128_t lung;
248 
249  lung = dsfmt->status[DSFMT_N];
250  do_recursion(&array[0], &dsfmt->status[0], &dsfmt->status[DSFMT_POS1],
251  &lung);
252  for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) {
253  do_recursion(&array[i], &dsfmt->status[i],
254  &dsfmt->status[i + DSFMT_POS1], &lung);
255  }
256  for (; i < DSFMT_N; i++) {
257  do_recursion(&array[i], &dsfmt->status[i],
258  &array[i + DSFMT_POS1 - DSFMT_N], &lung);
259  }
260  for (; i < size - DSFMT_N; i++) {
261  do_recursion(&array[i], &array[i - DSFMT_N],
262  &array[i + DSFMT_POS1 - DSFMT_N], &lung);
263  }
264  for (j = 0; j < 2 * DSFMT_N - size; j++) {
265  dsfmt->status[j] = array[j + size - DSFMT_N];
266  }
267  for (; i < size; i++, j++) {
268  do_recursion(&array[i], &array[i - DSFMT_N],
269  &array[i + DSFMT_POS1 - DSFMT_N], &lung);
270  dsfmt->status[j] = array[i];
271  }
272  dsfmt->status[DSFMT_N] = lung;
273 }
274 
282 inline static void gen_rand_array_c0o1(dsfmt_t *dsfmt, w128_t *array,
283  int size) {
284  int i, j;
285  w128_t lung;
286 
287  lung = dsfmt->status[DSFMT_N];
288  do_recursion(&array[0], &dsfmt->status[0], &dsfmt->status[DSFMT_POS1],
289  &lung);
290  for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) {
291  do_recursion(&array[i], &dsfmt->status[i],
292  &dsfmt->status[i + DSFMT_POS1], &lung);
293  }
294  for (; i < DSFMT_N; i++) {
295  do_recursion(&array[i], &dsfmt->status[i],
296  &array[i + DSFMT_POS1 - DSFMT_N], &lung);
297  }
298  for (; i < size - DSFMT_N; i++) {
299  do_recursion(&array[i], &array[i - DSFMT_N],
300  &array[i + DSFMT_POS1 - DSFMT_N], &lung);
301  convert_c0o1(&array[i - DSFMT_N]);
302  }
303  for (j = 0; j < 2 * DSFMT_N - size; j++) {
304  dsfmt->status[j] = array[j + size - DSFMT_N];
305  }
306  for (; i < size; i++, j++) {
307  do_recursion(&array[i], &array[i - DSFMT_N],
308  &array[i + DSFMT_POS1 - DSFMT_N], &lung);
309  dsfmt->status[j] = array[i];
310  convert_c0o1(&array[i - DSFMT_N]);
311  }
312  for (i = size - DSFMT_N; i < size; i++) {
313  convert_c0o1(&array[i]);
314  }
315  dsfmt->status[DSFMT_N] = lung;
316 }
317 
325 inline static void gen_rand_array_o0o1(dsfmt_t *dsfmt, w128_t *array,
326  int size) {
327  int i, j;
328  w128_t lung;
329 
330  lung = dsfmt->status[DSFMT_N];
331  do_recursion(&array[0], &dsfmt->status[0], &dsfmt->status[DSFMT_POS1],
332  &lung);
333  for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) {
334  do_recursion(&array[i], &dsfmt->status[i],
335  &dsfmt->status[i + DSFMT_POS1], &lung);
336  }
337  for (; i < DSFMT_N; i++) {
338  do_recursion(&array[i], &dsfmt->status[i],
339  &array[i + DSFMT_POS1 - DSFMT_N], &lung);
340  }
341  for (; i < size - DSFMT_N; i++) {
342  do_recursion(&array[i], &array[i - DSFMT_N],
343  &array[i + DSFMT_POS1 - DSFMT_N], &lung);
344  convert_o0o1(&array[i - DSFMT_N]);
345  }
346  for (j = 0; j < 2 * DSFMT_N - size; j++) {
347  dsfmt->status[j] = array[j + size - DSFMT_N];
348  }
349  for (; i < size; i++, j++) {
350  do_recursion(&array[i], &array[i - DSFMT_N],
351  &array[i + DSFMT_POS1 - DSFMT_N], &lung);
352  dsfmt->status[j] = array[i];
353  convert_o0o1(&array[i - DSFMT_N]);
354  }
355  for (i = size - DSFMT_N; i < size; i++) {
356  convert_o0o1(&array[i]);
357  }
358  dsfmt->status[DSFMT_N] = lung;
359 }
360 
368 inline static void gen_rand_array_o0c1(dsfmt_t *dsfmt, w128_t *array,
369  int size) {
370  int i, j;
371  w128_t lung;
372 
373  lung = dsfmt->status[DSFMT_N];
374  do_recursion(&array[0], &dsfmt->status[0], &dsfmt->status[DSFMT_POS1],
375  &lung);
376  for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) {
377  do_recursion(&array[i], &dsfmt->status[i],
378  &dsfmt->status[i + DSFMT_POS1], &lung);
379  }
380  for (; i < DSFMT_N; i++) {
381  do_recursion(&array[i], &dsfmt->status[i],
382  &array[i + DSFMT_POS1 - DSFMT_N], &lung);
383  }
384  for (; i < size - DSFMT_N; i++) {
385  do_recursion(&array[i], &array[i - DSFMT_N],
386  &array[i + DSFMT_POS1 - DSFMT_N], &lung);
387  convert_o0c1(&array[i - DSFMT_N]);
388  }
389  for (j = 0; j < 2 * DSFMT_N - size; j++) {
390  dsfmt->status[j] = array[j + size - DSFMT_N];
391  }
392  for (; i < size; i++, j++) {
393  do_recursion(&array[i], &array[i - DSFMT_N],
394  &array[i + DSFMT_POS1 - DSFMT_N], &lung);
395  dsfmt->status[j] = array[i];
396  convert_o0c1(&array[i - DSFMT_N]);
397  }
398  for (i = size - DSFMT_N; i < size; i++) {
399  convert_o0c1(&array[i]);
400  }
401  dsfmt->status[DSFMT_N] = lung;
402 }
403 
410 static uint32_t ini_func1(uint32_t x) {
411  return (x ^ (x >> 27)) * (uint32_t)1664525UL;
412 }
413 
420 static uint32_t ini_func2(uint32_t x) {
421  return (x ^ (x >> 27)) * (uint32_t)1566083941UL;
422 }
423 
429 static void initial_mask(dsfmt_t *dsfmt) {
430  int i;
431  uint64_t *psfmt;
432 
433  psfmt = &dsfmt->status[0].u[0];
434  for (i = 0; i < DSFMT_N * 2; i++) {
435  psfmt[i] = (psfmt[i] & DSFMT_LOW_MASK) | DSFMT_HIGH_CONST;
436  }
437 }
438 
443 static void period_certification(dsfmt_t *dsfmt) {
444  uint64_t pcv[2] = {DSFMT_PCV1, DSFMT_PCV2};
445  uint64_t tmp[2];
446  uint64_t inner;
447  int i;
448 #if (DSFMT_PCV2 & 1) != 1
449  int j;
450  uint64_t work;
451 #endif
452 
453  tmp[0] = (dsfmt->status[DSFMT_N].u[0] ^ DSFMT_FIX1);
454  tmp[1] = (dsfmt->status[DSFMT_N].u[1] ^ DSFMT_FIX2);
455 
456  inner = tmp[0] & pcv[0];
457  inner ^= tmp[1] & pcv[1];
458  for (i = 32; i > 0; i >>= 1) {
459  inner ^= inner >> i;
460  }
461  inner &= 1;
462  /* check OK */
463  if (inner == 1) {
464  return;
465  }
466  /* check NG, and modification */
467 #if (DSFMT_PCV2 & 1) == 1
468  dsfmt->status[DSFMT_N].u[1] ^= 1;
469 #else
470  for (i = 1; i >= 0; i--) {
471  work = 1;
472  for (j = 0; j < 64; j++) {
473  if ((work & pcv[i]) != 0) {
474  dsfmt->status[DSFMT_N].u[i] ^= work;
475  return;
476  }
477  work = work << 1;
478  }
479  }
480 #endif
481  return;
482 }
483 
484 /*----------------
485  PUBLIC FUNCTIONS
486  ----------------*/
492 const char *dsfmt_get_idstring(void) {
493  return DSFMT_IDSTR;
494 }
495 
502  return DSFMT_N64;
503 }
504 
510 void dsfmt_gen_rand_all(dsfmt_t *dsfmt) {
511  int i;
512  w128_t lung;
513 
514  lung = dsfmt->status[DSFMT_N];
515  do_recursion(&dsfmt->status[0], &dsfmt->status[0],
516  &dsfmt->status[DSFMT_POS1], &lung);
517  for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) {
518  do_recursion(&dsfmt->status[i], &dsfmt->status[i],
519  &dsfmt->status[i + DSFMT_POS1], &lung);
520  }
521  for (; i < DSFMT_N; i++) {
522  do_recursion(&dsfmt->status[i], &dsfmt->status[i],
523  &dsfmt->status[i + DSFMT_POS1 - DSFMT_N], &lung);
524  }
525  dsfmt->status[DSFMT_N] = lung;
526 }
527 
556 void dsfmt_fill_array_close1_open2(dsfmt_t *dsfmt, double array[], int size) {
557  assert(size % 2 == 0);
558  assert(size >= DSFMT_N64);
559  gen_rand_array_c1o2(dsfmt, (w128_t *)array, size / 2);
560 }
561 
574 void dsfmt_fill_array_open_close(dsfmt_t *dsfmt, double array[], int size) {
575  assert(size % 2 == 0);
576  assert(size >= DSFMT_N64);
577  gen_rand_array_o0c1(dsfmt, (w128_t *)array, size / 2);
578 }
579 
592 void dsfmt_fill_array_close_open(dsfmt_t *dsfmt, double array[], int size) {
593  assert(size % 2 == 0);
594  assert(size >= DSFMT_N64);
595  gen_rand_array_c0o1(dsfmt, (w128_t *)array, size / 2);
596 }
597 
610 void dsfmt_fill_array_open_open(dsfmt_t *dsfmt, double array[], int size) {
611  assert(size % 2 == 0);
612  assert(size >= DSFMT_N64);
613  gen_rand_array_o0o1(dsfmt, (w128_t *)array, size / 2);
614 }
615 
616 #if defined(__INTEL_COMPILER)
617 # pragma warning(disable:981)
618 #endif
619 
626 void dsfmt_chk_init_gen_rand(dsfmt_t *dsfmt, uint32_t seed, int mexp) {
627  int i;
628  uint32_t *psfmt;
629 
630  /* make sure caller program is compiled with the same MEXP */
631  if (mexp != dsfmt_mexp) {
632  fprintf(stderr, "DSFMT_MEXP doesn't match with dSFMT.cpp\n");
633  exitMPI(1);
634  }
635  psfmt = &dsfmt->status[0].u32[0];
636  psfmt[idxof(0)] = seed;
637  for (i = 1; i < (DSFMT_N + 1) * 4; i++) {
638  psfmt[idxof(i)] = 1812433253UL
639  * (psfmt[idxof(i - 1)] ^ (psfmt[idxof(i - 1)] >> 30)) + i;
640  }
641  initial_mask(dsfmt);
642  period_certification(dsfmt);
643  dsfmt->idx = DSFMT_N64;
644 #if defined(HAVE_SSE2)
645  setup_const();
646 #endif
647 }
648 
657 void dsfmt_chk_init_by_array(dsfmt_t *dsfmt, uint32_t init_key[],
658  int key_length, int mexp) {
659  int i, j, count;
660  uint32_t r;
661  uint32_t *psfmt32;
662  int lag;
663  int mid;
664  int size = (DSFMT_N + 1) * 4; /* pulmonary */
665 
666  /* make sure caller program is compiled with the same MEXP */
667  if (mexp != dsfmt_mexp) {
668  fprintf(stderr, "DSFMT_MEXP doesn't match with dSFMT.cpp\n");
669  exitMPI(1);
670  }
671  if (size >= 623) {
672  lag = 11;
673  } else if (size >= 68) {
674  lag = 7;
675  } else if (size >= 39) {
676  lag = 5;
677  } else {
678  lag = 3;
679  }
680  mid = (size - lag) / 2;
681 
682  psfmt32 = &dsfmt->status[0].u32[0];
683  memset(dsfmt->status, 0x8b, sizeof(dsfmt->status));
684  if (key_length + 1 > size) {
685  count = key_length + 1;
686  } else {
687  count = size;
688  }
689  r = ini_func1(psfmt32[idxof(0)] ^ psfmt32[idxof(mid % size)]
690  ^ psfmt32[idxof((size - 1) % size)]);
691  psfmt32[idxof(mid % size)] += r;
692  r += key_length;
693  psfmt32[idxof((mid + lag) % size)] += r;
694  psfmt32[idxof(0)] = r;
695  count--;
696  for (i = 1, j = 0; (j < count) && (j < key_length); j++) {
697  r = ini_func1(psfmt32[idxof(i)]
698  ^ psfmt32[idxof((i + mid) % size)]
699  ^ psfmt32[idxof((i + size - 1) % size)]);
700  psfmt32[idxof((i + mid) % size)] += r;
701  r += init_key[j] + i;
702  psfmt32[idxof((i + mid + lag) % size)] += r;
703  psfmt32[idxof(i)] = r;
704  i = (i + 1) % size;
705  }
706  for (; j < count; j++) {
707  r = ini_func1(psfmt32[idxof(i)]
708  ^ psfmt32[idxof((i + mid) % size)]
709  ^ psfmt32[idxof((i + size - 1) % size)]);
710  psfmt32[idxof((i + mid) % size)] += r;
711  r += i;
712  psfmt32[idxof((i + mid + lag) % size)] += r;
713  psfmt32[idxof(i)] = r;
714  i = (i + 1) % size;
715  }
716  for (j = 0; j < size; j++) {
717  r = ini_func2(psfmt32[idxof(i)]
718  + psfmt32[idxof((i + mid) % size)]
719  + psfmt32[idxof((i + size - 1) % size)]);
720  psfmt32[idxof((i + mid) % size)] ^= r;
721  r -= i;
722  psfmt32[idxof((i + mid + lag) % size)] ^= r;
723  psfmt32[idxof(i)] = r;
724  i = (i + 1) % size;
725  }
726  initial_mask(dsfmt);
727  period_certification(dsfmt);
728  dsfmt->idx = DSFMT_N64;
729 #if defined(HAVE_SSE2)
730  setup_const();
731 #endif
732 }
733 #if defined(__INTEL_COMPILER)
734 # pragma warning(default:981)
735 #endif
static void period_certification(dsfmt_t *dsfmt)
Definition: dSFMT.cpp:443
void exitMPI(int errorcode)
MPI Abortation wrapper.
Definition: wrapperMPI.cpp:86
static void convert_o0c1(w128_t *w)
Definition: dSFMT.cpp:218
static const int dsfmt_mexp
Definition: dSFMT.cpp:23
void dsfmt_chk_init_by_array(dsfmt_t *dsfmt, uint32_t init_key[], int key_length, int mexp)
Definition: dSFMT.cpp:657
dsfmt_t dsfmt_global_data
Definition: dSFMT.cpp:21
static void convert_c0o1(w128_t *w)
Definition: dSFMT.cpp:207
void dsfmt_fill_array_close1_open2(dsfmt_t *dsfmt, double array[], int size)
Definition: dSFMT.cpp:556
void dsfmt_chk_init_gen_rand(dsfmt_t *dsfmt, uint32_t seed, int mexp)
Definition: dSFMT.cpp:626
static void gen_rand_array_o0c1(dsfmt_t *dsfmt, w128_t *array, int size)
Definition: dSFMT.cpp:368
void dsfmt_fill_array_open_open(dsfmt_t *dsfmt, double array[], int size)
Definition: dSFMT.cpp:610
static uint32_t ini_func2(uint32_t x)
Definition: dSFMT.cpp:420
static void gen_rand_array_o0o1(dsfmt_t *dsfmt, w128_t *array, int size)
Definition: dSFMT.cpp:325
void dsfmt_gen_rand_all(dsfmt_t *dsfmt)
Definition: dSFMT.cpp:510
static void gen_rand_array_c1o2(dsfmt_t *dsfmt, w128_t *array, int size)
Definition: dSFMT.cpp:244
static uint32_t ini_func1(uint32_t x)
Definition: dSFMT.cpp:410
static void convert_o0o1(w128_t *w)
Definition: dSFMT.cpp:229
const char * dsfmt_get_idstring(void)
Definition: dSFMT.cpp:492
static void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *lung)
Definition: dSFMT.cpp:154
static int idxof(int i)
Definition: dSFMT.cpp:65
int dsfmt_get_min_array_size(void)
Definition: dSFMT.cpp:501
void dsfmt_fill_array_open_close(dsfmt_t *dsfmt, double array[], int size)
Definition: dSFMT.cpp:574
static void gen_rand_array_c0o1(dsfmt_t *dsfmt, w128_t *array, int size)
Definition: dSFMT.cpp:282
static void initial_mask(dsfmt_t *dsfmt)
Definition: dSFMT.cpp:429
void dsfmt_fill_array_close_open(dsfmt_t *dsfmt, double array[], int size)
Definition: dSFMT.cpp:592