17 #include "dSFMT-params.hpp" 18 #include "wrapperMPI.hpp" 28 inline static uint32_t
ini_func1(uint32_t x);
29 inline static uint32_t
ini_func2(uint32_t x);
38 inline static int idxof(
int i);
42 #if defined(HAVE_SSE2) 43 # include <emmintrin.hpp> 45 static __m128i sse2_param_mask;
47 static __m128i sse2_int_one;
49 static __m128d sse2_double_two;
51 static __m128d sse2_double_m_one;
53 static void setup_const(
void);
60 #if defined(DSFMT_BIG_ENDIAN) 61 inline static int idxof(
int i) {
65 inline static int idxof(
int i) {
77 #if defined(HAVE_ALTIVEC) 78 inline static void do_recursion(w128_t *r, w128_t *a, w128_t * b,
80 const vector
unsigned char sl1 = ALTI_SL1;
81 const vector
unsigned char sl1_perm = ALTI_SL1_PERM;
82 const vector
unsigned int sl1_msk = ALTI_SL1_MSK;
83 const vector
unsigned char sr1 = ALTI_SR;
84 const vector
unsigned char sr1_perm = ALTI_SR_PERM;
85 const vector
unsigned int sr1_msk = ALTI_SR_MSK;
86 const vector
unsigned char perm = ALTI_PERM;
87 const vector
unsigned int msk1 = ALTI_MSK;
88 vector
unsigned int w, x, y, z;
92 x = vec_perm(w, (vector
unsigned int)perm, perm);
93 y = vec_perm(z, sl1_perm, sl1_perm);
95 y = vec_and(y, sl1_msk);
98 x = vec_perm(w, (vector
unsigned int)sr1_perm, sr1_perm);
100 x = vec_and(x, sr1_msk);
101 y = vec_and(w, msk1);
103 r->s = vec_xor(z, x);
106 #elif defined(HAVE_SSE2) 110 static void setup_const(
void) {
111 static int first = 1;
115 sse2_param_mask = _mm_set_epi32(DSFMT_MSK32_3, DSFMT_MSK32_4,
116 DSFMT_MSK32_1, DSFMT_MSK32_2);
117 sse2_int_one = _mm_set_epi32(0, 1, 0, 1);
118 sse2_double_two = _mm_set_pd(2.0, 2.0);
119 sse2_double_m_one = _mm_set_pd(-1.0, -1.0);
130 inline static void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *u) {
131 __m128i v, w, x, y, z;
134 z = _mm_slli_epi64(x, DSFMT_SL1);
135 y = _mm_shuffle_epi32(u->si, SSE2_SHUFF);
136 z = _mm_xor_si128(z, b->si);
137 y = _mm_xor_si128(y, z);
139 v = _mm_srli_epi64(y, DSFMT_SR);
140 w = _mm_and_si128(y, sse2_param_mask);
141 v = _mm_xor_si128(v, x);
142 v = _mm_xor_si128(v, w);
156 uint64_t t0, t1, L0, L1;
162 lung->u[0] = (t0 << DSFMT_SL1) ^ (L1 >> 32) ^ (L1 << 32) ^ b->u[0];
163 lung->u[1] = (t1 << DSFMT_SL1) ^ (L0 >> 32) ^ (L0 << 32) ^ b->u[1];
164 r->u[0] = (lung->u[0] >> DSFMT_SR) ^ (lung->u[0] & DSFMT_MSK1) ^ t0;
165 r->u[1] = (lung->u[1] >> DSFMT_SR) ^ (lung->u[1] & DSFMT_MSK2) ^ t1;
169 #if defined(HAVE_SSE2) 177 w->sd = _mm_add_pd(w->sd, sse2_double_m_one);
187 w->sd = _mm_sub_pd(sse2_double_two, w->sd);
197 w->si = _mm_or_si128(w->si, sse2_int_one);
198 w->sd = _mm_add_pd(w->sd, sse2_double_m_one);
219 w->d[0] = 2.0 - w->d[0];
220 w->d[1] = 2.0 - w->d[1];
249 lung = dsfmt->status[DSFMT_N];
250 do_recursion(&array[0], &dsfmt->status[0], &dsfmt->status[DSFMT_POS1],
252 for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) {
254 &dsfmt->status[i + DSFMT_POS1], &lung);
256 for (; i < DSFMT_N; i++) {
258 &array[i + DSFMT_POS1 - DSFMT_N], &lung);
260 for (; i < size - DSFMT_N; i++) {
262 &array[i + DSFMT_POS1 - DSFMT_N], &lung);
264 for (j = 0; j < 2 * DSFMT_N - size; j++) {
265 dsfmt->status[j] = array[j + size - DSFMT_N];
267 for (; i < size; i++, j++) {
269 &array[i + DSFMT_POS1 - DSFMT_N], &lung);
270 dsfmt->status[j] = array[i];
272 dsfmt->status[DSFMT_N] = lung;
287 lung = dsfmt->status[DSFMT_N];
288 do_recursion(&array[0], &dsfmt->status[0], &dsfmt->status[DSFMT_POS1],
290 for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) {
292 &dsfmt->status[i + DSFMT_POS1], &lung);
294 for (; i < DSFMT_N; i++) {
296 &array[i + DSFMT_POS1 - DSFMT_N], &lung);
298 for (; i < size - DSFMT_N; i++) {
300 &array[i + DSFMT_POS1 - DSFMT_N], &lung);
303 for (j = 0; j < 2 * DSFMT_N - size; j++) {
304 dsfmt->status[j] = array[j + size - DSFMT_N];
306 for (; i < size; i++, j++) {
308 &array[i + DSFMT_POS1 - DSFMT_N], &lung);
309 dsfmt->status[j] = array[i];
312 for (i = size - DSFMT_N; i < size; i++) {
315 dsfmt->status[DSFMT_N] = lung;
330 lung = dsfmt->status[DSFMT_N];
331 do_recursion(&array[0], &dsfmt->status[0], &dsfmt->status[DSFMT_POS1],
333 for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) {
335 &dsfmt->status[i + DSFMT_POS1], &lung);
337 for (; i < DSFMT_N; i++) {
339 &array[i + DSFMT_POS1 - DSFMT_N], &lung);
341 for (; i < size - DSFMT_N; i++) {
343 &array[i + DSFMT_POS1 - DSFMT_N], &lung);
346 for (j = 0; j < 2 * DSFMT_N - size; j++) {
347 dsfmt->status[j] = array[j + size - DSFMT_N];
349 for (; i < size; i++, j++) {
351 &array[i + DSFMT_POS1 - DSFMT_N], &lung);
352 dsfmt->status[j] = array[i];
355 for (i = size - DSFMT_N; i < size; i++) {
358 dsfmt->status[DSFMT_N] = lung;
373 lung = dsfmt->status[DSFMT_N];
374 do_recursion(&array[0], &dsfmt->status[0], &dsfmt->status[DSFMT_POS1],
376 for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) {
378 &dsfmt->status[i + DSFMT_POS1], &lung);
380 for (; i < DSFMT_N; i++) {
382 &array[i + DSFMT_POS1 - DSFMT_N], &lung);
384 for (; i < size - DSFMT_N; i++) {
386 &array[i + DSFMT_POS1 - DSFMT_N], &lung);
389 for (j = 0; j < 2 * DSFMT_N - size; j++) {
390 dsfmt->status[j] = array[j + size - DSFMT_N];
392 for (; i < size; i++, j++) {
394 &array[i + DSFMT_POS1 - DSFMT_N], &lung);
395 dsfmt->status[j] = array[i];
398 for (i = size - DSFMT_N; i < size; i++) {
401 dsfmt->status[DSFMT_N] = lung;
411 return (x ^ (x >> 27)) * (uint32_t)1664525UL;
421 return (x ^ (x >> 27)) * (uint32_t)1566083941UL;
433 psfmt = &dsfmt->status[0].u[0];
434 for (i = 0; i < DSFMT_N * 2; i++) {
435 psfmt[i] = (psfmt[i] & DSFMT_LOW_MASK) | DSFMT_HIGH_CONST;
444 uint64_t pcv[2] = {DSFMT_PCV1, DSFMT_PCV2};
448 #if (DSFMT_PCV2 & 1) != 1 453 tmp[0] = (dsfmt->status[DSFMT_N].u[0] ^ DSFMT_FIX1);
454 tmp[1] = (dsfmt->status[DSFMT_N].u[1] ^ DSFMT_FIX2);
456 inner = tmp[0] & pcv[0];
457 inner ^= tmp[1] & pcv[1];
458 for (i = 32; i > 0; i >>= 1) {
467 #if (DSFMT_PCV2 & 1) == 1 468 dsfmt->status[DSFMT_N].u[1] ^= 1;
470 for (i = 1; i >= 0; i--) {
472 for (j = 0; j < 64; j++) {
473 if ((work & pcv[i]) != 0) {
474 dsfmt->status[DSFMT_N].u[i] ^= work;
514 lung = dsfmt->status[DSFMT_N];
516 &dsfmt->status[DSFMT_POS1], &lung);
517 for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) {
519 &dsfmt->status[i + DSFMT_POS1], &lung);
521 for (; i < DSFMT_N; i++) {
523 &dsfmt->status[i + DSFMT_POS1 - DSFMT_N], &lung);
525 dsfmt->status[DSFMT_N] = lung;
557 assert(size % 2 == 0);
558 assert(size >= DSFMT_N64);
575 assert(size % 2 == 0);
576 assert(size >= DSFMT_N64);
593 assert(size % 2 == 0);
594 assert(size >= DSFMT_N64);
611 assert(size % 2 == 0);
612 assert(size >= DSFMT_N64);
616 #if defined(__INTEL_COMPILER) 617 # pragma warning(disable:981) 632 fprintf(stderr,
"DSFMT_MEXP doesn't match with dSFMT.cpp\n");
635 psfmt = &dsfmt->status[0].u32[0];
636 psfmt[
idxof(0)] = seed;
637 for (i = 1; i < (DSFMT_N + 1) * 4; i++) {
638 psfmt[
idxof(i)] = 1812433253UL
639 * (psfmt[
idxof(i - 1)] ^ (psfmt[
idxof(i - 1)] >> 30)) + i;
643 dsfmt->idx = DSFMT_N64;
644 #if defined(HAVE_SSE2) 658 int key_length,
int mexp) {
664 int size = (DSFMT_N + 1) * 4;
668 fprintf(stderr,
"DSFMT_MEXP doesn't match with dSFMT.cpp\n");
673 }
else if (size >= 68) {
675 }
else if (size >= 39) {
680 mid = (size - lag) / 2;
682 psfmt32 = &dsfmt->status[0].u32[0];
683 memset(dsfmt->status, 0x8b,
sizeof(dsfmt->status));
684 if (key_length + 1 > size) {
685 count = key_length + 1;
690 ^ psfmt32[
idxof((size - 1) % size)]);
691 psfmt32[
idxof(mid % size)] += r;
693 psfmt32[
idxof((mid + lag) % size)] += r;
694 psfmt32[
idxof(0)] = r;
696 for (i = 1, j = 0; (j < count) && (j < key_length); j++) {
698 ^ psfmt32[
idxof((i + mid) % size)]
699 ^ psfmt32[
idxof((i + size - 1) % size)]);
700 psfmt32[
idxof((i + mid) % size)] += r;
701 r += init_key[j] + i;
702 psfmt32[
idxof((i + mid + lag) % size)] += r;
703 psfmt32[
idxof(i)] = r;
706 for (; j < count; j++) {
708 ^ psfmt32[
idxof((i + mid) % size)]
709 ^ psfmt32[
idxof((i + size - 1) % size)]);
710 psfmt32[
idxof((i + mid) % size)] += r;
712 psfmt32[
idxof((i + mid + lag) % size)] += r;
713 psfmt32[
idxof(i)] = r;
716 for (j = 0; j < size; j++) {
718 + psfmt32[
idxof((i + mid) % size)]
719 + psfmt32[
idxof((i + size - 1) % size)]);
720 psfmt32[
idxof((i + mid) % size)] ^= r;
722 psfmt32[
idxof((i + mid + lag) % size)] ^= r;
723 psfmt32[
idxof(i)] = r;
728 dsfmt->idx = DSFMT_N64;
729 #if defined(HAVE_SSE2) 733 #if defined(__INTEL_COMPILER) 734 # pragma warning(default:981) static void period_certification(dsfmt_t *dsfmt)
void exitMPI(int errorcode)
MPI Abortation wrapper.
static void convert_o0c1(w128_t *w)
static const int dsfmt_mexp
void dsfmt_chk_init_by_array(dsfmt_t *dsfmt, uint32_t init_key[], int key_length, int mexp)
dsfmt_t dsfmt_global_data
static void convert_c0o1(w128_t *w)
void dsfmt_fill_array_close1_open2(dsfmt_t *dsfmt, double array[], int size)
void dsfmt_chk_init_gen_rand(dsfmt_t *dsfmt, uint32_t seed, int mexp)
static void gen_rand_array_o0c1(dsfmt_t *dsfmt, w128_t *array, int size)
void dsfmt_fill_array_open_open(dsfmt_t *dsfmt, double array[], int size)
static uint32_t ini_func2(uint32_t x)
static void gen_rand_array_o0o1(dsfmt_t *dsfmt, w128_t *array, int size)
void dsfmt_gen_rand_all(dsfmt_t *dsfmt)
static void gen_rand_array_c1o2(dsfmt_t *dsfmt, w128_t *array, int size)
static uint32_t ini_func1(uint32_t x)
static void convert_o0o1(w128_t *w)
const char * dsfmt_get_idstring(void)
static void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *lung)
int dsfmt_get_min_array_size(void)
void dsfmt_fill_array_open_close(dsfmt_t *dsfmt, double array[], int size)
static void gen_rand_array_c0o1(dsfmt_t *dsfmt, w128_t *array, int size)
static void initial_mask(dsfmt_t *dsfmt)
void dsfmt_fill_array_close_open(dsfmt_t *dsfmt, double array[], int size)