00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040 #include <limits.h>
00041 #include <float.h>
00042 #include <stdlib.h>
00043 #include <stdio.h>
00044 #include <math.h>
00045
00046 #if ((defined SSE)||(defined SSE2))
00047
00048 typedef struct
00049 {
00050 float c1,c2,c3,c4;
00051 } vec_t __attribute__ ((aligned (16)));
00052
00053 typedef struct
00054 {
00055 vec_t c1,c2;
00056 } dble_vec_t __attribute__ ((aligned (16)));
00057
00058 static int init=0,pr,prm,ir,jr,is,is_old,next[96];
00059 static vec_t one,one_bit,carry;
00060
00061 static union
00062 {
00063 dble_vec_t vec[12];
00064 float num[96];
00065 } x __attribute__ ((aligned (16)));
00066
00067 #define STEP(pi,pj) \
00068 __asm__ __volatile__ ("movaps %2, %%xmm4 \n\t" \
00069 "movaps %%xmm2, %%xmm3 \n\t" \
00070 "subps %0, %%xmm4 \n\t" \
00071 "movaps %%xmm1, %%xmm5 \n\t" \
00072 "cmpps $0x6, %%xmm4, %%xmm2 \n\t" \
00073 "andps %%xmm2, %%xmm5 \n\t" \
00074 "subps %%xmm3, %%xmm4 \n\t" \
00075 "andps %%xmm0, %%xmm2 \n\t" \
00076 "addps %%xmm4, %%xmm5 \n\t" \
00077 "movaps %%xmm5, %0 \n\t" \
00078 "movaps %3, %%xmm6 \n\t" \
00079 "movaps %%xmm2, %%xmm3 \n\t" \
00080 "subps %1, %%xmm6 \n\t" \
00081 "movaps %%xmm1, %%xmm7 \n\t" \
00082 "cmpps $0x6, %%xmm6, %%xmm2 \n\t" \
00083 "andps %%xmm2, %%xmm7 \n\t" \
00084 "subps %%xmm3, %%xmm6 \n\t" \
00085 "andps %%xmm0, %%xmm2 \n\t" \
00086 "addps %%xmm6, %%xmm7 \n\t" \
00087 "movaps %%xmm7, %1" \
00088 : \
00089 "+m" ((*pi).c1), \
00090 "+m" ((*pi).c2) \
00091 : \
00092 "m" ((*pj).c1), \
00093 "m" ((*pj).c2))
00094
00095
00096 static void error(int no)
00097 {
00098 switch(no)
00099 {
00100 case 1:
00101 printf("Error in subroutine rlxd_init\n");
00102 printf("Bad choice of luxury level (should be 1 or 2)\n");
00103 break;
00104 case 2:
00105 printf("Error in subroutine rlxd_init\n");
00106 printf("Bad choice of seed (should be between 1 and 2^31-1)\n");
00107 break;
00108 case 3:
00109 printf("Error in rlxd_get\n");
00110 printf("Undefined state (ranlxd is not initialized\n");
00111 break;
00112 case 5:
00113 printf("Error in rlxd_reset\n");
00114 printf("Unexpected input data\n");
00115 break;
00116 }
00117 printf("Program aborted\n");
00118 exit(0);
00119 }
00120
00121
00122 static void update()
00123 {
00124 int k,kmax;
00125 dble_vec_t *pmin,*pmax,*pi,*pj;
00126
00127 kmax=pr;
00128 pmin=&x.vec[0];
00129 pmax=pmin+12;
00130 pi=&x.vec[ir];
00131 pj=&x.vec[jr];
00132
00133 __asm__ __volatile__ ("movaps %0, %%xmm0 \n\t"
00134 "movaps %1, %%xmm1 \n\t"
00135 "movaps %2, %%xmm2"
00136 :
00137 :
00138 "m" (one_bit),
00139 "m" (one),
00140 "m" (carry));
00141
00142 for (k=0;k<kmax;k++)
00143 {
00144 STEP(pi,pj);
00145 pi+=1;
00146 pj+=1;
00147 if (pi==pmax)
00148 pi=pmin;
00149 if (pj==pmax)
00150 pj=pmin;
00151 }
00152
00153 __asm__ __volatile__ ("movaps %%xmm2, %0"
00154 :
00155 :
00156 "m" (carry));
00157
00158 ir+=prm;
00159 jr+=prm;
00160 if (ir>=12)
00161 ir-=12;
00162 if (jr>=12)
00163 jr-=12;
00164 is=8*ir;
00165 is_old=is;
00166 }
00167
00168
00169 static void define_constants()
00170 {
00171 int k;
00172 float b;
00173
00174 one.c1=1.0f;
00175 one.c2=1.0f;
00176 one.c3=1.0f;
00177 one.c4=1.0f;
00178
00179 b=(float)(ldexp(1.0,-24));
00180 one_bit.c1=b;
00181 one_bit.c2=b;
00182 one_bit.c3=b;
00183 one_bit.c4=b;
00184
00185 for (k=0;k<96;k++)
00186 {
00187 next[k]=(k+1)%96;
00188 if ((k%4)==3)
00189 next[k]=(k+5)%96;
00190 }
00191 }
00192
00193
00194 void rlxd_init(int level,int seed)
00195 {
00196 int i,k,l;
00197 int ibit,jbit,xbit[31];
00198 int ix,iy;
00199
00200 define_constants();
00201
00202 if (level==1)
00203 pr=202;
00204 else if (level==2)
00205 pr=397;
00206 else
00207 error(1);
00208
00209 i=seed;
00210
00211 for (k=0;k<31;k++)
00212 {
00213 xbit[k]=i%2;
00214 i/=2;
00215 }
00216
00217 if ((seed<=0)||(i!=0))
00218 error(2);
00219
00220 ibit=0;
00221 jbit=18;
00222
00223 for (i=0;i<4;i++)
00224 {
00225 for (k=0;k<24;k++)
00226 {
00227 ix=0;
00228
00229 for (l=0;l<24;l++)
00230 {
00231 iy=xbit[ibit];
00232 ix=2*ix+iy;
00233
00234 xbit[ibit]=(xbit[ibit]+xbit[jbit])%2;
00235 ibit=(ibit+1)%31;
00236 jbit=(jbit+1)%31;
00237 }
00238
00239 if ((k%4)!=i)
00240 ix=16777215-ix;
00241
00242 x.num[4*k+i]=(float)(ldexp((double)(ix),-24));
00243 }
00244 }
00245
00246 carry.c1=0.0f;
00247 carry.c2=0.0f;
00248 carry.c3=0.0f;
00249 carry.c4=0.0f;
00250
00251 ir=0;
00252 jr=7;
00253 is=91;
00254 is_old=0;
00255 prm=pr%12;
00256 init=1;
00257 }
00258
00259
00260 void ranlxd(double r[],int n)
00261 {
00262 int k;
00263
00264 if (init==0)
00265 rlxd_init(1,1);
00266
00267 for (k=0;k<n;k++)
00268 {
00269 is=next[is];
00270 if (is==is_old)
00271 update();
00272 r[k]=(double)(x.num[is+4])+(double)(one_bit.c1*x.num[is]);
00273 }
00274 }
00275
00276
00277 int rlxd_size(void)
00278 {
00279 return(105);
00280 }
00281
00282
00283 void rlxd_get(int state[])
00284 {
00285 int k;
00286 float base;
00287
00288 if (init==0)
00289 error(3);
00290
00291 base=(float)(ldexp(1.0,24));
00292 state[0]=rlxd_size();
00293
00294 for (k=0;k<96;k++)
00295 state[k+1]=(int)(base*x.num[k]);
00296
00297 state[97]=(int)(base*carry.c1);
00298 state[98]=(int)(base*carry.c2);
00299 state[99]=(int)(base*carry.c3);
00300 state[100]=(int)(base*carry.c4);
00301
00302 state[101]=pr;
00303 state[102]=ir;
00304 state[103]=jr;
00305 state[104]=is;
00306 }
00307
00308
00309 void rlxd_reset(int state[])
00310 {
00311 int k;
00312
00313 define_constants();
00314
00315 if (state[0]!=rlxd_size())
00316 error(5);
00317
00318 for (k=0;k<96;k++)
00319 {
00320 if ((state[k+1]<0)||(state[k+1]>=167777216))
00321 error(5);
00322
00323 x.num[k]=(float)(ldexp((double)(state[k+1]),-24));
00324 }
00325
00326 if (((state[97]!=0)&&(state[97]!=1))||
00327 ((state[98]!=0)&&(state[98]!=1))||
00328 ((state[99]!=0)&&(state[99]!=1))||
00329 ((state[100]!=0)&&(state[100]!=1)))
00330 error(5);
00331
00332 carry.c1=(float)(ldexp((double)(state[97]),-24));
00333 carry.c2=(float)(ldexp((double)(state[98]),-24));
00334 carry.c3=(float)(ldexp((double)(state[99]),-24));
00335 carry.c4=(float)(ldexp((double)(state[100]),-24));
00336
00337 pr=state[101];
00338 ir=state[102];
00339 jr=state[103];
00340 is=state[104];
00341 is_old=8*ir;
00342 prm=pr%12;
00343 init=1;
00344
00345 if (((pr!=202)&&(pr!=397))||
00346 (ir<0)||(ir>11)||(jr<0)||(jr>11)||(jr!=((ir+7)%12))||
00347 (is<0)||(is>91))
00348 error(5);
00349 }
00350
00351 #else
00352
00353 #define BASE 0x1000000
00354 #define MASK 0xffffff
00355
00356 typedef struct
00357 {
00358 int c1,c2,c3,c4;
00359 } vec_t;
00360
00361 typedef struct
00362 {
00363 vec_t c1,c2;
00364 } dble_vec_t;
00365
00366 static int init=0,pr,prm,ir,jr,is,is_old,next[96];
00367 static double one_bit;
00368 static vec_t carry;
00369
00370 static union
00371 {
00372 dble_vec_t vec[12];
00373 int num[96];
00374 } x;
00375
00376 #define STEP(pi,pj) \
00377 d=(*pj).c1.c1-(*pi).c1.c1-carry.c1; \
00378 (*pi).c2.c1+=(d<0); \
00379 d+=BASE; \
00380 (*pi).c1.c1=d&MASK; \
00381 d=(*pj).c1.c2-(*pi).c1.c2-carry.c2; \
00382 (*pi).c2.c2+=(d<0); \
00383 d+=BASE; \
00384 (*pi).c1.c2=d&MASK; \
00385 d=(*pj).c1.c3-(*pi).c1.c3-carry.c3; \
00386 (*pi).c2.c3+=(d<0); \
00387 d+=BASE; \
00388 (*pi).c1.c3=d&MASK; \
00389 d=(*pj).c1.c4-(*pi).c1.c4-carry.c4; \
00390 (*pi).c2.c4+=(d<0); \
00391 d+=BASE; \
00392 (*pi).c1.c4=d&MASK; \
00393 d=(*pj).c2.c1-(*pi).c2.c1; \
00394 carry.c1=(d<0); \
00395 d+=BASE; \
00396 (*pi).c2.c1=d&MASK; \
00397 d=(*pj).c2.c2-(*pi).c2.c2; \
00398 carry.c2=(d<0); \
00399 d+=BASE; \
00400 (*pi).c2.c2=d&MASK; \
00401 d=(*pj).c2.c3-(*pi).c2.c3; \
00402 carry.c3=(d<0); \
00403 d+=BASE; \
00404 (*pi).c2.c3=d&MASK; \
00405 d=(*pj).c2.c4-(*pi).c2.c4; \
00406 carry.c4=(d<0); \
00407 d+=BASE; \
00408 (*pi).c2.c4=d&MASK
00409
00410
00411 static void error(int no)
00412 {
00413 switch(no)
00414 {
00415 case 0:
00416 printf("Error in rlxd_init\n");
00417 printf("Arithmetic on this machine is not suitable for ranlxd\n");
00418 break;
00419 case 1:
00420 printf("Error in subroutine rlxd_init\n");
00421 printf("Bad choice of luxury level (should be 1 or 2)\n");
00422 break;
00423 case 2:
00424 printf("Error in subroutine rlxd_init\n");
00425 printf("Bad choice of seed (should be between 1 and 2^31-1)\n");
00426 break;
00427 case 3:
00428 printf("Error in rlxd_get\n");
00429 printf("Undefined state (ranlxd is not initialized)\n");
00430 break;
00431 case 4:
00432 printf("Error in rlxd_reset\n");
00433 printf("Arithmetic on this machine is not suitable for ranlxd\n");
00434 break;
00435 case 5:
00436 printf("Error in rlxd_reset\n");
00437 printf("Unexpected input data\n");
00438 break;
00439 }
00440 printf("Program aborted\n");
00441 exit(0);
00442 }
00443
00444
00445 static void update()
00446 {
00447 int k,kmax,d;
00448 dble_vec_t *pmin,*pmax,*pi,*pj;
00449
00450 kmax=pr;
00451 pmin=&x.vec[0];
00452 pmax=pmin+12;
00453 pi=&x.vec[ir];
00454 pj=&x.vec[jr];
00455
00456 for (k=0;k<kmax;k++)
00457 {
00458 STEP(pi,pj);
00459 pi+=1;
00460 pj+=1;
00461 if (pi==pmax)
00462 pi=pmin;
00463 if (pj==pmax)
00464 pj=pmin;
00465 }
00466
00467 ir+=prm;
00468 jr+=prm;
00469 if (ir>=12)
00470 ir-=12;
00471 if (jr>=12)
00472 jr-=12;
00473 is=8*ir;
00474 is_old=is;
00475 }
00476
00477
00478 static void define_constants()
00479 {
00480 int k;
00481
00482 one_bit=ldexp(1.0,-24);
00483
00484 for (k=0;k<96;k++)
00485 {
00486 next[k]=(k+1)%96;
00487 if ((k%4)==3)
00488 next[k]=(k+5)%96;
00489 }
00490 }
00491
00492
00493 void rlxd_init(int level,int seed)
00494 {
00495 int i,k,l;
00496 int ibit,jbit,xbit[31];
00497 int ix,iy;
00498
00499 if ((INT_MAX<2147483647)||(FLT_RADIX!=2)||(FLT_MANT_DIG<24)||
00500 (DBL_MANT_DIG<48))
00501 error(0);
00502
00503 define_constants();
00504
00505 if (level==1)
00506 pr=202;
00507 else if (level==2)
00508 pr=397;
00509 else
00510 error(1);
00511
00512 i=seed;
00513
00514 for (k=0;k<31;k++)
00515 {
00516 xbit[k]=i%2;
00517 i/=2;
00518 }
00519
00520 if ((seed<=0)||(i!=0))
00521 error(2);
00522
00523 ibit=0;
00524 jbit=18;
00525
00526 for (i=0;i<4;i++)
00527 {
00528 for (k=0;k<24;k++)
00529 {
00530 ix=0;
00531
00532 for (l=0;l<24;l++)
00533 {
00534 iy=xbit[ibit];
00535 ix=2*ix+iy;
00536
00537 xbit[ibit]=(xbit[ibit]+xbit[jbit])%2;
00538 ibit=(ibit+1)%31;
00539 jbit=(jbit+1)%31;
00540 }
00541
00542 if ((k%4)!=i)
00543 ix=16777215-ix;
00544
00545 x.num[4*k+i]=ix;
00546 }
00547 }
00548
00549 carry.c1=0;
00550 carry.c2=0;
00551 carry.c3=0;
00552 carry.c4=0;
00553
00554 ir=0;
00555 jr=7;
00556 is=91;
00557 is_old=0;
00558 prm=pr%12;
00559 init=1;
00560 }
00561
00562
00563 void ranlxd(double r[],int n)
00564 {
00565 int k;
00566
00567 if (init==0)
00568 rlxd_init(0,1);
00569
00570 for (k=0;k<n;k++)
00571 {
00572 is=next[is];
00573 if (is==is_old)
00574 update();
00575 r[k]=one_bit*((double)(x.num[is+4])+one_bit*(double)(x.num[is]));
00576 }
00577 }
00578
00579
00580 int rlxd_size(void)
00581 {
00582 return(105);
00583 }
00584
00585
00586 void rlxd_get(int state[])
00587 {
00588 int k;
00589
00590 if (init==0)
00591 error(3);
00592
00593 state[0]=rlxd_size();
00594
00595 for (k=0;k<96;k++)
00596 state[k+1]=x.num[k];
00597
00598 state[97]=carry.c1;
00599 state[98]=carry.c2;
00600 state[99]=carry.c3;
00601 state[100]=carry.c4;
00602
00603 state[101]=pr;
00604 state[102]=ir;
00605 state[103]=jr;
00606 state[104]=is;
00607 }
00608
00609
00610 void rlxd_reset(int state[])
00611 {
00612 int k;
00613
00614 if ((INT_MAX<2147483647)||(FLT_RADIX!=2)||(FLT_MANT_DIG<24)||
00615 (DBL_MANT_DIG<48))
00616 error(4);
00617
00618 define_constants();
00619
00620 if (state[0]!=rlxd_size())
00621 error(5);
00622
00623 for (k=0;k<96;k++)
00624 {
00625 if ((state[k+1]<0)||(state[k+1]>=167777216))
00626 error(5);
00627
00628 x.num[k]=state[k+1];
00629 }
00630
00631 if (((state[97]!=0)&&(state[97]!=1))||
00632 ((state[98]!=0)&&(state[98]!=1))||
00633 ((state[99]!=0)&&(state[99]!=1))||
00634 ((state[100]!=0)&&(state[100]!=1)))
00635 error(5);
00636
00637 carry.c1=state[97];
00638 carry.c2=state[98];
00639 carry.c3=state[99];
00640 carry.c4=state[100];
00641
00642 pr=state[101];
00643 ir=state[102];
00644 jr=state[103];
00645 is=state[104];
00646 is_old=8*ir;
00647 prm=pr%12;
00648 init=1;
00649
00650 if (((pr!=202)&&(pr!=397))||
00651 (ir<0)||(ir>11)||(jr<0)||(jr>11)||(jr!=((ir+7)%12))||
00652 (is<0)||(is>91))
00653 error(5);
00654 }
00655
00656 #endif
00657