Borqueror: tryrand3.cpp Source File

00001 #define WANT_STREAM
00002 #define WANT_MATH
00003 #include "include.h"
00004 #include "newran.h"
00005 
00006 #ifdef use_namespace
00007 using namespace NEWRAN;
00008 #endif
00009 
00010 void SortAscending(Real* data, int max);
00011 Real KS(Real* data, int n);
00012 Real NormalDF(Real x);
00013 double invchi95(int N);
00014 double invchi99(int N);
00015 void ChiSquaredTest(int* Observed, Real* Prob, int N, int n);
00016 void TestBinomial(int N, Real p, int n);
00017 void TestPoisson(Real mu, int n);
00018 void TestNegativeBinomial(Real NX, Real p, int n);
00019 void TestDiscreteGen(int N, Real* prob, int n);
00020 
00021 inline Real square(Real x) { return x*x; }
00022 inline Real cube(Real x) { return x*x*x; }
00023 
00024 
00025 void test3(int n)
00026 {
00027    cout << endl;
00028 
00029    // Do chi-squared tests to discrete data
00030    cout << "ChiSquared tests" << endl;
00031 
00032    {
00033       Real p[] = { 0.05, 0.10, 0.05, 0.5, 0.01, 0.01, 0.03, 0.20, 0.05 };
00034       TestDiscreteGen(9, p, n);
00035    }
00036 
00037    {
00038       Real p[] = { 0.4, 0.2, 0.1, 0.05, 0.025, 0.0125, 0.00625, 0.00625, 0.2 };
00039       TestDiscreteGen(9, p, n);
00040    }
00041 
00042 
00043    TestNegativeBinomial(200.3, 0.05, n);
00044    TestNegativeBinomial(150.3, 0.15, n);
00045    TestNegativeBinomial(100.8, 0.18, n);
00046    TestNegativeBinomial(100.8, 1.22, n);
00047    TestNegativeBinomial(100.8, 9.0, n);
00048    TestNegativeBinomial(10.5, 0.18, n);
00049    TestNegativeBinomial(10.5, 1.22, n);
00050    TestNegativeBinomial(10.5, 9.0, n);
00051    TestNegativeBinomial(0.35, 0.18, n);
00052    TestNegativeBinomial(0.35, 1.22, n);
00053    TestNegativeBinomial(0.35, 9.0, n);
00054 
00055    TestBinomial(100, 0.45, n);
00056    TestBinomial(100, 0.25, n);
00057    TestBinomial(100, 0.02, n);
00058    TestBinomial(100, 0.01, n);
00059    TestBinomial(49, 0.60, n);
00060    TestBinomial(21, 0.70, n);
00061    TestBinomial(10, 0.90, n);
00062    TestBinomial(10, 0.25, n);
00063    TestBinomial(10, 0.10, n);
00064 
00065    TestPoisson(0.75, n);
00066    TestPoisson(4.3, n);
00067    TestPoisson(10, n);
00068    TestPoisson(100, n);
00069 
00070    Real* data = new Real[n];
00071    if (!data) Throw(Bad_alloc());
00072 
00073 // Apply KS test to a variety of continuous distributions
00074 //    - use cdf transform to convert to uniform
00075 
00076    cout << endl;
00077    cout << "Kolmogorov-Smirnoff tests" << endl;
00078    cout << "25%, 5%, 1%, .1% upper points are 1.019, 1.358, 1.628, 1.950"
00079       << endl;
00080    cout << "5% lower point is 0.520" << endl;
00081 
00082    {
00083       ChiSq X(1, 1.44);
00084       for (int i = 0; i < n; i++)
00085       {
00086          Real x = sqrt(X.Next());
00087          data[i] = NormalDF(x - 1.2) - NormalDF(-x - 1.2);
00088       }
00089       cout << X.Name() << ":   "  << KS(data, n) << endl;
00090    }
00091 
00092    {
00093       ChiSq X(4);
00094       for (int i = 0; i < n; i++)
00095          { Real x = 0.5 * X.Next(); data[i] = (1+x)*exp(-x); }
00096       cout << X.Name() << ":   "  << KS(data, n) << endl;
00097    }
00098 
00099    {
00100       ChiSq X(2);
00101       for (int i = 0; i < n; i++) data[i] = exp(-0.5 * X.Next());
00102       cout << X.Name() << ":   "  << KS(data, n) << endl;
00103    }
00104 
00105    {
00106       Pareto X(0.5);
00107       for (int i = 0; i < n; i++)
00108          { Real x = X.Next(); data[i] = 1.0 / sqrt(x); }
00109       cout << X.Name() << ":   "  << KS(data, n) << endl;
00110    }
00111 
00112    {
00113       Pareto X(1.5);
00114       for (int i = 0; i < n; i++)
00115          { Real x = X.Next(); data[i] = 1.0 / (x * sqrt(x)); }
00116       cout << X.Name() << ":   "  << KS(data, n) << endl;
00117    }
00118 
00119    {
00120       Normal X;
00121       for (int i = 0; i < n; i++)
00122          { Real x = X.Next(); data[i] = NormalDF(x); }
00123       cout << X.Name() << ":   "  << KS(data, n) << endl;
00124    }
00125 
00126    {
00127       Normal N; SumRandom X = 10 + 5 * N;
00128       for (int i = 0; i < n; i++)
00129          { Real x = X.Next(); data[i] = NormalDF((x-10)/5); }
00130       cout << X.Name() << ":   "  << KS(data, n) << endl;
00131    }
00132 
00133    {
00134       Normal N; Cauchy C; MixedRandom X = N(0.9) + C(0.1);
00135       for (int i = 0; i < n; i++)
00136       {
00137          Real x = X.Next();
00138          data[i] = 0.9*NormalDF(x)+0.1*(atan(x)/3.141592654 + 0.5);
00139       }
00140       cout << X.Name() << ":   "  << KS(data, n) << endl;
00141    }
00142 
00143    {
00144       Normal N; MixedRandom X = N(0.9) + (10*N)(0.1);
00145       for (int i = 0; i < n; i++)
00146       {
00147          Real x = X.Next();
00148          data[i] = 0.9*NormalDF(x)+0.1*NormalDF(x/10);
00149       }
00150       cout << X.Name() << ":   "  << KS(data, n) << endl;
00151    }
00152 
00153    {
00154       Normal  X0; SumRandom X = X0 * 0.6 + X0 * 0.8;
00155       for (int i = 0; i < n; i++)
00156          { Real x = X.Next(); data[i] = NormalDF(x); }
00157       cout << X.Name() << ":   "  << KS(data, n) << endl;
00158    }
00159 
00160    {
00161       Normal X1;
00162       MixedRandom X = X1(0.2) + (X1 * 2.5 + 1.1)(0.35) + (X1 + 2.3)(0.45);
00163       for (int i = 0; i < n; i++)
00164       {
00165          Real x = X.Next();
00166          data[i] = 0.20 * NormalDF(x)
00167                  + 0.35 * NormalDF((x - 1.1) / 2.5)
00168                  + 0.45 * NormalDF(x - 2.3);
00169       }
00170       cout << X.Name() << ":   "  << KS(data, n) << endl;
00171    }
00172 
00173    {
00174       Gamma X(0.5);
00175       for (int i = 0; i < n; i++)
00176          { Real x = X.Next(); data[i] = 2.0 * NormalDF(-sqrt(2 * x)); }
00177       cout << X.Name() << ":   "  << KS(data, n) << endl;
00178    }
00179 
00180    {
00181       Gamma X(3);
00182       for (int i = 0; i < n; i++)
00183          { Real x = X.Next(); data[i] = (1+x+0.5*x*x)*exp(-x); }
00184       cout << X.Name() << ":   "  << KS(data, n) << endl;
00185    }
00186 
00187    {
00188       Gamma X1(0.85); Gamma X2(2.15); SumRandom X = X1 + X2;
00189       for (int i = 0; i < n; i++)
00190          { Real x = X.Next(); data[i] = (1+x+0.5*x*x)*exp(-x); }
00191       cout << X.Name() << ":   "  << KS(data, n) << endl;
00192    }
00193 
00194    {
00195       Gamma X1(0.75); Gamma X2(0.25); SumRandom X = X1 + X2;
00196       for (int i = 0; i < n; i++) data[i] = exp(-X.Next());
00197       cout << X.Name() << ":   "  << KS(data, n) << endl;
00198    }
00199 
00200    {
00201       Gamma X(2);
00202       for (int i = 0; i < n; i++)
00203          { Real x = X.Next(); data[i] = (1+x)*exp(-x); }
00204       cout << X.Name() << ":   "  << KS(data, n) << endl;
00205    }
00206 
00207    {
00208       Exponential X;
00209       for (int i = 0; i < n; i++) data[i] = exp(-X.Next());
00210       cout << X.Name() << ":   "  << KS(data, n) << endl;
00211    }
00212 
00213    {
00214       Cauchy X;
00215       for (int i = 0; i < n; i++) data[i] = atan(X.Next())/3.141592654 + 0.5;
00216       cout << X.Name() << ":   "  << KS(data, n) << endl;
00217    }
00218 
00219    {
00220       Cauchy X0; SumRandom X = X0 * 0.3 + X0 * 0.7;
00221       for (int i = 0; i < n; i++) data[i] = atan(X.Next())/3.141592654 + 0.5;
00222       cout << X.Name() << ":   "  << KS(data, n) << endl;
00223    }
00224 
00225    {
00226       Uniform X;
00227       for (int i = 0; i < n; i++) data[i] = X.Next();
00228       cout << X.Name() << ":   "  << KS(data, n) << endl;
00229    }
00230 
00231    delete [] data;
00232 
00233 
00234 }
00235 
00236 /*************************** Kolmogorov Smirnov Test ************************/
00237 
00238 // test the data in the array (length n) for being uniform (0,1)
00239 
00240 Real KS(Real* data, int n)
00241 {
00242    SortAscending(data, n);
00243    Real D = 0.0;
00244    for (int i = 0; i < n; i++)
00245    {
00246       Real d1 = (Real)(i+1) / (Real)n - data[i];
00247       Real d2 = data[i] - (Real)i / (Real)n;
00248       if (D < d1) D = d1; if (D < d2) D = d2;
00249    }
00250    return D * (sqrt(n) + 0.12 + 0.11 / sqrt(n));
00251 }
00252 
00253 
00254 
00255 
00256 /******************************** Quick sort ********************************/
00257 
00258 // Quicksort.
00259 // Essentially the method described in Sedgewick's algorithms in C++
00260 // My version is still partially recursive, unlike Segewick's, but the
00261 // smallest segment of each split is used in the recursion, so it should
00262 // not overlead the stack.
00263 
00264 // If the process does not seems to be converging an exception is thrown.
00265 
00266 
00267 #define DoSimpleSort 17            // when to switch to insert sort
00268 #define MaxDepth 50                // maximum recursion depth
00269 
00270 
00271 static Real SortThreeDescending(Real* a, Real* b, Real* c);
00272 static void MyQuickSortAscending(Real* first, Real* last, int depth);
00273 static void InsertionSortAscending(Real* first, const int length, int guard);
00274 
00275 
00276 
00277 static Real SortThreeDescending(Real* a, Real* b, Real* c)
00278 {
00279    // sort *a, *b, *c; return *b; optimise for already sorted
00280    if (*a >= *b)
00281    {
00282       if (*b >= *c) return *b;
00283       else if (*a >= *c) { Real x = *c; *c = *b; *b = x; return x; }
00284       else { Real x = *a; *a = *c; *c = *b; *b = x; return x; }
00285    }
00286    else if (*c >= *b) { Real x = *c; *c = *a; *a = x; return *b; }
00287    else if (*a >= *c) { Real x = *a; *a = *b; *b = x; return x; }
00288    else { Real x = *c; *c = *a; *a = *b; *b = x; return x; }
00289 }
00290 
00291 
00292 
00293 void SortAscending(Real* data, int max)
00294 {
00295    if (max > DoSimpleSort) MyQuickSortAscending(data, data + max - 1, 0);
00296    InsertionSortAscending(data, max, DoSimpleSort);
00297 }
00298 
00299 static void InsertionSortAscending(Real* first, const int length,
00300    int guard)
00301 // guard gives the length of the sequence to scan to find first
00302 // element (eg guard = length)
00303 {
00304    if (length <= 1) return;
00305 
00306    // scan for first element
00307    Real* f = first; Real v = *f; Real* h = f;
00308    if (guard > length) guard = length; int i = guard - 1;
00309    while (i--) if (v > *(++f)) { v = *f; h = f; }
00310    *h = *first; *first = v;
00311 
00312    // do the sort
00313    i = length - 1; f = first;
00314    while (i--)
00315    {
00316       Real* g = f++; h = f; v = *h;
00317       while (*g > v) *h-- = *g--;
00318       *h = v;
00319    }
00320 }
00321 
00322 static void MyQuickSortAscending(Real* first, Real* last, int depth)
00323 {
00324    for (;;)
00325    {
00326       const int length = last - first + 1;
00327       if (length < DoSimpleSort) return;
00328       if (depth++ > MaxDepth)
00329          Throw(Exception("QuickSortAscending fails"));
00330       Real* centre = first + length/2;
00331       const Real test = SortThreeDescending(last, centre, first);
00332       Real* f = first; Real* l = last;
00333       for (;;)
00334       {
00335          while (*(++f) < test) {}
00336          while (*(--l) > test) {}
00337          if (l <= f) break;
00338          const Real temp = *f; *f = *l; *l = temp;
00339       }
00340       if (f > centre) { MyQuickSortAscending(l+1, last, depth); last = f-1; }
00341       else { MyQuickSortAscending(first, f-1, depth); first = l+1; }
00342    }
00343 }
00344 
00345 Real NormalDF(Real x)
00346 {
00347    // from Abramowitz and Stegun - accuracy 7.5E-8
00348    // accuracy is absolute; not relative
00349    // eventually will need a better method
00350    // but good enough here
00351    Real t = 1.0 / (1.0 + 0.2316419 * fabs(x));
00352    t = ( 0.319381530
00353      + (-0.356563782
00354      + ( 1.781477937
00355      + (-1.821255978
00356      +   1.330274429 * t) * t) * t) * t) * t;
00357    t = 0.3989422804014326779399461 * exp(-0.5 * x * x) * t;
00358    return (x < 0) ? t : 1.0 - t;
00359 }
00360 
00361 void ChiSquaredTest(int* Observed, Real* Prob, int N, int n)
00362 {
00363    // go for at least two expected observations per cell
00364    // work in from ends
00365 
00366    if (N <= 0) { cout << "no categories" << endl; return; }
00367    if (n <= 0) { cout << "no data" << endl; return; }
00368 
00369    int O1 = 0; Real E1 = 0.0; int O2 = 0; Real E2 = 0.0;
00370    Real CS = 0.0; int df = 0; int i = 0; int Ni = N; Real ToGo = n;
00371    for (;;)
00372    {
00373       O1 += Observed[i]; Real e1 = n * Prob[i]; E1 += e1; ToGo -= e1;
00374       if (E1 >= 2.0 && ToGo + E2 >= 2.0)
00375          { CS += square(O1 - E1) / E1; df += 1; O1 = 0; E1 = 0.0; }
00376 
00377       if (i == Ni) break;
00378       ++i;
00379 
00380       O2 += Observed[Ni]; Real e2 = n * Prob[Ni]; E2 += e2; ToGo -= e2;
00381       if (E2 >= 2.0 && ToGo + E1 >= 2.0)
00382          { CS += square(O2 - E2) / E2; df += 1; O2 = 0; E2 = 0.0; }
00383 
00384       if (i == Ni) break;
00385       --Ni;
00386    }
00387 
00388    E1 += E2; O1 += O2;
00389    if (E1 > 0.0) { CS += square(O1 - E1) / E1; df += 1; }
00390    if (fabs(ToGo) >= 0.01) cout << "chi-squared program fails  - ";
00391 
00392    cout << "chisq = " << CS << "; df = " << (df-1)
00393       << "; 95% pt. = " << invchi95(df-1)
00394       << "; 99% pt. = " << invchi99(df-1) << endl;
00395 }
00396 
00397 
00398 void TestBinomial(int N, Real p, int n)
00399 {
00400    Binomial X(N, p);
00401    Real q = 1.0 - p; Real ln_p = log(p); Real ln_q = log(q);
00402    int* obs = new int [N+1]; if (!obs) Throw(Bad_alloc());
00403    Real* prob = new Real [N+1]; if (!prob) Throw(Bad_alloc());
00404    int i;
00405    for (i = 0; i <= N; i++)
00406    {
00407       obs[i] = 0;
00408       prob[i] = exp(ln_gamma(N+1) - ln_gamma(i+1) - ln_gamma(N-i+1)
00409          + i * ln_p + (N-i) * ln_q);
00410    }
00411    for (i = 0; i < n; i++)
00412    {
00413       int b = (int)X.Next();
00414       if (b < 0 || b > N) Throw(Logic_error("Binomial error"));
00415       obs[b]++;
00416    }
00417    cout << "Binomial: "; ChiSquaredTest(obs, prob, N, n);
00418 
00419    delete [] obs; delete [] prob;
00420 }
00421 
00422 void TestPoisson(Real mu, int n)
00423 {
00424    Poisson X(mu);
00425    Real ln_mu = log(mu);
00426    int N = (int)(20 + mu + 10 * sqrt(mu));         // set upper bound
00427    if (N > n)
00428    {
00429       cout << "Poisson: range too large" << endl;
00430       return;
00431    }
00432    int* obs = new int [N+1]; if (!obs) Throw(Bad_alloc());
00433    Real* prob = new Real [N+1]; if (!prob) Throw(Bad_alloc());
00434    int i;
00435    for (i = 0; i <= N; i++)
00436       { obs[i] = 0; prob[i] = exp(i * ln_mu - mu - ln_gamma(i+1)); }
00437    for (i = 0; i < n; i++)
00438    {
00439       int b = (int)(X.Next());
00440       if (b < 0 || b > N) Throw(Logic_error("Poisson error"));
00441       obs[b]++;
00442    }
00443    cout << "Poisson: "; ChiSquaredTest(obs, prob, N, n);
00444 
00445    delete [] obs; delete [] prob;
00446 }
00447 
00448 void TestNegativeBinomial(Real NX, Real P, int n)
00449 {
00450    NegativeBinomial X(NX, P);
00451    Real Q = 1.0 + P; Real p = 1.0 / Q; Real q = 1.0 - p;
00452    Real ln_p = log(p); Real ln_q = log(q);
00453    Real mean = NX * P; Real var = mean * Q;
00454    int N = (int)(20 + mean + 100 * sqrt(var));         // set upper bound
00455       // won't be good enough for large P
00456    if (N > n)
00457    {
00458       cout << "NegativeBinomial: range too large" << endl;
00459       return;
00460    }
00461    int* obs = new int [N+1]; if (!obs) Throw(Bad_alloc());
00462    Real* prob = new Real [N+1]; if (!prob) Throw(Bad_alloc());
00463    int i;
00464    for (i = 0; i <= N; i++)
00465    {
00466       obs[i] = 0;
00467       prob[i] = exp(ln_gamma(NX+i) - ln_gamma(i+1) - ln_gamma(NX)
00468          + NX * ln_p + i * ln_q);
00469    }
00470    for (i = 0; i < n; i++)
00471    {
00472       int b = (int)X.Next();
00473       if (b < 0 || b > N) Throw(Logic_error("NegativeBinomial error"));
00474       obs[b]++;
00475    }
00476    cout << "NegativeBinomial: "; ChiSquaredTest(obs, prob, N, n);
00477 
00478    delete [] obs; delete [] prob;
00479 }
00480 
00481 void TestDiscreteGen(int N, Real* prob, int n)
00482 {
00483    DiscreteGen X(N, prob);
00484    int* obs = new int [N]; if (!obs) Throw(Bad_alloc());
00485    int i;
00486    for (i = 0; i < N; i++) obs[i] = 0;
00487    for (i = 0; i < n; i++)
00488    {
00489       int b = (int)X.Next();
00490       if (b < 0 || b >= N) Throw(Logic_error("DiscreteGen error"));
00491       obs[b]++;
00492    }
00493    cout << "DiscreteGen: "; ChiSquaredTest(obs, prob, N-1, n);
00494 
00495    delete [] obs;
00496 }
00497 
00498 // Calculate 95% point of chi-squared distribution
00499 
00500 double invchi95(int N)
00501 // upper 95% point of chi-squared distribution
00502 {
00503    if (N < 0) Throw(Logic_error("Error in invchi95 arg"));
00504    if (N < 30)
00505    {
00506       double Q[] = { 0, 3.841459, 5.991465, 7.814728, 9.487729, 11.0705,
00507          12.59159, 14.06714, 15.50731, 16.91898, 18.30704, 19.67506,
00508          21.02601, 22.36199, 23.68475, 24.99576, 26.2962, 27.58709,
00509          28.86928, 30.14351, 31.4104, 32.6705, 33.9244, 35.1725,
00510          36.4151, 37.6525, 38.8852, 40.1133, 41.3372, 42.5569 };
00511       return Q[N];
00512    }
00513    else
00514    {
00515       double A = 1.0/(4.5 * N); double H = (-0.0002 * 60)/N;
00516       double Q = N * cube(1 - A + (1.645 - H) * sqrt(A));
00517       return Q;
00518    }
00519 }
00520 
00521 // Calculate 99% point of chi-squared distribution
00522 
00523 double invchi99(int N)
00524 // upper 99% point of chi-squared distribution
00525 {
00526    if (N < 0) Throw(Logic_error("Error in invchi99 arg"));
00527    if (N < 30)
00528    {
00529       double Q[] = { 0, 6.63490, 9.21034, 11.3449, 13.2767, 15.0863,
00530          16.8119, 18.4753, 20.0902, 21.6660, 23.2093, 24.7250,
00531          26.2170, 27.6883, 29.1413, 30.5779, 31.9999, 33.4087,
00532          34.8053, 36.1908, 37.5662, 38.9321, 40.2894, 41.6384,
00533          42.9798, 44.3141, 45.6417, 46.9630, 48.2782, 49.5879 };
00534       return Q[N];
00535    }
00536    else
00537    {
00538       double A = 1.0/(4.5 * N); double H = (0.0008 * 60)/N;
00539       double Q = N * cube(1 - A + (2.326 - H) * sqrt(A));
00540       return Q;
00541    }
00542 }
00543