/*
 * gcc   -fopenmp -Wall pi-monte-carlo.parallel.c -o pi-monte-carlo.parallel.c.exe -lm
 * clang -fopenmp -Wall pi-monte-carlo.parallel.c -o pi-monte-carlo.parallel.c.exe -lm
 * icc   -qopenmp -Wall pi-monte-carlo.parallel.c -o pi-monte-carlo.parallel.c.exe
 * icx   -qopenmp -Wall pi-monte-carlo.parallel.c -o pi-monte-carlo.parallel.c.exe
 */
#define _GNU_SOURCE

#include <errno.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

#include <omp.h>

static inline double
get_time()
{
    struct timespec tp;
    int result = clock_gettime(CLOCK_MONOTONIC, &tp);

    if (result == -1) {
        fprintf(stderr,
                "ERROR: clock_gettime(CLOCK_MONOTONIC) failed: %d - %s\n",
                errno, strerror(errno));
        exit(EXIT_FAILURE);
    }

    return (double)tp.tv_sec + (double)tp.tv_nsec * 1e-9;
}


int
main()
{
    int count = 0;
    int n_repetitions = 342000000;
    double t_start = get_time();

    #pragma omp parallel default(none) \
                shared(n_repetitions, count)
    {
        int local_count = 0;
        unsigned int seed = omp_get_thread_num();

        #pragma omp for
        for (int i = 0; i < n_repetitions; ++i) {
            double x = rand_r(&seed) / (double)RAND_MAX;
            double y = rand_r(&seed) / (double)RAND_MAX;

            if (sqrt(x * x + y * y) < 1.0) {
                ++local_count;
            }
        }

        #pragma omp critical
        {
          count += local_count;
        }
    }

    double pi = 4.0 * (double)count / ((double)n_repetitions);
    double duration = get_time() - t_start;

    printf("count: %d  repetitions: %d\n", count, n_repetitions);
    printf("perf: %.3lf Mit/sec, duration: %f s, accuracy: %.12e\n",
         (double)n_repetitions / duration / 1.0e6,
         duration,
         fabs(M_PI - pi) / M_PI);

    return 0;
}
