/* gcc -std=c11 -fopenmp -Wall main.c poly-eval.c -o simd.c.exe -lr -lm*/
#include <float.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <sys/time.h>

extern double
poly_eval(double x, int degree, const double *coefficients);

static double
timing()
{
    struct timeval tp;

    if (gettimeofday(&tp, NULL) == -1) {
        fprintf(stderr, "ERROR: gettimeofday failed.\n");
        exit(EXIT_FAILURE);
    }

    return tp.tv_sec + tp.tv_usec * 1e-6;
}

/**
 * @brief Evaluate polynomial of @p degreep with @p coefficients at @p x.
 * @param x             Value of x.
 * @param degree        Degeree of the polynomial.
 * @param coefficients  Pointer to coefficents array.  The array holds @p degreee + 1
 *                      coefficients in reverse order, i.e. a_0 = coefficents[degree],
 *                      a_1 = coefficients[degree - 1], ... a_degree = coefficients[0].
 * @return Evaluated polynomial at @p x.
 */
static double
poly_eval_ref(double x, int degree, const double * coefficients)
{
    double f = coefficients[0];
    for (int i = 1; i <= degree; ++i) {
        f = x * f + coefficients[i];
    }
    return f;
}

static void
test()
{
    double x = 1.0;
    int degree = 5;
    const double coeffs[] = { 2.0, 3.0, 4.0, 5.0, 6.0, 7.0 };
    const double ref = 27;

    {
        double result = poly_eval(x, degree, coeffs);
        if (result != ref) {
            printf("poly_eval: expected %f, got %f\n", ref, result);
        }
    }

    {
        double result = poly_eval_ref(x, degree, coeffs);
        if (result != ref) {
            printf("poly_eval_ref: expected %f, got %f\n", ref, result);
        }
    }
}

int
main(int argc, char **argv)
{
    int N = 1000;
    int degree = 10;

    if (argc > 1) N = atoi(argv[1]);
    if (argc > 2) degree = atoi(argv[2]);

    if (N < 1) N = 1;
    if (degree < 1) degree = 1;

    test();

    srand(2);

    double * coefficients = (double *)malloc((degree + 1) * sizeof(double));

    for (int i = 0; i < degree + 1; ++i) {
        coefficients[i] = rand() / (double)RAND_MAX * 10.0;
        if (coefficients[i] < DBL_MIN) {
            coefficients[i] = DBL_MIN;
        }
    }

    double *x = (double *)malloc(N * sizeof(double));
    double *f = (double *)malloc(N * sizeof(double));

    srand(1);

    for (int i = 0; i < N; ++i) {
        x[i] = (double)rand() / (double)RAND_MAX / 100.0 + 1.0;
        f[i] = 0.0;
    }

    // reference solution
    double sum_ref = 0.0;

    for (int i = 0; i < N; ++i) {
        sum_ref += poly_eval_ref(x[i], degree, coefficients);
    }

    int n_repetitions = 2;

    double duration_s;
    do {
        double t_start = timing();

        for (int r = 0; r < n_repetitions; r++) {
            // main loop
            for (int i = 0; i < N; ++i) {
                f[i] = poly_eval(x[i], degree, coefficients);
            }

            if (f[N / 2] < 0.0) {
                fprintf(stderr, "ERROR: should never happen\n");
                exit(EXIT_FAILURE);
            }
        }

        duration_s = timing() - t_start;
        n_repetitions *= 2;
    } while (duration_s < 0.2 || n_repetitions == 4);

    n_repetitions /= 2;

    double sum_simd = 0.0;
    // for correctness check
    for (int i = 0; i < N; ++i) {
        sum_simd += f[i];
    }

    printf("reference sum:  %19.12e\n", sum_ref);
    printf("simd sum:       %19.12e\n", sum_simd);
    printf("relative error: %19.12e\n", fabs((sum_simd - sum_ref) / sum_ref));

    double perf_MFLOP_s = (double)n_repetitions * (double)N * (double)degree * 2.0 / duration_s / 1.e6;
    printf("N: %d  degree: %d  repetitions: %d  performance: %7.2f  MFLOP/s"
           "  duration: %7.2f s\n",
           N, degree, n_repetitions, perf_MFLOP_s,
           duration_s);

    free(coefficients);
    free(x);
    free(f);

    return EXIT_SUCCESS;
}
