#include #include "../util.h" #include "mat-vec-mult-util.h" inline void matVecMult(size_t nx, const double *__restrict__ mat, const double *__restrict__ src, double *__restrict__ dest) { #pragma omp target teams distribute parallel for schedule(static) for (size_t r = 0; r < nx; ++r) { dest[r] = 0.; for (size_t c = 0; c < nx; ++c) dest[r] += mat[r * nx + c] * src[c]; } } int main(int argc, char *argv[]) { size_t nx, nItWarmUp, nIt; parseCLA_1d(argc, argv, nx, nItWarmUp, nIt); auto mat = new double[nx * nx]; auto src = new double[nx]; auto dest = new double[nx]; // init initMatVecMult(mat, src, nx); #pragma omp target enter data map(to : mat[0 : nx*nx], src[0 : nx], dest[0 : nx]) // warm-up for (size_t i = 0; i < nItWarmUp; ++i) { matVecMult(nx, mat, src, dest); std::swap(src, dest); } // measurement auto start = std::chrono::steady_clock::now(); for (size_t i = 0; i < nIt; ++i) { matVecMult(nx, mat, src, dest); std::swap(src, dest); } auto end = std::chrono::steady_clock::now(); #pragma omp target exit data map(from : src[0 : nx], dest[0 : nx]) printStatsMatVecMult(end - start, nx, nIt); // check solution checkSolutionMatVecMult(src, nx, nIt + nItWarmUp); delete[] src; delete[] dest; return 0; }