#ifndef spMV_HPP #define spMV_HPP #include "SellCSigma.hpp" #include "CSRMatrix.hpp" #ifdef USE_LIKWID extern "C" { #include } #endif /*****CSR_MATRIX**************************************************************/ /** * sparse Matrix-Vector multiplication * y= y + A*x * using the CSR Format * y and x musst be allocated and valid * if accelorators are used (openACC) data have to be preent on the device */ void spMV( CSR_Matrix const & A, double const *x, double *y ) { double const *val = A.getValues(); int const *colInd = A.getColInd(); int const *rowPtr = A.getRowPtr(); int const numRows = A.getRows(); int const numNonZeros = A.getNonZeros(); // loop over all rows #pragma omp parallel for schedule(runtime) #pragma acc parallel present(val[0:numNonZeros], \ colInd[0:numNonZeros], \ rowPtr[0:numRows+1], \ x[0:numRows], \ y[0:numRows]) \ loop for (int rowID=0; rowID void spMV( SellCSigma_Matrix const & A, double const * x, double * y ) { double const * val = A.getValues(); int const * chunkPtr = A.getChankPtr(); int const * chunkLength = A.getChankLength(); int const * colInd = A.getColInd(); int const numberOfChunks = A.getNumberOfChunks(); int const chunkSize = C; int const paddedRows = A.getPaddedRows(); int const capacity = A.getCapasety(); #pragma omp parallel for schedule(runtime) #pragma acc parallel present(val[0 : capacity], \ colInd[0 : capacity], \ chunkPtr[0 : numberOfChunks], \ chunkLength[0 : numberOfChunks], \ x[0 : paddedRows], \ y[0 : paddedRows]) \ vector_length(C) \ loop // loop over all chunks for (int chunk=0; chunk < numberOfChunks; ++chunk) { int chunkOffset = chunkPtr[chunk]; int rowOffset = chunk*chunkSize; #pragma acc loop vector #pragma omp simd for (int chunkRow=0; chunkRow(A,x,y); else if (2 == C) return spMV<2>(A,x,y); else if (4 == C) return spMV<4>(A,x,y); else if (16 == C) return spMV<16>(A,x,y); else if (32 == C) return spMV<32>(A,x,y); else if (64 == C) return spMV<64>(A,x,y); else if (128 == C) return spMV<128>(A,x,y); #ifdef SET_C else if (SET_C == C) return spMV(A,x,y); #endif else { std::cout << "spMV Kernel for C="<< C << " is not compiled." << " Use 'SET_C=C' as compile time flag to creat this function." << "\nC=1 is used as a fall back function." << std::endl; return spMV<1>(A,x,y); } } #endif