PaStiX4CalculiX Segmentation fault cLightSpMV.cu:99

This is a duplicate of PaStiX4CalculiX issue 6
I’m not sure if people who are active here also follow the issues on Github for PaStiX4CalculiX
so I would like to post it here too.
If such “double posts” are regarded as misbehaviour I apologize and will delete the post.

Hello,
I get a Segmentation fault while running ccx_2.17 with PaStiX4CalculiX Solver CUDA enabled and PASTIX_GPU=1
The input file is beam10p from the test tarball.

System:
CENTOS 7.8
gcc 9.3.0
cuda 11.0.3

gdb shows:

Thread 1 “ccx_2.17_i8” received signal SIGSEGV, Segmentation fault.
0x000000000100167e in performLightLsMV (alpha=, dval=0xfffffffffffffff8, drowptr=0x0, dcolind=0xfffffffffffffff8, dx=0x1ef90, beta=0, dy=0x5c78)
at /tmp/work/Calculix_ccx_i8_pastix/PaStiX4CalculiX/kernels/gpus/LightSpMV-1.0/src/cLightSpMV.cu:99
99 spmv->_rowOffsets[0] = drowptr;

(gdb) l
94 CHECK_CUSPARSE( cusparseSpMV(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
95 &alpha, matA, vecX, &beta, vecY, CUDA_R_64F,
96 CUSPARSE_MV_ALG_DEFAULT, dBuffer) )
97 */
98
99 spmv->_rowOffsets[0] = drowptr;
100 spmv->_colIndexValues[0] = dcolind;
101 spmv->_numericalValues[0] = dval;
102 spmv->_vectorX[0] = dx;
103 spmv->_vectorY[0] = dy;

(gdb) p drowptr
$1 = (int64_t *) 0x0

(gdb) where
#0 0x000000000100167e in performLightLsMV (alpha=, dval=0xfffffffffffffff8, drowptr=0x0, dcolind=0xfffffffffffffff8, dx=0x1ef90, beta=0, dy=0x5c78)
at /tmp/work/Calculix_ccx_i8_pastix/PaStiX4CalculiX/kernels/gpus/LightSpMV-1.0/src/cLightSpMV.cu:99
#1 0x0000000000dad7b9 in gpu_d_spmv (n=258, alpha=1, beta=0, A=0x0, x=0x1ef98, y=0x5c78, rowptr=0x0, colind=0x0)
at /tmp/work/Calculix_ccx_i8_pastix/PaStiX4CalculiX/build/kernels/gpu_d_spmv.c:35
#2 0x0000000000a268a3 in d_gmres_gpu_smp (pastix_data=0xfd9d620, x=0x382b8, b=0x38ac8)
at /tmp/work/Calculix_ccx_i8_pastix/PaStiX4CalculiX/build/refinement/d_refine_gmres_gpu.c:270
#3 0x0000000000a0b186 in pastix_subtask_refine (pastix_data=0xfd9d620, n=258, nrhs=1, b=0x160d5e30, ldb=258, x=0xf8edf80, ldx=258)
at /tmp/work/Calculix_ccx_i8_pastix/PaStiX4CalculiX/refinement/pastix_task_refine.c:182
#4 0x0000000000a0b45d in pastix_task_refine (pastix_data=0xfd9d620, n=258, nrhs=1, b=0x160d5e30, ldb=258, x=0xf8edf80, ldx=258)
at /tmp/work/Calculix_ccx_i8_pastix/PaStiX4CalculiX/refinement/pastix_task_refine.c:303
#5 0x00000000005f21fb in pastix_solve_generic (symmetryflag=, nrhs=, neq=, x=) at pastix.c:696
#6 pastix_solve_generic (x=0xf8edf80, neq=0x7fffffffaaa0, symmetryflag=, nrhs=0x7fffffff95a0) at pastix.c:644
#7 0x00000000005f2d0d in pastix_main_generic (nrhs=0x7fffffff95a0, nzs3=0x7fffffffaad0, jq=0xf8ebab0, inputformat=0x7fffffff9460, symmetryflag=0x7fffffff9458,
nzs=0x7fffffffaac0, neq=0x7fffffffaaa0, irow=0xf8f8c70, icol=0xf8e9c40, b=0xf8edf80, sigma=0x7fffffff95b0, aub=0x0, adb=0x0, au=0xf914440, ad=0xf8ecf40) at pastix.c:837
#8 pastix_main_generic (ad=0xf8ecf40, au=0xf914440, adb=0x0, aub=0x0, sigma=0x7fffffff95b0, b=0xf8edf80, icol=0xf8e9c40, irow=0xf8f8c70, neq=0x7fffffffaaa0,
nzs=0x7fffffffaac0, symmetryflag=0x7fffffff9458, inputformat=0x7fffffff9460, jq=0xf8ebab0, nzs3=0x7fffffffaad0, nrhs=0x7fffffff95a0) at pastix.c:756
#9 0x00000000005cec53 in linstatic (co=0xf8e6e50, nk=nk@entry=0x7fffffffa518, konp=konp@entry=0x7fffffffa358, ipkonp=ipkonp@entry=0x7fffffffa460,
lakonp=lakonp@entry=0x7fffffffa310, ne=ne@entry=0x7fffffffa520, nodeboun=0xf8e69a0, ndirboun=0xf8e6a10, xboun=0xf8e8bc0, nboun=0x7fffffffa528, ipompc=0x0,
nodempc=, coefmpc=, labmpc=0xf8e8d30 “”, nmpc=0x7fffffffa530, nodeforc=0xf8e8db0, ndirforc=0xf8e8e50, xforc=0xf8e8f40,
nforc=0x7fffffffa538, nelemload=0xf8e9780, sideload=0xf8e97a0 “\270缬\252*”, xload=0xf8e97c0, nload=0x7fffffffa540, nactdof=0xf8f0040, icolp=0x7fffffffa3a0,
jq=0xf8ebab0, irowp=0x7fffffffa3c0, neq=0x7fffffffaaa0, nzl=0x7fffffffa570, nmethod=0x7fffffffa560, ikmpc=0x0, ilmpc=0x0, ikboun=0xf8e8c30, ilboun=0xf8e8ca0,
elcon=0xf8e9a30, nelcon=0xf8e9a50, rhcon=0xf8e9a70, nrhcon=0xf8e9a90, alcon=0xf8e9b00, nalcon=0xf8e9b40, alzero=0xf8e9b60, ielmatp=0x7fffffffa408,
ielorienp=0x7fffffffa410, norien=0x7fffffffa610, orab=0x0, ntmat_=0x7fffffffa608, t0=0xf8ea9d0, t1=0xf8eaf00, t1old=0x0, ithermal=0x7fffffffaa00, prestr=0xf8eb960,
iprestr=0x7fffffffa618, vold=0xf8eebb0, iperturb=0x7fffffffa9f0, sti=0xf8f27d0, nzs=0x7fffffffaac0, kode=0x7fffffffa620, filab=0xf8f1750 ’ ’ <repeats 200 times>…,
eme=0xf8f5a20, iexpl=0x7fffffffa660, plicon=0x0, nplicon=0x0, plkcon=0x0, nplkcon=0x0, xstatep=0x7fffffffa950, npmat_=0x7fffffffa680,
matname=0xf8f16f0 “EL”, ’ ’ <repeats 78 times>, isolver=0x7fffffffa628, mi=0x7fffffffaae0, ncmat_=0x7fffffffa838, nstate_=0x7fffffffa830, cs=0x0, mcs=0x7fffffffa708,
nkon=0x7fffffffa650, enerp=0x7fffffffa960, xbounold=0xf8eba40, xforcold=0xf8e8ef0, xloadold=0x0, amname=0xf8ea8b0 “”, amta=0xf8ea960, namta=0xf8ea990,
nam=0x7fffffffa578, iamforc=0xf8e8ea0, iamload=0x0, iamt1=0xf8eb430, iamboun=0xf8e6aa0, ttime=0x7fffffffa9e8, output=0x7fffffffa2db "asc ",
set=0xf8e9150 “NALLN”, ’ ’ <repeats 76 times>, “EALLE”, ’ ’ <repeats 76 times>, “FIXN”, ’ ’ <repeats 34 times>…, nset=0x7fffffffa550, istartset=0xf8e92a0,
iendset=0xf8e92d0, ialset=0xf8e9300, nprint=0x7fffffffa548, prlab=0xf8e9030 “U LRF LS L”,
prset=0xf8e9050 “NALLN”, ’ ’ <repeats 76 times>, “NALLN”, ’ ’ <repeats 76 times>, “EALLE”, ’ ’ <repeats 33 times>…, nener=0x7fffffffa6d8, trab=0x0, inotr=0xf8e9e60,
ntrans=0x7fffffffa688, fmpc=0x0, ipobody=0x0, ibody=0xf8e9800, xbody=0xf8e9820, nbody=0x7fffffffa598, xbodyold=0xf8e9840, timepar=0x7fffffffabd0, thicke=0x0,
jobnamec=0x7fffffffaf60 “beam10p”, tieset=0x0, ntie=0x7fffffffa6f8, istep=0x7fffffffa5e0, nmat=0x7fffffffa600, ielprop=0x0, prop=0x0,
typeboun=0xf8e6a80 ‘B’ <repeats 12 times>, mortar=0x7fffffffa640, mpcinfo=0x7fffffffab60, tietol=0x0, ics=0x0, icontact=0x7fffffffa760, orname=0x0,
itempuser=0x7fffffffaa60) at linstatic.c:985
#10 0x00000000004179ae in main (argc=, argv=) at ccx_2.17.c:1176

Is this a bug or a mistake from my side while setting up/compiling?

Any help is appreciated.
Kind Regards, and thanks in advance

The problem is solved by setting PASTIX_REFINE_GPU=1 as additional environment variable.

1 Like