FrontISTR 5.2.0
Large-scale structural analysis program with finit element method
Loading...
Searching...
No Matches
hecmw_solver_las_44.f90
Go to the documentation of this file.
1!-------------------------------------------------------------------------------
2! Copyright (c) 2019 FrontISTR Commons
3! This software is released under the MIT License, see LICENSE.txt
4!-------------------------------------------------------------------------------
5
7 use hecmw_util
8 implicit none
9
10 private
11
12 public :: hecmw_matvec_44
15 public :: hecmw_matresid_44
16 public :: hecmw_rel_resid_l2_44
17 public :: hecmw_tvec_44
18 public :: hecmw_ttvec_44
19 public :: hecmw_ttmattvec_44
20 public :: hecmw_mat_diag_sr_44
21
22 ! ! for communication hiding in matvec
23 ! integer(kind=kint), save, allocatable :: index_o(:), item_o(:)
24 ! real(kind=kreal), save, allocatable :: A_o(:)
25 logical, save :: async_matvec_flg = .false.
26
27contains
28
29 !C
30 !C***
31 !C*** hecmw_matvec_44
32 !C***
33 !C
34 subroutine hecmw_matvec_44 (hecMESH, hecMAT, X, Y, time_Ax, COMMtime)
35 use hecmw_util
37 implicit none
38 type (hecmwst_local_mesh), intent(in) :: hecmesh
39 type (hecmwst_matrix), intent(in), target :: hecmat
40 real(kind=kreal), intent(in) :: x(:)
41 real(kind=kreal), intent(out) :: y(:)
42 real(kind=kreal), intent(inout), optional :: time_ax
43 real(kind=kreal), intent(inout), optional :: commtime
44
45 real(kind=kreal) :: tcomm
46 real(kind=kreal), allocatable :: wk(:)
47
48 tcomm = 0.d0
49
50 if (hecmw_mat_get_flag_mpcmatvec(hecmat) /= 0) then
51 allocate(wk(hecmat%NP * hecmat%NDOF))
52 call hecmw_ttmattvec_44(hecmesh, hecmat, x, y, wk, tcomm)
53 deallocate(wk)
54 else
55 call hecmw_matvec_44_inner(hecmesh, hecmat, x, y, time_ax, tcomm)
56 endif
57
58 if (present(commtime)) commtime = commtime + tcomm
59 end subroutine hecmw_matvec_44
60
61 !C
62 !C***
63 !C*** hecmw_matvec_44_set_async
64 !C***
65 !C
66 subroutine hecmw_matvec_44_set_async (hecMAT)
67 use hecmw_util
68 implicit none
69 type (hecmwst_matrix), intent(in) :: hecmat
70
71 end subroutine hecmw_matvec_44_set_async
72
73 !C
74 !C***
75 !C*** hecmw_matvec_44_unset_async
76 !C***
77 !C
79 implicit none
80
81 end subroutine hecmw_matvec_44_unset_async
82
83 !C
84 !C***
85 !C*** hecmw_matvec_44_inner ( private subroutine )
86 !C***
87 !C
88 subroutine hecmw_matvec_44_inner (hecMESH, hecMAT, X, Y, time_Ax, COMMtime)
89 use hecmw_util
95 !$ use omp_lib
96
97 implicit none
98 type (hecmwst_local_mesh), intent(in) :: hecmesh
99 type (hecmwst_matrix), intent(in), target :: hecmat
100 real(kind=kreal), intent(in) :: x(:)
101 real(kind=kreal), intent(out) :: y(:)
102 real(kind=kreal), intent(inout) :: time_ax
103 real(kind=kreal), intent(inout), optional :: commtime
104
105 real(kind=kreal) :: start_time, end_time, tcomm
106 integer(kind=kint) :: i, j, js, je, in
107 real(kind=kreal) :: yv1, yv2, yv3, yv4, x1, x2, x3, x4
108
109 integer(kind=kint) :: n, np
110 integer(kind=kint), pointer :: indexl(:), iteml(:), indexu(:), itemu(:)
111 real(kind=kreal), pointer :: al(:), au(:), d(:)
112
113 ! added for turning >>>
114 integer, parameter :: numofblockperthread = 100
115 logical, save :: isfirst = .true.
116 integer, save :: numofthread = 1
117 integer, save, allocatable :: startpos(:), endpos(:)
118 integer(kind=kint), save :: sectorcachesize0, sectorcachesize1
119 integer(kind=kint) :: threadnum, blocknum, numofblock
120 integer(kind=kint) :: numofelement, elementcount, blockindex
121 real(kind=kreal) :: numofelementperblock
122 ! <<< added for turning
123
124 if (hecmw_jad_is_initialized().ne.0) then
125 tcomm = 0.d0
126 start_time = hecmw_wtime()
127 call hecmw_jad_matvec(hecmesh, hecmat, x, y, tcomm)
128 end_time = hecmw_wtime()
129 time_ax = time_ax + end_time - start_time - tcomm
130 if (present(commtime)) commtime = commtime + tcomm
131 else
132
133 n = hecmat%N
134 np = hecmat%NP
135 indexl => hecmat%indexL
136 indexu => hecmat%indexU
137 iteml => hecmat%itemL
138 itemu => hecmat%itemU
139 al => hecmat%AL
140 au => hecmat%AU
141 d => hecmat%D
142
143 ! added for turning >>>
144 if (.not. isfirst) then
145 numofblock = numofthread * numofblockperthread
146 if (endpos(numofblock-1) .ne. n-1) then
147 deallocate(startpos, endpos)
148 isfirst = .true.
149 endif
150 endif
151 if (isfirst) then
152 !$ numOfThread = omp_get_max_threads()
153 numofblock = numofthread * numofblockperthread
154 allocate (startpos(0 : numofblock - 1), endpos(0 : numofblock - 1))
155 numofelement = n + indexl(n) + indexu(n)
156 numofelementperblock = dble(numofelement) / numofblock
157 blocknum = 0
158 elementcount = 0
159 startpos(blocknum) = 1
160 do i= 1, n
161 elementcount = elementcount + 1
162 elementcount = elementcount + (indexl(i) - indexl(i-1))
163 elementcount = elementcount + (indexu(i) - indexu(i-1))
164 if (elementcount > (blocknum + 1) * numofelementperblock) then
165 endpos(blocknum) = i
166 ! write(9000+hecMESH%my_rank,*) mod(blockNum, numOfThread), &
167 ! startPos(blockNum), endPos(blockNum)
168 blocknum = blocknum + 1
169 startpos(blocknum) = i + 1
170 if (blocknum == (numofblock - 1)) exit
171 endif
172 enddo
173 endpos(blocknum) = n
174 ! write(9000+hecMESH%my_rank,*) mod(blockNum, numOfThread), &
175 ! startPos(blockNum), endPos(blockNum)
176 ! for irregular data
177 do i= blocknum+1, numofblock-1
178 startpos(i) = n
179 endpos(i) = n-1
180 ! write(9000+hecMESH%my_rank,*) mod(i, numOfThread), &
181 ! startPos(i), endPos(i)
182 end do
183
185 sectorcachesize0, sectorcachesize1)
186
187 isfirst = .false.
188 endif
189 ! <<< added for turning
190
191 start_time= hecmw_wtime()
192
193 call hecmw_update_4_r (hecmesh, x, np)
194
195 ! endif
196 end_time= hecmw_wtime()
197 if (present(commtime)) commtime = commtime + end_time - start_time
198
199 start_time = hecmw_wtime()
200
201 !call fapp_start("loopInMatvec44", 1, 0)
202 !call start_collection("loopInMatvec44")
203
204 !OCL CACHE_SECTOR_SIZE(sectorCacheSize0,sectorCacheSize1)
205 !OCL CACHE_SUBSECTOR_ASSIGN(X)
206
207 !$OMP PARALLEL DEFAULT(NONE) &
208 !$OMP&PRIVATE(i,X1,X2,X3,X4,YV1,YV2,YV3,YV4,jS,jE,j,in,threadNum,blockNum,blockIndex) &
209 !$OMP&SHARED(D,AL,AU,indexL,itemL,indexU,itemU,X,Y,startPos,endPos,numOfThread,N,async_matvec_flg)
210 threadnum = 0
211 !$ threadNum = omp_get_thread_num()
212 do blocknum = 0 , numofblockperthread - 1
213 blockindex = blocknum * numofthread + threadnum
214 do i = startpos(blockindex), endpos(blockindex)
215 x1= x(4*i-3)
216 x2= x(4*i-2)
217 x3= x(4*i-1)
218 x4= x(4*i )
219 yv1= d(16*i-15)*x1 + d(16*i-14)*x2 + d(16*i-13)*x3 + d(16*i-12)*x4
220 yv2= d(16*i-11)*x1 + d(16*i-10)*x2 + d(16*i- 9)*x3 + d(16*i- 8)*x4
221 yv3= d(16*i- 7)*x1 + d(16*i- 6)*x2 + d(16*i- 5)*x3 + d(16*i- 4)*x4
222 yv4= d(16*i- 3)*x1 + d(16*i- 2)*x2 + d(16*i- 1)*x3 + d(16*i )*x4
223
224 js= indexl(i-1) + 1
225 je= indexl(i )
226 do j= js, je
227 in = iteml(j)
228 x1= x(4*in-3)
229 x2= x(4*in-2)
230 x3= x(4*in-1)
231 x4= x(4*in )
232 yv1= yv1 + al(16*j-15)*x1 + al(16*j-14)*x2 + al(16*j-13)*x3 + al(16*j-12)*x4
233 yv2= yv2 + al(16*j-11)*x1 + al(16*j-10)*x2 + al(16*j- 9)*x3 + al(16*j- 8)*x4
234 yv3= yv3 + al(16*j- 7)*x1 + al(16*j- 6)*x2 + al(16*j- 5)*x3 + al(16*j- 4)*x4
235 yv4= yv4 + al(16*j- 3)*x1 + al(16*j- 2)*x2 + al(16*j- 1)*x3 + al(16*j )*x4
236 enddo
237 js= indexu(i-1) + 1
238 je= indexu(i )
239 do j= js, je
240 in = itemu(j)
241 ! if (async_matvec_flg .and. in > N) cycle
242 x1= x(4*in-3)
243 x2= x(4*in-2)
244 x3= x(4*in-1)
245 x4= x(4*in )
246 yv1= yv1 + au(16*j-15)*x1 + au(16*j-14)*x2 + au(16*j-13)*x3 + au(16*j-12)*x4
247 yv2= yv2 + au(16*j-11)*x1 + au(16*j-10)*x2 + au(16*j- 9)*x3 + au(16*j- 8)*x4
248 yv3= yv3 + au(16*j- 7)*x1 + au(16*j- 6)*x2 + au(16*j- 5)*x3 + au(16*j- 4)*x4
249 yv4= yv4 + au(16*j- 3)*x1 + au(16*j- 2)*x2 + au(16*j- 1)*x3 + au(16*j )*x4
250 enddo
251 y(4*i-3)= yv1
252 y(4*i-2)= yv2
253 y(4*i-1)= yv3
254 y(4*i )= yv4
255 enddo
256 enddo
257 !$OMP END PARALLEL
258
259 !OCL END_CACHE_SUBSECTOR
260 !OCL END_CACHE_SECTOR_SIZE
261
262 !call stop_collection("loopInMatvec44")
263 !call fapp_stop("loopInMatvec44", 1, 0)
264
265 end_time = hecmw_wtime()
266 time_ax = time_ax + end_time - start_time
267
268 endif
269
270 if (hecmat%cmat%n_val > 0) then
271 call hecmw_cmat_multvec_add( hecmat%cmat, x, y, np * hecmat%NDOF )
272 end if
273
274 end subroutine hecmw_matvec_44_inner
275
276 !C
277 !C***
278 !C*** hecmw_matresid_44
279 !C***
280 !C
281 subroutine hecmw_matresid_44 (hecMESH, hecMAT, X, B, R, COMMtime)
282 use hecmw_util
283 implicit none
284 type (hecmwst_local_mesh), intent(in) :: hecmesh
285 type (hecmwst_matrix), intent(in) :: hecmat
286 real(kind=kreal), intent(in) :: x(:), b(:)
287 real(kind=kreal), intent(out) :: r(:)
288 real(kind=kreal), intent(inout), optional :: commtime
289
290 integer(kind=kint) :: i
291 real(kind=kreal) :: tcomm
292
293 tcomm = 0.d0
294 call hecmw_matvec_44 (hecmesh, hecmat, x, r, tcomm)
295 if (present(commtime)) commtime = commtime + tcomm
296 !$omp parallel default(none),private(i),shared(hecMAT,R,B)
297 !$omp do
298 do i = 1, hecmat%N * 4
299 r(i) = b(i) - r(i)
300 enddo
301 !$omp end do
302 !$omp end parallel
303 end subroutine hecmw_matresid_44
304
305 !C
306 !C***
307 !C*** hecmw_rel_resid_L2_44
308 !C***
309 !C
310 function hecmw_rel_resid_l2_44 (hecMESH, hecMAT, COMMtime)
311 use hecmw_util
313 implicit none
314 real(kind=kreal) :: hecmw_rel_resid_l2_44
315 type ( hecmwst_local_mesh ), intent(in) :: hecmesh
316 type ( hecmwst_matrix ), intent(in) :: hecmat
317 real(kind=kreal), intent(inout), optional :: commtime
318
319 real(kind=kreal), allocatable :: r(:)
320 real(kind=kreal) :: bnorm2, rnorm2
321 real(kind=kreal) :: tcomm
322
323 allocate(r(hecmat%NDOF*hecmat%NP))
324
325 tcomm = 0.d0
326 call hecmw_innerproduct_r(hecmesh, hecmat%NDOF, &
327 hecmat%B, hecmat%B, bnorm2, tcomm)
328 if (bnorm2 == 0.d0) then
329 bnorm2 = 1.d0
330 endif
331 call hecmw_matresid_44(hecmesh, hecmat, hecmat%X, hecmat%B, r, tcomm)
332 call hecmw_innerproduct_r(hecmesh, hecmat%NDOF, r, r, rnorm2, tcomm)
333 hecmw_rel_resid_l2_44 = sqrt(rnorm2 / bnorm2)
334
335 if (present(commtime)) commtime = commtime + tcomm
336
337 deallocate(r)
338 end function hecmw_rel_resid_l2_44
339
340 !C
341 !C***
342 !C*** hecmw_Tvec_44
343 !C***
344 !C
345 subroutine hecmw_tvec_44 (hecMESH, X, Y, COMMtime)
346 use hecmw_util
348 implicit none
349 type (hecmwst_local_mesh), intent(in) :: hecmesh
350 real(kind=kreal), intent(in) :: x(:)
351 real(kind=kreal), intent(out) :: y(:)
352 real(kind=kreal), intent(inout) :: commtime
353
354 real(kind=kreal) :: start_time, end_time
355 integer(kind=kint) :: i, j, jj, k, kk
356
357 start_time= hecmw_wtime()
358 call hecmw_update_4_r (hecmesh, x, hecmesh%n_node)
359 end_time= hecmw_wtime()
360 commtime = commtime + end_time - start_time
361
362 !$omp parallel default(none),private(i,k,kk,j,jj),shared(hecMESH,X,Y)
363 !$omp do
364 do i= 1, hecmesh%nn_internal * hecmesh%n_dof
365 y(i)= x(i)
366 enddo
367 !$omp end do
368
369 !$omp do
370 outer: do i= 1, hecmesh%mpc%n_mpc
371 do j= hecmesh%mpc%mpc_index(i-1) + 1, hecmesh%mpc%mpc_index(i)
372 if (hecmesh%mpc%mpc_dof(j) > 4) cycle outer
373 enddo
374 k = hecmesh%mpc%mpc_index(i-1) + 1
375 kk = 4 * (hecmesh%mpc%mpc_item(k) - 1) + hecmesh%mpc%mpc_dof(k)
376 y(kk) = 0.d0
377 do j= hecmesh%mpc%mpc_index(i-1) + 2, hecmesh%mpc%mpc_index(i)
378 jj = 4 * (hecmesh%mpc%mpc_item(j) - 1) + hecmesh%mpc%mpc_dof(j)
379 y(kk) = y(kk) - hecmesh%mpc%mpc_val(j) * x(jj)
380 enddo
381 enddo outer
382 !$omp end do
383 !$omp end parallel
384
385 end subroutine hecmw_tvec_44
386
387 !C
388 !C***
389 !C*** hecmw_Ttvec_44
390 !C***
391 !C
392 subroutine hecmw_ttvec_44 (hecMESH, X, Y, COMMtime)
393 use hecmw_util
395 implicit none
396 type (hecmwst_local_mesh), intent(in) :: hecmesh
397 real(kind=kreal), intent(in) :: x(:)
398 real(kind=kreal), intent(out) :: y(:)
399 real(kind=kreal), intent(inout) :: commtime
400
401 real(kind=kreal) :: start_time, end_time
402 integer(kind=kint) :: i, j, jj, k, kk
403
404 start_time= hecmw_wtime()
405 call hecmw_update_4_r (hecmesh, x, hecmesh%n_node)
406 end_time= hecmw_wtime()
407 commtime = commtime + end_time - start_time
408
409 !$omp parallel default(none),private(i,k,kk,j,jj),shared(hecMESH,X,Y)
410 !$omp do
411 do i= 1, hecmesh%nn_internal * hecmesh%n_dof
412 y(i)= x(i)
413 enddo
414 !$omp end do
415
416 !$omp do
417 outer: do i= 1, hecmesh%mpc%n_mpc
418 do j= hecmesh%mpc%mpc_index(i-1) + 1, hecmesh%mpc%mpc_index(i)
419 if (hecmesh%mpc%mpc_dof(j) > 4) cycle outer
420 enddo
421 k = hecmesh%mpc%mpc_index(i-1) + 1
422 kk = 4 * (hecmesh%mpc%mpc_item(k) - 1) + hecmesh%mpc%mpc_dof(k)
423 y(kk) = 0.d0
424 do j= hecmesh%mpc%mpc_index(i-1) + 2, hecmesh%mpc%mpc_index(i)
425 jj = 4 * (hecmesh%mpc%mpc_item(j) - 1) + hecmesh%mpc%mpc_dof(j)
426 y(jj) = y(jj) - hecmesh%mpc%mpc_val(j) * x(kk)
427 enddo
428 enddo outer
429 !$omp end do
430 !$omp end parallel
431
432 end subroutine hecmw_ttvec_44
433
434 !C
435 !C***
436 !C*** hecmw_TtmatTvec_44
437 !C***
438 !C
439 subroutine hecmw_ttmattvec_44 (hecMESH, hecMAT, X, Y, W, COMMtime)
440 use hecmw_util
441 implicit none
442 type (hecmwst_local_mesh), intent(in) :: hecmesh
443 type (hecmwst_matrix), intent(in) :: hecmat
444 real(kind=kreal), intent(in) :: x(:)
445 real(kind=kreal), intent(out) :: y(:), w(:)
446 real(kind=kreal), intent(inout) :: commtime
447
448 call hecmw_tvec_44(hecmesh, x, y, commtime)
449 call hecmw_matvec_44_inner(hecmesh, hecmat, y, w, commtime)
450 call hecmw_ttvec_44(hecmesh, w, y, commtime)
451
452 end subroutine hecmw_ttmattvec_44
453
454
455 !C
456 !C***
457 !C*** hecmw_mat_diag_sr_44
458 !C***
459 !C
460 subroutine hecmw_mat_diag_sr_44(hecMESH, hecMAT, COMMtime)
461 use hecmw_util
463 implicit none
464 type (hecmwst_local_mesh), intent(in) :: hecmesh
465 type (hecmwst_matrix), intent(inout), target :: hecmat
466 real(kind=kreal), intent(inout), optional :: commtime
467 real(kind=kreal), allocatable :: w(:,:)
468 real(kind=kreal), pointer :: d(:)
469 integer(kind=kint) :: ip
470 real(kind=kreal) :: start_time, end_time
471 allocate(w(4*hecmat%NP,4))
472 d => hecmat%D
473 do ip= 1, hecmat%N
474 w(4*ip-3,1)= d(16*ip-15); w(4*ip-3,2)= d(16*ip-14); w(4*ip-3,3)= d(16*ip-13); w(4*ip-3,4)= d(16*ip-12)
475 w(4*ip-2,1)= d(16*ip-11); w(4*ip-2,2)= d(16*ip-10); w(4*ip-2,3)= d(16*ip- 9); w(4*ip-2,4)= d(16*ip- 8)
476 w(4*ip-1,1)= d(16*ip- 7); w(4*ip-1,2)= d(16*ip- 6); w(4*ip-1,3)= d(16*ip- 5); w(4*ip-1,4)= d(16*ip- 4)
477 w(4*ip ,1)= d(16*ip- 3); w(4*ip ,2)= d(16*ip- 2); w(4*ip ,3)= d(16*ip- 1); w(4*ip ,4)= d(16*ip )
478 enddo
479 start_time= hecmw_wtime()
480 call hecmw_update_4_r (hecmesh, w(:,1), hecmat%NP)
481 call hecmw_update_4_r (hecmesh, w(:,2), hecmat%NP)
482 call hecmw_update_4_r (hecmesh, w(:,3), hecmat%NP)
483 call hecmw_update_4_r (hecmesh, w(:,4), hecmat%NP)
484 end_time= hecmw_wtime()
485 if (present(commtime)) commtime = commtime + end_time - start_time
486 do ip= hecmat%N+1, hecmat%NP
487 d(16*ip-15)= w(4*ip-3,1); d(16*ip-14)= w(4*ip-3,2); d(16*ip-13)= w(4*ip-3,3); d(16*ip-12)= w(4*ip-3,4)
488 d(16*ip-11)= w(4*ip-2,1); d(16*ip-10)= w(4*ip-2,2); d(16*ip- 9)= w(4*ip-2,3); d(16*ip- 8)= w(4*ip-2,4)
489 d(16*ip- 7)= w(4*ip-1,1); d(16*ip- 6)= w(4*ip-1,2); d(16*ip- 5)= w(4*ip-1,3); d(16*ip- 4)= w(4*ip-1,4)
490 d(16*ip- 3)= w(4*ip ,1); d(16*ip- 2)= w(4*ip ,2); d(16*ip- 1)= w(4*ip ,3); d(16*ip )= w(4*ip ,4)
491 enddo
492 deallocate(w)
493 end subroutine hecmw_mat_diag_sr_44
494
495end module hecmw_solver_las_44
Jagged Diagonal Matrix storage for vector processors. Original code was provided by JAMSTEC.
Definition: hecmw_jadm.f90:8
integer(kind=kint) function, public hecmw_jad_is_initialized()
Definition: hecmw_jadm.f90:56
subroutine, public hecmw_jad_matvec(hecmesh, hecmat, x, y, commtime)
Definition: hecmw_jadm.f90:61
subroutine, public hecmw_cmat_multvec_add(cmat, x, y, len)
integer(kind=kint) function, public hecmw_mat_get_flag_mpcmatvec(hecmat)
subroutine, public hecmw_tvec_44(hecmesh, x, y, commtime)
real(kind=kreal) function, public hecmw_rel_resid_l2_44(hecmesh, hecmat, commtime)
subroutine, public hecmw_mat_diag_sr_44(hecmesh, hecmat, commtime)
subroutine, public hecmw_matvec_44_unset_async
subroutine, public hecmw_ttmattvec_44(hecmesh, hecmat, x, y, w, commtime)
subroutine, public hecmw_matresid_44(hecmesh, hecmat, x, b, r, commtime)
subroutine, public hecmw_matvec_44_set_async(hecmat)
subroutine, public hecmw_matvec_44(hecmesh, hecmat, x, y, time_ax, commtime)
subroutine, public hecmw_ttvec_44(hecmesh, x, y, commtime)
subroutine hecmw_innerproduct_r(hecmesh, ndof, x, y, sum, commtime)
subroutine, public hecmw_tuning_fx_calc_sector_cache(n, ndof, sectorcachesize0, sectorcachesize1)
I/O and Utility.
Definition: hecmw_util_f.F90:7
integer(kind=4), parameter kreal
real(kind=kreal) function hecmw_wtime()
subroutine hecmw_update_4_r(hecmesh, val, n)