111
|
1 ! { dg-do compile }
|
|
2 ! { dg-options "-fcoarray=lib" }
|
|
3 !
|
|
4 ! PR fortran/64771
|
|
5 !
|
|
6 ! Contributed by Alessandro Fanfarill
|
|
7 !
|
|
8 ! Reduced version of the full NAS CG benchmark
|
|
9 !
|
|
10
|
|
11 !-------------------------------------------------------------------------!
|
|
12 ! !
|
|
13 ! N A S P A R A L L E L B E N C H M A R K S 3.3 !
|
|
14 ! !
|
|
15 ! C G !
|
|
16 ! !
|
|
17 !-------------------------------------------------------------------------!
|
|
18 ! !
|
|
19 ! This benchmark is part of the NAS Parallel Benchmark 3.3 suite. !
|
|
20 ! It is described in NAS Technical Reports 95-020 and 02-007 !
|
|
21 ! !
|
|
22 ! Permission to use, copy, distribute and modify this software !
|
|
23 ! for any purpose with or without fee is hereby granted. We !
|
|
24 ! request, however, that all derived work reference the NAS !
|
|
25 ! Parallel Benchmarks 3.3. This software is provided "as is" !
|
|
26 ! without express or implied warranty. !
|
|
27 ! !
|
|
28 ! Information on NPB 3.3, including the technical report, the !
|
|
29 ! original specifications, source code, results and information !
|
|
30 ! on how to submit new results, is available at: !
|
|
31 ! !
|
|
32 ! http://www.nas.nasa.gov/Software/NPB/ !
|
|
33 ! !
|
|
34 ! Send comments or suggestions to npb@nas.nasa.gov !
|
|
35 ! !
|
|
36 ! NAS Parallel Benchmarks Group !
|
|
37 ! NASA Ames Research Center !
|
|
38 ! Mail Stop: T27A-1 !
|
|
39 ! Moffett Field, CA 94035-1000 !
|
|
40 ! !
|
|
41 ! E-mail: npb@nas.nasa.gov !
|
|
42 ! Fax: (650) 604-3957 !
|
|
43 ! !
|
|
44 !-------------------------------------------------------------------------!
|
|
45
|
|
46
|
|
47 c---------------------------------------------------------------------
|
|
48 c
|
|
49 c Authors: M. Yarrow
|
|
50 c C. Kuszmaul
|
|
51 c R. F. Van der Wijngaart
|
|
52 c H. Jin
|
|
53 c
|
|
54 c---------------------------------------------------------------------
|
|
55
|
|
56
|
|
57 c---------------------------------------------------------------------
|
|
58 c---------------------------------------------------------------------
|
|
59 program cg
|
|
60 c---------------------------------------------------------------------
|
|
61 c---------------------------------------------------------------------
|
|
62 implicit none
|
|
63
|
|
64 integer na, nonzer, niter
|
|
65 double precision shift, rcond
|
|
66 parameter( na=75000,
|
|
67 > nonzer=13,
|
|
68 > niter=75,
|
|
69 > shift=60.,
|
|
70 > rcond=1.0d-1 )
|
|
71
|
|
72
|
|
73
|
|
74 integer num_proc_rows, num_proc_cols
|
|
75 parameter( num_proc_rows = 2, num_proc_cols = 2)
|
|
76 integer num_procs
|
|
77 parameter( num_procs = num_proc_cols * num_proc_rows )
|
|
78
|
|
79 integer nz
|
|
80 parameter( nz = na*(nonzer+1)/num_procs*(nonzer+1)+nonzer
|
|
81 > + na*(nonzer+2+num_procs/256)/num_proc_cols )
|
|
82
|
|
83 common / partit_size / naa, nzz,
|
|
84 > npcols, nprows,
|
|
85 > proc_col, proc_row,
|
|
86 > firstrow,
|
|
87 > lastrow,
|
|
88 > firstcol,
|
|
89 > lastcol,
|
|
90 > exch_proc,
|
|
91 > exch_recv_length,
|
|
92 > send_start,
|
|
93 > send_len
|
|
94 integer naa, nzz,
|
|
95 > npcols, nprows,
|
|
96 > proc_col, proc_row,
|
|
97 > firstrow,
|
|
98 > lastrow,
|
|
99 > firstcol,
|
|
100 > lastcol,
|
|
101 > exch_proc,
|
|
102 > exch_recv_length,
|
|
103 > send_start,
|
|
104 > send_len
|
|
105
|
|
106
|
|
107 common / main_int_mem / colidx, rowstr,
|
|
108 > iv, arow, acol
|
|
109 integer colidx(nz), rowstr(na+1),
|
|
110 > iv(2*na+1), arow(nz), acol(nz)
|
|
111
|
|
112
|
|
113 c---------------------------------
|
|
114 c Coarray Decalarations
|
|
115 c---------------------------------
|
|
116 double precision v(na+1)[0:*], aelt(nz)[0:*], a(nz)[0:*],
|
|
117 > x(na/num_proc_rows+2)[0:*],
|
|
118 > z(na/num_proc_rows+2)[0:*],
|
|
119 > p(na/num_proc_rows+2)[0:*],
|
|
120 > q(na/num_proc_rows+2)[0:*],
|
|
121 > r(na/num_proc_rows+2)[0:*],
|
|
122 > w(na/num_proc_rows+2)[0:*]
|
|
123
|
|
124
|
|
125 common /urando/ amult, tran
|
|
126 double precision amult, tran
|
|
127
|
|
128
|
|
129
|
|
130 integer l2npcols
|
|
131 integer reduce_exch_proc(num_proc_cols)
|
|
132 integer reduce_send_starts(num_proc_cols)
|
|
133 integer reduce_send_lengths(num_proc_cols)
|
|
134 integer reduce_recv_lengths(num_proc_cols)
|
|
135 integer reduce_rrecv_starts(num_proc_cols)
|
|
136 c---------------------------------
|
|
137 c Coarray Decalarations
|
|
138 c---------------------------------
|
|
139 integer reduce_recv_starts(num_proc_cols)[0:*]
|
|
140
|
|
141 integer i, j, k, it, me, nprocs, root
|
|
142
|
|
143 double precision zeta, randlc
|
|
144 external randlc
|
|
145 double precision rnorm
|
|
146 c---------------------------------
|
|
147 c Coarray Decalarations
|
|
148 c---------------------------------
|
|
149 double precision norm_temp1(2)[0:*], norm_temp2(2)[0:*]
|
|
150
|
|
151 double precision t, tmax, mflops
|
|
152 double precision u(1), umax(1)
|
|
153 external timer_read
|
|
154 double precision timer_read
|
|
155 character class
|
|
156 logical verified
|
|
157 double precision zeta_verify_value, epsilon, err
|
|
158
|
|
159 c---------------------------------------------------------------------
|
|
160 c Explicit interface for conj_grad, due to coarray args
|
|
161 c---------------------------------------------------------------------
|
|
162 interface
|
|
163
|
|
164 subroutine conj_grad ( colidx,
|
|
165 > rowstr,
|
|
166 > x,
|
|
167 > z,
|
|
168 > a,
|
|
169 > p,
|
|
170 > q,
|
|
171 > r,
|
|
172 > w,
|
|
173 > rnorm,
|
|
174 > l2npcols,
|
|
175 > reduce_exch_proc,
|
|
176 > reduce_send_starts,
|
|
177 > reduce_send_lengths,
|
|
178 > reduce_recv_starts,
|
|
179 > reduce_recv_lengths,
|
|
180 > reduce_rrecv_starts )
|
|
181
|
|
182 common / partit_size / naa, nzz,
|
|
183 > npcols, nprows,
|
|
184 > proc_col, proc_row,
|
|
185 > firstrow,
|
|
186 > lastrow,
|
|
187 > firstcol,
|
|
188 > lastcol,
|
|
189 > exch_proc,
|
|
190 > exch_recv_length,
|
|
191 > send_start,
|
|
192 > send_len
|
|
193
|
|
194 integer naa, nzz,
|
|
195 > npcols, nprows,
|
|
196 > proc_col, proc_row,
|
|
197 > firstrow,
|
|
198 > lastrow,
|
|
199 > firstcol,
|
|
200 > lastcol,
|
|
201 > exch_proc,
|
|
202 > exch_recv_length,
|
|
203 > send_start,
|
|
204 > send_len
|
|
205
|
|
206 double precision x(*),
|
|
207 > z(*),
|
|
208 > a(nzz)
|
|
209 integer colidx(nzz), rowstr(naa+1)
|
|
210
|
|
211 double precision p(*),
|
|
212 > q(*)[0:*],
|
|
213 > r(*)[0:*],
|
|
214 > w(*)[0:*] ! used as work temporary
|
|
215
|
|
216 integer l2npcols
|
|
217 integer reduce_exch_proc(l2npcols)
|
|
218 integer reduce_send_starts(l2npcols)
|
|
219 integer reduce_send_lengths(l2npcols)
|
|
220 integer reduce_recv_starts(l2npcols)[0:*]
|
|
221 integer reduce_recv_lengths(l2npcols)
|
|
222 integer reduce_rrecv_starts(l2npcols)
|
|
223
|
|
224 double precision rnorm
|
|
225
|
|
226 end subroutine
|
|
227
|
|
228 end interface
|
|
229
|
|
230 c---------------------------------------------------------------------
|
|
231 c The call to the conjugate gradient routine:
|
|
232 c---------------------------------------------------------------------
|
|
233 call conj_grad ( colidx,
|
|
234 > rowstr,
|
|
235 > x,
|
|
236 > z,
|
|
237 > a,
|
|
238 > p,
|
|
239 > q,
|
|
240 > r,
|
|
241 > w,
|
|
242 > rnorm,
|
|
243 > l2npcols,
|
|
244 > reduce_exch_proc,
|
|
245 > reduce_send_starts,
|
|
246 > reduce_send_lengths,
|
|
247 > reduce_recv_starts,
|
|
248 > reduce_recv_lengths,
|
|
249 > reduce_rrecv_starts )
|
|
250
|
|
251
|
|
252 sync all
|
|
253
|
|
254 end ! end main
|
|
255
|
|
256 c---------------------------------------------------------------------
|
|
257 c---------------------------------------------------------------------
|
|
258 subroutine conj_grad ( colidx,
|
|
259 > rowstr,
|
|
260 > x,
|
|
261 > z,
|
|
262 > a,
|
|
263 > p,
|
|
264 > q,
|
|
265 > r,
|
|
266 > w,
|
|
267 > rnorm,
|
|
268 > l2npcols,
|
|
269 > reduce_exch_proc,
|
|
270 > reduce_send_starts,
|
|
271 > reduce_send_lengths,
|
|
272 > reduce_recv_starts,
|
|
273 > reduce_recv_lengths,
|
|
274 > reduce_rrecv_starts )
|
|
275 c---------------------------------------------------------------------
|
|
276 c---------------------------------------------------------------------
|
|
277
|
|
278 c---------------------------------------------------------------------
|
|
279 c Floaging point arrays here are named as in NPB1 spec discussion of
|
|
280 c CG algorithm
|
|
281 c---------------------------------------------------------------------
|
|
282
|
|
283 implicit none
|
|
284
|
|
285 c include 'cafnpb.h'
|
|
286
|
|
287 common / partit_size / naa, nzz,
|
|
288 > npcols, nprows,
|
|
289 > proc_col, proc_row,
|
|
290 > firstrow,
|
|
291 > lastrow,
|
|
292 > firstcol,
|
|
293 > lastcol,
|
|
294 > exch_proc,
|
|
295 > exch_recv_length,
|
|
296 > send_start,
|
|
297 > send_len
|
|
298 integer naa, nzz,
|
|
299 > npcols, nprows,
|
|
300 > proc_col, proc_row,
|
|
301 > firstrow,
|
|
302 > lastrow,
|
|
303 > firstcol,
|
|
304 > lastcol,
|
|
305 > exch_proc,
|
|
306 > exch_recv_length,
|
|
307 > send_start,
|
|
308 > send_len
|
|
309
|
|
310
|
|
311
|
|
312 double precision x(*),
|
|
313 > z(*),
|
|
314 > a(nzz)
|
|
315 integer colidx(nzz), rowstr(naa+1)
|
|
316
|
|
317 double precision p(*),
|
|
318 > q(*)[0:*],
|
|
319 > r(*)[0:*],
|
|
320 > w(*)[0:*] ! used as work temporary
|
|
321
|
|
322 integer l2npcols
|
|
323 integer reduce_exch_proc(l2npcols)
|
|
324 integer reduce_send_starts(l2npcols)
|
|
325 integer reduce_send_lengths(l2npcols)
|
|
326 integer reduce_recv_starts(l2npcols)[0:*]
|
|
327 integer reduce_recv_lengths(l2npcols)
|
|
328 integer reduce_rrecv_starts(l2npcols)
|
|
329
|
|
330 integer recv_start_idx, recv_end_idx, send_start_idx,
|
|
331 > send_end_idx, recv_length
|
|
332
|
|
333 integer i, j, k, ierr
|
|
334 integer cgit, cgitmax
|
|
335
|
|
336 double precision, save :: d[0:*], rho[0:*]
|
|
337 double precision sum, rho0, alpha, beta, rnorm
|
|
338
|
|
339 external timer_read
|
|
340 double precision timer_read
|
|
341
|
|
342 data cgitmax / 25 /
|
|
343
|
|
344
|
|
345 return
|
|
346 end ! end of routine conj_grad
|
|
347
|