Package solvcon :: Package kerpak :: Module cuse
[hide private]
[frames] | no frames]

Source Code for Module solvcon.kerpak.cuse

   1  # -*- coding: UTF-8 -*- 
   2  # 
   3  # Copyright (C) 2008-2011 Yung-Yu Chen <yyc@solvcon.net>. 
   4  # 
   5  # This program is free software; you can redistribute it and/or modify 
   6  # it under the terms of the GNU General Public License as published by 
   7  # the Free Software Foundation; either version 2 of the License, or 
   8  # (at your option) any later version. 
   9  # 
  10  # This program is distributed in the hope that it will be useful, 
  11  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
  12  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
  13  # GNU General Public License for more details. 
  14  # 
  15  # You should have received a copy of the GNU General Public License along 
  16  # with this program; if not, write to the Free Software Foundation, Inc., 
  17  # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 
  18   
  19  """ 
  20  Second-order, multi-dimensional CESE method with CUDA enabled. 
  21   
  22  Four functionalities are defined: (i) CuseGlue, (ii) CFL (CflHook), (iii) 
  23  Convergence (ConvergeAnchor, ConvergeHook), and (iv) Prober (Probe, 
  24  ProbeAnchor, ProbeHook). 
  25  """ 
  26   
  27  CUDA_RAISE_ON_FAIL = False 
  28   
  29  from ctypes import Structure 
  30  from solvcon.gendata import AttributeDict 
  31  from solvcon.solver import BlockSolver 
  32  from solvcon.case import BlockCase 
  33  from solvcon.boundcond import BC, periodic 
  34  from solvcon.anchor import Anchor, GlueAnchor 
  35  from solvcon.hook import Hook, BlockHook 
36 37 -class CudaDataManager(AttributeDict):
38 """ 39 Customized dictionary managing solver data on GPU memory. Each item in 40 the object represents a block of allocated memory on GPU, i.e., 41 solvcon.scuda.GpuMemory object. 42 """
43 - def __init__(self, *args, **kw):
44 from ctypes import sizeof 45 self.svr = kw.pop('svr') 46 super(CudaDataManager, self).__init__(*args, **kw) 47 self.exd = self.svr._exedatatype_(svr=self.svr) 48 self.gexd = self.svr.scu.alloc(sizeof(self.exd)) 49 # allocate all arrays. 50 for name in self.svr.cuarr_map: 51 shf = self.svr.cuarr_map[name] 52 carr = getattr(self.svr, name) 53 self[name] = self.svr.scu.alloc(carr.nbytes)
54 - def __set_cuda_pointer(self, exc, svr, aname, shf):
55 from ctypes import c_void_p 56 arr = getattr(svr, aname) 57 gptr = self[aname].gptr 58 nt = 1 59 if len(arr.shape) > 1: 60 for width in arr.shape[1:]: 61 nt *= width 62 shf *= nt * arr.itemsize 63 addr = gptr.value + shf if gptr.value is not None else None 64 setattr(exc, aname, c_void_p(addr))
65
66 - def update_exd(self, exd=None):
67 if exd is None: 68 exd = self.svr.exd 69 # copy everything. 70 for key, ctp in exd._fields_: 71 setattr(self.exd, key, getattr(exd, key)) 72 # shift pointers. 73 for name in self.svr.cuarr_map: 74 shf = self.svr.cuarr_map[name] 75 self.__set_cuda_pointer(self.exd, self.svr, name, shf) 76 # send to GPU. 77 self.exd_to_gpu()
78
79 - def exd_to_gpu(self):
80 from ctypes import byref, sizeof 81 scu = self.svr.scu 82 scu.cudaMemcpy(self.gexd.gptr, byref(self.exd), 83 sizeof(self.exd), scu.cudaMemcpyHostToDevice)
84
85 - def arr_to_gpu(self, *args):
86 scu = self.svr.scu 87 names = self if not args else args 88 for name in names: 89 scu.memcpy(self[name], getattr(self.svr, name))
90 - def arr_from_gpu(self, *args):
91 scu = self.svr.scu 92 names = self if not args else args 93 for name in names: 94 scu.memcpy(getattr(self.svr, name), self[name])
95
96 - def free_all(self):
97 scu = self.svr.scu 98 scu.free(self.gexd) 99 for name in self: 100 scu.free(self[name])
101
102 ############################################################################### 103 # Solver. 104 ############################################################################### 105 106 -class CuseSolverExedata(Structure):
107 """ 108 Data structure to interface with C. 109 """ 110 from ctypes import c_int, c_double, c_void_p 111 _fields_ = [ 112 # inherited. 113 ('ncore', c_int), ('neq', c_int), 114 ('time', c_double), ('time_increment', c_double), 115 # mesh shape. 116 ('ndim', c_int), ('nnode', c_int), ('nface', c_int), ('ncell', c_int), 117 ('nbound', c_int), 118 ('ngstnode', c_int), ('ngstface', c_int), ('ngstcell', c_int), 119 # group shape. 120 ('ngroup', c_int), ('gdlen', c_int), 121 # parameter shape. 122 ('nsca', c_int), ('nvec', c_int), 123 # scheme. 124 ('alpha', c_int), ('sigma0', c_double), 125 ('taylor', c_double), ('cnbfac', c_double), ('sftfac', c_double), 126 ('taumin', c_double), ('tauscale', c_double), 127 # function pointer. 128 ('jacofunc', c_void_p), 129 # meta array. 130 ('fctpn', c_void_p), ('cltpn', c_void_p), ('clgrp', c_void_p), 131 ('grpda', c_void_p), 132 # metric array. 133 ('ndcrd', c_void_p), ('fccnd', c_void_p), ('fcnml', c_void_p), 134 ('clcnd', c_void_p), ('clvol', c_void_p), 135 ('cecnd', c_void_p), ('cevol', c_void_p), ('sfmrc', c_void_p), 136 # connectivity array. 137 ('fcnds', c_void_p), ('fccls', c_void_p), 138 ('clnds', c_void_p), ('clfcs', c_void_p), 139 # solution array. 140 ('amsca', c_void_p), ('amvec', c_void_p), 141 ('sol', c_void_p), ('dsol', c_void_p), ('solt', c_void_p), 142 ('soln', c_void_p), ('dsoln', c_void_p), 143 ('cfl', c_void_p), ('ocfl', c_void_p), 144 ] 145 del c_int, c_double, c_void_p 146
147 - def __set_pointer(self, svr, aname, shf):
148 from ctypes import c_void_p 149 ptr = getattr(svr, aname)[shf:].ctypes._as_parameter_ 150 setattr(self, aname, ptr)
151
152 - def __init__(self, *args, **kw):
153 from ctypes import c_int, c_double, POINTER, c_void_p, byref, cast 154 svr = kw.pop('svr', None) 155 super(CuseSolverExedata, self).__init__(*args, **kw) 156 if svr == None: 157 return 158 # function pointer. 159 self.jacofunc = cast(svr._jacofunc_, c_void_p).value 160 for key in ('ncore', 'neq', 'time', 'time_increment', 161 'ndim', 'nnode', 'nface', 'ncell', 'nbound', 'ngstnode', 162 'ngstface', 'ngstcell', 'ngroup', 'gdlen', 'nsca', 'nvec', 163 'alpha', 'sigma0', 'taylor', 'cnbfac', 'sftfac', 164 'taumin', 'tauscale'): 165 setattr(self, key, getattr(svr, key)) 166 # arrays. 167 for aname in ('grpda', 'sfmrc'): 168 self.__set_pointer(svr, aname, 0) 169 for aname in ('ndcrd',): 170 self.__set_pointer(svr, aname, svr.ngstnode) 171 for aname in ('fctpn', 'fcnds', 'fccls', 'fccnd', 'fcnml'): 172 self.__set_pointer(svr, aname, svr.ngstface) 173 for aname in ('cltpn', 'clgrp', 'clcnd', 'clvol', 'cecnd', 'cevol', 174 'clnds', 'clfcs', 'amsca', 'amvec', 175 'sol', 'dsol', 'solt', 'soln', 'dsoln', 'cfl', 'ocfl'): 176 self.__set_pointer(svr, aname, svr.ngstcell)
177
178 -class CuseSolver(BlockSolver):
179 """ 180 The base solver class for second-order, multi-dimensional CESE code with 181 CUDA enabled. 182 183 @cvar _gdlen_: length per group data. Must be overridden. 184 @ctype _gdlen_: int 185 @cvar _jacofunc_: ctypes function to Jacobian calculator. Must be 186 overridden. 187 @ctype _jacofunc_: ctypes.FuncPtr 188 @cvar _clib_mcu: ctypes library for physical model on GPU. 189 @ctype _clib_mcu: ctypes.CDLL 190 191 @ivar debug: flag for debugging. 192 @itype debug: bool 193 194 @ivar scu: CUDA wrapper. 195 @itype scu: solvcon.scuda.Scuda 196 @ivar ncuth: number of thread per block for CUDA. 197 @itype ncuth: int 198 199 @ivar alpha: parameter to the weighting function. 200 @itype alpha: int 201 @ivar sigma0: constant parameter for W-3 scheme; should be of order of 1. 202 Default is 3.0. 203 @itype sigma0: float 204 @ivar taylor: factor for Taylor's expansion; 0 off, 1 on. 205 @itype taylor: float 206 @ivar cnbfac: factor to use BCE centroid, othersize midpoint; 0 off, 1 on. 207 @itype cnbfac: float 208 @ivar sftfac: factor to shift gradient shape; 0 off, 1 on. 209 @itype sftfac: float 210 @ivar taumin: the lower bound of tau. 211 @itype taumin: float 212 @ivar tauscale: scaling of tau. 213 @itype tauscale: float 214 215 @ivar grpda: group data. 216 @ivar cecnd: solution points for CCEs and BCEs. 217 @ivar cevol: CCE and BCE volumes. 218 @ivar sfmrc: sub-face geometry. It is a 5-dimensional array, and the shape 219 is (ncell, CLMFC, FCMND, 2, NDIM). sfmrc[...,0,:] are centers, 220 while sfmrc[...,1,:] are normal vectors. 221 @ivar amsca: Parameter scalar array. 222 @ivar amvec: Parameter vector array. 223 @ivar solt: temporal diffrentiation of solution. 224 @ivar sol: current solution. 225 @ivar soln: next solution. 226 @ivar dsol: current gradient of solution. 227 @ivar dsoln: next gradient of solution. 228 @ivar cfl: CFL number. 229 @ivar ocfl: original CFL number. 230 """ 231 232 _exedatatype_ = CuseSolverExedata 233 _interface_init_ = ['cecnd', 'cevol'] 234 _solution_array_ = ['sol', 'soln', 'dsol', 'dsoln'] 235 236 _gdlen_ = None 237 _jacofunc_ = None 238 _clib_mcu = None 239
240 - def __init__(self, blk, *args, **kw):
241 from numpy import empty 242 self.debug = kw.pop('debug', False) 243 # shape parameters. 244 nsca = kw.pop('nsca', 0) 245 nvec = kw.pop('nvec', 0) 246 # CUDA parameters. 247 self.ncuth = kw.pop('ncuth', 0) 248 self.scu = None 249 # scheme parameters. 250 self.alpha = int(kw.pop('alpha', 0)) 251 self.sigma0 = int(kw.pop('sigma0', 3.0)) 252 self.taylor = float(kw.pop('taylor', 1.0)) # dirty hack. 253 self.cnbfac = float(kw.pop('cnbfac', 1.0)) # dirty hack. 254 self.sftfac = float(kw.pop('sftfac', 1.0)) # dirty hack. 255 self.taumin = float(kw.pop('taumin', 0.0)) 256 self.tauscale = float(kw.pop('tauscale', 1.0)) 257 # super call. 258 kw.setdefault('enable_tpool', False) 259 super(CuseSolver, self).__init__(blk, *args, **kw) 260 fpdtype = self.fpdtype 261 ndim = self.ndim 262 ncell = self.ncell 263 ngstcell = self.ngstcell 264 neq = self.neq 265 ngroup = self.ngroup 266 # CUDA manager. 267 self.cumgr = None 268 self.cuarr_map = dict() 269 for key in ('ndcrd',): 270 self.cuarr_map[key] = self.ngstnode 271 for key in ('fcnds', 'fccls', 'fccnd', 'fcnml'): 272 self.cuarr_map[key] = self.ngstface 273 for key in ('clfcs', 'clcnd', 'cltpn'): 274 self.cuarr_map[key] = self.ngstcell 275 # meta array. 276 self.grpda = empty((ngroup, self._gdlen_), dtype=fpdtype) 277 self.cuarr_map['grpda'] = 0 278 # dual mesh. 279 self.cecnd = empty((ngstcell+ncell, self.CLMFC+1, ndim), dtype=fpdtype) 280 self.cevol = empty((ngstcell+ncell, self.CLMFC+1), dtype=fpdtype) 281 self.cuarr_map['cecnd'] = self.cuarr_map['cevol'] = self.ngstcell 282 self.sfmrc = empty((ncell, self.CLMFC, self.FCMND, 2, ndim), 283 dtype=fpdtype) 284 self.cuarr_map['sfmrc'] = 0 285 # solutions. 286 self.amsca = empty((ngstcell+ncell, nsca), dtype=fpdtype) 287 self.amvec = empty((ngstcell+ncell, nvec, ndim), dtype=fpdtype) 288 self.solt = empty((ngstcell+ncell, neq), dtype=fpdtype) 289 self.sol = empty((ngstcell+ncell, neq), dtype=fpdtype) 290 self.soln = empty((ngstcell+ncell, neq), dtype=fpdtype) 291 self.dsol = empty((ngstcell+ncell, neq, ndim), dtype=fpdtype) 292 self.dsoln = empty((ngstcell+ncell, neq, ndim), dtype=fpdtype) 293 self.cfl = empty(ngstcell+ncell, dtype=fpdtype) 294 self.ocfl = empty(ngstcell+ncell, dtype=fpdtype) 295 for key in ('amsca', 'amvec', 'solt', 'sol', 'soln', 'dsol', 'dsoln', 296 'cfl', 'ocfl'): 297 self.cuarr_map[key] = self.ngstcell
298 299 @property
300 - def gdlen(self):
301 """ 302 Length per group data. 303 """ 304 return self._gdlen_
305 @property
306 - def nsca(self):
307 return self.amsca.shape[1]
308 @property
309 - def nvec(self):
310 return self.amvec.shape[1]
311
312 - def bind(self):
313 from solvcon.conf import env 314 self.scu = env.scu if self.ncuth else None 315 if self.scu: 316 self.cumgr = CudaDataManager(svr=self) 317 super(CuseSolver, self).bind()
318 - def unbind(self):
319 if self.scu and self.cumgr is not None: 320 self.cumgr.free_all() 321 self.cumgr = None 322 super(CuseSolver, self).unbind()
323
324 - def init(self, **kw):
325 from ctypes import byref 326 self._clib_cuse_c.prepare_ce(byref(self.exd)) 327 super(CuseSolver, self).init(**kw) 328 self._clib_cuse_c.prepare_sf(byref(self.exd)) 329 if self.scu: self.cumgr.arr_to_gpu() 330 self.mesg('cuda is %s\n'%('on' if self.scu else 'off'))
331
332 - def boundcond(self):
333 if self.scu: 334 self.cumgr.update_exd() 335 self.cumgr.arr_to_gpu('sol', 'soln', 'dsol', 'dsoln') 336 if self.nsca: self.cumgr.arr_to_gpu('amsca') 337 if self.nvec: self.cumgr.arr_to_gpu('amvec') 338 super(CuseSolver, self).boundcond() 339 self.call_non_interface_bc('soln') 340 if self.scu: 341 self.cumgr.arr_from_gpu('sol', 'soln') 342 self.call_non_interface_bc('dsoln') 343 if self.scu: 344 self.cumgr.arr_from_gpu('dsol', 'dsoln')
345 346 ########################################################################### 347 # parallelization. 348 ###########################################################################
349 - def pushibc(self, arrname, bc, recvn, worker=None):
350 """ 351 Push data toward selected interface which connect to blocks with larger 352 serial number than myself. If CUDA is present, data are first uploaded 353 to and then downloaded from GPU. 354 355 @param arrname: name of the array in the object to exchange. 356 @type arrname: str 357 @param bc: the interface BC to push. 358 @type bc: solvcon.boundcond.interface 359 @param recvn: serial number of the peer to exchange data with. 360 @type recvn: int 361 @keyword worker: the wrapping worker object for parallel processing. 362 @type worker: solvcon.rpc.Worker 363 """ 364 from numpy import empty 365 conn = worker.pconns[bc.rblkn] 366 ngstcell = self.ngstcell 367 arr = getattr(self, arrname) 368 shape = list(arr.shape) 369 shape[0] = bc.rclp.shape[0] 370 # for CUDA up/download. 371 if self.scu: 372 stride = arr.itemsize 373 for size in arr.shape[1:]: 374 stride *= size 375 # ask the receiver for data. 376 rarr = empty(shape, dtype=arr.dtype) 377 conn.recvarr(rarr) # comm. 378 # set array and upload to GPU. 379 slct = bc.rclp[:,0] + ngstcell 380 if self.scu: 381 gslct = self.scu.alloc(slct.nbytes) 382 self.scu.memcpy(gslct, slct) 383 gbrr = self.scu.alloc(rarr.nbytes) 384 self.scu.memcpy(gbrr, rarr) 385 garr = self.cumgr[arrname] 386 self._clib_cuse_cu.slct_io(self.ncuth, 0, len(slct), stride, 387 gslct.gptr, garr.gptr, gbrr.gptr) 388 else: 389 arr[slct] = rarr[:] 390 # download from GPU and get array. 391 slct = bc.rclp[:,2] + ngstcell 392 if self.scu: 393 self.scu.memcpy(gslct, slct) 394 self._clib_cuse_cu.slct_io(self.ncuth, 1, len(slct), stride, 395 gslct.gptr, garr.gptr, gbrr.gptr) 396 self.scu.memcpy(rarr, gbrr) 397 self.scu.free(gbrr) 398 self.scu.free(gslct) 399 else: 400 rarr[:] = arr[slct] 401 # provide the receiver with data. 402 conn.sendarr(rarr) # comm.
403
404 - def pullibc(self, arrname, bc, sendn, worker=None):
405 """ 406 Pull data from the interface determined by the serial of peer. If CUDA 407 is present, data are first downloaded from GPU and then uploaded to 408 GPU. 409 410 @param arrname: name of the array in the object to exchange. 411 @type arrname: str 412 @param bc: the interface BC to pull. 413 @type bc: solvcon.boundcond.interface 414 @param sendn: serial number of the peer to exchange data with. 415 @type sendn: int 416 @keyword worker: the wrapping worker object for parallel processing. 417 @type worker: solvcon.rpc.Worker 418 """ 419 from numpy import empty 420 conn = worker.pconns[bc.rblkn] 421 ngstcell = self.ngstcell 422 arr = getattr(self, arrname) 423 shape = list(arr.shape) 424 shape[0] = bc.rclp.shape[0] 425 # for CUDA up/download. 426 if self.scu: 427 stride = arr.itemsize 428 for size in arr.shape[1:]: 429 stride *= size 430 # download from GPU and get array. 431 slct = bc.rclp[:,2] + ngstcell 432 rarr = empty(shape, dtype=arr.dtype) 433 if self.scu: 434 gslct = self.scu.alloc(slct.nbytes) 435 self.scu.memcpy(gslct, slct) 436 gbrr = self.scu.alloc(rarr.nbytes) 437 garr = self.cumgr[arrname] 438 self._clib_cuse_cu.slct_io(self.ncuth, 1, len(slct), stride, 439 gslct.gptr, garr.gptr, gbrr.gptr) 440 self.scu.memcpy(rarr, gbrr) 441 else: 442 rarr[:] = arr[slct] 443 # provide sender the data. 444 conn.sendarr(rarr) # comm. 445 # ask data from sender. 446 conn.recvarr(rarr) # comm. 447 # set array and upload to GPU. 448 slct = bc.rclp[:,0] + ngstcell 449 if self.scu: 450 self.scu.memcpy(gslct, slct) 451 self.scu.memcpy(gbrr, rarr) 452 self._clib_cuse_cu.slct_io(self.ncuth, 0, len(slct), stride, 453 gslct.gptr, garr.gptr, gbrr.gptr) 454 self.scu.free(gbrr) 455 self.scu.free(gslct) 456 else: 457 arr[slct] = rarr[:]
458 459 ########################################################################### 460 # utility. 461 ###########################################################################
462 - def locate_point(self, *args):
463 """ 464 Locate the cell index where the input coordinate is. 465 """ 466 from ctypes import byref, c_int 467 from numpy import array 468 crd = array(args, dtype='float64') 469 picl = c_int(0) 470 pifl = c_int(0) 471 pjcl = c_int(0) 472 pjfl = c_int(0) 473 self._clib_cuse_c.locate_point(byref(self.exd), 474 crd.ctypes._as_parameter_, 475 byref(picl), byref(pifl), byref(pjcl), byref(pjfl)) 476 return picl.value, pifl.value, pjcl.value, pjfl.value
477 478 ########################################################################### 479 # library. 480 ########################################################################### 481 from solvcon.dependency import getcdll 482 __clib_cuse_c = { 483 2: getcdll('cuse2d_c'), 484 3: getcdll('cuse3d_c'), 485 } 486 __clib_cuse_cu = { 487 2: getcdll('cuse2d_cu', raise_on_fail=CUDA_RAISE_ON_FAIL), 488 3: getcdll('cuse3d_cu', raise_on_fail=CUDA_RAISE_ON_FAIL), 489 } 490 del getcdll 491 @property
492 - def _clib_cuse_c(self):
493 return self.__clib_cuse_c[self.ndim]
494 @property
495 - def _clib_cuse_cu(self):
496 return self.__clib_cuse_cu[self.ndim]
497 498 ########################################################################### 499 # marching algorithm. 500 ########################################################################### 501 MMNAMES = list() 502 503 MMNAMES.append('update')
504 - def update(self, worker=None):
505 if self.debug: self.mesg('update') 506 # exchange solution and gradient. 507 self.sol, self.soln = self.soln, self.sol 508 self.dsol, self.dsoln = self.dsoln, self.dsol 509 # exchange pointers in execution data. 510 exd = self.exd 511 exd.sol, exd.soln = exd.soln, exd.sol 512 exd.dsol, exd.dsoln = exd.dsoln, exd.dsol 513 # exchange items in GPU execution data. 514 if self.scu: 515 cumgr = self.cumgr 516 cumgr.sol, cumgr.soln = cumgr.soln, cumgr.sol 517 cumgr.dsol, cumgr.dsoln = cumgr.dsoln, cumgr.dsol 518 cumgr.update_exd() 519 if self.debug: self.mesg(' done.\n')
520 521 MMNAMES.append('ibcam')
522 - def ibcam(self, worker=None):
523 if self.debug: self.mesg('ibcam') 524 if worker: 525 if self.nsca: self.exchangeibc('amsca', worker=worker) 526 if self.nvec: self.exchangeibc('amvec', worker=worker) 527 if self.debug: self.mesg(' done.\n')
528 529 MMNAMES.append('calcsolt')
530 - def calcsolt(self, worker=None):
531 from ctypes import byref 532 if self.debug: self.mesg('calcsolt') 533 if self.scu: 534 self._clib_mcu.calc_solt(self.ncuth, 535 byref(self.cumgr.exd), self.cumgr.gexd.gptr) 536 else: 537 self._clib_cuse_c.calc_solt(byref(self.exd), 538 -self.ngstcell, self.ncell) 539 if self.debug: self.mesg(' done.\n')
540 MMNAMES.append('calcsoln')
541 - def calcsoln(self, worker=None):
542 from ctypes import byref 543 if self.debug: self.mesg('calcsoln') 544 if self.scu: 545 self._clib_mcu.calc_soln(self.ncuth, 546 byref(self.cumgr.exd), self.cumgr.gexd.gptr) 547 else: 548 self._clib_cuse_c.calc_soln(byref(self.exd)) 549 if self.debug: self.mesg(' done.\n')
550 551 MMNAMES.append('ibcsoln')
552 - def ibcsoln(self, worker=None):
553 if self.debug: self.mesg('ibcsoln') 554 if worker: self.exchangeibc('soln', worker=worker) 555 if self.debug: self.mesg(' done.\n')
556 MMNAMES.append('bcsoln')
557 - def bcsoln(self, worker=None):
558 if self.debug: self.mesg('bcsoln') 559 self.call_non_interface_bc('soln') 560 if self.debug: self.mesg(' done.\n')
561 562 MMNAMES.append('calccfl')
563 - def calccfl(self, worker=None):
564 raise NotImplementedError
565 566 MMNAMES.append('calcdsoln')
567 - def calcdsoln(self, worker=None):
568 from ctypes import byref 569 if self.debug: self.mesg('calcdsoln') 570 if self.scu: 571 self._clib_mcu.calc_dsoln_w3(self.ncuth, 572 byref(self.cumgr.exd), self.cumgr.gexd.gptr) 573 else: 574 self._clib_cuse_c.calc_dsoln_w3(byref(self.exd)) 575 if self.debug: self.mesg(' done.\n')
576 577 MMNAMES.append('ibcdsoln')
578 - def ibcdsoln(self, worker=None):
579 if self.debug: self.mesg('ibcdsoln') 580 if worker: self.exchangeibc('dsoln', worker=worker) 581 if self.debug: self.mesg(' done.\n')
582 MMNAMES.append('bcdsoln')
583 - def bcdsoln(self, worker=None):
584 if self.debug: self.mesg('bcdsoln') 585 self.call_non_interface_bc('dsoln') 586 if self.debug: self.mesg(' done.\n')
587
588 ############################################################################### 589 # Case. 590 ############################################################################### 591 592 -class CuseCase(BlockCase):
593 """ 594 Inviscid aerodynamic case for the Euler equations. 595 """ 596 defdict = { 597 'execution.verified_norm': -1.0, 598 'solver.debug_cese': False, 599 'solver.ncuth': 0, 600 'solver.alpha': 1, 601 'solver.sigma0': 3.0, 602 'solver.taylor': 1.0, 603 'solver.cnbfac': 1.0, 604 'solver.sftfac': 1.0, 605 'solver.taumin': None, 606 'solver.tauscale': None, 607 }
608 - def make_solver_keywords(self):
609 kw = super(CuseCase, self).make_solver_keywords() 610 kw['debug'] = self.solver.debug_cese 611 kw['ncuth'] = int(self.solver.ncuth) 612 kw['alpha'] = int(self.solver.alpha) 613 for key in ('sigma0', 'taylor', 'cnbfac', 'sftfac', 614 'taumin', 'tauscale',): 615 val = self.solver.get(key) 616 if val != None: kw[key] = float(val) 617 return kw
618
619 ############################################################################### 620 # Boundary conditions. 621 ############################################################################### 622 623 -class CuseBC(BC):
624 """ 625 Basic BC class for the cuse series solvers. This class support glue BCs. 626 627 @cvar _ghostgeom_: indicate which ghost geometry processor to use. 628 @ctype _ghostgeom_: str 629 """ 630 631 _ghostgeom_ = None 632 633 ########################################################################### 634 # library. 635 ########################################################################### 636 from solvcon.dependency import getcdll 637 __clib_cuseb_c = { 638 2: getcdll('cuseb2d_c'), 639 3: getcdll('cuseb3d_c'), 640 } 641 __clib_cuseb_cu = { 642 2: getcdll('cuseb2d_cu', raise_on_fail=CUDA_RAISE_ON_FAIL), 643 3: getcdll('cuseb3d_cu', raise_on_fail=CUDA_RAISE_ON_FAIL), 644 } 645 del getcdll 646 @property
647 - def _clib_cuseb_c(self):
648 return self.__clib_cuseb_c[self.svr.ndim]
649 @property
650 - def _clib_cuseb_cu(self):
651 return self.__clib_cuseb_cu[self.svr.ndim]
652
653 - def bind(self):
654 super(CuseBC, self).bind() 655 if self.svr.scu: 656 for key in ('facn', 'value',): 657 nbytes = getattr(self, key).nbytes 658 setattr(self, 'cu'+key, self.svr.scu.alloc(nbytes))
659 - def unbind(self):
660 if self.svr.scu: 661 for key in ('facn', 'value',): 662 gptr = getattr(self, 'cu'+key, None) 663 if gptr is not None: 664 self.svr.scu.free(getattr(self, 'cu'+key))
665
666 - def init(self, **kw):
667 from ctypes import byref, c_int 668 super(CuseBC, self).init(**kw) 669 # process ghost geometry. 670 getattr(self._clib_cuseb_c, 'ghostgeom_'+self._ghostgeom_)( 671 byref(self.svr.exd), c_int(self.facn.shape[0]), 672 self.facn.ctypes._as_parameter_) 673 # initialize GPU data. 674 if self.svr.scu: 675 for key in ('facn', 'value',): 676 self.svr.scu.memcpy(getattr(self, 'cu'+key), getattr(self, key))
677
678 - def soln(self):
679 """ 680 Update ghost cells after self.svr.calcsoln. 681 """ 682 pass
683 - def dsoln(self):
684 """ 685 Update ghost cells after self.svr.calcdsoln. 686 """ 687 pass
688
689 -class CuseNonrefl(CuseBC):
690 _ghostgeom_ = 'mirror'
691 - def soln(self):
692 from ctypes import byref 693 svr = self.svr 694 if svr.scu: 695 self._clib_cuseb_cu.bound_nonrefl_soln(svr.ncuth, 696 svr.cumgr.gexd.gptr, self.facn.shape[0], self.cufacn.gptr) 697 else: 698 self._clib_cuseb_c.bound_nonrefl_soln(byref(svr.exd), 699 self.facn.shape[0], self.facn.ctypes._as_parameter_)
700 - def dsoln(self):
701 from ctypes import byref 702 svr = self.svr 703 if svr.scu: 704 self._clib_cuseb_cu.bound_nonrefl_dsoln(svr.ncuth, 705 byref(svr.cumgr.exd), 706 svr.cumgr.gexd.gptr, self.facn.shape[0], self.cufacn.gptr) 707 else: 708 self._clib_cuseb_c.bound_nonrefl_dsoln(byref(svr.exd), 709 self.facn.shape[0], self.facn.ctypes._as_parameter_)
710
711 -class CusePeriodic(periodic):
712 """ 713 General periodic boundary condition for sequential runs. 714 """
715 - def init(self, **kw):
716 svr = self.svr 717 ngstcell = svr.ngstcell 718 ngstface = svr.ngstface 719 facn = self.facn 720 slctm = self.rclp[:,0] + ngstcell 721 slctr = self.rclp[:,1] + ngstcell 722 # move coordinates. 723 shf = svr.cecnd[slctr,0,:] - svr.fccnd[facn[:,2]+ngstface,:] 724 svr.cecnd[slctm,0,:] = svr.fccnd[facn[:,0]+ngstface,:] + shf
725 - def soln(self):
726 svr = self.svr 727 slctm = self.rclp[:,0] + svr.ngstcell 728 slctr = self.rclp[:,1] + svr.ngstcell 729 if svr.scu: 730 svr.cumgr.arr_from_gpu('soln') 731 svr.soln[slctm,:] = svr.soln[slctr,:] 732 if svr.scu: 733 svr.cumgr.arr_to_gpu('soln')
734 - def dsoln(self):
735 svr = self.svr 736 slctm = self.rclp[:,0] + svr.ngstcell 737 slctr = self.rclp[:,1] + svr.ngstcell 738 if svr.scu: 739 svr.cumgr.arr_from_gpu('dsoln') 740 svr.dsoln[slctm,:,:] = svr.dsoln[slctr,:,:] 741 if svr.scu: 742 svr.cumgr.arr_to_gpu('dsoln')
743
744 ################################################################################ 745 # CUDA downloader. 746 ################################################################################ 747 748 -class CudaUpDownAnchor(Anchor):
749 """ 750 Upload and download variable arrays between GPU device memory and CPU host 751 memory. By default preloop() and premarch() callbacks do uploading, and 752 postmarch() and postloop() do downloading. The default behavior 753 is compatible to solvcon.anchor.VtkAnchor. Subclasses can override 754 callback methods for more complicated operations. 755 756 @ivar rsteps: steps to run. 757 @itype rsteps: int 758 @ivar uparrs: names of arrays to be uploaded from host to device. 759 @itype uparrs: list 760 @ivar downarrs: names of arrays to be downloaded from device to host. 761 @itype downarrs: list 762 """
763 - def __init__(self, svr, **kw):
764 self.rsteps = kw.pop('rsteps', 1) 765 self.uparrs = kw.pop('uparrs', list()) 766 self.downarrs = kw.pop('downarrs', list()) 767 super(CudaUpDownAnchor, self).__init__(svr, **kw)
768 - def _upload(self):
769 if self.svr.scu and self.uparrs: 770 self.svr.cumgr.arr_to_gpu(*self.uparrs)
771 - def _download(self):
772 if self.svr.scu and self.downarrs: 773 print self.downarrs 774 self.svr.cumgr.arr_from_gpu(*self.downarrs)
775 - def preloop(self):
776 self._upload()
777 - def premarch(self):
778 istep = self.svr.step_global 779 rsteps = self.rsteps 780 if istep > 0 and istep%rsteps == 0: 781 self._upload()
782 - def postmarch(self):
783 istep = self.svr.step_global 784 rsteps = self.rsteps 785 if istep > 0 and istep%rsteps == 0: 786 self._download()
787 - def postloop(self):
788 istep = self.svr.step_global 789 rsteps = self.rsteps 790 if istep%rsteps != 0: 791 self.process(istep)
792
793 ################################################################################ 794 # Glue. 795 ################################################################################ 796 797 -class CuseGlue(GlueAnchor):
798 """ 799 Use Glue class to glue specified BC objects of a solver object. The class 800 is only valid for CuseSolver. 801 """ 802 KEYS_ENABLER = ('cecnd',)
803 - def __init__(self, svr, **kw):
804 assert isinstance(svr, CuseSolver) 805 super(CuseGlue, self).__init__(svr, **kw)
806
807 ################################################################################ 808 # CFL. 809 ################################################################################ 810 811 -class CflAnchor(Anchor):
812 """ 813 Counting CFL numbers. Use svr.marchret to return results. Implements 814 postmarch() method. 815 816 @ivar rsteps: steps to run. 817 @itype rsteps: int 818 """
819 - def __init__(self, svr, **kw):
820 self.rsteps = kw.pop('rsteps') 821 super(CflAnchor, self).__init__(svr, **kw)
822 - def postmarch(self):
823 svr = self.svr 824 istep = svr.step_global 825 rsteps = self.rsteps 826 if istep > 0 and istep%rsteps == 0: 827 # download data. 828 if svr.scu: 829 svr.cumgr.arr_from_gpu('cfl', 'ocfl') 830 ocfl = svr.ocfl[svr.ngstcell:] 831 cfl = svr.cfl[svr.ngstcell:] 832 # determine extremum. 833 mincfl = ocfl.min() 834 maxcfl = ocfl.max() 835 nadj = (cfl==1).sum() 836 # store. 837 lst = svr.marchret.setdefault('cfl', [0.0, 0.0, 0, 0]) 838 lst[0] = mincfl 839 lst[1] = maxcfl 840 lst[2] = nadj 841 lst[3] += nadj
842
843 -class CflHook(Hook):
844 """ 845 Makes sure CFL number is bounded and print averaged CFL number over time. 846 Reports CFL information per time step and on finishing. Implements (i) 847 postmarch() and (ii) postloop() methods. 848 849 @ivar name: name of the CFL tool. 850 @itype name: str 851 @ivar rsteps: steps to run. 852 @itype rsteps: int 853 @ivar cflmin: CFL number should be greater than or equal to the value. 854 @itype cflmin: float 855 @ivar cflmax: CFL number should be less than the value. 856 @itype cflmax: float 857 @ivar fullstop: flag to stop when CFL is out of bound. Default True. 858 @itype fullstop: bool 859 @ivar aCFL: accumulated CFL. 860 @itype aCFL: float 861 @ivar mCFL: mean CFL. 862 @itype mCFL: float 863 @ivar hnCFL: hereditary minimal CFL. 864 @itype hnCFL: float 865 @ivar hxCFL: hereditary maximal CFL. 866 @itype hxCFL: float 867 @ivar aadj: number of adjusted CFL accumulated since last report. 868 @itype aadj: int 869 @ivar haadj: total number of adjusted CFL since simulation started. 870 @itype haadj: int 871 """
872 - def __init__(self, cse, **kw):
873 self.name = kw.pop('name', 'cfl') 874 self.cflmin = kw.pop('cflmin', 0.0) 875 self.cflmax = kw.pop('cflmax', 1.0) 876 self.fullstop = kw.pop('fullstop', True) 877 self.aCFL = 0.0 878 self.mCFL = 0.0 879 self.hnCFL = 1.0 880 self.hxCFL = 0.0 881 self.aadj = 0 882 self.haadj = 0 883 rsteps = kw.pop('rsteps', None) 884 super(CflHook, self).__init__(cse, **kw) 885 self.rsteps = self.psteps if rsteps == None else rsteps 886 self.ankkw = kw
887 - def drop_anchor(self, svr):
888 ankkw = self.ankkw.copy() 889 ankkw['name'] = self.name 890 ankkw['rsteps'] = self.rsteps 891 self._deliver_anchor(svr, CflAnchor, ankkw)
892 - def _notify(self, msg):
893 from warnings import warn 894 if self.fullstop: 895 raise RuntimeError(msg) 896 else: 897 warn(msg)
898 - def postmarch(self):
899 from numpy import isnan 900 info = self.info 901 istep = self.cse.execution.step_current 902 mr = self.cse.execution.marchret 903 isp = self.cse.is_parallel 904 rsteps = self.rsteps 905 psteps = self.psteps 906 # collect CFL. 907 if istep > 0 and istep%rsteps == 0: 908 nCFL = max([m['cfl'][0] for m in mr]) if isp else mr['cfl'][0] 909 xCFL = max([m['cfl'][1] for m in mr]) if isp else mr['cfl'][1] 910 nadj = sum([m['cfl'][2] for m in mr]) if isp else mr['cfl'][2] 911 aadj = sum([m['cfl'][3] for m in mr]) if isp else mr['cfl'][3] 912 hnCFL = min([nCFL, self.hnCFL]) 913 self.hnCFL = hnCFL if not isnan(hnCFL) else self.hnCFL 914 hxCFL = max([xCFL, self.hxCFL]) 915 self.hxCFL = hxCFL if not isnan(hxCFL) else self.hxCFL 916 self.aCFL += xCFL*rsteps 917 self.mCFL = self.aCFL/istep 918 self.aadj += aadj 919 self.haadj += aadj 920 # check. 921 if self.cflmin != None and nCFL < self.cflmin: 922 self._notify("CFL = %g < %g after step: %d" % ( 923 nCFL, self.cflmin, istep)) 924 if self.cflmax != None and xCFL >= self.cflmax: 925 self._notify("CFL = %g >= %g after step: %d" % ( 926 xCFL, self.cflmax, istep)) 927 # output information. 928 if istep > 0 and istep%psteps == 0: 929 info("CFL = %.2f/%.2f - %.2f/%.2f adjusted: %d/%d/%d\n" % ( 930 nCFL, xCFL, self.hnCFL, self.hxCFL, nadj, 931 self.aadj, self.haadj)) 932 self.aadj = 0
933 - def postloop(self):
934 self.info("Averaged maximum CFL = %g.\n" % self.mCFL)
935
936 ################################################################################ 937 # Convergence. 938 ################################################################################ 939 940 -class ConvergeAnchor(Anchor):
941 """ 942 Performs calculation for convergence on Solver. Implements (i) preloop() 943 and (ii) postfull() methods. 944 945 @ivar rsteps: steps to run. 946 @itype rsteps: int 947 @ivar norm: container of calculated norm. 948 @itype norm: dict 949 """
950 - def __init__(self, svr, **kw):
951 self.rsteps = kw.pop('rsteps') 952 super(ConvergeAnchor, self).__init__(svr, **kw) 953 self.norm = {}
954 - def preloop(self):
955 from numpy import empty 956 svr = self.svr 957 der = svr.der 958 der['diff'] = empty((svr.ngstcell+svr.ncell, svr.neq), 959 dtype=svr.fpdtype)
960 - def postfull(self):
961 from ctypes import byref, c_double 962 svr = self.svr 963 istep = svr.step_global 964 rsteps = self.rsteps 965 if istep > 0 and istep%rsteps == 0: 966 diff = svr.der['diff'] 967 svr._clib_cuse_c.process_norm_diff(byref(svr.exd), 968 diff[svr.ngstcell:].ctypes._as_parameter_) 969 # Linf norm. 970 Linf = [] 971 Linf.extend(diff.max(axis=0)) 972 self.norm['Linf'] = Linf 973 # L1 norm. 974 svr._clib_cuse_c.process_norm_L1.restype = c_double 975 L1 = [] 976 for ieq in range(svr.neq): 977 vals = svr._clib_cuse_c.process_norm_L1(byref(svr.exd), 978 diff[svr.ngstcell:].ctypes._as_parameter_, ieq) 979 L1.append(vals) 980 self.norm['L1'] = L1 981 # L2 norm. 982 svr._clib_cuse_c.process_norm_L2.restype = c_double 983 L2 = [] 984 for ieq in range(svr.neq): 985 vals = svr._clib_cuse_c.process_norm_L2(byref(svr.exd), 986 diff[svr.ngstcell:].ctypes._as_parameter_, ieq) 987 L2.append(vals) 988 self.norm['L2'] = L2
989
990 -class ConvergeHook(BlockHook):
991 """ 992 Initiates and controls the remote ConvergeAnchor. Implements (i) 993 drop_anchor() and (ii) postmarch() methods. 994 995 @ivar name: name of the converge tool. 996 @itype name: str 997 @ivar keys: kinds of norms to output; Linf, L1, and L2. 998 @itype keys: list 999 @ivar eqs: indices of unknowns (associated with the equations). 1000 @itype eqs: list 1001 @ivar csteps: steps to collect; default to psteps. 1002 @itype csteps: int 1003 @ivar rsteps: steps to run (Anchor); default to csteps. 1004 @itype rsteps: int 1005 @ivar ankkw: hold the remaining keywords for Anchor. 1006 @itype ankkw: dict 1007 """
1008 - def __init__(self, cse, **kw):
1009 self.name = kw.pop('name', 'converge') 1010 self.keys = kw.pop('keys', None) 1011 self.eqs = kw.pop('eqs', None) 1012 csteps = kw.pop('csteps', None) 1013 rsteps = kw.pop('rsteps', None) 1014 super(ConvergeHook, self).__init__(cse, **kw) 1015 self.csteps = self.psteps if csteps == None else csteps 1016 self.rsteps = self.csteps if rsteps == None else rsteps 1017 self.ankkw = kw
1018 - def drop_anchor(self, svr):
1019 ankkw = self.ankkw.copy() 1020 ankkw['name'] = self.name 1021 ankkw['rsteps'] = self.rsteps 1022 self._deliver_anchor(svr, ConvergeAnchor, ankkw)
1023 - def _collect(self):
1024 from numpy import sqrt 1025 cse = self.cse 1026 neq = cse.execution.neq 1027 if cse.is_parallel: 1028 dom = cse.solver.domainobj 1029 dealer = cse.solver.dealer 1030 allnorm = list() 1031 for iblk in range(dom.nblk): 1032 dealer[iblk].cmd.pullank(self.name, 'norm', with_worker=True) 1033 allnorm.append(dealer[iblk].recv()) 1034 norm = {'Linf': [0.0]*neq, 'L1': [0.0]*neq, 'L2': [0.0]*neq} 1035 for ieq in range(neq): 1036 norm['Linf'][ieq] = max([nm['Linf'][ieq] for nm in allnorm]) 1037 norm['L1'][ieq] = sum([nm['L1'][ieq] for nm in allnorm]) 1038 norm['L2'][ieq] = sum([nm['L2'][ieq] for nm in allnorm]) 1039 else: 1040 svr = self.cse.solver.solverobj 1041 norm = svr.runanchors[self.name].norm 1042 for ieq in range(neq): 1043 norm['L2'][ieq] = sqrt(norm['L2'][ieq]) 1044 self.norm = norm
1045 - def postmarch(self):
1046 info = self.info 1047 cse = self.cse 1048 istep = cse.execution.step_current 1049 csteps = self.csteps 1050 psteps = self.psteps 1051 if istep > 0 and istep%csteps == 0: 1052 self._collect() 1053 if istep > 0 and istep%psteps == 0: 1054 norm = self.norm 1055 keys = self.keys if self.keys != None else self.norm.keys() 1056 keys.sort() 1057 eqs = self.eqs if self.eqs != None else range(cse.execution.neq) 1058 for key in keys: 1059 info("Converge/%-4s [ %s ]:\n %s\n" % (key, 1060 ', '.join(['%d'%ieq for ieq in eqs]), 1061 ' '.join(['%.4e'%norm[key][ieq] for ieq in eqs]), 1062 ))
1063
1064 ################################################################################ 1065 # Probe. 1066 ################################################################################ 1067 1068 -class Probe(object):
1069 """ 1070 Represent a point in the mesh. 1071 """
1072 - def __init__(self, *args, **kw):
1073 from numpy import array 1074 self.speclst = kw.pop('speclst') 1075 self.name = kw.pop('name', None) 1076 self.crd = array(args, dtype='float64') 1077 self.pcl = -1 1078 self.vals = list()
1079 - def __str__(self):
1080 crds = ','.join(['%g'%val for val in self.crd]) 1081 return 'Pt/%s#%d(%s)%d' % (self.name, self.pcl, crds, len(self.vals))
1082 - def locate_cell(self, svr):
1083 idx = svr.locate_point(*self.crd) 1084 self.pcl = idx[0]
1085 - def __call__(self, svr, time):
1086 ngstcell = svr.ngstcell 1087 vlist = [time] 1088 for spec in self.speclst: 1089 arr = None 1090 if isinstance(spec, str): 1091 arr = svr.der[spec] 1092 elif isinstance(spec, int): 1093 if spec >= 0 and spec < svr.neq: 1094 arr = svr.soln[:,spec] 1095 elif spec < 0 and -1-spec < svr.neq: 1096 spec = -1-spec 1097 arr = svr.sol[:,spec] 1098 if arr == None: 1099 raise IndexError, 'spec %s incorrect'%str(spec) 1100 vlist.append(arr[ngstcell+self.pcl]) 1101 self.vals.append(vlist)
1102
1103 -class ProbeAnchor(Anchor):
1104 """ 1105 Anchor for probe. 1106 """
1107 - def __init__(self, svr, **kw):
1108 speclst = kw.pop('speclst') 1109 self.points = list() 1110 for data in kw.pop('coords'): 1111 pkw = {'speclst': speclst, 'name': data[0]} 1112 self.points.append(Probe(*data[1:], **pkw)) 1113 super(ProbeAnchor, self).__init__(svr, **kw)
1114 - def preloop(self):
1115 for point in self.points: point.locate_cell(self.svr) 1116 for point in self.points: point(self.svr, self.svr.time)
1117 - def postfull(self):
1118 for point in self.points: point(self.svr, self.svr.time)
1119
1120 -class ProbeHook(BlockHook):
1121 """ 1122 Point probe. 1123 """
1124 - def __init__(self, cse, **kw):
1125 self.name = kw.pop('name', 'ppank') 1126 super(ProbeHook, self).__init__(cse, **kw) 1127 self.ankkw = kw 1128 self.points = None
1129 - def drop_anchor(self, svr):
1130 ankkw = self.ankkw.copy() 1131 ankkw['name'] = self.name 1132 self._deliver_anchor(svr, ProbeAnchor, ankkw)
1133 - def _collect(self):
1134 cse = self.cse 1135 if cse.is_parallel: 1136 dom = cse.solver.domainobj 1137 dealer = cse.solver.dealer 1138 allpoints = list() 1139 for iblk in range(dom.nblk): 1140 dealer[iblk].cmd.pullank(self.name, 'points', with_worker=True) 1141 allpoints.append(dealer[iblk].recv()) 1142 npt = len(allpoints[0]) 1143 points = [None]*npt 1144 for rpoints in allpoints: 1145 ipt = 0 1146 while ipt < npt: 1147 if points[ipt] == None and rpoints[ipt].pcl >=0: 1148 points[ipt] = rpoints[ipt] 1149 ipt += 1 1150 else: 1151 svr = self.cse.solver.solverobj 1152 points = [pt for pt in svr.runanchors[self.name].points 1153 if pt.pcl >= 0] 1154 self.points = points
1155 - def postmarch(self):
1156 psteps = self.psteps 1157 istep = self.cse.execution.step_current 1158 if istep%psteps != 0: return False 1159 self._collect() 1160 return True
1161 - def postloop(self):
1162 import os 1163 from numpy import array, save 1164 for point in self.points: 1165 ptfn = '%s_pt_%s_%s.npy' % ( 1166 self.cse.io.basefn, self.name, point.name) 1167 ptfn = os.path.join(self.cse.io.basedir, ptfn) 1168 save(ptfn, array(point.vals, dtype='float64'))
1169