Package solvcon :: Package kerpak :: Module cuse
[hide private]
[frames] | no frames]

Source Code for Module solvcon.kerpak.cuse

   1  # -*- coding: UTF-8 -*- 
   2  # 
   3  # Copyright (C) 2008-2011 Yung-Yu Chen <yyc@solvcon.net>. 
   4  # 
   5  # This program is free software; you can redistribute it and/or modify 
   6  # it under the terms of the GNU General Public License as published by 
   7  # the Free Software Foundation; either version 2 of the License, or 
   8  # (at your option) any later version. 
   9  # 
  10  # This program is distributed in the hope that it will be useful, 
  11  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
  12  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
  13  # GNU General Public License for more details. 
  14  # 
  15  # You should have received a copy of the GNU General Public License along 
  16  # with this program; if not, write to the Free Software Foundation, Inc., 
  17  # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 
  18   
  19  """ 
  20  Second-order, multi-dimensional CESE method with CUDA enabled. 
  21   
  22  Three functionalities are defined: (i) CFL (CflHook), (ii) Convergence 
  23  (ConvergeAnchor, ConvergeHook), and (iii) Prober (Probe, ProbeAnchor, 
  24  ProbeHook). 
  25  """ 
  26   
  27  CUDA_RAISE_ON_FAIL = False 
  28   
  29  from ctypes import Structure 
  30  from solvcon.gendata import AttributeDict 
  31  from solvcon.solver import BlockSolver 
  32  from solvcon.case import BlockCase 
  33  from solvcon.boundcond import BC, periodic 
  34  from solvcon.anchor import Anchor 
  35  from solvcon.hook import Hook, BlockHook 
36 37 -class CudaDataManager(AttributeDict):
38 """ 39 Manage solver data on GPU memory. 40 """
41 - def __init__(self, *args, **kw):
42 from ctypes import sizeof 43 self.svr = kw.pop('svr') 44 super(CudaDataManager, self).__init__(*args, **kw) 45 self.exd = self.svr._exedatatype_(svr=self.svr) 46 self.gexd = self.svr.scu.alloc(sizeof(self.exd)) 47 # allocate all arrays. 48 for name in self.svr.cuarr_map: 49 shf = self.svr.cuarr_map[name] 50 carr = getattr(self.svr, name) 51 self[name] = self.svr.scu.alloc(carr.nbytes)
52 - def __set_cuda_pointer(self, exc, svr, aname, shf):
53 from ctypes import c_void_p 54 arr = getattr(svr, aname) 55 gptr = self[aname].gptr 56 nt = 1 57 if len(arr.shape) > 1: 58 for width in arr.shape[1:]: 59 nt *= width 60 shf *= nt * arr.itemsize 61 addr = gptr.value + shf if gptr.value is not None else None 62 setattr(exc, aname, c_void_p(addr))
63
64 - def update_exd(self, exd=None):
65 if exd is None: 66 exd = self.svr.exd 67 # copy everything. 68 for key, ctp in exd._fields_: 69 setattr(self.exd, key, getattr(exd, key)) 70 # shift pointers. 71 for name in self.svr.cuarr_map: 72 shf = self.svr.cuarr_map[name] 73 self.__set_cuda_pointer(self.exd, self.svr, name, shf) 74 # send to GPU. 75 self.exd_to_gpu()
76
77 - def exd_to_gpu(self):
78 from ctypes import byref, sizeof 79 scu = self.svr.scu 80 scu.cudaMemcpy(self.gexd.gptr, byref(self.exd), 81 sizeof(self.exd), scu.cudaMemcpyHostToDevice)
82
83 - def arr_to_gpu(self, *args):
84 scu = self.svr.scu 85 names = self if not args else args 86 for name in names: 87 scu.memcpy(self[name], getattr(self.svr, name))
88 - def arr_from_gpu(self, *args):
89 scu = self.svr.scu 90 names = self if not args else args 91 for name in names: 92 scu.memcpy(getattr(self.svr, name), self[name])
93
94 - def free_all(self):
95 scu = self.svr.scu 96 scu.free(self.gexd) 97 for name in self: 98 scu.free(self[name])
99
100 ############################################################################### 101 # Solver. 102 ############################################################################### 103 104 -class CuseSolverExedata(Structure):
105 """ 106 Data structure to interface with C. 107 """ 108 from ctypes import c_int, c_double, c_void_p 109 _fields_ = [ 110 # inherited. 111 ('ncore', c_int), ('neq', c_int), 112 ('time', c_double), ('time_increment', c_double), 113 # mesh shape. 114 ('ndim', c_int), ('nnode', c_int), ('nface', c_int), ('ncell', c_int), 115 ('nbound', c_int), 116 ('ngstnode', c_int), ('ngstface', c_int), ('ngstcell', c_int), 117 # group shape. 118 ('ngroup', c_int), ('gdlen', c_int), 119 # parameter shape. 120 ('nsca', c_int), ('nvec', c_int), 121 # scheme. 122 ('alpha', c_int), ('taylor', c_double), 123 ('cnbfac', c_double), ('sftfac', c_double), 124 ('taumin', c_double), ('taumax', c_double), ('tauscale', c_double), 125 ('omegamin', c_double), ('omegascale', c_double), 126 # function pointer. 127 ('jacofunc', c_void_p), ('taufunc', c_void_p), ('omegafunc', c_void_p), 128 # meta array. 129 ('fctpn', c_void_p), ('cltpn', c_void_p), ('clgrp', c_void_p), 130 ('grpda', c_void_p), 131 # metric array. 132 ('ndcrd', c_void_p), ('fccnd', c_void_p), ('fcnml', c_void_p), 133 ('clcnd', c_void_p), ('clvol', c_void_p), 134 ('cecnd', c_void_p), ('cevol', c_void_p), 135 # connectivity array. 136 ('fcnds', c_void_p), ('fccls', c_void_p), 137 ('clnds', c_void_p), ('clfcs', c_void_p), 138 # solution array. 139 ('amsca', c_void_p), ('amvec', c_void_p), 140 ('sol', c_void_p), ('dsol', c_void_p), ('solt', c_void_p), 141 ('soln', c_void_p), ('dsoln', c_void_p), 142 ('cfl', c_void_p), ('ocfl', c_void_p), 143 ] 144 del c_int, c_double, c_void_p 145
146 - def __set_pointer(self, svr, aname, shf):
147 from ctypes import c_void_p 148 ptr = getattr(svr, aname)[shf:].ctypes._as_parameter_ 149 setattr(self, aname, ptr)
150
151 - def __init__(self, *args, **kw):
152 from ctypes import c_int, c_double, POINTER, c_void_p, byref, cast 153 svr = kw.pop('svr', None) 154 super(CuseSolverExedata, self).__init__(*args, **kw) 155 if svr == None: 156 return 157 # function pointer. 158 self.jacofunc = cast(svr._jacofunc_, c_void_p).value 159 self.taufunc = cast(getattr(svr._clib_cuse_c, 160 'tau_'+svr.tauname), c_void_p).value 161 self.omegafunc = cast(getattr(svr._clib_cuse_c, 162 'omega_'+svr.omeganame), c_void_p).value 163 # scalar. 164 for key in ('ncore', 'neq', 'time', 'time_increment', 165 'ndim', 'nnode', 'nface', 'ncell', 'nbound', 'ngstnode', 166 'ngstface', 'ngstcell', 'ngroup', 'gdlen', 'nsca', 'nvec', 167 'alpha', 'taylor', 'cnbfac', 'sftfac', 168 'taumin', 'taumax', 'tauscale', 'omegamin', 'omegascale'): 169 setattr(self, key, getattr(svr, key)) 170 # arrays. 171 for aname in ('grpda',): 172 self.__set_pointer(svr, aname, 0) 173 for aname in ('ndcrd',): 174 self.__set_pointer(svr, aname, svr.ngstnode) 175 for aname in ('fctpn', 'fcnds', 'fccls', 'fccnd', 'fcnml'): 176 self.__set_pointer(svr, aname, svr.ngstface) 177 for aname in ('cltpn', 'clgrp', 'clcnd', 'clvol', 'cecnd', 'cevol', 178 'clnds', 'clfcs', 'amsca', 'amvec', 179 'sol', 'dsol', 'solt', 'soln', 'dsoln', 'cfl', 'ocfl'): 180 self.__set_pointer(svr, aname, svr.ngstcell)
181
182 -class CuseSolver(BlockSolver):
183 """ 184 The base solver class for second-order, multi-dimensional CESE code with 185 CUDA enabled. 186 187 @cvar _gdlen_: length per group data. Must be overridden. 188 @ctype _gdlen_: int 189 @cvar _jacofunc_: ctypes function to Jacobian calculator. Must be 190 overridden. 191 @ctype _jacofunc_: ctypes.FuncPtr 192 @cvar _clib_mcu: ctypes library for physical model on GPU. 193 @ctype _clib_mcu: ctypes.CDLL 194 195 @ivar debug: flag for debugging. 196 @itype debug: bool 197 198 @ivar scu: CUDA wrapper. 199 @itype scu: solvcon.scuda.Scuda 200 @ivar ncuth: number of thread per block for CUDA. 201 @itype ncuth: int 202 203 @ivar diffname: name of gradient calculation function; tau is default, 204 omega is selectable. 205 @itype diffname: str 206 @ivar tauname: name of tau function; default linear. 207 @itype tauname: str 208 @ivar omeganame: name of omega function; default scale. 209 @itype omeganame: str 210 @ivar alpha: parameter to the weighting function. 211 @itype alpha: int 212 @ivar taylor: factor for Taylor's expansion; 0 off, 1 on. 213 @itype taylor: float 214 @ivar cnbfac: factor to use BCE centroid, othersize midpoint; 0 off, 1 on. 215 @itype cnbfac: float 216 @ivar sftfac: factor to shift gradient shape; 0 off, 1 on. 217 @itype sftfac: float 218 @ivar taumin: the lower bound of tau. 219 @itype taumin: float 220 @ivar taumax: the upper bound of tau. 221 @itype taumax: float 222 @ivar tauscale: scaling of tau. 223 @itype tauscale: float 224 @ivar omegamin: the lower bound of omega. 225 @itype omegamin: float 226 @ivar omegascale: scaling of omega. 227 @itype omegascale: float 228 229 @ivar grpda: group data. 230 @ivar cecnd: solution points for CCEs and BCEs. 231 @ivar cevol: CCE and BCE volumes. 232 @ivar amsca: Parameter scalar array. 233 @ivar amvec: Parameter vector array. 234 @ivar solt: temporal diffrentiation of solution. 235 @ivar sol: current solution. 236 @ivar soln: next solution. 237 @ivar dsol: current gradient of solution. 238 @ivar dsoln: next gradient of solution. 239 @ivar cfl: CFL number. 240 @ivar ocfl: original CFL number. 241 """ 242 243 _exedatatype_ = CuseSolverExedata 244 _interface_init_ = ['cecnd', 'cevol'] 245 _solution_array_ = ['sol', 'soln', 'dsol', 'dsoln'] 246 247 _gdlen_ = None 248 _jacofunc_ = None 249 _clib_mcu = None 250
251 - def __init__(self, blk, *args, **kw):
252 from numpy import empty 253 from solvcon.conf import env 254 self.debug = kw.pop('debug', False) 255 # shape parameters. 256 nsca = kw.pop('nsca', 0) 257 nvec = kw.pop('nvec', 0) 258 # CUDA parameters. 259 self.ncuth = kw.pop('ncuth', 0) 260 self.scu = scu = env.scu if self.ncuth else None 261 # scheme parameters. 262 diffname = kw.pop('diffname', None) 263 self.diffname = diffname if diffname != None else 'tau' 264 tauname = kw.pop('tauname', None) 265 self.tauname = tauname if tauname != None else 'linear' 266 omeganame = kw.pop('omeganame', None) 267 self.omeganame = omeganame if omeganame != None else 'scale' 268 self.alpha = int(kw.pop('alpha', 0)) 269 self.taylor = float(kw.pop('taylor', 1.0)) 270 self.cnbfac = float(kw.pop('cnbfac', 1.0)) 271 self.sftfac = float(kw.pop('sftfac', 1.0)) 272 self.taumin = float(kw.pop('taumin', 0.0)) 273 self.taumax = float(kw.pop('taumax', 1.0)) 274 self.tauscale = float(kw.pop('tauscale', 0.0)) 275 self.omegamin = float(kw.pop('omegamin', 1.1)) 276 self.omegascale = float(kw.pop('omegascale', 0.0)) 277 # super call. 278 super(CuseSolver, self).__init__(blk, *args, **kw) 279 fpdtype = self.fpdtype 280 ndim = self.ndim 281 ncell = self.ncell 282 ngstcell = self.ngstcell 283 neq = self.neq 284 ngroup = self.ngroup 285 # CUDA manager. 286 self.cumgr = None 287 self.cuarr_map = dict() 288 for key in ('ndcrd',): 289 self.cuarr_map[key] = self.ngstnode 290 for key in ('fcnds', 'fccls', 'fccnd', 'fcnml'): 291 self.cuarr_map[key] = self.ngstface 292 for key in ('clfcs', 'clcnd', 'cltpn'): 293 self.cuarr_map[key] = self.ngstcell 294 # meta array. 295 self.grpda = empty((ngroup, self._gdlen_), dtype=fpdtype) 296 self.cuarr_map['grpda'] = 0 297 # dual mesh. 298 self.cecnd = empty((ngstcell+ncell, self.CLMFC+1, ndim), dtype=fpdtype) 299 self.cevol = empty((ngstcell+ncell, self.CLMFC+1), dtype=fpdtype) 300 self.cuarr_map['cecnd'] = self.cuarr_map['cevol'] = self.ngstcell 301 # solutions. 302 self.amsca = empty((ngstcell+ncell, nsca), dtype=fpdtype) 303 self.amvec = empty((ngstcell+ncell, nvec, ndim), dtype=fpdtype) 304 self.solt = empty((ngstcell+ncell, neq), dtype=fpdtype) 305 self.sol = empty((ngstcell+ncell, neq), dtype=fpdtype) 306 self.soln = empty((ngstcell+ncell, neq), dtype=fpdtype) 307 self.dsol = empty((ngstcell+ncell, neq, ndim), dtype=fpdtype) 308 self.dsoln = empty((ngstcell+ncell, neq, ndim), dtype=fpdtype) 309 self.cfl = empty(ngstcell+ncell, dtype=fpdtype) 310 self.ocfl = empty(ngstcell+ncell, dtype=fpdtype) 311 for key in ('amsca', 'amvec', 'solt', 'sol', 'soln', 'dsol', 'dsoln', 312 'cfl', 'ocfl'): 313 self.cuarr_map[key] = self.ngstcell
314 315 @property
316 - def gdlen(self):
317 """ 318 Length per group data. 319 """ 320 return self._gdlen_
321 @property
322 - def nsca(self):
323 return self.amsca.shape[1]
324 @property
325 - def nvec(self):
326 return self.amvec.shape[1]
327
328 - def bind(self):
329 if self.scu: 330 self.cumgr = CudaDataManager(svr=self) 331 super(CuseSolver, self).bind()
332 - def unbind(self):
333 if self.scu and self.cumgr is not None: 334 self.cumgr.free_all() 335 self.cumgr = None 336 super(CuseSolver, self).unbind()
337
338 - def init(self, **kw):
339 self._tcall(self._clib_cuse_c.prepare_ce, 0, self.ncell) 340 super(CuseSolver, self).init(**kw) 341 if self.scu: self.cumgr.arr_to_gpu()
342
343 - def boundcond(self):
344 if self.scu: 345 self.cumgr.update_exd() 346 self.cumgr.arr_to_gpu('sol', 'soln', 'dsol', 'dsoln') 347 if self.nsca: self.cumgr.arr_to_gpu('amsca') 348 if self.nvec: self.cumgr.arr_to_gpu('amvec') 349 super(CuseSolver, self).boundcond() 350 self.call_non_interface_bc('soln') 351 if self.scu: 352 self.cumgr.arr_from_gpu('sol', 'soln') 353 self.call_non_interface_bc('dsoln') 354 if self.scu: 355 self.cumgr.arr_from_gpu('dsol', 'dsoln')
356 357 ########################################################################### 358 # utility. 359 ###########################################################################
360 - def locate_point(self, *args):
361 """ 362 Locate the cell index where the input coordinate is. 363 """ 364 from ctypes import byref 365 from numpy import array 366 crd = array(args, dtype='float64') 367 picl = c_int(0) 368 pifl = c_int(0) 369 pjcl = c_int(0) 370 pjfl = c_int(0) 371 self._clib_cuse_c.locate_point(byref(self.exd), 372 crd.ctypes._as_parameter_, 373 byref(picl), byref(pifl), byref(pjcl), byref(pjfl)) 374 return picl.value, pifl.value, pjcl.value, pjfl.value
375 376 ########################################################################### 377 # library. 378 ########################################################################### 379 from solvcon.dependency import getcdll 380 __clib_cuse_c = { 381 2: getcdll('cuse2d_c'), 382 3: getcdll('cuse3d_c'), 383 } 384 __clib_cuse_cu = { 385 2: getcdll('cuse2d_cu', raise_on_fail=CUDA_RAISE_ON_FAIL), 386 3: getcdll('cuse3d_cu', raise_on_fail=CUDA_RAISE_ON_FAIL), 387 } 388 del getcdll 389 @property
390 - def _clib_cuse_c(self):
391 return self.__clib_cuse_c[self.ndim]
392 @property
393 - def _clib_cuse_cu(self):
394 return self.__clib_cuse_cu[self.ndim]
395 396 ########################################################################### 397 # marching algorithm. 398 ########################################################################### 399 MMNAMES = list() 400 401 MMNAMES.append('update')
402 - def update(self, worker=None):
403 if self.debug: self.mesg('update') 404 # exchange solution and gradient. 405 self.sol, self.soln = self.soln, self.sol 406 self.dsol, self.dsoln = self.dsoln, self.dsol 407 # exchange pointers in execution data. 408 exd = self.exd 409 exd.sol, exd.soln = exd.soln, exd.sol 410 exd.dsol, exd.dsoln = exd.dsoln, exd.dsol 411 # exchange items in GPU execution data. 412 if self.scu: 413 cumgr = self.cumgr 414 cumgr.sol, cumgr.soln = cumgr.soln, cumgr.sol 415 cumgr.dsol, cumgr.dsoln = cumgr.dsoln, cumgr.dsol 416 cumgr.update_exd() 417 #self.cumgr.arr_from_gpu('solt') # DEBUG. 418 #print self.solt.sum() # DEBUG. 419 if self.debug: self.mesg(' done.\n')
420 421 MMNAMES.append('ibcam')
422 - def ibcam(self, worker=None):
423 if self.debug: self.mesg('ibcam') 424 if worker: # FIXME: not working with CUDA. 425 if self.nsca: self.exchangeibc('amsca', worker=worker) 426 if self.nvec: self.exchangeibc('amvec', worker=worker) 427 if self.debug: self.mesg(' done.\n')
428 429 MMNAMES.append('calcsolt')
430 - def calcsolt(self, worker=None):
431 if self.debug: self.mesg('calcsolt') 432 if self.scu: 433 from ctypes import byref 434 self._clib_mcu.calc_solt(self.ncuth, 435 byref(self.cumgr.exd), self.cumgr.gexd.gptr) 436 else: 437 self._tcall(self._clib_cuse_c.calc_solt, -self.ngstcell, self.ncell, 438 tickerkey='calcsolt') 439 if self.debug: self.mesg(' done.\n')
440 MMNAMES.append('calcsoln')
441 - def calcsoln(self, worker=None):
442 if self.debug: self.mesg('calcsoln') 443 if self.scu: 444 from ctypes import byref 445 self._clib_mcu.calc_soln(self.ncuth, 446 byref(self.cumgr.exd), self.cumgr.gexd.gptr) 447 else: 448 func = self._clib_cuse_c.calc_soln 449 self._tcall(func, 0, self.ncell, tickerkey='calcsoln') 450 if self.debug: self.mesg(' done.\n')
451 452 MMNAMES.append('ibcsoln')
453 - def ibcsoln(self, worker=None):
454 if self.debug: self.mesg('ibcsoln') 455 if worker: self.exchangeibc('soln', worker=worker) 456 if self.debug: self.mesg(' done.\n')
457 MMNAMES.append('bcsoln')
458 - def bcsoln(self, worker=None):
459 if self.debug: self.mesg('bcsoln') 460 self.call_non_interface_bc('soln') 461 if self.debug: self.mesg(' done.\n')
462 463 MMNAMES.append('calccfl')
464 - def calccfl(self, worker=None):
465 raise NotImplementedError
466 467 MMNAMES.append('calcdsoln')
468 - def calcdsoln(self, worker=None):
469 if self.debug: self.mesg('calcdsoln') 470 if self.scu: 471 from ctypes import byref 472 self._clib_mcu.calc_dsoln(self.ncuth, 473 byref(self.cumgr.exd), self.cumgr.gexd.gptr) 474 else: 475 func = self._clib_cuse_c.calc_dsoln 476 self._tcall(func, 0, self.ncell, tickerkey='calcdsoln') 477 if self.debug: self.mesg(' done.\n')
478 479 MMNAMES.append('ibcdsoln')
480 - def ibcdsoln(self, worker=None):
481 if self.debug: self.mesg('ibcdsoln') 482 if worker: self.exchangeibc('dsoln', worker=worker) 483 if self.debug: self.mesg(' done.\n')
484 MMNAMES.append('bcdsoln')
485 - def bcdsoln(self, worker=None):
486 if self.debug: self.mesg('bcdsoln') 487 self.call_non_interface_bc('dsoln') 488 if self.debug: self.mesg(' done.\n')
489
490 ############################################################################### 491 # Case. 492 ############################################################################### 493 494 -class CuseCase(BlockCase):
495 """ 496 Inviscid aerodynamic case for the Euler equations. 497 """ 498 defdict = { 499 'execution.verified_norm': -1.0, 500 'solver.debug_cese': False, 501 'solver.ncuth': 0, 502 'solver.diffname': None, 503 'solver.tauname': None, 504 'solver.omeganame': None, 505 'solver.alpha': 1, 506 'solver.taylor': 1.0, 507 'solver.cnbfac': 1.0, 508 'solver.sftfac': 1.0, 509 'solver.taumin': None, 510 'solver.taumax': None, 511 'solver.tauscale': None, 512 'solver.omegamin': None, 513 'solver.omegascale': None, 514 }
515 - def make_solver_keywords(self):
516 kw = super(CuseCase, self).make_solver_keywords() 517 kw['debug'] = self.solver.debug_cese 518 kw['ncuth'] = int(self.solver.ncuth) 519 for key in 'diffname', 'tauname', 'omeganame': 520 val = self.solver.get(key) 521 if val != None: kw[key] = val 522 kw['alpha'] = int(self.solver.alpha) 523 for key in ('taylor', 'cnbfac', 'sftfac', 524 'taumin', 'taumax', 'tauscale', 'omegamin', 'omegascale',): 525 val = self.solver.get(key) 526 if val != None: kw[key] = float(val) 527 return kw
528
529 ############################################################################### 530 # Boundary conditions. 531 ############################################################################### 532 533 -class CuseBC(BC):
534 """ 535 Basic BC class for the Euler equations. 536 537 @cvar _ghostgeom_: indicate which ghost geometry processor to use. 538 @ctype _ghostgeom_: str 539 """ 540 541 _ghostgeom_ = None 542 543 ########################################################################### 544 # library. 545 ########################################################################### 546 from solvcon.dependency import getcdll 547 __clib_cuseb_c = { 548 2: getcdll('cuseb2d_c'), 549 3: getcdll('cuseb3d_c'), 550 } 551 __clib_cuseb_cu = { 552 2: getcdll('cuseb2d_cu', raise_on_fail=CUDA_RAISE_ON_FAIL), 553 3: getcdll('cuseb3d_cu', raise_on_fail=CUDA_RAISE_ON_FAIL), 554 } 555 del getcdll 556 @property
557 - def _clib_cuseb_c(self):
558 return self.__clib_cuseb_c[self.svr.ndim]
559 @property
560 - def _clib_cuseb_cu(self):
561 return self.__clib_cuseb_cu[self.svr.ndim]
562
563 - def bind(self):
564 super(CuseBC, self).bind() 565 if self.svr.scu: 566 for key in ('facn', 'value',): 567 nbytes = getattr(self, key).nbytes 568 setattr(self, 'cu'+key, self.svr.scu.alloc(nbytes))
569 - def unbind(self):
570 if self.svr.scu: 571 for key in ('facn', 'value',): 572 self.svr.scu.free(getattr(self, 'cu'+key))
573
574 - def init(self, **kw):
575 from ctypes import byref, c_int 576 super(CuseBC, self).init(**kw) 577 # process ghost geometry. 578 getattr(self._clib_cuseb_c, 'ghostgeom_'+self._ghostgeom_)( 579 byref(self.svr.exd), c_int(self.facn.shape[0]), 580 self.facn.ctypes._as_parameter_) 581 # initialize GPU data. 582 if self.svr.scu: 583 for key in ('facn', 'value',): 584 self.svr.scu.memcpy(getattr(self, 'cu'+key), getattr(self, key))
585
586 - def soln(self):
587 """ 588 Update ghost cells after self.svr.calcsoln. 589 """ 590 pass
591 - def dsoln(self):
592 """ 593 Update ghost cells after self.svr.calcdsoln. 594 """ 595 pass
596
597 -class CuseNonrefl(CuseBC):
598 _ghostgeom_ = 'mirror'
599 - def soln(self):
600 from ctypes import byref 601 svr = self.svr 602 if svr.scu: 603 self._clib_cuseb_cu.bound_nonrefl_soln(svr.ncuth, 604 svr.cumgr.gexd.gptr, self.facn.shape[0], self.cufacn.gptr) 605 else: 606 self._clib_cuseb_c.bound_nonrefl_soln(byref(svr.exd), 607 self.facn.shape[0], self.facn.ctypes._as_parameter_)
608 - def dsoln(self):
609 from ctypes import byref 610 svr = self.svr 611 if svr.scu: 612 self._clib_cuseb_cu.bound_nonrefl_dsoln(svr.ncuth, 613 byref(svr.cumgr.exd), 614 svr.cumgr.gexd.gptr, self.facn.shape[0], self.cufacn.gptr) 615 else: 616 self._clib_cuseb_c.bound_nonrefl_dsoln(byref(svr.exd), 617 self.facn.shape[0], self.facn.ctypes._as_parameter_)
618
619 -class CusePeriodic(periodic):
620 """ 621 General periodic boundary condition for sequential runs. 622 """
623 - def init(self, **kw):
624 svr = self.svr 625 ngstcell = svr.ngstcell 626 ngstface = svr.ngstface 627 facn = self.facn 628 slctm = self.rclp[:,0] + ngstcell 629 slctr = self.rclp[:,1] + ngstcell 630 # move coordinates. 631 shf = svr.cecnd[slctr,0,:] - svr.fccnd[facn[:,2]+ngstface,:] 632 svr.cecnd[slctm,0,:] = svr.fccnd[facn[:,0]+ngstface,:] + shf
633 - def soln(self):
634 svr = self.svr 635 slctm = self.rclp[:,0] + svr.ngstcell 636 slctr = self.rclp[:,1] + svr.ngstcell 637 svr.cumgr.arr_from_gpu('soln') 638 svr.soln[slctm,:] = svr.soln[slctr,:] 639 svr.cumgr.arr_to_gpu('soln')
640 - def dsoln(self):
641 svr = self.svr 642 slctm = self.rclp[:,0] + svr.ngstcell 643 slctr = self.rclp[:,1] + svr.ngstcell 644 svr.cumgr.arr_from_gpu('dsoln') 645 svr.dsoln[slctm,:,:] = svr.dsoln[slctr,:,:] 646 svr.cumgr.arr_to_gpu('dsoln')
647
648 ################################################################################ 649 # CUDA downloader. 650 ################################################################################ 651 652 -class CudaUpDownAnchor(Anchor):
653 """ 654 Upload and download variable arrays between GPU device memory and CPU host 655 memory. By default preloop() and premarch() callbacks do uploading, and 656 postmarch() and postloop() do downloading. The default behavior 657 is compatible to solvcon.anchor.VtkAnchor. Subclasses can override 658 callback methods for more complicated operations. 659 660 @ivar rsteps: steps to run. 661 @itype rsteps: int 662 @ivar uparrs: names of arrays to be uploaded from host to device. 663 @itype uparrs: list 664 @ivar downarrs: names of arrays to be downloaded from device to host. 665 @itype downarrs: list 666 """
667 - def __init__(self, svr, **kw):
668 self.rsteps = kw.pop('rsteps', 1) 669 self.uparrs = kw.pop('uparrs', list()) 670 self.downarrs = kw.pop('downarrs', list()) 671 super(CudaUpDownAnchor, self).__init__(svr, **kw)
672 - def _upload(self):
673 if self.svr.scu and self.uparrs: 674 self.svr.cumgr.arr_to_gpu(*self.uparrs)
675 - def _download(self):
676 if self.svr.scu and self.downarrs: 677 print self.downarrs 678 self.svr.cumgr.arr_from_gpu(*self.downarrs)
679 - def preloop(self):
680 self._upload()
681 - def premarch(self):
682 istep = self.svr.step_global 683 rsteps = self.rsteps 684 if istep > 0 and istep%rsteps == 0: 685 self._upload()
686 - def postmarch(self):
687 istep = self.svr.step_global 688 rsteps = self.rsteps 689 if istep > 0 and istep%rsteps == 0: 690 self._download()
691 - def postloop(self):
692 istep = self.svr.step_global 693 rsteps = self.rsteps 694 if istep%rsteps != 0: 695 self.process(istep)
696
697 ################################################################################ 698 # CFL. 699 ################################################################################ 700 701 -class CflAnchor(Anchor):
702 """ 703 Counting CFL numbers. Use svr.marchret to return results. Implements 704 postmarch() method. 705 706 @ivar rsteps: steps to run. 707 @itype rsteps: int 708 """
709 - def __init__(self, svr, **kw):
710 self.rsteps = kw.pop('rsteps') 711 super(CflAnchor, self).__init__(svr, **kw)
712 - def postmarch(self):
713 svr = self.svr 714 istep = svr.step_global 715 rsteps = self.rsteps 716 if istep > 0 and istep%rsteps == 0: 717 # download data. 718 if svr.scu: 719 svr.cumgr.arr_from_gpu('cfl', 'ocfl') 720 ocfl = svr.ocfl[svr.ngstcell:] 721 cfl = svr.cfl[svr.ngstcell:] 722 # determine extremum. 723 mincfl = ocfl.min() 724 maxcfl = ocfl.max() 725 nadj = (cfl==1).sum() 726 # store. 727 lst = svr.marchret.setdefault('cfl', [0.0, 0.0, 0, 0]) 728 lst[0] = mincfl 729 lst[1] = maxcfl 730 lst[2] = nadj 731 lst[3] += nadj
732
733 -class CflHook(Hook):
734 """ 735 Makes sure CFL number is bounded and print averaged CFL number over time. 736 Reports CFL information per time step and on finishing. Implements (i) 737 postmarch() and (ii) postloop() methods. 738 739 @ivar name: name of the CFL tool. 740 @itype name: str 741 @ivar rsteps: steps to run. 742 @itype rsteps: int 743 @ivar cflmin: CFL number should be greater than or equal to the value. 744 @itype cflmin: float 745 @ivar cflmax: CFL number should be less than the value. 746 @itype cflmax: float 747 @ivar fullstop: flag to stop when CFL is out of bound. Default True. 748 @itype fullstop: bool 749 @ivar aCFL: accumulated CFL. 750 @itype aCFL: float 751 @ivar mCFL: mean CFL. 752 @itype mCFL: float 753 @ivar hnCFL: hereditary minimal CFL. 754 @itype hnCFL: float 755 @ivar hxCFL: hereditary maximal CFL. 756 @itype hxCFL: float 757 @ivar aadj: number of adjusted CFL accumulated since last report. 758 @itype aadj: int 759 @ivar haadj: total number of adjusted CFL since simulation started. 760 @itype haadj: int 761 """
762 - def __init__(self, cse, **kw):
763 self.name = kw.pop('name', 'cfl') 764 self.cflmin = kw.pop('cflmin', 0.0) 765 self.cflmax = kw.pop('cflmax', 1.0) 766 self.fullstop = kw.pop('fullstop', True) 767 self.aCFL = 0.0 768 self.mCFL = 0.0 769 self.hnCFL = 1.0 770 self.hxCFL = 0.0 771 self.aadj = 0 772 self.haadj = 0 773 rsteps = kw.pop('rsteps', None) 774 super(CflHook, self).__init__(cse, **kw) 775 self.rsteps = self.psteps if rsteps == None else rsteps 776 self.ankkw = kw
777 - def drop_anchor(self, svr):
778 ankkw = self.ankkw.copy() 779 ankkw['name'] = self.name 780 ankkw['rsteps'] = self.rsteps 781 self._deliver_anchor(svr, CflAnchor, ankkw)
782 - def _notify(self, msg):
783 from warnings import warn 784 if self.fullstop: 785 raise RuntimeError(msg) 786 else: 787 warn(msg)
788 - def postmarch(self):
789 from numpy import isnan 790 info = self.info 791 istep = self.cse.execution.step_current 792 mr = self.cse.execution.marchret 793 isp = self.cse.is_parallel 794 rsteps = self.rsteps 795 psteps = self.psteps 796 # collect CFL. 797 if istep > 0 and istep%rsteps == 0: 798 nCFL = max([m['cfl'][0] for m in mr]) if isp else mr['cfl'][0] 799 xCFL = max([m['cfl'][1] for m in mr]) if isp else mr['cfl'][1] 800 nadj = sum([m['cfl'][2] for m in mr]) if isp else mr['cfl'][2] 801 aadj = sum([m['cfl'][3] for m in mr]) if isp else mr['cfl'][3] 802 hnCFL = min([nCFL, self.hnCFL]) 803 self.hnCFL = hnCFL if not isnan(hnCFL) else self.hnCFL 804 hxCFL = max([xCFL, self.hxCFL]) 805 self.hxCFL = hxCFL if not isnan(hxCFL) else self.hxCFL 806 self.aCFL += xCFL*rsteps 807 self.mCFL = self.aCFL/istep 808 self.aadj += aadj 809 self.haadj += aadj 810 # check. 811 if self.cflmin != None and nCFL < self.cflmin: 812 self._notify("CFL = %g < %g after step: %d" % ( 813 nCFL, self.cflmin, istep)) 814 if self.cflmax != None and xCFL >= self.cflmax: 815 self._notify("CFL = %g >= %g after step: %d" % ( 816 xCFL, self.cflmax, istep)) 817 # output information. 818 if istep > 0 and istep%psteps == 0: 819 info("CFL = %.2f/%.2f - %.2f/%.2f adjusted: %d/%d/%d\n" % ( 820 nCFL, xCFL, self.hnCFL, self.hxCFL, nadj, 821 self.aadj, self.haadj)) 822 self.aadj = 0
823 - def postloop(self):
824 self.info("Averaged maximum CFL = %g.\n" % self.mCFL)
825
826 ################################################################################ 827 # Convergence. 828 ################################################################################ 829 830 -class ConvergeAnchor(Anchor):
831 """ 832 Performs calculation for convergence on Solver. Implements (i) preloop() 833 and (ii) postfull() methods. 834 835 @ivar rsteps: steps to run. 836 @itype rsteps: int 837 @ivar norm: container of calculated norm. 838 @itype norm: dict 839 """
840 - def __init__(self, svr, **kw):
841 self.rsteps = kw.pop('rsteps') 842 super(ConvergeAnchor, self).__init__(svr, **kw) 843 self.norm = {}
844 - def preloop(self):
845 from numpy import empty 846 svr = self.svr 847 der = svr.der 848 der['diff'] = empty((svr.ngstcell+svr.ncell, svr.neq), 849 dtype=svr.fpdtype)
850 - def postfull(self):
851 from ctypes import c_double 852 svr = self.svr 853 istep = svr.step_global 854 rsteps = self.rsteps 855 if istep > 0 and istep%rsteps == 0: 856 diff = svr.der['diff'] 857 svr._tcall(svr._clib_cuse_c.process_norm_diff, -svr.ngstcell, 858 svr.ncell, diff[svr.ngstcell:].ctypes._as_parameter_) 859 # Linf norm. 860 Linf = [] 861 Linf.extend(diff.max(axis=0)) 862 self.norm['Linf'] = Linf 863 # L1 norm. 864 svr._clib_cuse_c.process_norm_L1.restype = c_double 865 L1 = [] 866 for ieq in range(svr.neq): 867 vals = svr._tcall(svr._clib_cuse_c.process_norm_L1, 0, 868 svr.ncell, diff[svr.ngstcell:].ctypes._as_parameter_, ieq) 869 L1.append(sum(vals)) 870 self.norm['L1'] = L1 871 # L2 norm. 872 svr._clib_cuse_c.process_norm_L2.restype = c_double 873 L2 = [] 874 for ieq in range(svr.neq): 875 vals = svr._tcall(svr._clib_cuse_c.process_norm_L2, 0, 876 svr.ncell, diff[svr.ngstcell:].ctypes._as_parameter_, ieq) 877 L2.append(sum(vals)) 878 self.norm['L2'] = L2
879
880 -class ConvergeHook(BlockHook):
881 """ 882 Initiates and controls the remote ConvergeAnchor. Implements (i) 883 drop_anchor() and (ii) postmarch() methods. 884 885 @ivar name: name of the converge tool. 886 @itype name: str 887 @ivar keys: kinds of norms to output; Linf, L1, and L2. 888 @itype keys: list 889 @ivar eqs: indices of unknowns (associated with the equations). 890 @itype eqs: list 891 @ivar csteps: steps to collect; default to psteps. 892 @itype csteps: int 893 @ivar rsteps: steps to run (Anchor); default to csteps. 894 @itype rsteps: int 895 @ivar ankkw: hold the remaining keywords for Anchor. 896 @itype ankkw: dict 897 """
898 - def __init__(self, cse, **kw):
899 self.name = kw.pop('name', 'converge') 900 self.keys = kw.pop('keys', None) 901 self.eqs = kw.pop('eqs', None) 902 csteps = kw.pop('csteps', None) 903 rsteps = kw.pop('rsteps', None) 904 super(ConvergeHook, self).__init__(cse, **kw) 905 self.csteps = self.psteps if csteps == None else csteps 906 self.rsteps = self.csteps if rsteps == None else rsteps 907 self.ankkw = kw
908 - def drop_anchor(self, svr):
909 ankkw = self.ankkw.copy() 910 ankkw['name'] = self.name 911 ankkw['rsteps'] = self.rsteps 912 self._deliver_anchor(svr, ConvergeAnchor, ankkw)
913 - def _collect(self):
914 from numpy import sqrt 915 cse = self.cse 916 neq = cse.execution.neq 917 if cse.is_parallel: 918 dom = cse.solver.domainobj 919 dealer = cse.solver.dealer 920 allnorm = list() 921 for iblk in range(dom.nblk): 922 dealer[iblk].cmd.pullank(self.name, 'norm', with_worker=True) 923 allnorm.append(dealer[iblk].recv()) 924 norm = {'Linf': [0.0]*neq, 'L1': [0.0]*neq, 'L2': [0.0]*neq} 925 for ieq in range(neq): 926 norm['Linf'][ieq] = max([nm['Linf'][ieq] for nm in allnorm]) 927 norm['L1'][ieq] = sum([nm['L1'][ieq] for nm in allnorm]) 928 norm['L2'][ieq] = sum([nm['L2'][ieq] for nm in allnorm]) 929 else: 930 svr = self.cse.solver.solverobj 931 norm = svr.runanchors[self.name].norm 932 for ieq in range(neq): 933 norm['L2'][ieq] = sqrt(norm['L2'][ieq]) 934 self.norm = norm
935 - def postmarch(self):
936 info = self.info 937 cse = self.cse 938 istep = cse.execution.step_current 939 csteps = self.csteps 940 psteps = self.psteps 941 if istep > 0 and istep%csteps == 0: 942 self._collect() 943 if istep > 0 and istep%psteps == 0: 944 norm = self.norm 945 keys = self.keys if self.keys != None else self.norm.keys() 946 keys.sort() 947 eqs = self.eqs if self.eqs != None else range(cse.execution.neq) 948 for key in keys: 949 info("Converge/%-4s [ %s ]:\n %s\n" % (key, 950 ', '.join(['%d'%ieq for ieq in eqs]), 951 ' '.join(['%.4e'%norm[key][ieq] for ieq in eqs]), 952 ))
953
954 ################################################################################ 955 # Probe. 956 ################################################################################ 957 958 -class Probe(object):
959 """ 960 Represent a point in the mesh. 961 """
962 - def __init__(self, *args, **kw):
963 from numpy import array 964 self.speclst = kw.pop('speclst') 965 self.name = kw.pop('name', None) 966 self.crd = array(args, dtype='float64') 967 self.pcl = -1 968 self.vals = list()
969 - def __str__(self):
970 crds = ','.join(['%g'%val for val in self.crd]) 971 return 'Pt/%s#%d(%s)%d' % (self.name, self.pcl, crds, len(self.vals))
972 - def locate_cell(self, svr):
973 idx = svr.locate_point(*self.crd) 974 self.pcl = idx[0]
975 - def __call__(self, svr, time):
976 ngstcell = svr.ngstcell 977 vlist = [time] 978 for spec in self.speclst: 979 arr = None 980 if isinstance(spec, str): 981 arr = svr.der[spec] 982 elif isinstance(spec, int): 983 if spec >= 0 and spec < svr.neq: 984 arr = svr.soln[:,spec] 985 elif spec < 0 and -1-spec < svr.neq: 986 spec = -1-spec 987 arr = svr.sol[:,spec] 988 if arr == None: 989 raise IndexError, 'spec %s incorrect'%str(spec) 990 vlist.append(arr[ngstcell+self.pcl]) 991 self.vals.append(vlist)
992
993 -class ProbeAnchor(Anchor):
994 """ 995 Anchor for probe. 996 """
997 - def __init__(self, svr, **kw):
998 speclst = kw.pop('speclst') 999 self.points = list() 1000 for data in kw.pop('coords'): 1001 pkw = {'speclst': speclst, 'name': data[0]} 1002 self.points.append(Probe(*data[1:], **pkw)) 1003 super(ProbeAnchor, self).__init__(svr, **kw)
1004 - def preloop(self):
1005 for point in self.points: point.locate_cell(self.svr) 1006 for point in self.points: point(self.svr, self.svr.time)
1007 - def postfull(self):
1008 for point in self.points: point(self.svr, self.svr.time)
1009
1010 -class ProbeHook(BlockHook):
1011 """ 1012 Point probe. 1013 """
1014 - def __init__(self, cse, **kw):
1015 self.name = kw.pop('name', 'ppank') 1016 super(ProbeHook, self).__init__(cse, **kw) 1017 self.ankkw = kw 1018 self.points = None
1019 - def drop_anchor(self, svr):
1020 ankkw = self.ankkw.copy() 1021 ankkw['name'] = self.name 1022 self._deliver_anchor(svr, ProbeAnchor, ankkw)
1023 - def _collect(self):
1024 cse = self.cse 1025 if cse.is_parallel: 1026 dom = cse.solver.domainobj 1027 dealer = cse.solver.dealer 1028 allpoints = list() 1029 for iblk in range(dom.nblk): 1030 dealer[iblk].cmd.pullank(self.name, 'points', with_worker=True) 1031 allpoints.append(dealer[iblk].recv()) 1032 npt = len(allpoints[0]) 1033 points = [None]*npt 1034 for rpoints in allpoints: 1035 ipt = 0 1036 while ipt < npt: 1037 if points[ipt] == None and rpoints[ipt].pcl >=0: 1038 points[ipt] = rpoints[ipt] 1039 ipt += 1 1040 else: 1041 svr = self.cse.solver.solverobj 1042 points = [pt for pt in svr.runanchors[self.name].points 1043 if pt.pcl >= 0] 1044 self.points = points
1045 - def postmarch(self):
1046 psteps = self.psteps 1047 istep = self.cse.execution.step_current 1048 if istep%psteps != 0: return False 1049 self._collect() 1050 return True
1051 - def postloop(self):
1052 import os 1053 from numpy import array, save 1054 for point in self.points: 1055 ptfn = '%s_pt_%s_%s.npy' % ( 1056 self.cse.io.basefn, self.name, point.name) 1057 ptfn = os.path.join(self.cse.io.basedir, ptfn) 1058 save(ptfn, array(point.vals, dtype='float64'))
1059