Package solvcon :: Module scuda
[hide private]
[frames] | no frames]

Source Code for Module solvcon.scuda

  1  # -*- coding: UTF-8 -*- 
  2  # 
  3  # Copyright (C) 2011 Yung-Yu Chen <yyc@solvcon.net>. 
  4  # 
  5  # This program is free software; you can redistribute it and/or modify 
  6  # it under the terms of the GNU General Public License as published by 
  7  # the Free Software Foundation; either version 2 of the License, or 
  8  # (at your option) any later version. 
  9  # 
 10  # This program is distributed in the hope that it will be useful, 
 11  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 12  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 13  # GNU General Public License for more details. 
 14  # 
 15  # You should have received a copy of the GNU General Public License along 
 16  # with this program; if not, write to the Free Software Foundation, Inc., 
 17  # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 
 18   
 19  """ 
 20  A wrapper to CUDA shared library by using ctypes. 
 21  """ 
 22   
 23  _libs = dict() 
24 -def get_lib(path):
25 from ctypes import CDLL 26 if path not in _libs: 27 _libs[path] = CDLL(path) 28 lib = _libs[path] 29 return lib
30
31 -class GpuMemory(object):
32 """ 33 Record-keeper for memory on GPU. 34 """
35 - def __init__(self, gptr, nbytes):
36 self.gptr = gptr 37 self.nbytes = nbytes
38 39 from ctypes import Structure, c_uint, c_char, c_size_t, c_int
40 -class CudaDim3(Structure):
41 _fields_ = [ 42 ('x', c_uint), ('y', c_uint), ('z', c_uint), 43 ]
44 - def __init__(self, *args, **kw):
45 super(CudaDim3, self).__init__(*args, **kw) 46 for key in ['x', 'y', 'z']: 47 if key not in kw: setattr(self, key, 1)
48 -class CudaDeviceProp(Structure):
49 _fields_ = [ 50 ('name', c_char*256), 51 ('totalGlobalMem', c_size_t), 52 ('sharedMemPerBlock', c_size_t), 53 ('regsPerPerBlock', c_int), 54 ('warpSize', c_int), 55 ('memPitch', c_size_t), 56 ('maxThreadsPerBlock', c_int), 57 ('maxThreadsDim', c_int*3), 58 ('maxGridSize', c_int*3), 59 ('totalConstMem', c_size_t), 60 ('major', c_int), 61 ('minor', c_int), 62 ('clockRate', c_int), 63 ('textureAlignment', c_size_t), 64 ('deviceOverlap', c_int), 65 ('multiProcessorCount', c_int), 66 ('kernelExecTimeoutEnabled', c_int), 67 ('integrated', c_int), 68 ('canMapHostMemory', c_int), 69 ('computeMode', c_int), 70 ('maxTexture1D', c_int), 71 ('maxTexture2D', c_int*2), 72 ('maxTexture3D', c_int*3), 73 ('maxTexture2DArray', c_int*3), 74 ('surfaceAlignment', c_size_t), 75 ('concurrentKernels', c_int), 76 ('ECCEnabled', c_int), 77 ('pciBusID', c_int), 78 ('pciDeviceID', c_int), 79 ('tccDriver', c_int), 80 ('__cudaReserved', c_int*21), 81 ]
82 - def __str__(self):
83 return self.name
84 - def get_compute_capability(self):
85 return '%d.%d'%(self.major, self.minor)
86 - def has_compute_capability(self, *args):
87 """ 88 Determine if the device has the compute capability specified by the 89 arguments. Arguments can be in the format of (i) 'x.y' or (ii) x, y. 90 91 @return: has the compute capability or not. 92 @rtype: bool 93 """ 94 # parse input. 95 if len(args) == 1 and isinstance(args[0], basestring): 96 major, minor = [int(val) for val in args[0].split('.')] 97 elif len(args) == 2: 98 major, minor = args 99 else: 100 raise ValueError('incompatible arguments.') 101 # determine capability. 102 if self.major > major: 103 return True 104 elif self.major == major and self.minor >= minor: 105 return True 106 else: 107 return False
108 del Structure, c_uint, c_char, c_size_t, c_int
109 110 -class Scuda(object):
111 """ 112 Wrapper for CUDA library by using ctypes. 113 114 @ivar cudart: CUDA runtime library. 115 @itype cudart: ctypes.CDLL 116 @ivar cuda: CUDA driver library. 117 @itype cuda: ctypes.CDLL 118 @ivar device: the ID of device to use. 119 @itype device: int 120 @ivar _alloc_gpumem: all allocated GpuMemory objects. 121 @itype _alloc_gpumem: set 122 """ 123 124 @staticmethod
125 - def has_cuda():
126 ret = False 127 try: 128 get_lib('libcudart.so') 129 get_lib('libcuda.so') 130 except OSError: 131 ret = False 132 else: 133 ret = True 134 return ret
135 136 # cudaMemcpyKind enum in driver_types.h 137 cudaMemcpyHostToHost = 0 # host -> host. 138 cudaMemcpyHostToDevice = 1 # host -> device. 139 cudaMemcpyDeviceToHost = 2 # device -> host. 140 cudaMemcpyDeviceToDevice = 3 # device -> device. 141 142 # cudaError enum in driver_types.h 143 cudaSuccess = 0 144 cudaErrorMissingConfiguration = 1 145 cudaErrorMemoryAllocation = 2 146 cudaErrorInitializationError = 3 147 cudaErrorLaunchFailure = 4 148 cudaErrorPriorLaunchFailure = 5 149 cudaErrorLaunchTimeout = 6 150 cudaErrorLaunchOutOfResources = 7 151 cudaErrorInvalidDeviceFunction = 8 152 cudaErrorInvalidConfiguration = 9 153 cudaErrorInvalidDevice = 10 154 cudaErrorInvalidValue = 11 155 cudaErrorInvalidPitchValue = 12 156 cudaErrorInvalidSymbol = 13 157 cudaErrorMapBufferObjectFailed = 14 158 cudaErrorUnmapBufferObjectFailed = 15 159 cudaErrorInvalidHostPointer = 16 160 cudaErrorInvalidDevicePointer = 17 161 cudaErrorInvalidTexture = 18 162 cudaErrorInvalidTextureBinding = 19 163 cudaErrorInvalidChannelDescriptor = 20 164 cudaErrorInvalidMemcpyDirection = 21 165 cudaErrorAddressOfConstant = 22 166 cudaErrorTextureFetchFailed = 23 167 cudaErrorTextureNotBound = 24 168 cudaErrorSynchronizationError = 25 169 cudaErrorInvalidFilterSetting = 26 170 cudaErrorInvalidNormSetting = 27 171 cudaErrorMixedDeviceExecution = 28 172 cudaErrorCudartUnloading = 29 173 cudaErrorUnknown = 30 174 cudaErrorNotYetImplemented = 31 175 cudaErrorMemoryValueTooLarge = 32 176 cudaErrorInvalidResourceHandle = 33 177 cudaErrorNotReady = 34 178 cudaErrorInsufficientDriver = 35 179 cudaErrorSetOnActiveProcess = 36 180 cudaErrorInvalidSurface = 37 181 cudaErrorNoDevice = 38 182 cudaErrorECCUncorrectable = 39 183 cudaErrorSharedObjectSymbolNotFound = 40 184 cudaErrorSharedObjectInitFailed = 41 185 cudaErrorUnsupportedLimit = 42 186 cudaErrorDuplicateVariableName = 43 187 cudaErrorDuplicateTextureName = 44 188 cudaErrorDuplicateSurfaceName = 45 189 cudaErrorDevicesUnavailable = 46 190 cudaErrorInvalidKernelImage = 47 191 cudaErrorNoKernelImageForDevice = 48 192 cudaErrorIncompatibleDriverContext = 49 193 cudaErrorStartupFailure = 0x7f 194 cudaErrorApiFailureBase = 10000 195
196 - def __init__(self, libname_cudart='libcudart.so', 197 libname_cuda='libcuda.so'):
198 """ 199 @keyword libname_cudart: name of the CUDA runtime library. 200 @type libname_cudart: str 201 @keyword libname_cuda: name of the CUDA driver library. 202 @type libname_cuda: str 203 """ 204 self.cudart = get_lib(libname_cudart) 205 self.cuda = get_lib(libname_cuda) 206 self.device = None 207 self.devprop = CudaDeviceProp() 208 self._alloc_gpumem = set() 209 super(Scuda, self).__init__() 210 self._dcnt = None 211 self.use_first_valid_device()
212 - def __del__(self):
213 for gmem in self._alloc_gpumem: 214 self.free(gmem, do_remove=False)
215 - def __getattr__(self, key):
216 if key.startswith('cuda'): 217 return getattr(self.cudart, key) 218 elif key.startswith('cu'): 219 return getattr(self.cuda, key) 220 else: 221 raise KeyError
222
223 - def __len__(self):
224 from ctypes import byref, c_int 225 if self._dcnt is None: 226 dcnt = c_int() 227 self.cudaGetDeviceCount(byref(dcnt)) 228 self._dcnt = dcnt.value 229 return self._dcnt
230
232 """ 233 Use CUDA runtime API to download device properties to self object. Set 234 self._device_properties. 235 236 @return: nothing 237 """ 238 from ctypes import byref, c_int 239 ret = self.cudaGetDeviceProperties(byref(self.devprop), 240 c_int(self.device)) 241 if ret != self.cudaSuccess: 242 raise ValueError(ret)
243 - def use_device(self, idx):
244 """ 245 Use the specified device ID. Set self.device. 246 247 @param idx: device ID to use. 248 @type idx: int 249 @return: the device ID. 250 @rtype: int 251 """ 252 from ctypes import c_int, byref 253 assert idx < len(self) 254 idx = c_int(idx) 255 ret = self.cudaSetDevice(idx) 256 if ret != self.cudaSuccess: 257 raise ValueError(ret) 258 self.device = idx.value 259 self.download_device_properties() 260 return ret
261 - def use_first_valid_device(self):
262 dev = None 263 for idx in range(len(self)): 264 try: 265 dev = self.use_device(idx) 266 break 267 except ValueError: 268 dev = None 269 pass 270 return dev
271
272 - def alloc(self, nbytes):
273 from ctypes import byref, c_void_p 274 gptr = c_void_p() 275 self.cudaMalloc(byref(gptr), nbytes) 276 gmem = GpuMemory(gptr, nbytes) 277 self._alloc_gpumem.add(gmem) 278 return gmem
279 - def free(self, gmem, do_remove=True):
280 from ctypes import byref 281 self.cudaFree(byref(gmem.gptr)) # XXX: is byref right? 282 if do_remove: 283 try: 284 self._alloc_gpumem.remove(gmem) 285 except KeyError: 286 pass
287 - def memcpy(self, tgt, src):
288 if isinstance(src, GpuMemory) and isinstance(tgt, GpuMemory): 289 dkey = self.cudaMemcpyDeviceToDevice 290 psrc = src.gptr 291 ptgt = tgt.gptr 292 assert ptgt.nbytes > psrc.nbytes 293 nbytes = psrc.nbytes 294 elif isinstance(src, GpuMemory): 295 dkey = self.cudaMemcpyDeviceToHost 296 psrc = src.gptr 297 ptgt = tgt.ctypes._as_parameter_ 298 nbytes = tgt.nbytes 299 elif isinstance(tgt, GpuMemory): 300 dkey = self.cudaMemcpyHostToDevice 301 psrc = src.ctypes._as_parameter_ 302 ptgt = tgt.gptr 303 nbytes = src.nbytes 304 else: 305 raise TypeError('don\'t do host to host memcpy') 306 self.cudaMemcpy(ptgt, psrc, nbytes, dkey)
307