1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 """
20 A wrapper to CUDA shared library by using ctypes.
21 """
22
23 _libs = dict()
25 from ctypes import CDLL
26 if path not in _libs:
27 _libs[path] = CDLL(path)
28 lib = _libs[path]
29 return lib
30
32 """
33 Record-keeper for memory on GPU.
34 """
36 self.gptr = gptr
37 self.nbytes = nbytes
38
39 from ctypes import Structure, c_uint, c_char, c_size_t, c_int
41 _fields_ = [
42 ('x', c_uint), ('y', c_uint), ('z', c_uint),
43 ]
45 super(CudaDim3, self).__init__(*args, **kw)
46 for key in ['x', 'y', 'z']:
47 if key not in kw: setattr(self, key, 1)
49 _fields_ = [
50 ('name', c_char*256),
51 ('totalGlobalMem', c_size_t),
52 ('sharedMemPerBlock', c_size_t),
53 ('regsPerPerBlock', c_int),
54 ('warpSize', c_int),
55 ('memPitch', c_size_t),
56 ('maxThreadsPerBlock', c_int),
57 ('maxThreadsDim', c_int*3),
58 ('maxGridSize', c_int*3),
59 ('totalConstMem', c_size_t),
60 ('major', c_int),
61 ('minor', c_int),
62 ('clockRate', c_int),
63 ('textureAlignment', c_size_t),
64 ('deviceOverlap', c_int),
65 ('multiProcessorCount', c_int),
66 ('kernelExecTimeoutEnabled', c_int),
67 ('integrated', c_int),
68 ('canMapHostMemory', c_int),
69 ('computeMode', c_int),
70 ('maxTexture1D', c_int),
71 ('maxTexture2D', c_int*2),
72 ('maxTexture3D', c_int*3),
73 ('maxTexture2DArray', c_int*3),
74 ('surfaceAlignment', c_size_t),
75 ('concurrentKernels', c_int),
76 ('ECCEnabled', c_int),
77 ('pciBusID', c_int),
78 ('pciDeviceID', c_int),
79 ('tccDriver', c_int),
80 ('__cudaReserved', c_int*21),
81 ]
87 """
88 Determine if the device has the compute capability specified by the
89 arguments. Arguments can be in the format of (i) 'x.y' or (ii) x, y.
90
91 @return: has the compute capability or not.
92 @rtype: bool
93 """
94
95 if len(args) == 1 and isinstance(args[0], basestring):
96 major, minor = [int(val) for val in args[0].split('.')]
97 elif len(args) == 2:
98 major, minor = args
99 else:
100 raise ValueError('incompatible arguments.')
101
102 if self.major > major:
103 return True
104 elif self.major == major and self.minor >= minor:
105 return True
106 else:
107 return False
108 del Structure, c_uint, c_char, c_size_t, c_int
111 """
112 Wrapper for CUDA library by using ctypes.
113
114 @ivar cudart: CUDA runtime library.
115 @itype cudart: ctypes.CDLL
116 @ivar cuda: CUDA driver library.
117 @itype cuda: ctypes.CDLL
118 @ivar device: the ID of device to use.
119 @itype device: int
120 @ivar _alloc_gpumem: all allocated GpuMemory objects.
121 @itype _alloc_gpumem: set
122 """
123
124 @staticmethod
126 ret = False
127 try:
128 get_lib('libcudart.so')
129 get_lib('libcuda.so')
130 except OSError:
131 ret = False
132 else:
133 ret = True
134 return ret
135
136
137 cudaMemcpyHostToHost = 0
138 cudaMemcpyHostToDevice = 1
139 cudaMemcpyDeviceToHost = 2
140 cudaMemcpyDeviceToDevice = 3
141
142
143 cudaSuccess = 0
144 cudaErrorMissingConfiguration = 1
145 cudaErrorMemoryAllocation = 2
146 cudaErrorInitializationError = 3
147 cudaErrorLaunchFailure = 4
148 cudaErrorPriorLaunchFailure = 5
149 cudaErrorLaunchTimeout = 6
150 cudaErrorLaunchOutOfResources = 7
151 cudaErrorInvalidDeviceFunction = 8
152 cudaErrorInvalidConfiguration = 9
153 cudaErrorInvalidDevice = 10
154 cudaErrorInvalidValue = 11
155 cudaErrorInvalidPitchValue = 12
156 cudaErrorInvalidSymbol = 13
157 cudaErrorMapBufferObjectFailed = 14
158 cudaErrorUnmapBufferObjectFailed = 15
159 cudaErrorInvalidHostPointer = 16
160 cudaErrorInvalidDevicePointer = 17
161 cudaErrorInvalidTexture = 18
162 cudaErrorInvalidTextureBinding = 19
163 cudaErrorInvalidChannelDescriptor = 20
164 cudaErrorInvalidMemcpyDirection = 21
165 cudaErrorAddressOfConstant = 22
166 cudaErrorTextureFetchFailed = 23
167 cudaErrorTextureNotBound = 24
168 cudaErrorSynchronizationError = 25
169 cudaErrorInvalidFilterSetting = 26
170 cudaErrorInvalidNormSetting = 27
171 cudaErrorMixedDeviceExecution = 28
172 cudaErrorCudartUnloading = 29
173 cudaErrorUnknown = 30
174 cudaErrorNotYetImplemented = 31
175 cudaErrorMemoryValueTooLarge = 32
176 cudaErrorInvalidResourceHandle = 33
177 cudaErrorNotReady = 34
178 cudaErrorInsufficientDriver = 35
179 cudaErrorSetOnActiveProcess = 36
180 cudaErrorInvalidSurface = 37
181 cudaErrorNoDevice = 38
182 cudaErrorECCUncorrectable = 39
183 cudaErrorSharedObjectSymbolNotFound = 40
184 cudaErrorSharedObjectInitFailed = 41
185 cudaErrorUnsupportedLimit = 42
186 cudaErrorDuplicateVariableName = 43
187 cudaErrorDuplicateTextureName = 44
188 cudaErrorDuplicateSurfaceName = 45
189 cudaErrorDevicesUnavailable = 46
190 cudaErrorInvalidKernelImage = 47
191 cudaErrorNoKernelImageForDevice = 48
192 cudaErrorIncompatibleDriverContext = 49
193 cudaErrorStartupFailure = 0x7f
194 cudaErrorApiFailureBase = 10000
195
196 - def __init__(self, libname_cudart='libcudart.so',
197 libname_cuda='libcuda.so'):
198 """
199 @keyword libname_cudart: name of the CUDA runtime library.
200 @type libname_cudart: str
201 @keyword libname_cuda: name of the CUDA driver library.
202 @type libname_cuda: str
203 """
204 self.cudart = get_lib(libname_cudart)
205 self.cuda = get_lib(libname_cuda)
206 self.device = None
207 self.devprop = CudaDeviceProp()
208 self._alloc_gpumem = set()
209 super(Scuda, self).__init__()
210 self._dcnt = None
211 self.use_first_valid_device()
213 for gmem in self._alloc_gpumem:
214 self.free(gmem, do_remove=False)
216 if key.startswith('cuda'):
217 return getattr(self.cudart, key)
218 elif key.startswith('cu'):
219 return getattr(self.cuda, key)
220 else:
221 raise KeyError
222
224 from ctypes import byref, c_int
225 if self._dcnt is None:
226 dcnt = c_int()
227 self.cudaGetDeviceCount(byref(dcnt))
228 self._dcnt = dcnt.value
229 return self._dcnt
230
232 """
233 Use CUDA runtime API to download device properties to self object. Set
234 self._device_properties.
235
236 @return: nothing
237 """
238 from ctypes import byref, c_int
239 ret = self.cudaGetDeviceProperties(byref(self.devprop),
240 c_int(self.device))
241 if ret != self.cudaSuccess:
242 raise ValueError(ret)
244 """
245 Use the specified device ID. Set self.device.
246
247 @param idx: device ID to use.
248 @type idx: int
249 @return: the device ID.
250 @rtype: int
251 """
252 from ctypes import c_int, byref
253 assert idx < len(self)
254 idx = c_int(idx)
255 ret = self.cudaSetDevice(idx)
256 if ret != self.cudaSuccess:
257 raise ValueError(ret)
258 self.device = idx.value
259 self.download_device_properties()
260 return ret
262 dev = None
263 for idx in range(len(self)):
264 try:
265 dev = self.use_device(idx)
266 break
267 except ValueError:
268 dev = None
269 pass
270 return dev
271
272 - def alloc(self, nbytes):
273 from ctypes import byref, c_void_p
274 gptr = c_void_p()
275 self.cudaMalloc(byref(gptr), nbytes)
276 gmem = GpuMemory(gptr, nbytes)
277 self._alloc_gpumem.add(gmem)
278 return gmem
279 - def free(self, gmem, do_remove=True):
280 from ctypes import byref
281 self.cudaFree(byref(gmem.gptr))
282 if do_remove:
283 try:
284 self._alloc_gpumem.remove(gmem)
285 except KeyError:
286 pass
288 if isinstance(src, GpuMemory) and isinstance(tgt, GpuMemory):
289 dkey = self.cudaMemcpyDeviceToDevice
290 psrc = src.gptr
291 ptgt = tgt.gptr
292 assert ptgt.nbytes > psrc.nbytes
293 nbytes = psrc.nbytes
294 elif isinstance(src, GpuMemory):
295 dkey = self.cudaMemcpyDeviceToHost
296 psrc = src.gptr
297 ptgt = tgt.ctypes._as_parameter_
298 nbytes = tgt.nbytes
299 elif isinstance(tgt, GpuMemory):
300 dkey = self.cudaMemcpyHostToDevice
301 psrc = src.ctypes._as_parameter_
302 ptgt = tgt.gptr
303 nbytes = src.nbytes
304 else:
305 raise TypeError('don\'t do host to host memcpy')
306 self.cudaMemcpy(ptgt, psrc, nbytes, dkey)
307