Euler appears to work now

This commit is contained in:
André R. Brodtkorb
2018-11-05 16:46:37 +01:00
parent 0671bd747a
commit e38885d39b
13 changed files with 702 additions and 19 deletions

View File

@@ -29,16 +29,17 @@ import io
import hashlib
import logging
import gc
import netCDF4
import pycuda.compiler as cuda_compiler
import pycuda.gpuarray
import pycuda.driver as cuda
"""
Class which keeps track of time spent for a section of code
"""
class Timer(object):
"""
Class which keeps track of time spent for a section of code
"""
def __init__(self, tag, log_level=logging.DEBUG):
self.tag = tag
self.log_level = log_level
@@ -63,9 +64,116 @@ class Timer(object):
class DataDumper(object):
"""
Simple class for holding a netCDF4 object
(handles opening and closing in a nice way)
Use as
with DataDumper("filename") as data:
...
"""
def __init__(self, filename, *args, **kwargs):
self.logger = logging.getLogger(__name__)
#Create directory if needed
dirname = os.path.dirname(filename)
if not os.path.isdir(dirname):
self.logger.info("Creating directory " + dirname)
os.makedirs(dirname)
#Get mode of file if we have that
mode = None
if (args):
mode = args[0]
elif (kwargs and 'mode' in kwargs.keys()):
mode = kwargs['mode']
#Create new unique file if writing
if (mode):
if (("w" in mode) or ("+" in mode) or ("a" in mode)):
i = 0
stem, ext = os.path.splitext(filename)
while (os.path.isfile(filename)):
filename = "{:s}_{:04d}{:s}".format(stem, i, ext)
i = i+1
self.filename = os.path.abspath(filename)
#Save arguments
self.args = args
self.kwargs = kwargs
#Log output
self.logger.info("Writing output to " + self.filename)
def __enter__(self):
self.logger.info("Opening " + self.filename)
if (self.args):
self.logger.info("Arguments: " + str(self.args))
if (self.kwargs):
self.logger.info("Keyword arguments: " + str(self.kwargs))
self.ncfile = netCDF4.Dataset(self.filename, *self.args, **self.kwargs)
return self
def __exit__(self, *args):
self.logger.info("Closing " + self.filename)
self.ncfile.close()
class ProgressPrinter(object):
"""
Small helper class for
"""
def __init__(self, total_steps, print_every=5):
self.logger = logging.getLogger(__name__)
self.start = time.time()
self.total_steps = total_steps
self.print_every = print_every
self.next_print_time = self.print_every
self.last_step = 0
self.secs_per_iter = None
def getPrintString(self, step):
elapsed = time.time() - self.start
if (elapsed > self.next_print_time):
dt = elapsed - (self.next_print_time - self.print_every)
dsteps = step - self.last_step
steps_remaining = self.total_steps - step
if (dsteps == 0):
return
self.last_step = step
self.next_print_time = elapsed + self.print_every
if not self.secs_per_iter:
self.secs_per_iter = dt / dsteps
self.secs_per_iter = 0.2*self.secs_per_iter + 0.8*(dt / dsteps)
remaining_time = steps_remaining * self.secs_per_iter
return "{:s}. Total: {:s}, elapsed: {:s}, remaining: {:s}".format(
ProgressPrinter.progressBar(step, self.total_steps),
ProgressPrinter.timeString(elapsed + remaining_time),
ProgressPrinter.timeString(elapsed),
ProgressPrinter.timeString(remaining_time))
def timeString(seconds):
seconds = int(max(seconds, 1))
minutes, seconds = divmod(seconds, 60)
hours, minutes = divmod(minutes, 60)
periods = [('h', hours), ('m', minutes), ('s', seconds)]
time_string = ' '.join('{}{}'.format(value, name)
for name, value in periods
if value)
return time_string
def progressBar(step, total_steps, width=30):
progress = np.round(width * step / total_steps).astype(np.int32)
progressbar = "0% [" + "#"*(progress) + "="*(width-progress) + "] 100%"
return progressbar

View File

@@ -113,6 +113,7 @@ class EE2D_KP07_dimsplit (Simulator.BaseSimulator):
self.u1[3].data.gpudata, self.u1[3].data.strides[0])
self.u0, self.u1 = self.u1, self.u0
self.t += dt
self.nt += 1
def stepDimsplitYX(self, dt):
self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, \
@@ -131,6 +132,7 @@ class EE2D_KP07_dimsplit (Simulator.BaseSimulator):
self.u1[3].data.gpudata, self.u1[3].data.strides[0])
self.u0, self.u1 = self.u1, self.u0
self.t += dt
self.nt += 1
def download(self):
return self.u0.download(self.stream)

View File

@@ -105,6 +105,7 @@ class FORCE (Simulator.BaseSimulator):
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
self.u0, self.u1 = self.u1, self.u0
self.t += dt
self.nt += 1
def download(self):
return self.u0.download(self.stream)

View File

@@ -100,6 +100,7 @@ class HLL (Simulator.BaseSimulator):
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
self.u0, self.u1 = self.u1, self.u0
self.t += dt
self.nt += 1
def download(self):
return self.u0.download(self.stream)

View File

@@ -110,6 +110,7 @@ class HLL2 (Simulator.BaseSimulator):
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
self.u0, self.u1 = self.u1, self.u0
self.t += dt
self.nt += 1
def stepDimsplitYX(self, dt):
self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, \
@@ -126,6 +127,7 @@ class HLL2 (Simulator.BaseSimulator):
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
self.u0, self.u1 = self.u1, self.u0
self.t += dt
self.nt += 1
def download(self):
return self.u0.download(self.stream)

View File

@@ -110,6 +110,7 @@ class KP07 (Simulator.BaseSimulator):
def stepEuler(self, dt):
self.substepRK(dt, 0)
self.t += dt
self.nt += 1
def stepRK(self, dt, order):
if (order != 2):
@@ -117,6 +118,7 @@ class KP07 (Simulator.BaseSimulator):
self.substepRK(dt, 0)
self.substepRK(dt, 1)
self.t += dt
self.nt += 1
def download(self):
return self.u0.download(self.stream)

View File

@@ -110,6 +110,7 @@ class KP07_dimsplit (Simulator.BaseSimulator):
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
self.u0, self.u1 = self.u1, self.u0
self.t += dt
self.nt += 1
def stepDimsplitYX(self, dt):
self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, \
@@ -126,6 +127,7 @@ class KP07_dimsplit (Simulator.BaseSimulator):
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
self.u0, self.u1 = self.u1, self.u0
self.t += dt
self.nt += 1
def download(self):
return self.u0.download(self.stream)

View File

@@ -101,6 +101,7 @@ class LxF (Simulator.BaseSimulator):
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
self.u0, self.u1 = self.u1, self.u0
self.t += dt
self.nt += 1
def download(self):
return self.u0.download(self.stream)

View File

@@ -80,8 +80,9 @@ class BaseSimulator:
#Create a CUDA stream
self.stream = cuda.Stream()
#Keep track of simulation time
self.t = 0.0;
#Keep track of simulation time and number of timesteps
self.t = 0.0
self.nt = 0
#Log progress every n seconds during simulation
self.log_every = 5
@@ -209,14 +210,16 @@ class BaseSimulator:
def download(self):
raise(NotImplementedError("Needs to be implemented in subclass"))
def check(self):
raise(NotImplementedError("Needs to be implemented in subclass"))
def sim_time(self):
return self.t
def synchronize(self):
self.stream.synchronize()
def check(self):
raise(NotImplementedError("Needs to be implemented in subclass"))
def simTime(self):
return self.t
def simSteps(self):
return self.nt

View File

@@ -103,6 +103,7 @@ class WAF (Simulator.BaseSimulator):
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
self.u0, self.u1 = self.u1, self.u0
self.t += dt
self.nt += 1
def stepDimsplitYX(self, dt):
self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, \
@@ -118,6 +119,7 @@ class WAF (Simulator.BaseSimulator):
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
self.u0, self.u1 = self.u1, self.u0
self.t += dt
self.nt += 1
def download(self):
return self.u0.download(self.stream)

View File

@@ -154,10 +154,10 @@ __global__ void KP07DimsplitKernel(
//Read into shared memory
readBlock<w, h, gc>( rho0_ptr_, rho0_pitch_, Q[0], nx_+4, ny_+4);
readBlock<w, h, gc>(rho_u0_ptr_, rho_u0_pitch_, Q[1], nx_+4, ny_+4);
readBlock<w, h, gc>(rho_v0_ptr_, rho_v0_pitch_, Q[2], nx_+4, ny_+4);
readBlock<w, h, gc>( E0_ptr_, E0_pitch_, Q[3], nx_+4, ny_+4);
readBlock<w, h, gc>( rho0_ptr_, rho0_pitch_, Q[0], nx_, ny_);
readBlock<w, h, gc>(rho_u0_ptr_, rho_u0_pitch_, Q[1], nx_, ny_);
readBlock<w, h, gc>(rho_v0_ptr_, rho_v0_pitch_, Q[2], nx_, ny_);
readBlock<w, h, gc>( E0_ptr_, E0_pitch_, Q[3], nx_, ny_);
__syncthreads();
//Fix boundary conditions
@@ -226,6 +226,26 @@ __global__ void KP07DimsplitKernel(
evolveF<w, h, gc, vars>(Q, F, dx_, dt_);
__syncthreads();
//This is the RK2-part
const int tx = threadIdx.x + gc;
const int ty = threadIdx.y + gc;
const float q1 = Q[0][ty][tx];
const float q2 = Q[1][ty][tx];
const float q3 = Q[2][ty][tx];
const float q4 = Q[3][ty][tx];
__syncthreads();
readBlock<w, h, gc>( rho1_ptr_, rho1_pitch_, Q[0], nx_, ny_);
readBlock<w, h, gc>(rho_u1_ptr_, rho_u1_pitch_, Q[1], nx_, ny_);
readBlock<w, h, gc>(rho_v1_ptr_, rho_v1_pitch_, Q[2], nx_, ny_);
readBlock<w, h, gc>( E1_ptr_, E1_pitch_, Q[3], nx_, ny_);
__syncthreads();
Q[0][ty][tx] = 0.5f*( Q[0][ty][tx] + q1 );
Q[1][ty][tx] = 0.5f*( Q[1][ty][tx] + q2 );
Q[2][ty][tx] = 0.5f*( Q[2][ty][tx] + q3 );
Q[3][ty][tx] = 0.5f*( Q[3][ty][tx] + q4 );
}

View File

@@ -92,19 +92,29 @@ __device__ float desingularize(float x_, float eps_) {
template<int block_width, int block_height, int ghost_cells>
inline __device__ void readBlock(float* ptr_, int pitch_,
float shmem[block_height+2*ghost_cells][block_width+2*ghost_cells],
const int max_x_, const int max_y_) {
const int nx_, const int ny_) {
//Index of block within domain
const int bx = blockDim.x * blockIdx.x;
const int by = blockDim.y * blockIdx.y;
const int gc_pad = 4;
//Read into shared memory
//Loop over all variables
for (int j=threadIdx.y; j<block_height+2*ghost_cells; j+=block_height) {
const int l = min(by + j, max_y_-1);
//const int l = min(by + j, ny_+2*ghost_cells-1);
const int y = by + j;
const int y_offset = ( (int) (y < gc_pad) - (int) (y >= ny_+2*ghost_cells-gc_pad) ) * (ny_+2*ghost_cells - 2*gc_pad);
const int l = y + y_offset;
float* row = (float*) ((char*) ptr_ + pitch_*l);
for (int i=threadIdx.x; i<block_width+2*ghost_cells; i+=block_width) {
const int k = min(bx + i, max_x_-1);
//const int k = min(bx + i, nx_+2*ghost_cells-1);
const int x = bx + i;
const int gc_pad = 4;
const int x_offset = ( (int) (x < gc_pad) - (int) (x >= nx_+2*ghost_cells-gc_pad) ) * (nx_+2*ghost_cells - 2*gc_pad);
const int k = x + x_offset;
shmem[j][i] = row[k];
}
@@ -248,6 +258,16 @@ __device__ void noFlowBoundary(float Q[block_height+2*ghost_cells][block_width+2
template<int block_width, int block_height, int ghost_cells, int vars>