mirror of
				https://github.com/smyalygames/FiniteVolumeGPU.git
				synced 2025-10-31 20:17:41 +01:00 
			
		
		
		
	Implemented variable timestep
This commit is contained in:
		
							parent
							
								
									7592ad5b9f
								
							
						
					
					
						commit
						ddac53271c
					
				| @ -62,7 +62,7 @@ class EE2D_KP07_dimsplit (BaseSimulator): | ||||
|                  g,  | ||||
|                  gamma,  | ||||
|                  theta=1.3,  | ||||
|                  cfl_scale=0.25*0.9, | ||||
|                  cfl_scale=0.9, | ||||
|                  boundary_conditions=BoundaryCondition(),  | ||||
|                  block_width=16, block_height=8): | ||||
|                   | ||||
| @ -140,4 +140,4 @@ class EE2D_KP07_dimsplit (BaseSimulator): | ||||
|          | ||||
|     def computeDt(self): | ||||
|         max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get(); | ||||
|         return max_dt*self.cfl_scale | ||||
|         return max_dt*0.5*self.cfl_scale | ||||
| @ -25,6 +25,7 @@ from GPUSimulators import Simulator, Common | ||||
| from GPUSimulators.Simulator import BaseSimulator, BoundaryCondition | ||||
| import numpy as np | ||||
| 
 | ||||
| from pycuda import gpuarray | ||||
| 
 | ||||
| 
 | ||||
|          | ||||
| @ -58,6 +59,7 @@ class FORCE (Simulator.BaseSimulator): | ||||
|                  nx, ny,  | ||||
|                  dx, dy, dt,  | ||||
|                  g,  | ||||
|                  cfl_scale=0.9, | ||||
|                  boundary_conditions=BoundaryCondition(),  | ||||
|                  block_width=16, block_height=16): | ||||
|                   | ||||
| @ -65,8 +67,9 @@ class FORCE (Simulator.BaseSimulator): | ||||
|         super().__init__(context,  | ||||
|             nx, ny,  | ||||
|             dx, dy, dt,  | ||||
|             block_width, block_height); | ||||
|             block_width, block_height) | ||||
|         self.g = np.float32(g)  | ||||
|         self.cfl_scale = cfl_scale | ||||
|         self.boundary_conditions = boundary_conditions.asCodedInt() | ||||
| 
 | ||||
|         #Get kernels | ||||
| @ -81,7 +84,7 @@ class FORCE (Simulator.BaseSimulator): | ||||
|                                         },  | ||||
|                                         jit_compile_args={}) | ||||
|         self.kernel = module.get_function("FORCEKernel") | ||||
|         self.kernel.prepare("iiffffiPiPiPiPiPiPi") | ||||
|         self.kernel.prepare("iiffffiPiPiPiPiPiPiP") | ||||
|      | ||||
|         #Create data by uploading to device | ||||
|         self.u0 = Common.ArakawaA2D(self.stream,  | ||||
| @ -92,6 +95,8 @@ class FORCE (Simulator.BaseSimulator): | ||||
|                         nx, ny,  | ||||
|                         1, 1,  | ||||
|                         [None, None, None]) | ||||
|         self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32) | ||||
|         self.cfl_data.fill(self.dt, stream=self.stream) | ||||
|          | ||||
|     def step(self, dt): | ||||
|         self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream,  | ||||
| @ -104,7 +109,8 @@ class FORCE (Simulator.BaseSimulator): | ||||
|                 self.u0[2].data.gpudata, self.u0[2].data.strides[0],  | ||||
|                 self.u1[0].data.gpudata, self.u1[0].data.strides[0],  | ||||
|                 self.u1[1].data.gpudata, self.u1[1].data.strides[0],  | ||||
|                 self.u1[2].data.gpudata, self.u1[2].data.strides[0]) | ||||
|                 self.u1[2].data.gpudata, self.u1[2].data.strides[0], | ||||
|                 self.cfl_data.gpudata) | ||||
|         self.u0, self.u1 = self.u1, self.u0 | ||||
|         self.t += dt | ||||
|         self.nt += 1 | ||||
| @ -115,4 +121,7 @@ class FORCE (Simulator.BaseSimulator): | ||||
|     def check(self): | ||||
|         self.u0.check() | ||||
|         self.u1.check() | ||||
|          | ||||
|                  | ||||
|     def computeDt(self): | ||||
|         max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get(); | ||||
|         return max_dt*0.5*self.cfl_scale | ||||
| @ -24,6 +24,7 @@ from GPUSimulators import Simulator, Common | ||||
| from GPUSimulators.Simulator import BaseSimulator, BoundaryCondition | ||||
| import numpy as np | ||||
| 
 | ||||
| from pycuda import gpuarray | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| @ -53,6 +54,7 @@ class HLL (Simulator.BaseSimulator): | ||||
|                  nx, ny,  | ||||
|                  dx, dy, dt,  | ||||
|                  g,  | ||||
|                  cfl_scale=0.9, | ||||
|                  boundary_conditions=BoundaryCondition(),  | ||||
|                  block_width=16, block_height=16): | ||||
|                   | ||||
| @ -62,6 +64,7 @@ class HLL (Simulator.BaseSimulator): | ||||
|             dx, dy, dt,  | ||||
|             block_width, block_height); | ||||
|         self.g = np.float32(g)  | ||||
|         self.cfl_scale = cfl_scale | ||||
|         self.boundary_conditions = boundary_conditions.asCodedInt() | ||||
| 
 | ||||
|         #Get kernels | ||||
| @ -76,7 +79,7 @@ class HLL (Simulator.BaseSimulator): | ||||
|                                         },  | ||||
|                                         jit_compile_args={}) | ||||
|         self.kernel = module.get_function("HLLKernel") | ||||
|         self.kernel.prepare("iiffffiPiPiPiPiPiPi") | ||||
|         self.kernel.prepare("iiffffiPiPiPiPiPiPiP") | ||||
|      | ||||
|         #Create data by uploading to device | ||||
|         self.u0 = Common.ArakawaA2D(self.stream,  | ||||
| @ -87,6 +90,8 @@ class HLL (Simulator.BaseSimulator): | ||||
|                         nx, ny,  | ||||
|                         1, 1,  | ||||
|                         [None, None, None]) | ||||
|         self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32) | ||||
|         self.cfl_data.fill(self.dt, stream=self.stream) | ||||
|          | ||||
|     def step(self, dt): | ||||
|         self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream,  | ||||
| @ -99,10 +104,19 @@ class HLL (Simulator.BaseSimulator): | ||||
|                 self.u0[2].data.gpudata, self.u0[2].data.strides[0],  | ||||
|                 self.u1[0].data.gpudata, self.u1[0].data.strides[0],  | ||||
|                 self.u1[1].data.gpudata, self.u1[1].data.strides[0],  | ||||
|                 self.u1[2].data.gpudata, self.u1[2].data.strides[0]) | ||||
|                 self.u1[2].data.gpudata, self.u1[2].data.strides[0], | ||||
|                 self.cfl_data.gpudata) | ||||
|         self.u0, self.u1 = self.u1, self.u0 | ||||
|         self.t += dt | ||||
|         self.nt += 1 | ||||
|          | ||||
|     def download(self): | ||||
|         return self.u0.download(self.stream) | ||||
|         return self.u0.download(self.stream) | ||||
|                          | ||||
|     def check(self): | ||||
|         self.u0.check() | ||||
|         self.u1.check() | ||||
|          | ||||
|     def computeDt(self): | ||||
|         max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get(); | ||||
|         return max_dt*0.5*self.cfl_scale | ||||
| @ -24,6 +24,7 @@ from GPUSimulators import Simulator, Common | ||||
| from GPUSimulators.Simulator import BaseSimulator, BoundaryCondition | ||||
| import numpy as np | ||||
| 
 | ||||
| from pycuda import gpuarray | ||||
| 
 | ||||
|          | ||||
|          | ||||
| @ -56,6 +57,7 @@ class HLL2 (Simulator.BaseSimulator): | ||||
|                  dx, dy, dt,  | ||||
|                  g,  | ||||
|                  theta=1.8,  | ||||
|                  cfl_scale=0.9, | ||||
|                  boundary_conditions=BoundaryCondition(),  | ||||
|                  block_width=16, block_height=16): | ||||
|                   | ||||
| @ -66,6 +68,7 @@ class HLL2 (Simulator.BaseSimulator): | ||||
|             block_width, block_height); | ||||
|         self.g = np.float32(g)  | ||||
|         self.theta = np.float32(theta) | ||||
|         self.cfl_scale = cfl_scale | ||||
|         self.boundary_conditions = boundary_conditions.asCodedInt() | ||||
|          | ||||
|         #Get kernels | ||||
| @ -80,7 +83,7 @@ class HLL2 (Simulator.BaseSimulator): | ||||
|                                         },  | ||||
|                                         jit_compile_args={}) | ||||
|         self.kernel = module.get_function("HLL2Kernel") | ||||
|         self.kernel.prepare("iifffffiiPiPiPiPiPiPi") | ||||
|         self.kernel.prepare("iifffffiiPiPiPiPiPiPiP") | ||||
|          | ||||
|         #Create data by uploading to device | ||||
|         self.u0 = Common.ArakawaA2D(self.stream,  | ||||
| @ -91,6 +94,8 @@ class HLL2 (Simulator.BaseSimulator): | ||||
|                         nx, ny,  | ||||
|                         2, 2,  | ||||
|                         [None, None, None]) | ||||
|         self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32) | ||||
|         self.cfl_data.fill(self.dt, stream=self.stream) | ||||
|          | ||||
|     def step(self, dt): | ||||
|         self.substepDimsplit(dt*0.5, 0) | ||||
| @ -112,8 +117,17 @@ class HLL2 (Simulator.BaseSimulator): | ||||
|                 self.u0[2].data.gpudata, self.u0[2].data.strides[0],  | ||||
|                 self.u1[0].data.gpudata, self.u1[0].data.strides[0],  | ||||
|                 self.u1[1].data.gpudata, self.u1[1].data.strides[0],  | ||||
|                 self.u1[2].data.gpudata, self.u1[2].data.strides[0]) | ||||
|                 self.u1[2].data.gpudata, self.u1[2].data.strides[0], | ||||
|                 self.cfl_data.gpudata) | ||||
|         self.u0, self.u1 = self.u1, self.u0 | ||||
|      | ||||
|     def download(self): | ||||
|         return self.u0.download(self.stream) | ||||
|         return self.u0.download(self.stream) | ||||
|          | ||||
|     def check(self): | ||||
|         self.u0.check() | ||||
|         self.u1.check() | ||||
|          | ||||
|     def computeDt(self): | ||||
|         max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get(); | ||||
|         return max_dt*0.5*self.cfl_scale | ||||
| @ -29,6 +29,7 @@ from GPUSimulators import Simulator, Common | ||||
| from GPUSimulators.Simulator import BaseSimulator, BoundaryCondition | ||||
| import numpy as np | ||||
| 
 | ||||
| from pycuda import gpuarray | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| @ -57,6 +58,7 @@ class KP07 (Simulator.BaseSimulator): | ||||
|                  dx, dy, dt,  | ||||
|                  g,  | ||||
|                  theta=1.3,  | ||||
|                  cfl_scale=0.9, | ||||
|                  order=2, | ||||
|                  boundary_conditions=BoundaryCondition(),  | ||||
|                  block_width=16, block_height=16): | ||||
| @ -67,7 +69,8 @@ class KP07 (Simulator.BaseSimulator): | ||||
|             dx, dy, dt,  | ||||
|             block_width, block_height); | ||||
|         self.g = np.float32(g)              | ||||
|         self.theta = np.float32(theta) | ||||
|         self.theta = np.float32(theta)  | ||||
|         self.cfl_scale = cfl_scale | ||||
|         self.order = np.int32(order) | ||||
|         self.boundary_conditions = boundary_conditions.asCodedInt() | ||||
| 
 | ||||
| @ -83,7 +86,7 @@ class KP07 (Simulator.BaseSimulator): | ||||
|                                         },  | ||||
|                                         jit_compile_args={}) | ||||
|         self.kernel = module.get_function("KP07Kernel") | ||||
|         self.kernel.prepare("iifffffiiPiPiPiPiPiPi") | ||||
|         self.kernel.prepare("iifffffiiPiPiPiPiPiPiP") | ||||
|          | ||||
|         #Create data by uploading to device | ||||
|         self.u0 = Common.ArakawaA2D(self.stream,  | ||||
| @ -94,6 +97,8 @@ class KP07 (Simulator.BaseSimulator): | ||||
|                         nx, ny,  | ||||
|                         2, 2,  | ||||
|                         [None, None, None]) | ||||
|         self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32) | ||||
|         self.cfl_data.fill(self.dt, stream=self.stream) | ||||
|                          | ||||
|          | ||||
|     def step(self, dt): | ||||
| @ -121,9 +126,18 @@ class KP07 (Simulator.BaseSimulator): | ||||
|                 self.u0[2].data.gpudata, self.u0[2].data.strides[0],  | ||||
|                 self.u1[0].data.gpudata, self.u1[0].data.strides[0],  | ||||
|                 self.u1[1].data.gpudata, self.u1[1].data.strides[0],  | ||||
|                 self.u1[2].data.gpudata, self.u1[2].data.strides[0]) | ||||
|                 self.u1[2].data.gpudata, self.u1[2].data.strides[0], | ||||
|                 self.cfl_data.gpudata) | ||||
|         self.u0, self.u1 = self.u1, self.u0 | ||||
| 
 | ||||
| 
 | ||||
|     def download(self): | ||||
|         return self.u0.download(self.stream) | ||||
|         return self.u0.download(self.stream) | ||||
|          | ||||
|     def check(self): | ||||
|         self.u0.check() | ||||
|         self.u1.check() | ||||
|          | ||||
|     def computeDt(self): | ||||
|         max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get(); | ||||
|         return max_dt*0.5**self.order*self.cfl_scale | ||||
| @ -29,6 +29,8 @@ from GPUSimulators import Simulator, Common | ||||
| from GPUSimulators.Simulator import BaseSimulator, BoundaryCondition | ||||
| import numpy as np | ||||
| 
 | ||||
| from pycuda import gpuarray | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| @ -57,6 +59,7 @@ class KP07_dimsplit (Simulator.BaseSimulator): | ||||
|                  dx, dy, dt,  | ||||
|                  g,  | ||||
|                  theta=1.3,  | ||||
|                  cfl_scale=0.9, | ||||
|                  boundary_conditions=BoundaryCondition(),  | ||||
|                  block_width=16, block_height=16): | ||||
|                   | ||||
| @ -69,6 +72,7 @@ class KP07_dimsplit (Simulator.BaseSimulator): | ||||
|         self.gc_y = 2 | ||||
|         self.g = np.float32(g) | ||||
|         self.theta = np.float32(theta) | ||||
|         self.cfl_scale = cfl_scale | ||||
|         self.boundary_conditions = boundary_conditions.asCodedInt() | ||||
| 
 | ||||
|         #Get kernels | ||||
| @ -83,7 +87,7 @@ class KP07_dimsplit (Simulator.BaseSimulator): | ||||
|                                         },  | ||||
|                                         jit_compile_args={}) | ||||
|         self.kernel = module.get_function("KP07DimsplitKernel") | ||||
|         self.kernel.prepare("iifffffiiPiPiPiPiPiPi") | ||||
|         self.kernel.prepare("iifffffiiPiPiPiPiPiPiP") | ||||
|      | ||||
|         #Create data by uploading to device | ||||
|         self.u0 = Common.ArakawaA2D(self.stream,  | ||||
| @ -94,6 +98,8 @@ class KP07_dimsplit (Simulator.BaseSimulator): | ||||
|                         nx, ny,  | ||||
|                         self.gc_x, self.gc_y,  | ||||
|                         [None, None, None]) | ||||
|         self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32) | ||||
|         self.cfl_data.fill(self.dt, stream=self.stream) | ||||
|      | ||||
|     def step(self, dt): | ||||
|         self.substepDimsplit(dt*0.5, 0) | ||||
| @ -115,7 +121,8 @@ class KP07_dimsplit (Simulator.BaseSimulator): | ||||
|                 self.u0[2].data.gpudata, self.u0[2].data.strides[0],  | ||||
|                 self.u1[0].data.gpudata, self.u1[0].data.strides[0],  | ||||
|                 self.u1[1].data.gpudata, self.u1[1].data.strides[0],  | ||||
|                 self.u1[2].data.gpudata, self.u1[2].data.strides[0]) | ||||
|                 self.u1[2].data.gpudata, self.u1[2].data.strides[0], | ||||
|                 self.cfl_data.gpudata) | ||||
|         self.u0, self.u1 = self.u1, self.u0 | ||||
|      | ||||
|          | ||||
| @ -124,4 +131,8 @@ class KP07_dimsplit (Simulator.BaseSimulator): | ||||
|          | ||||
|     def check(self): | ||||
|         self.u0.check() | ||||
|         self.u1.check() | ||||
|         self.u1.check() | ||||
|          | ||||
|     def computeDt(self): | ||||
|         max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get(); | ||||
|         return max_dt*0.5*self.cfl_scale | ||||
| @ -25,6 +25,7 @@ from GPUSimulators import Simulator, Common | ||||
| from GPUSimulators.Simulator import BaseSimulator, BoundaryCondition | ||||
| import numpy as np | ||||
| 
 | ||||
| from pycuda import gpuarray | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| @ -54,6 +55,7 @@ class LxF (Simulator.BaseSimulator): | ||||
|                  nx, ny,  | ||||
|                  dx, dy, dt,  | ||||
|                  g,  | ||||
|                  cfl_scale=0.9, | ||||
|                  boundary_conditions=BoundaryCondition(), | ||||
|                  block_width=16, block_height=16): | ||||
|                   | ||||
| @ -63,6 +65,7 @@ class LxF (Simulator.BaseSimulator): | ||||
|             dx, dy, dt,  | ||||
|             block_width, block_height); | ||||
|         self.g = np.float32(g)  | ||||
|         self.cfl_scale = cfl_scale | ||||
|         self.boundary_conditions = boundary_conditions.asCodedInt() | ||||
| 
 | ||||
|         # Get kernels | ||||
| @ -77,7 +80,7 @@ class LxF (Simulator.BaseSimulator): | ||||
|                                         },  | ||||
|                                         jit_compile_args={}) | ||||
|         self.kernel = module.get_function("LxFKernel") | ||||
|         self.kernel.prepare("iiffffiPiPiPiPiPiPi") | ||||
|         self.kernel.prepare("iiffffiPiPiPiPiPiPiP") | ||||
| 
 | ||||
|         #Create data by uploading to device | ||||
|         self.u0 = Common.ArakawaA2D(self.stream,  | ||||
| @ -88,6 +91,8 @@ class LxF (Simulator.BaseSimulator): | ||||
|                         nx, ny,  | ||||
|                         1, 1,  | ||||
|                         [None, None, None]) | ||||
|         self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32) | ||||
|         self.cfl_data.fill(self.dt, stream=self.stream) | ||||
|          | ||||
|     def step(self, dt): | ||||
|         self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream,  | ||||
| @ -100,10 +105,15 @@ class LxF (Simulator.BaseSimulator): | ||||
|                 self.u0[2].data.gpudata, self.u0[2].data.strides[0],  | ||||
|                 self.u1[0].data.gpudata, self.u1[0].data.strides[0],  | ||||
|                 self.u1[1].data.gpudata, self.u1[1].data.strides[0],  | ||||
|                 self.u1[2].data.gpudata, self.u1[2].data.strides[0]) | ||||
|                 self.u1[2].data.gpudata, self.u1[2].data.strides[0], | ||||
|                 self.cfl_data.gpudata) | ||||
|         self.u0, self.u1 = self.u1, self.u0 | ||||
|         self.t += dt | ||||
|         self.nt += 1 | ||||
|    | ||||
|     def download(self): | ||||
|         return self.u0.download(self.stream) | ||||
|          | ||||
|     def computeDt(self): | ||||
|         max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get(); | ||||
|         return max_dt*0.5*self.cfl_scale | ||||
| @ -25,6 +25,7 @@ from GPUSimulators import Simulator, Common | ||||
| from GPUSimulators.Simulator import BaseSimulator, BoundaryCondition | ||||
| import numpy as np | ||||
| 
 | ||||
| from pycuda import gpuarray | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| @ -52,6 +53,7 @@ class WAF (Simulator.BaseSimulator): | ||||
|                  nx, ny,  | ||||
|                  dx, dy, dt,  | ||||
|                  g,  | ||||
|                  cfl_scale=0.9, | ||||
|                  boundary_conditions=BoundaryCondition(),  | ||||
|                  block_width=16, block_height=16): | ||||
|                   | ||||
| @ -61,6 +63,7 @@ class WAF (Simulator.BaseSimulator): | ||||
|             dx, dy, dt*2,  | ||||
|             block_width, block_height); | ||||
|         self.g = np.float32(g)  | ||||
|         self.cfl_scale = cfl_scale | ||||
|         self.boundary_conditions = boundary_conditions.asCodedInt() | ||||
| 
 | ||||
|         #Get kernels | ||||
| @ -86,6 +89,8 @@ class WAF (Simulator.BaseSimulator): | ||||
|                         nx, ny,  | ||||
|                         2, 2,  | ||||
|                         [None, None, None]) | ||||
|         self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32) | ||||
|         self.cfl_data.fill(self.dt, stream=self.stream) | ||||
|      | ||||
|     def step(self, dt): | ||||
|         self.substepDimsplit(dt*0.5, substep=0) | ||||
| @ -109,4 +114,12 @@ class WAF (Simulator.BaseSimulator): | ||||
|         self.u0, self.u1 = self.u1, self.u0 | ||||
| 
 | ||||
|     def download(self): | ||||
|         return self.u0.download(self.stream) | ||||
|         return self.u0.download(self.stream) | ||||
|          | ||||
|     def check(self): | ||||
|         self.u0.check() | ||||
|         self.u1.check() | ||||
|          | ||||
|     def computeDt(self): | ||||
|         max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get(); | ||||
|         return max_dt*0.5*self.cfl_scale | ||||
| @ -97,7 +97,10 @@ __global__ void FORCEKernel( | ||||
|         //Output h^{n+1} | ||||
|         float* h1_ptr_, int h1_pitch_, | ||||
|         float* hu1_ptr_, int hu1_pitch_, | ||||
|         float* hv1_ptr_, int hv1_pitch_) { | ||||
|         float* hv1_ptr_, int hv1_pitch_, | ||||
|          | ||||
|         //Output CFL | ||||
|         float* cfl_) { | ||||
|      | ||||
|     const unsigned int w = BLOCK_WIDTH; | ||||
|     const unsigned int h = BLOCK_HEIGHT; | ||||
| @ -130,6 +133,11 @@ __global__ void FORCEKernel( | ||||
|     writeBlock<w, h, gc_x, gc_y>( h1_ptr_,  h1_pitch_, Q[0], nx_, ny_, 0, 1); | ||||
|     writeBlock<w, h, gc_x, gc_y>(hu1_ptr_, hu1_pitch_, Q[1], nx_, ny_, 0, 1); | ||||
|     writeBlock<w, h, gc_x, gc_y>(hv1_ptr_, hv1_pitch_, Q[2], nx_, ny_, 0, 1); | ||||
|      | ||||
|     //Compute the CFL for this block | ||||
|     if (cfl_ != NULL) { | ||||
|         writeCfl<w, h, gc_x, gc_y, vars>(Q, F[0], nx_, ny_, dx_, dy_, g_, cfl_); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| } // extern "C" | ||||
| @ -113,7 +113,10 @@ __global__ void HLLKernel( | ||||
|         //Output h^{n+1} | ||||
|         float* h1_ptr_, int h1_pitch_, | ||||
|         float* hu1_ptr_, int hu1_pitch_, | ||||
|         float* hv1_ptr_, int hv1_pitch_) { | ||||
|         float* hv1_ptr_, int hv1_pitch_, | ||||
|          | ||||
|         //Output CFL | ||||
|         float* cfl_) { | ||||
|      | ||||
|     const unsigned int w = BLOCK_WIDTH; | ||||
|     const unsigned int h = BLOCK_HEIGHT; | ||||
| @ -148,6 +151,11 @@ __global__ void HLLKernel( | ||||
|     writeBlock<w, h, gc_x, gc_y>( h1_ptr_,  h1_pitch_, Q[0], nx_, ny_, 0, 1); | ||||
|     writeBlock<w, h, gc_x, gc_y>(hu1_ptr_, hu1_pitch_, Q[1], nx_, ny_, 0, 1); | ||||
|     writeBlock<w, h, gc_x, gc_y>(hv1_ptr_, hv1_pitch_, Q[2], nx_, ny_, 0, 1); | ||||
|      | ||||
|     //Compute the CFL for this block | ||||
|     if (cfl_ != NULL) { | ||||
|         writeCfl<w, h, gc_x, gc_y, vars>(Q, F[0], nx_, ny_, dx_, dy_, g_, cfl_); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| } // extern "C" | ||||
| @ -141,7 +141,10 @@ __global__ void HLL2Kernel( | ||||
|         //Output h^{n+1} | ||||
|         float* h1_ptr_, int h1_pitch_, | ||||
|         float* hu1_ptr_, int hu1_pitch_, | ||||
|         float* hv1_ptr_, int hv1_pitch_) { | ||||
|         float* hv1_ptr_, int hv1_pitch_, | ||||
|          | ||||
|         //Output CFL | ||||
|         float* cfl_) { | ||||
|      | ||||
|     const unsigned int w = BLOCK_WIDTH; | ||||
|     const unsigned int h = BLOCK_HEIGHT; | ||||
| @ -203,6 +206,11 @@ __global__ void HLL2Kernel( | ||||
|     writeBlock<w, h, gc_x, gc_y>( h1_ptr_,  h1_pitch_, Q[0], nx_, ny_, 0, 1); | ||||
|     writeBlock<w, h, gc_x, gc_y>(hu1_ptr_, hu1_pitch_, Q[1], nx_, ny_, 0, 1); | ||||
|     writeBlock<w, h, gc_x, gc_y>(hv1_ptr_, hv1_pitch_, Q[2], nx_, ny_, 0, 1); | ||||
|      | ||||
|     //Compute the CFL for this block | ||||
|     if (cfl_ != NULL) { | ||||
|         writeCfl<w, h, gc_x, gc_y, vars>(Q, F[0], nx_, ny_, dx_, dy_, g_, cfl_); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| } // extern "C" | ||||
| @ -151,8 +151,10 @@ __global__ void KP07Kernel( | ||||
|         //Output h^{n+1} | ||||
|         float* h1_ptr_, int h1_pitch_, | ||||
|         float* hu1_ptr_, int hu1_pitch_, | ||||
|         float* hv1_ptr_, int hv1_pitch_) { | ||||
|              | ||||
|         float* hv1_ptr_, int hv1_pitch_, | ||||
|          | ||||
|         //Output CFL | ||||
|         float* cfl_) { | ||||
|     const unsigned int w = BLOCK_WIDTH; | ||||
|     const unsigned int h = BLOCK_HEIGHT; | ||||
|     const unsigned int gc_x = 2; | ||||
| @ -222,5 +224,10 @@ __global__ void KP07Kernel( | ||||
|             hv_row[ti] = hv1; | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     //Compute the CFL for this block | ||||
|     if (cfl_ != NULL) { | ||||
|         writeCfl<w, h, gc_x, gc_y, vars>(Q, Q[0], nx_, ny_, dx_, dy_, g_, cfl_); | ||||
|     } | ||||
| } | ||||
| } //extern "C" | ||||
| @ -138,7 +138,10 @@ __global__ void KP07DimsplitKernel( | ||||
|         //Output h^{n+1} | ||||
|         float* h1_ptr_, int h1_pitch_, | ||||
|         float* hu1_ptr_, int hu1_pitch_, | ||||
|         float* hv1_ptr_, int hv1_pitch_) { | ||||
|         float* hv1_ptr_, int hv1_pitch_,  | ||||
|          | ||||
|         //Output CFL | ||||
|         float* cfl_) { | ||||
|     const unsigned int w = BLOCK_WIDTH; | ||||
|     const unsigned int h = BLOCK_HEIGHT; | ||||
|     const unsigned int gc_x = 2; | ||||
| @ -194,6 +197,11 @@ __global__ void KP07DimsplitKernel( | ||||
|     writeBlock<w, h, gc_x, gc_y>( h1_ptr_,  h1_pitch_, Q[0], nx_, ny_, 0, 1); | ||||
|     writeBlock<w, h, gc_x, gc_y>(hu1_ptr_, hu1_pitch_, Q[1], nx_, ny_, 0, 1); | ||||
|     writeBlock<w, h, gc_x, gc_y>(hv1_ptr_, hv1_pitch_, Q[2], nx_, ny_, 0, 1); | ||||
|      | ||||
|     //Compute the CFL for this block | ||||
|     if (cfl_ != NULL) { | ||||
|         writeCfl<w, h, gc_x, gc_y, vars>(Q, F[0], nx_, ny_, dx_, dy_, g_, cfl_); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
|  | ||||
| @ -114,7 +114,10 @@ void LxFKernel( | ||||
|         //Output h^{n+1} | ||||
|         float* h1_ptr_, int h1_pitch_, | ||||
|         float* hu1_ptr_, int hu1_pitch_, | ||||
|         float* hv1_ptr_, int hv1_pitch_) { | ||||
|         float* hv1_ptr_, int hv1_pitch_, | ||||
|          | ||||
|         //Output CFL | ||||
|         float* cfl_) { | ||||
|      | ||||
|     const unsigned int w = BLOCK_WIDTH; | ||||
|     const unsigned int h = BLOCK_HEIGHT; | ||||
| @ -154,6 +157,11 @@ void LxFKernel( | ||||
|     writeBlock<w, h, gc_x, gc_y>( h1_ptr_,  h1_pitch_, Q[0], nx_, ny_, 0, 1); | ||||
|     writeBlock<w, h, gc_x, gc_y>(hu1_ptr_, hu1_pitch_, Q[1], nx_, ny_, 0, 1); | ||||
|     writeBlock<w, h, gc_x, gc_y>(hv1_ptr_, hv1_pitch_, Q[2], nx_, ny_, 0, 1); | ||||
|      | ||||
|     //Compute the CFL for this block | ||||
|     if (cfl_ != NULL) { | ||||
|         writeCfl<w, h, gc_x, gc_y, vars>(Q, Q[0], nx_, ny_, dx_, dy_, g_, cfl_); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| } // extern "C" | ||||
|  | ||||
| @ -468,3 +468,66 @@ __device__ float3 FORCE_1D_flux(const float3 Q_l, const float3 Q_r, const float | ||||
|     const float3 F_lw2 = LxW2_1D_flux(Q_l, Q_r, g_, dx_, dt_); | ||||
|     return 0.5f*(F_lf + F_lw2); | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| template<int w, int h, int gc_x, int gc_y, int vars> | ||||
| __device__ void writeCfl(float Q[vars][h+2*gc_y][w+2*gc_x], | ||||
|         float shmem[h+2*gc_y][w+2*gc_x], | ||||
|         const int nx_, const int ny_, | ||||
|         const float dx_, const float dy_, const float g_, | ||||
|         float* output_) { | ||||
|     //Index of thread within block
 | ||||
|     const int tx = threadIdx.x + gc_x; | ||||
|     const int ty = threadIdx.y + gc_y; | ||||
|      | ||||
|     //Index of cell within domain
 | ||||
|     const int ti = blockDim.x*blockIdx.x + tx; | ||||
|     const int tj = blockDim.y*blockIdx.y + ty; | ||||
|      | ||||
|     //Only internal cells
 | ||||
|     if (ti < nx_+gc_x && tj < ny_+gc_y) { | ||||
|         const float h = Q[0][ty][tx]; | ||||
|         const float u   = Q[1][ty][tx] / h; | ||||
|         const float v   = Q[2][ty][tx] / h; | ||||
|          | ||||
|         const float max_u = dx_ / (fabsf(u) + sqrtf(g_*h)); | ||||
|         const float max_v = dy_ / (fabsf(v) + sqrtf(g_*h)); | ||||
|          | ||||
|         shmem[ty][tx] = fminf(max_u, max_v); | ||||
|     } | ||||
|     __syncthreads(); | ||||
|      | ||||
|     //One row of threads loop over all rows
 | ||||
|     if (ti < nx_+gc_x && tj < ny_+gc_y) { | ||||
|         if (ty == gc_y) { | ||||
|             float min_val = shmem[ty][tx]; | ||||
|             const int max_y = min(h, ny_+gc_y - tj); | ||||
|             for (int j=gc_y; j<max_y+gc_y; j++) { | ||||
|                 min_val = fminf(min_val, shmem[j][tx]); | ||||
|             } | ||||
|             shmem[ty][tx] = min_val; | ||||
|         } | ||||
|     } | ||||
|     __syncthreads(); | ||||
|      | ||||
|     //One thread loops over first row to find global max
 | ||||
|     if (tx == gc_x && ty == gc_y) { | ||||
|         float min_val = shmem[ty][tx]; | ||||
|         const int max_x = min(w, nx_+gc_x - ti); | ||||
|         for (int i=gc_x; i<max_x+gc_x; ++i) { | ||||
|             min_val = fminf(min_val, shmem[ty][i]); | ||||
|         } | ||||
|          | ||||
|         const int idx = gridDim.x*blockIdx.y + blockIdx.x; | ||||
|         output_[idx] = min_val; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 André R. Brodtkorb
						André R. Brodtkorb