from ...core.CUDA import *
from ...geom.CUDA._mesh import CuStructuredMesh2D, CuStructuredMesh3D, CuGeneralMesh
import cupy as cp
from typing import Union
class StiffnessKernel:
"""
Base class for CUDA-accelerated stiffness matrix assembly kernels.
Abstract base class defining interface for GPU-accelerated finite element stiffness
matrix operations. Subclasses implement specific assembly strategies (structured,
uniform, general) optimized for GPU computation using CuPy.
Attributes
----------
has_rho : bool
Whether density variables have been set
rho : cupy.ndarray
Element density variables on GPU, shape (n_elements,)
shape : tuple
Matrix shape (n_dof, n_dof)
matvec : callable
Alias for dot() method
rmatvec : callable
Alias for dot() method (symmetric matrices)
matmat : callable
Alias for dot() method
rmatmat : callable
Alias for dot() method (symmetric matrices)
mat_vec : cupy.ndarray
Cached matrix-vector product result
diag : cupy.ndarray
Cached diagonal entries
dk : cupy.ndarray
Cached derivative matrices
Notes
-----
- All data stored as CuPy arrays on GPU
- Matrix-free operations (no explicit matrix storage)
- Supports matrix-vector products via dot()
- Subclasses must implement construct(), _matvec(), dot(), diagonal()
"""
def __init__(self):
"""
Initialize base stiffness kernel.
Sets up attributes for density storage and matrix operation aliases.
Subclasses should call super().__init__() and set mesh-specific attributes.
"""
self.has_rho = False
self.rho = None
self.shape = None
self.matvec = self.dot
self.rmatvec = self.dot
self.matmat = self.dot
self.rmatmat = self.dot
self.mat_vec = None
self.diag = None
self.dk = None
def construct(self, rho):
"""
Construct stiffness matrix representation (abstract method).
Parameters
----------
rho : cupy.ndarray
Element density variables, shape (n_elements,)
Notes
-----
Subclasses must implement this to assemble stiffness matrix based on
density distribution. May precompute matrix entries or prepare for
matrix-free operations.
"""
pass
def _matvec(self, rho, vec):
"""
Matrix-vector product K(rho) @ vec (abstract method).
Parameters
----------
rho : cupy.ndarray
Element density variables, shape (n_elements,)
vec : cupy.ndarray
Input vector, shape (n_dof,)
Returns
-------
cupy.ndarray
Output vector K(rho) @ vec, shape (n_dof,)
Notes
-----
Subclasses must implement this for matrix-free operations.
Called internally by dot() method.
"""
pass
def _rmatvec(self, rho, vec):
"""
Transpose matrix-vector product K(rho)^T @ vec (abstract method).
Parameters
----------
rho : cupy.ndarray
Element density variables, shape (n_elements,)
vec : cupy.ndarray
Input vector, shape (n_dof,)
Returns
-------
cupy.ndarray
Output vector K(rho)^T @ vec, shape (n_dof,)
Notes
-----
For symmetric stiffness matrices, same as _matvec(). Subclasses
should implement accordingly.
"""
pass
def _matmat(self, rho, mat):
"""
Matrix-matrix product K(rho) @ mat (abstract method).
Parameters
----------
rho : cupy.ndarray
Element density variables, shape (n_elements,)
mat : cupy.ndarray
Input matrix, shape (n_dof, n_cols)
Returns
-------
cupy.ndarray
Output matrix K(rho) @ mat, shape (n_dof, n_cols)
Notes
-----
Subclasses may implement via repeated _matvec() calls or optimized
batch operations.
"""
pass
def _rmatmat(self, rho, mat):
"""
Transpose matrix-matrix product K(rho)^T @ mat (abstract method).
Parameters
----------
rho : cupy.ndarray
Element density variables, shape (n_elements,)
mat : cupy.ndarray
Input matrix, shape (n_dof, n_cols)
Returns
-------
cupy.ndarray
Output matrix K(rho)^T @ mat, shape (n_dof, n_cols)
Notes
-----
For symmetric matrices, same as _matmat(). Subclasses should
implement accordingly.
"""
pass
def set_rho(self, rho):
"""
Set element density variables.
Parameters
----------
rho : ndarray or cupy.ndarray
Element density variables, shape (n_elements,)
Notes
-----
Converts input to CuPy array if needed. Does not trigger matrix
reconstruction; call construct() after setting rho.
"""
self.rho = cp.array(rho, dtype=rho.dtype, copy=False)
self.has_rho = True
def dot(self, rhs):
"""
Matrix-vector product K @ rhs (abstract method).
Parameters
----------
rhs : cupy.ndarray
Right-hand side vector, shape (n_dof,) or (n_dof, n_cols)
Returns
-------
cupy.ndarray
Result K @ rhs, same shape as rhs
Notes
-----
Subclasses must implement this. Should use cached rho if available,
otherwise raise error. Handles both vectors and matrices.
"""
pass
def reset(self):
"""
Reset kernel state to initial condition.
Clears density variables, cached matrices, and constraint information.
Useful when reinitializing optimization or changing problem configuration.
"""
self.has_rho = False
self.rho = None
self.CSR = None
self.ptr = None
self.has_been_constructed = False
self.constraints[:] = False
self.has_con = False
self.mat_vec = None
self.diag = None
self.dk = None
def diagonal(self):
"""
Get diagonal entries of stiffness matrix (abstract method).
Returns
-------
cupy.ndarray
Diagonal entries, shape (n_dof,)
Notes
-----
Subclasses must implement this. May compute on-the-fly or return
cached diagonal if available. Used for preconditioning and diagnostics.
"""
pass
def __matmul__(self, rhs):
"""
Matrix multiplication operator (K @ rhs).
Parameters
----------
rhs : cupy.ndarray
Right-hand side vector or matrix
Returns
-------
cupy.ndarray
Result of matrix multiplication
Notes
-----
Enables Python @ operator syntax: result = kernel @ vector
"""
return self.dot(rhs)
[docs]
class StructuredStiffnessKernel(StiffnessKernel):
"""
CUDA-accelerated stiffness kernel for structured meshes.
GPU version of CPU StructuredStiffnessKernel using CuPy. Uses node-basis assembly with
a single precomputed element stiffness matrix for maximum efficiency on uniform grid meshes.
Identical API to CPU version but operates entirely on GPU memory for maximum performance.
Parameters
----------
mesh : CuStructuredMesh2D or CuStructuredMesh3D
CUDA-accelerated structured mesh with uniform elements
Attributes
----------
K_single : cupy.ndarray
Single element stiffness matrix used for all elements (on GPU)
shape : tuple
Global stiffness matrix dimensions (n_dof, n_dof)
constraints : cupy.ndarray (bool)
Boolean array marking constrained DOFs (on GPU)
has_cons : bool
True if boundary conditions have been applied
non_con_map : cupy.ndarray
Index map to non-constrained DOFs for reduced system (on GPU)
Methods
-------
set_rho(rho)
Set design variables for subsequent matrix-free operations
dot(rhs)
Matrix-vector product K @ rhs (requires rho to be set)
construct(rho)
Build explicit CSR matrix representation
diagonal(rho=None)
Extract diagonal of stiffness matrix
add_constraints(dof_indices)
Apply Dirichlet boundary conditions
process_grad(U)
Compute element-wise sensitivities dK/drho : U for adjoint method
Notes
-----
- Fastest kernel for structured meshes (2-3x faster than UniformStiffnessKernel on GPU)
- All arrays stored as CuPy arrays on GPU
- 5-10x faster than CPU for large 3D problems
- Automatically handles constrained DOFs by zeroing rows/columns
- Matrix-free dot() operation uses optimized CUDA kernels
- Use construct() only when explicit matrix is needed (e.g., for direct solvers)
- Requires CUDA-capable GPU and CuPy
- Use with CUDA solvers (CG, MultiGrid)
- Memory limited by GPU VRAM
Examples
--------
>>> from pyFANTOM.CUDA import StructuredMesh2D, StructuredStiffnessKernel
>>> mesh = StructuredMesh2D(nx=256, ny=128, lx=2.0, ly=1.0)
>>> kernel = StructuredStiffnessKernel(mesh=mesh)
>>> kernel.set_rho(rho)
>>> u = kernel.dot(f) # Matrix-free K @ f on GPU
"""
[docs]
def __init__(self, mesh: Union[CuStructuredMesh2D, CuStructuredMesh3D]):
super().__init__()
self.mesh = mesh
self.nodes, self.elements = mesh.nodes, mesh.elements
self.dof = mesh.dof
self.K_single = mesh.K_single
self.dtype = mesh.dtype
# move to gpu and cast to 32 bit
self.nodes = cp.array(self.nodes, dtype=self.nodes.dtype, copy=False)
self.elements = cp.array(self.elements, dtype=self.elements.dtype, copy=False)
self.K_single = cp.array(self.K_single, dtype=self.K_single.dtype, copy=False)
self.elements_flat = self.elements.flatten()
self.el_ids = cp.arange(self.elements.shape[0], dtype=cp.int32).repeat(
self.elements.shape[1]
)
self.sorter = cp.argsort(self.elements_flat).astype(cp.int32)
self.node_ids = cp.searchsorted(
self.elements_flat,
cp.arange(self.nodes.shape[0], dtype=cp.int32),
sorter=self.sorter,
side="left",
).astype(cp.int32)
self.elements_size = self.elements.shape[1]
self.n_nodes = self.nodes.shape[0]
self.has_been_constructed = False
self.ptr = None
self.max_con_count = (self.elements_size * int(self.dof)) * cp.unique(
self.elements_flat, return_counts=True
)[1].max()
self.constraints = cp.zeros(self.n_nodes * self.dof, dtype=cp.bool_)
self.shape = (self.n_nodes * self.dof, self.n_nodes * self.dof)
self.has_con = False
self.idx_map = cp.arange(self.n_nodes * self.dof, dtype=cp.int32)
[docs]
def diagonal(self, rho=None):
"""
Extract diagonal of assembled stiffness matrix.
Parameters
----------
rho : cp.ndarray, optional
Design densities. If None, uses stored rho
Returns
-------
diag : cp.ndarray
Diagonal entries of K(rho)
Notes
-----
- Used for Jacobi preconditioning in iterative solvers
- Cached on subsequent calls with same rho
"""
if rho is None and not self.has_rho:
raise ValueError(
"Rho has not been set. diagonal works only after setting rho or if rho is provided."
)
elif rho is None:
self.diag = get_diagonal_node_basis(
self.K_single,
self.elements_flat,
self.el_ids,
self.rho,
self.sorter,
self.node_ids,
self.n_nodes,
self.dof,
self.elements_size,
self.constraints,
self.diag
)
return self.diag
else:
self.diag = get_diagonal_node_basis(
self.K_single,
self.elements_flat,
self.el_ids,
rho,
self.sorter,
self.node_ids,
self.n_nodes,
self.dof,
self.elements_size,
self.constraints,
self.diag
)
return self.diag
[docs]
def set_constraints(self, constraints):
"""
Set Dirichlet boundary conditions (replaces existing constraints).
Parameters
----------
constraints : cp.ndarray
DOF indices to constrain on GPU, shape (n_constrained_dofs,)
Notes
-----
Clears all existing constraints and sets only the specified DOFs.
Use add_constraints() to add constraints incrementally.
"""
self.constraints[:] = False
self.constraints[constraints] = True
self.has_con = True
self.non_con_map = self.idx_map[~self.constraints]
[docs]
def add_constraints(self, constraints):
"""
Add Dirichlet boundary conditions (accumulates with existing constraints).
Parameters
----------
constraints : cp.ndarray
DOF indices to constrain on GPU, shape (n_constrained_dofs,)
Notes
-----
Adds to existing constraints. Use set_constraints() to replace all constraints.
"""
self.constraints[constraints] = True
self.has_con = True
self.non_con_map = self.idx_map[~self.constraints]
[docs]
def process_grad(self, U):
"""
Compute element-wise compliance sensitivities dC/drho.
Parameters
----------
U : cp.ndarray
Displacement vector from FEA solve on GPU, shape (n_dof,)
Returns
-------
dk : cp.ndarray
Element sensitivities on GPU (compliance derivative per element), shape (n_elements,)
Notes
-----
- Computes: dk[e] = -U[e]^T @ K_e @ U[e]
- Used for compliance minimization gradient
- Negative sign convention for minimization
- All operations performed on GPU
"""
self.dk = process_dk(
self.K_single, self.elements_flat, U, self.dof, self.elements_size, self.dk
)
return self.dk
[docs]
def construct(self, rho):
"""
Build explicit CSR sparse matrix representation on GPU.
Parameters
----------
rho : cp.ndarray
Design variables on GPU, shape (n_elements,)
Returns
-------
cupyx.scipy.sparse.csr_matrix
Sparse stiffness matrix in CSR format on GPU
Notes
-----
- Memory-intensive: O(nnz) storage on GPU
- Required for direct solvers (SPSOLVE)
- Reuses sparsity pattern on subsequent calls (faster)
"""
size = self.n_nodes * self.dof
if not self.has_been_constructed:
self.CSR = self._matmat(
rho, cp.sparse.eye(size, format="csr", dtype=self.dtype)
)
self.ptr = self.CSR.indptr
else:
self.CSR = self._matmat(
rho, cp.sparse.eye(size, format="csr", dtype=self.dtype), self.ptr
)
self.has_been_constructed = True
return self.CSR
def _matvec(self, rho, vec):
if not self.has_con:
self.mat_vec = mat_vec_node_basis_parallel(
self.K_single,
self.elements_flat,
self.el_ids,
rho,
self.sorter,
self.node_ids,
self.n_nodes,
vec,
self.dof,
self.elements_size,
out=self.mat_vec
)
return self.mat_vec
self.mat_vec = mat_vec_node_basis_parallel(
self.K_single,
self.elements_flat,
self.el_ids,
rho,
self.sorter,
self.node_ids,
self.n_nodes,
vec,
self.dof,
self.elements_size,
self.constraints,
self.mat_vec
)
return self.mat_vec
def _rmatvec(self, rho, vec):
return self._matvec(rho, vec)
def _matmat(self, rho, mat, Cp=None):
if Cp is None:
if not self.has_con:
return matmat_node_basis_parallel(
self.K_single,
self.elements_flat,
self.el_ids,
rho,
self.sorter,
self.node_ids,
self.n_nodes,
self.dof,
self.elements_size,
mat,
self.max_con_count,
)
else:
out = matmat_node_basis_parallel(
self.K_single,
self.elements_flat,
self.el_ids,
rho,
self.sorter,
self.node_ids,
self.n_nodes,
self.dof,
self.elements_size,
mat,
self.max_con_count,
self.constraints,
)
out.indices[out.indices < 0] = 0
return out
else:
if not self.has_con:
return matmat_node_basis_parallel_(
self.K_single,
self.elements_flat,
self.el_ids,
rho,
self.sorter,
self.node_ids,
self.n_nodes,
self.dof,
self.elements_size,
mat,
Cp
)
else:
out = matmat_node_basis_parallel_(
self.K_single,
self.elements_flat,
self.el_ids,
rho,
self.sorter,
self.node_ids,
self.n_nodes,
self.dof,
self.elements_size,
mat,
Cp,
self.constraints
)
out.indices[out.indices < 0] = 0
return out
def _rmatmat(self, rho, mat, Cp=None):
return self._matmat(rho, mat, Cp)
[docs]
def dot(self, rhs):
"""
Matrix-free matrix-vector or matrix-matrix product K(rho) @ rhs.
Parameters
----------
rhs : cp.ndarray or cp.sparse.csr_matrix
Right-hand side vector or matrix
Returns
-------
result : cp.ndarray or cp.sparse.csr_matrix
Product K(rho) @ rhs
Raises
------
ValueError
If rho not set or shape mismatch
NotImplementedError
If rhs type not supported
Notes
-----
- Requires rho set via set_rho() first
- Matrix-free: No explicit matrix storage
- Faster than constructing full matrix for large problems
"""
if self.has_rho:
if isinstance(rhs, cp.ndarray):
if rhs.shape[0] == self.n_nodes * self.dof:
return self._matvec(self.rho, rhs)
else:
raise ValueError(
"Shape of the input vector does not match the number of nodes and dof."
)
elif cp.sparse.issparse(rhs):
if rhs.shape[0] == self.n_nodes * self.dof:
if isinstance(rhs, cp.sparse.csr_matrix):
return self._matmat(self.rho, rhs)
else:
return self._matmat(self.rho, rhs.tocsr())
else:
raise ValueError(
"Shape of the input matrix does not match the number of nodes and dof."
)
else:
raise NotImplementedError(
"Only numpy arrays as vectors and scipy sparse matrices are supported."
)
else:
raise ValueError("Rho has not been set. dot works only after setting rho.")
[docs]
class GeneralStiffnessKernel(StiffnessKernel):
"""
CUDA-accelerated stiffness kernel for heterogeneous meshes.
GPU version of CPU GeneralStiffnessKernel. Most general kernel supporting meshes with
elements of varying sizes and topologies. Uses flat storage with pointer arrays for
memory-efficient representation. Identical API to CPU version but operates entirely on GPU.
Parameters
----------
mesh : CuGeneralMesh
CUDA general mesh with potentially heterogeneous elements (mesh.is_uniform can be False)
Attributes
----------
K_flat : cupy.ndarray
Flattened element stiffness matrices (on GPU)
K_ptr : cupy.ndarray
Pointer array for accessing K_flat: K_e = K_flat[K_ptr[e]:K_ptr[e+1]] (on GPU)
elements_flat : cupy.ndarray
Flattened element connectivity (on GPU)
elements_ptr : cupy.ndarray
Pointer array for accessing elements (on GPU)
element_sizes : cupy.ndarray
Number of nodes per element, shape (n_elements,) (on GPU)
shape : tuple
Global stiffness matrix dimensions (n_dof, n_dof)
constraints : cupy.ndarray (bool)
Boolean array for constrained DOFs (on GPU)
has_cons : bool
True if boundary conditions have been applied
Methods
-------
Same interface as StructuredStiffnessKernel (set_rho, dot, construct, diagonal, etc.)
Notes
-----
- Supports mixed element types (triangles + quads, tets + hexes, etc.)
- Flat storage with pointers enables variable element sizes
- Slower than Uniform/Structured kernels due to indirection
- Automatically used if mesh.is_uniform == False
- All arrays stored as CuPy arrays on GPU
- All operations on GPU using CuPy
- Use with CUDA solvers (CG, MultiGrid) for best performance
- Memory limited by GPU VRAM
Examples
--------
>>> from pyFANTOM.CUDA import GeneralMesh, GeneralStiffnessKernel
>>> import numpy as np
>>> # Mixed triangular and quad mesh
>>> nodes = np.array([[0,0], [1,0], [0,1], [1,1], [2,0]])
>>> elements = [np.array([0,1,2]), np.array([1,4,3,2])] # Triangle + Quad
>>> mesh = GeneralMesh(nodes, elements)
>>> kernel = GeneralStiffnessKernel(mesh=mesh)
"""
[docs]
def __init__(self, mesh: CuGeneralMesh):
super().__init__()
if mesh.is_uniform:
raise ValueError(
"Mesh is uniform, you should use UniformStiffnessKernel instead."
)
self.mesh = mesh
self.nodes, self.elements = mesh.nodes, mesh.elements
self.dof = mesh.dof
self.dtype = mesh.dtype
# Initialize lists for flattening
elements_flat = cp.array(mesh.elements_flat, dtype=cp.int32, copy=False)
element_sizes = cp.array(mesh.element_sizes, dtype=cp.int32, copy=False)
K_flat = mesh.K_flat
# Move everything to GPU with appropriate types
self.nodes = cp.array(self.nodes, dtype=self.dtype, copy=False)
self.elements_flat = cp.array(elements_flat, dtype=cp.int32, copy=False)
self.K_flat = cp.array(K_flat, dtype=self.dtype, copy=False)
# Calculate pointers
self.K_ptr = cp.array(mesh.K_ptr, dtype=cp.int32, copy=False)
self.elements_ptr = cp.array(mesh.elements_ptr, dtype=cp.int32, copy=False)
self.el_ids = cp.arange(self.elements.shape[0], dtype=cp.int32).repeat(
element_sizes.get().tolist()
)
self.sorter = cp.argsort(self.elements_flat).astype(cp.int32)
self.node_ids = cp.searchsorted(
self.elements_flat,
cp.arange(self.nodes.shape[0], dtype=cp.int32),
sorter=self.sorter,
side="left",
).astype(cp.int32)
self.n_nodes = self.nodes.shape[0]
self.has_been_constructed = False
self.ptr = None
self.indices = None
self.data = None
self.max_con_count = (
cp.diff(self.elements_ptr).max() * int(self.dof)
) * cp.unique(self.elements_flat, return_counts=True)[1].max()
self.constraints = cp.zeros(self.n_nodes * self.dof, dtype=cp.bool_)
self.shape = (self.n_nodes * self.dof, self.n_nodes * self.dof)
self.has_con = False
self.idx_map = cp.arange(self.n_nodes * self.dof, dtype=cp.int32)
[docs]
def diagonal(self, rho=None):
"""
Compute diagonal entries of stiffness matrix.
Parameters
----------
rho : cupy.ndarray, optional
Element density variables, shape (n_elements,). If None, uses cached self.rho
Returns
-------
cupy.ndarray
Diagonal entries of K(rho), shape (n_dof,)
Notes
-----
- Uses GPU-accelerated flat storage assembly
- Respects Dirichlet boundary conditions (constrained DOFs zeroed)
- Caches result in self.diag for reuse
- Used for preconditioning and diagnostics
"""
if rho is None and not self.has_rho:
raise ValueError(
"Rho has not been set. diagonal works only after setting rho or if rho is provided."
)
elif rho is None:
self.diag = get_diagonal_node_basis_flat(
self.K_flat,
self.elements_flat,
self.K_ptr,
self.elements_ptr,
self.el_ids,
self.rho,
self.sorter,
self.node_ids,
self.n_nodes,
self.dof,
self.constraints,
self.diag
)
return self.diag
else:
self.diag = get_diagonal_node_basis_flat(
self.K_flat,
self.elements_flat,
self.K_ptr,
self.elements_ptr,
self.el_ids,
rho,
self.sorter,
self.node_ids,
self.n_nodes,
self.dof,
self.constraints,
self.diag
)
return self.diag
[docs]
def set_constraints(self, constraints):
"""
Set Dirichlet boundary condition DOF (replaces existing constraints).
Parameters
----------
constraints : cp.ndarray or array-like
Indices of constrained DOF
Notes
-----
- Resets all previous constraints
- Use add_constraints() to append to existing constraints
- Constrained rows/columns are zeroed in matrix operations
"""
self.constraints[:] = False
self.constraints[constraints] = True
self.has_con = True
self.non_con_map = self.idx_map[~self.constraints]
[docs]
def add_constraints(self, constraints):
"""
Add Dirichlet boundary conditions (accumulates with existing constraints).
Parameters
----------
constraints : cp.ndarray
DOF indices to constrain on GPU, shape (n_constrained_dofs,)
Notes
-----
Adds to existing constraints. Use set_constraints() to replace all constraints.
- Use set_constraints() to replace all constraints
"""
self.constraints[constraints] = True
self.has_con = True
self.non_con_map = self.idx_map[~self.constraints]
[docs]
def process_grad(self, U):
"""
Compute element-wise compliance sensitivities dC/drho.
Parameters
----------
U : cp.ndarray
Displacement vector from FEA solve
Returns
-------
dk : cp.ndarray
Element sensitivities (compliance derivative per element)
Notes
-----
- Computes: dk[e] = -U[e]^T @ K_e @ U[e]
- Used for compliance minimization gradient
- Negative sign convention for minimization
"""
self.dk = process_dk(
self.K_single, self.elements_flat, U, self.dof, self.elements_size, self.dk
)
return self.dk
[docs]
def construct(self, rho):
"""
Construct CSR stiffness matrix representation.
Parameters
----------
rho : cupy.ndarray
Element density variables, shape (n_elements,)
Returns
-------
cupy.sparse.csr_matrix
Sparse CSR stiffness matrix K(rho), shape (n_dof, n_dof)
Notes
-----
- Assembles full sparse matrix structure (first call) or updates values (subsequent)
- Uses matrix-matrix product with identity to extract sparsity pattern
- Caches sparsity pattern (ptr) for efficient updates
- Sets has_been_constructed flag after first call
- More memory-intensive than matrix-free operations but needed for some solvers
"""
size = self.n_nodes * self.dof
if not self.has_been_constructed:
self.CSR = self._matmat(
rho, cp.sparse.eye(size, format="csr", dtype=self.dtype)
)
self.ptr = self.CSR.indptr
else:
self.CSR = self._matmat(
rho, cp.sparse.eye(size, format="csr", dtype=self.dtype), self.ptr
)
self.has_been_constructed = True
return self.CSR
def _matvec(self, rho, vec):
"""
Matrix-vector product K(rho) @ vec using GPU-accelerated flat storage.
Parameters
----------
rho : cupy.ndarray
Element density variables, shape (n_elements,)
vec : cupy.ndarray
Input vector, shape (n_dof,)
Returns
-------
cupy.ndarray
Output vector K(rho) @ vec, shape (n_dof,)
Notes
-----
- Uses parallel GPU kernel for element-wise assembly
- Handles Dirichlet constraints (zeroed rows/columns)
- Caches result in self.mat_vec for reuse
- Optimized for flat storage with pointer arrays
"""
if not self.has_con:
self.mat_vec = mat_vec_node_basis_parallel_flat(
self.K_flat,
self.elements_flat,
self.K_ptr,
self.elements_ptr,
self.el_ids,
rho,
self.sorter,
self.node_ids,
self.n_nodes,
vec,
self.dof,
out=self.mat_vec
)
return self.mat_vec
else:
self.mat_vec = mat_vec_node_basis_parallel_flat(
self.K_flat,
self.elements_flat,
self.K_ptr,
self.elements_ptr,
self.el_ids,
rho,
self.sorter,
self.node_ids,
self.n_nodes,
vec,
self.dof,
self.constraints,
self.mat_vec
)
return self.mat_vec
def _rmatvec(self, rho, vec):
"""
Transpose matrix-vector product K(rho)^T @ vec.
Parameters
----------
rho : cupy.ndarray
Element density variables, shape (n_elements,)
vec : cupy.ndarray
Input vector, shape (n_dof,)
Returns
-------
cupy.ndarray
Output vector K(rho)^T @ vec, shape (n_dof,)
Notes
-----
For symmetric stiffness matrices, same as _matvec(). Delegates to _matvec().
"""
return self._matvec(rho, vec)
def _matmat(self, rho, mat, Cp=None):
"""
Matrix-matrix product K(rho) @ mat using GPU-accelerated assembly.
Parameters
----------
rho : cupy.ndarray
Element density variables, shape (n_elements,)
mat : cupy.sparse.csr_matrix or cupy.ndarray
Input matrix, shape (n_dof, n_cols)
Cp : cupy.sparse.csr_matrix, optional
Pre-allocated output sparsity pattern. If None, computes new pattern
Returns
-------
cupy.sparse.csr_matrix
Output matrix K(rho) @ mat, shape (n_dof, n_cols)
Notes
-----
- Uses parallel GPU kernel for batch element assembly
- Handles Dirichlet constraints (zeroed rows/columns)
- If Cp provided, reuses sparsity pattern for efficiency
- Used by construct() to build full stiffness matrix
- Negative indices set to 0 (constraint handling artifact)
"""
if Cp is None:
if not self.has_con:
return matmat_node_basis_flat_parallel(
self.K_flat,
self.elements_flat,
self.K_ptr,
self.elements_ptr,
self.el_ids,
rho,
self.sorter,
self.node_ids,
self.n_nodes,
self.dof,
mat,
self.max_con_count,
)
else:
out = matmat_node_basis_flat_parallel(
self.K_flat,
self.elements_flat,
self.K_ptr,
self.elements_ptr,
self.el_ids,
rho,
self.sorter,
self.node_ids,
self.n_nodes,
self.dof,
mat,
self.max_con_count,
self.constraints,
)
out.indices[out.indices < 0] = 0
return out
else:
if not self.has_con:
return matmat_node_basis_flat_parallel_(
self.K_flat,
self.elements_flat,
self.K_ptr,
self.elements_ptr,
self.el_ids,
rho,
self.sorter,
self.node_ids,
self.n_nodes,
self.dof,
mat,
Cp,
)
else:
out = matmat_node_basis_flat_parallel(
self.K_flat,
self.elements_flat,
self.K_ptr,
self.elements_ptr,
self.el_ids,
rho,
self.sorter,
self.node_ids,
self.n_nodes,
self.dof,
mat,
self.max_con_count,
self.constraints,
)
out.indices[out.indices < 0] = 0
return out
def _rmatmat(self, rho, mat, Cp=None):
"""
Transpose matrix-matrix product K(rho)^T @ mat.
Parameters
----------
rho : cupy.ndarray
Element density variables, shape (n_elements,)
mat : cupy.sparse.csr_matrix or cupy.ndarray
Input matrix, shape (n_dof, n_cols)
Cp : cupy.sparse.csr_matrix, optional
Pre-allocated output sparsity pattern
Returns
-------
cupy.sparse.csr_matrix
Output matrix K(rho)^T @ mat, shape (n_dof, n_cols)
Notes
-----
For symmetric stiffness matrices, same as _matmat(). Delegates to _matmat().
"""
return self._matmat(rho, mat, Cp)
[docs]
def dot(self, rhs):
"""
Matrix-vector or matrix-matrix product K @ rhs.
Parameters
----------
rhs : cupy.ndarray or cupy.sparse.csr_matrix
Right-hand side vector or matrix
- Vector: shape (n_dof,)
- Matrix: shape (n_dof, n_cols) or sparse CSR matrix
Returns
-------
cupy.ndarray or cupy.sparse.csr_matrix
Result K @ rhs, same type and shape as rhs (except columns)
Raises
------
ValueError
If rho not set or rhs shape mismatch
NotImplementedError
If rhs is not cupy array or sparse matrix
Notes
-----
- Uses cached self.rho if available
- Automatically detects vector vs matrix input
- Converts sparse matrices to CSR format if needed
- Main interface for matrix operations
"""
if self.has_rho:
if isinstance(rhs, cp.ndarray):
if rhs.shape[0] == self.n_nodes * self.dof:
return self._matvec(self.rho, rhs)
else:
raise ValueError(
"Shape of the input vector does not match the number of nodes and dof."
)
elif cp.sparse.issparse(rhs):
if rhs.shape[0] == self.n_nodes * self.dof:
if isinstance(rhs, cp.sparse.csr_matrix):
return self._matmat(self.rho, rhs)
else:
return self._matmat(self.rho, rhs.tocsr())
else:
raise ValueError(
"Shape of the input matrix does not match the number of nodes and dof."
)
else:
raise NotImplementedError(
"Only cupy arrays and sparse matrices are supported."
)
else:
raise ValueError("Rho has not been set. dot works only after setting rho.")