Source code for pyFANTOM.stiffness.CUDA._FEA

from ...core.CUDA import *
from ...geom.CUDA._mesh import CuStructuredMesh2D, CuStructuredMesh3D, CuGeneralMesh
import cupy as cp
from typing import Union

class StiffnessKernel:
    """
    Base class for CUDA-accelerated stiffness matrix assembly kernels.
    
    Abstract base class defining interface for GPU-accelerated finite element stiffness
    matrix operations. Subclasses implement specific assembly strategies (structured,
    uniform, general) optimized for GPU computation using CuPy.
    
    Attributes
    ----------
    has_rho : bool
        Whether density variables have been set
    rho : cupy.ndarray
        Element density variables on GPU, shape (n_elements,)
    shape : tuple
        Matrix shape (n_dof, n_dof)
    matvec : callable
        Alias for dot() method
    rmatvec : callable
        Alias for dot() method (symmetric matrices)
    matmat : callable
        Alias for dot() method
    rmatmat : callable
        Alias for dot() method (symmetric matrices)
    mat_vec : cupy.ndarray
        Cached matrix-vector product result
    diag : cupy.ndarray
        Cached diagonal entries
    dk : cupy.ndarray
        Cached derivative matrices
        
    Notes
    -----
    - All data stored as CuPy arrays on GPU
    - Matrix-free operations (no explicit matrix storage)
    - Supports matrix-vector products via dot()
    - Subclasses must implement construct(), _matvec(), dot(), diagonal()
    """
    def __init__(self):
        """
        Initialize base stiffness kernel.
        
        Sets up attributes for density storage and matrix operation aliases.
        Subclasses should call super().__init__() and set mesh-specific attributes.
        """
        self.has_rho = False
        self.rho = None
        self.shape = None
        self.matvec = self.dot
        self.rmatvec = self.dot
        self.matmat = self.dot
        self.rmatmat = self.dot
        
        self.mat_vec = None
        self.diag = None
        self.dk = None

    def construct(self, rho):
        """
        Construct stiffness matrix representation (abstract method).
        
        Parameters
        ----------
        rho : cupy.ndarray
            Element density variables, shape (n_elements,)
            
        Notes
        -----
        Subclasses must implement this to assemble stiffness matrix based on
        density distribution. May precompute matrix entries or prepare for
        matrix-free operations.
        """
        pass

    def _matvec(self, rho, vec):
        """
        Matrix-vector product K(rho) @ vec (abstract method).
        
        Parameters
        ----------
        rho : cupy.ndarray
            Element density variables, shape (n_elements,)
        vec : cupy.ndarray
            Input vector, shape (n_dof,)
            
        Returns
        -------
        cupy.ndarray
            Output vector K(rho) @ vec, shape (n_dof,)
            
        Notes
        -----
        Subclasses must implement this for matrix-free operations.
        Called internally by dot() method.
        """
        pass

    def _rmatvec(self, rho, vec):
        """
        Transpose matrix-vector product K(rho)^T @ vec (abstract method).
        
        Parameters
        ----------
        rho : cupy.ndarray
            Element density variables, shape (n_elements,)
        vec : cupy.ndarray
            Input vector, shape (n_dof,)
            
        Returns
        -------
        cupy.ndarray
            Output vector K(rho)^T @ vec, shape (n_dof,)
            
        Notes
        -----
        For symmetric stiffness matrices, same as _matvec(). Subclasses
        should implement accordingly.
        """
        pass

    def _matmat(self, rho, mat):
        """
        Matrix-matrix product K(rho) @ mat (abstract method).
        
        Parameters
        ----------
        rho : cupy.ndarray
            Element density variables, shape (n_elements,)
        mat : cupy.ndarray
            Input matrix, shape (n_dof, n_cols)
            
        Returns
        -------
        cupy.ndarray
            Output matrix K(rho) @ mat, shape (n_dof, n_cols)
            
        Notes
        -----
        Subclasses may implement via repeated _matvec() calls or optimized
        batch operations.
        """
        pass

    def _rmatmat(self, rho, mat):
        """
        Transpose matrix-matrix product K(rho)^T @ mat (abstract method).
        
        Parameters
        ----------
        rho : cupy.ndarray
            Element density variables, shape (n_elements,)
        mat : cupy.ndarray
            Input matrix, shape (n_dof, n_cols)
            
        Returns
        -------
        cupy.ndarray
            Output matrix K(rho)^T @ mat, shape (n_dof, n_cols)
            
        Notes
        -----
        For symmetric matrices, same as _matmat(). Subclasses should
        implement accordingly.
        """
        pass

    def set_rho(self, rho):
        """
        Set element density variables.
        
        Parameters
        ----------
        rho : ndarray or cupy.ndarray
            Element density variables, shape (n_elements,)
            
        Notes
        -----
        Converts input to CuPy array if needed. Does not trigger matrix
        reconstruction; call construct() after setting rho.
        """
        self.rho = cp.array(rho, dtype=rho.dtype, copy=False)
        self.has_rho = True

    def dot(self, rhs):
        """
        Matrix-vector product K @ rhs (abstract method).
        
        Parameters
        ----------
        rhs : cupy.ndarray
            Right-hand side vector, shape (n_dof,) or (n_dof, n_cols)
            
        Returns
        -------
        cupy.ndarray
            Result K @ rhs, same shape as rhs
            
        Notes
        -----
        Subclasses must implement this. Should use cached rho if available,
        otherwise raise error. Handles both vectors and matrices.
        """
        pass

    def reset(self):
        """
        Reset kernel state to initial condition.
        
        Clears density variables, cached matrices, and constraint information.
        Useful when reinitializing optimization or changing problem configuration.
        """
        self.has_rho = False
        self.rho = None
        self.CSR = None
        self.ptr = None
        self.has_been_constructed = False
        self.constraints[:] = False
        self.has_con = False
        self.mat_vec = None
        self.diag = None
        self.dk = None

    def diagonal(self):
        """
        Get diagonal entries of stiffness matrix (abstract method).
        
        Returns
        -------
        cupy.ndarray
            Diagonal entries, shape (n_dof,)
            
        Notes
        -----
        Subclasses must implement this. May compute on-the-fly or return
        cached diagonal if available. Used for preconditioning and diagnostics.
        """
        pass

    def __matmul__(self, rhs):
        """
        Matrix multiplication operator (K @ rhs).
        
        Parameters
        ----------
        rhs : cupy.ndarray
            Right-hand side vector or matrix
            
        Returns
        -------
        cupy.ndarray
            Result of matrix multiplication
            
        Notes
        -----
        Enables Python @ operator syntax: result = kernel @ vector
        """
        return self.dot(rhs)


[docs] class StructuredStiffnessKernel(StiffnessKernel): """ CUDA-accelerated stiffness kernel for structured meshes. GPU version of CPU StructuredStiffnessKernel using CuPy. Uses node-basis assembly with a single precomputed element stiffness matrix for maximum efficiency on uniform grid meshes. Identical API to CPU version but operates entirely on GPU memory for maximum performance. Parameters ---------- mesh : CuStructuredMesh2D or CuStructuredMesh3D CUDA-accelerated structured mesh with uniform elements Attributes ---------- K_single : cupy.ndarray Single element stiffness matrix used for all elements (on GPU) shape : tuple Global stiffness matrix dimensions (n_dof, n_dof) constraints : cupy.ndarray (bool) Boolean array marking constrained DOFs (on GPU) has_cons : bool True if boundary conditions have been applied non_con_map : cupy.ndarray Index map to non-constrained DOFs for reduced system (on GPU) Methods ------- set_rho(rho) Set design variables for subsequent matrix-free operations dot(rhs) Matrix-vector product K @ rhs (requires rho to be set) construct(rho) Build explicit CSR matrix representation diagonal(rho=None) Extract diagonal of stiffness matrix add_constraints(dof_indices) Apply Dirichlet boundary conditions process_grad(U) Compute element-wise sensitivities dK/drho : U for adjoint method Notes ----- - Fastest kernel for structured meshes (2-3x faster than UniformStiffnessKernel on GPU) - All arrays stored as CuPy arrays on GPU - 5-10x faster than CPU for large 3D problems - Automatically handles constrained DOFs by zeroing rows/columns - Matrix-free dot() operation uses optimized CUDA kernels - Use construct() only when explicit matrix is needed (e.g., for direct solvers) - Requires CUDA-capable GPU and CuPy - Use with CUDA solvers (CG, MultiGrid) - Memory limited by GPU VRAM Examples -------- >>> from pyFANTOM.CUDA import StructuredMesh2D, StructuredStiffnessKernel >>> mesh = StructuredMesh2D(nx=256, ny=128, lx=2.0, ly=1.0) >>> kernel = StructuredStiffnessKernel(mesh=mesh) >>> kernel.set_rho(rho) >>> u = kernel.dot(f) # Matrix-free K @ f on GPU """
[docs] def __init__(self, mesh: Union[CuStructuredMesh2D, CuStructuredMesh3D]): super().__init__() self.mesh = mesh self.nodes, self.elements = mesh.nodes, mesh.elements self.dof = mesh.dof self.K_single = mesh.K_single self.dtype = mesh.dtype # move to gpu and cast to 32 bit self.nodes = cp.array(self.nodes, dtype=self.nodes.dtype, copy=False) self.elements = cp.array(self.elements, dtype=self.elements.dtype, copy=False) self.K_single = cp.array(self.K_single, dtype=self.K_single.dtype, copy=False) self.elements_flat = self.elements.flatten() self.el_ids = cp.arange(self.elements.shape[0], dtype=cp.int32).repeat( self.elements.shape[1] ) self.sorter = cp.argsort(self.elements_flat).astype(cp.int32) self.node_ids = cp.searchsorted( self.elements_flat, cp.arange(self.nodes.shape[0], dtype=cp.int32), sorter=self.sorter, side="left", ).astype(cp.int32) self.elements_size = self.elements.shape[1] self.n_nodes = self.nodes.shape[0] self.has_been_constructed = False self.ptr = None self.max_con_count = (self.elements_size * int(self.dof)) * cp.unique( self.elements_flat, return_counts=True )[1].max() self.constraints = cp.zeros(self.n_nodes * self.dof, dtype=cp.bool_) self.shape = (self.n_nodes * self.dof, self.n_nodes * self.dof) self.has_con = False self.idx_map = cp.arange(self.n_nodes * self.dof, dtype=cp.int32)
[docs] def diagonal(self, rho=None): """ Extract diagonal of assembled stiffness matrix. Parameters ---------- rho : cp.ndarray, optional Design densities. If None, uses stored rho Returns ------- diag : cp.ndarray Diagonal entries of K(rho) Notes ----- - Used for Jacobi preconditioning in iterative solvers - Cached on subsequent calls with same rho """ if rho is None and not self.has_rho: raise ValueError( "Rho has not been set. diagonal works only after setting rho or if rho is provided." ) elif rho is None: self.diag = get_diagonal_node_basis( self.K_single, self.elements_flat, self.el_ids, self.rho, self.sorter, self.node_ids, self.n_nodes, self.dof, self.elements_size, self.constraints, self.diag ) return self.diag else: self.diag = get_diagonal_node_basis( self.K_single, self.elements_flat, self.el_ids, rho, self.sorter, self.node_ids, self.n_nodes, self.dof, self.elements_size, self.constraints, self.diag ) return self.diag
[docs] def set_constraints(self, constraints): """ Set Dirichlet boundary conditions (replaces existing constraints). Parameters ---------- constraints : cp.ndarray DOF indices to constrain on GPU, shape (n_constrained_dofs,) Notes ----- Clears all existing constraints and sets only the specified DOFs. Use add_constraints() to add constraints incrementally. """ self.constraints[:] = False self.constraints[constraints] = True self.has_con = True self.non_con_map = self.idx_map[~self.constraints]
[docs] def add_constraints(self, constraints): """ Add Dirichlet boundary conditions (accumulates with existing constraints). Parameters ---------- constraints : cp.ndarray DOF indices to constrain on GPU, shape (n_constrained_dofs,) Notes ----- Adds to existing constraints. Use set_constraints() to replace all constraints. """ self.constraints[constraints] = True self.has_con = True self.non_con_map = self.idx_map[~self.constraints]
[docs] def process_grad(self, U): """ Compute element-wise compliance sensitivities dC/drho. Parameters ---------- U : cp.ndarray Displacement vector from FEA solve on GPU, shape (n_dof,) Returns ------- dk : cp.ndarray Element sensitivities on GPU (compliance derivative per element), shape (n_elements,) Notes ----- - Computes: dk[e] = -U[e]^T @ K_e @ U[e] - Used for compliance minimization gradient - Negative sign convention for minimization - All operations performed on GPU """ self.dk = process_dk( self.K_single, self.elements_flat, U, self.dof, self.elements_size, self.dk ) return self.dk
[docs] def construct(self, rho): """ Build explicit CSR sparse matrix representation on GPU. Parameters ---------- rho : cp.ndarray Design variables on GPU, shape (n_elements,) Returns ------- cupyx.scipy.sparse.csr_matrix Sparse stiffness matrix in CSR format on GPU Notes ----- - Memory-intensive: O(nnz) storage on GPU - Required for direct solvers (SPSOLVE) - Reuses sparsity pattern on subsequent calls (faster) """ size = self.n_nodes * self.dof if not self.has_been_constructed: self.CSR = self._matmat( rho, cp.sparse.eye(size, format="csr", dtype=self.dtype) ) self.ptr = self.CSR.indptr else: self.CSR = self._matmat( rho, cp.sparse.eye(size, format="csr", dtype=self.dtype), self.ptr ) self.has_been_constructed = True return self.CSR
def _matvec(self, rho, vec): if not self.has_con: self.mat_vec = mat_vec_node_basis_parallel( self.K_single, self.elements_flat, self.el_ids, rho, self.sorter, self.node_ids, self.n_nodes, vec, self.dof, self.elements_size, out=self.mat_vec ) return self.mat_vec self.mat_vec = mat_vec_node_basis_parallel( self.K_single, self.elements_flat, self.el_ids, rho, self.sorter, self.node_ids, self.n_nodes, vec, self.dof, self.elements_size, self.constraints, self.mat_vec ) return self.mat_vec def _rmatvec(self, rho, vec): return self._matvec(rho, vec) def _matmat(self, rho, mat, Cp=None): if Cp is None: if not self.has_con: return matmat_node_basis_parallel( self.K_single, self.elements_flat, self.el_ids, rho, self.sorter, self.node_ids, self.n_nodes, self.dof, self.elements_size, mat, self.max_con_count, ) else: out = matmat_node_basis_parallel( self.K_single, self.elements_flat, self.el_ids, rho, self.sorter, self.node_ids, self.n_nodes, self.dof, self.elements_size, mat, self.max_con_count, self.constraints, ) out.indices[out.indices < 0] = 0 return out else: if not self.has_con: return matmat_node_basis_parallel_( self.K_single, self.elements_flat, self.el_ids, rho, self.sorter, self.node_ids, self.n_nodes, self.dof, self.elements_size, mat, Cp ) else: out = matmat_node_basis_parallel_( self.K_single, self.elements_flat, self.el_ids, rho, self.sorter, self.node_ids, self.n_nodes, self.dof, self.elements_size, mat, Cp, self.constraints ) out.indices[out.indices < 0] = 0 return out def _rmatmat(self, rho, mat, Cp=None): return self._matmat(rho, mat, Cp)
[docs] def dot(self, rhs): """ Matrix-free matrix-vector or matrix-matrix product K(rho) @ rhs. Parameters ---------- rhs : cp.ndarray or cp.sparse.csr_matrix Right-hand side vector or matrix Returns ------- result : cp.ndarray or cp.sparse.csr_matrix Product K(rho) @ rhs Raises ------ ValueError If rho not set or shape mismatch NotImplementedError If rhs type not supported Notes ----- - Requires rho set via set_rho() first - Matrix-free: No explicit matrix storage - Faster than constructing full matrix for large problems """ if self.has_rho: if isinstance(rhs, cp.ndarray): if rhs.shape[0] == self.n_nodes * self.dof: return self._matvec(self.rho, rhs) else: raise ValueError( "Shape of the input vector does not match the number of nodes and dof." ) elif cp.sparse.issparse(rhs): if rhs.shape[0] == self.n_nodes * self.dof: if isinstance(rhs, cp.sparse.csr_matrix): return self._matmat(self.rho, rhs) else: return self._matmat(self.rho, rhs.tocsr()) else: raise ValueError( "Shape of the input matrix does not match the number of nodes and dof." ) else: raise NotImplementedError( "Only numpy arrays as vectors and scipy sparse matrices are supported." ) else: raise ValueError("Rho has not been set. dot works only after setting rho.")
[docs] class UniformStiffnessKernel(StiffnessKernel): """ CUDA-accelerated stiffness kernel for uniform unstructured meshes. GPU version of CPU UniformStiffnessKernel. Handles unstructured meshes where all elements have the same number of nodes and identical geometry, using element-specific stiffness matrices. More general than StructuredStiffnessKernel but less efficient. Identical API to CPU version but operates entirely on GPU memory. Parameters ---------- mesh : CuGeneralMesh CUDA general mesh with uniform element topology (mesh.is_uniform must be True) Attributes ---------- Ks : cupy.ndarray Element stiffness matrices, shape (n_elements, dof_per_element, dof_per_element) (on GPU) shape : tuple Global stiffness matrix dimensions (n_dof, n_dof) constraints : cupy.ndarray (bool) Boolean array for constrained DOFs (on GPU) has_cons : bool True if boundary conditions have been applied Methods ------- Same interface as StructuredStiffnessKernel (set_rho, dot, construct, diagonal, etc.) Notes ----- - Requires mesh.is_uniform == True (all elements same size) - Stores per-element stiffness matrices (more memory than StructuredStiffnessKernel) - Use for unstructured uniform meshes (e.g., triangular or tetrahedral) - For structured grids, prefer StructuredStiffnessKernel for better performance - All arrays stored as CuPy arrays on GPU - All operations on GPU using CuPy - Use with CUDA solvers (CG, MultiGrid) for best performance - Memory limited by GPU VRAM Examples -------- >>> from pyFANTOM.CUDA import GeneralMesh, UniformStiffnessKernel >>> import numpy as np >>> # Triangular mesh >>> nodes = np.array([[0,0], [1,0], [0,1], [1,1]]) >>> elements = np.array([[0,1,2], [1,3,2]]) >>> mesh = GeneralMesh(nodes, elements) >>> kernel = UniformStiffnessKernel(mesh=mesh) """
[docs] def __init__(self, mesh: CuGeneralMesh): super().__init__() if not mesh.is_uniform: raise ValueError( "Mesh is not uniform, you should use GeneralStiffnessKernel instead." ) self.mesh = mesh self.nodes, self.elements = mesh.nodes, mesh.elements self.dof = mesh.dof self.dtype = mesh.dtype # move to gpu and cast appropriately self.nodes = cp.array(self.nodes, dtype=self.dtype, copy=False) self.elements = cp.array(self.elements, dtype=cp.int32, copy=False) self.Ks = cp.array(mesh.Ks, dtype=self.dtype, copy=False) self.elements_flat = self.elements.flatten() self.el_ids = cp.arange(self.elements.shape[0], dtype=cp.int32).repeat( self.elements.shape[1] ) self.sorter = cp.argsort(self.elements_flat).astype(cp.int32) self.node_ids = cp.searchsorted( self.elements_flat, cp.arange(self.nodes.shape[0], dtype=cp.int32), sorter=self.sorter, side="left", ).astype(cp.int32) self.elements_size = self.elements.shape[1] self.n_nodes = self.nodes.shape[0] self.has_been_constructed = False self.ptr = None self.indices = None self.data = None self.max_con_count = (self.elements_size * int(self.dof)) * cp.unique( self.elements_flat, return_counts=True )[1].max() self.constraints = cp.zeros(self.n_nodes * self.dof, dtype=cp.bool_) self.shape = (self.n_nodes * self.dof, self.n_nodes * self.dof) self.has_con = False self.idx_map = cp.arange(self.n_nodes * self.dof, dtype=cp.int32)
[docs] def diagonal(self, rho=None): """ Extract diagonal of stiffness matrix. Parameters ---------- rho : cp.ndarray, optional Design variables on GPU. If None, uses self.rho (must be set via set_rho()) Returns ------- cp.ndarray Diagonal entries on GPU, shape (n_dof,) Raises ------ ValueError If rho is None and has_rho is False Notes ----- Useful for Jacobi preconditioning or diagonal scaling in iterative solvers. """ if rho is None and not self.has_rho: raise ValueError( "Rho has not been set. diagonal works only after setting rho or if rho is provided." ) elif rho is None: self.diag = get_diagonal_node_basis( self.Ks, self.elements_flat, self.el_ids, self.rho, self.sorter, self.node_ids, self.n_nodes, self.dof, self.elements_size, self.constraints, self.diag ) return self.diag else: self.diag = get_diagonal_node_basis( self.Ks, self.elements_flat, self.el_ids, rho, self.sorter, self.node_ids, self.n_nodes, self.dof, self.elements_size, self.constraints, self.diag ) return self.diag
[docs] def set_constraints(self, constraints): """ Set Dirichlet boundary conditions (replaces existing constraints). Parameters ---------- constraints : cp.ndarray DOF indices to constrain on GPU, shape (n_constrained_dofs,) Notes ----- Clears all existing constraints and sets only the specified DOFs. Use add_constraints() to add constraints incrementally. """ self.constraints[:] = False self.constraints[constraints] = True self.has_con = True self.non_con_map = self.idx_map[~self.constraints]
[docs] def add_constraints(self, constraints): """ Add Dirichlet boundary conditions (accumulates with existing constraints). Parameters ---------- constraints : cp.ndarray DOF indices to constrain on GPU, shape (n_constrained_dofs,) Notes ----- Adds to existing constraints. Use set_constraints() to replace all constraints. """ self.constraints[constraints] = True self.has_con = True self.non_con_map = self.idx_map[~self.constraints]
[docs] def process_grad(self, U): """ Compute element-wise compliance sensitivities dC/drho. Parameters ---------- U : cp.ndarray Displacement vector from FEA solve on GPU, shape (n_dof,) Returns ------- dk : cp.ndarray Element sensitivities on GPU, shape (n_elements,) Notes ----- Computes -U^T @ dK/drho @ U for each element, used in compliance sensitivity analysis. All operations performed on GPU. """ self.dk = process_dk_full( self.Ks, self.elements_flat, U, self.dof, self.elements_size, self.dk ) return self.dk
[docs] def construct(self, rho): """ Build explicit CSR sparse matrix representation on GPU. Parameters ---------- rho : cp.ndarray Design variables on GPU, shape (n_elements,) Returns ------- cupyx.scipy.sparse.csr_matrix Sparse stiffness matrix in CSR format on GPU Notes ----- - Memory-intensive: O(nnz) storage on GPU - Required for direct solvers (SPSOLVE) - Reuses sparsity pattern on subsequent calls (faster) """ size = self.n_nodes * self.dof if not self.has_been_constructed: self.CSR = self._matmat( rho, cp.sparse.eye(size, format="csr", dtype=self.dtype) ) self.ptr = self.CSR.indptr else: self.CSR = self._matmat( rho, cp.sparse.eye(size, format="csr", dtype=self.dtype), self.ptr ) self.has_been_constructed = True return self.CSR
def _matvec(self, rho, vec): if not self.has_con: self.mat_vec = mat_vec_node_basis_parallel_full( self.Ks, self.elements_flat, self.el_ids, rho, self.sorter, self.node_ids, self.n_nodes, vec, self.dof, self.elements_size, out=self.mat_vec ) return self.mat_vec else: self.mat_vec = mat_vec_node_basis_parallel_full( self.Ks, self.elements_flat, self.el_ids, rho, self.sorter, self.node_ids, self.n_nodes, vec, self.dof, self.elements_size, self.constraints, self.mat_vec ) return self.mat_vec def _rmatvec(self, rho, vec): return self._matvec(rho, vec) def _matmat(self, rho, mat, Cp=None): if Cp is None: if not self.has_con: return matmat_node_basis_full_parallel( self.Ks, self.elements_flat, self.el_ids, rho, self.sorter, self.node_ids, self.n_nodes, self.dof, self.elements_size, mat, self.max_con_count, ) else: out = matmat_node_basis_full_parallel( self.Ks, self.elements_flat, self.el_ids, rho, self.sorter, self.node_ids, self.n_nodes, self.dof, self.elements_size, mat, self.max_con_count, self.constraints, ) out.indices[out.indices < 0] = 0 return out else: if not self.has_con: return matmat_node_basis_full_parallel_( self.Ks, self.elements_flat, self.el_ids, rho, self.sorter, self.node_ids, self.n_nodes, self.dof, self.elements_size, mat, Cp, ) else: out = matmat_node_basis_full_parallel_( self.Ks, self.elements_flat, self.el_ids, rho, self.sorter, self.node_ids, self.n_nodes, self.dof, self.elements_size, mat, Cp, self.constraints, ) out.indices[out.indices < 0] = 0 return out def _rmatmat(self, rho, mat, Cp=None): """ Transpose matrix-matrix product K(rho)^T @ mat. Parameters ---------- rho : cupy.ndarray Element density variables, shape (n_elements,) mat : cupy.sparse.csr_matrix or cupy.ndarray Input matrix, shape (n_dof, n_cols) Cp : cupy.sparse.csr_matrix, optional Pre-allocated output sparsity pattern Returns ------- cupy.sparse.csr_matrix Output matrix K(rho)^T @ mat, shape (n_dof, n_cols) Notes ----- For symmetric stiffness matrices, same as _matmat(). Delegates to _matmat(). """ return self._matmat(rho, mat, Cp)
[docs] def dot(self, rhs): """ Matrix-vector or matrix-matrix product K @ rhs. Parameters ---------- rhs : cupy.ndarray or cupy.sparse.csr_matrix Right-hand side vector or matrix - Vector: shape (n_dof,) - Matrix: shape (n_dof, n_cols) or sparse CSR matrix Returns ------- cupy.ndarray or cupy.sparse.csr_matrix Result K @ rhs, same type and shape as rhs (except columns) Raises ------ ValueError If rho not set or rhs shape mismatch NotImplementedError If rhs is not cupy array or sparse matrix Notes ----- - Uses cached self.rho if available - Automatically detects vector vs matrix input - Converts sparse matrices to CSR format if needed - Main interface for matrix operations """ if self.has_rho: if isinstance(rhs, cp.ndarray): if rhs.shape[0] == self.n_nodes * self.dof: return self._matvec(self.rho, rhs) else: raise ValueError( "Shape of the input vector does not match the number of nodes and dof." ) elif cp.sparse.issparse(rhs): if rhs.shape[0] == self.n_nodes * self.dof: if isinstance(rhs, cp.sparse.csr_matrix): return self._matmat(self.rho, rhs) else: return self._matmat(self.rho, rhs.tocsr()) else: raise ValueError( "Shape of the input matrix does not match the number of nodes and dof." ) else: raise NotImplementedError( "Only cupy arrays and sparse matrices are supported." ) else: raise ValueError("Rho has not been set. dot works only after setting rho.")
[docs] class GeneralStiffnessKernel(StiffnessKernel): """ CUDA-accelerated stiffness kernel for heterogeneous meshes. GPU version of CPU GeneralStiffnessKernel. Most general kernel supporting meshes with elements of varying sizes and topologies. Uses flat storage with pointer arrays for memory-efficient representation. Identical API to CPU version but operates entirely on GPU. Parameters ---------- mesh : CuGeneralMesh CUDA general mesh with potentially heterogeneous elements (mesh.is_uniform can be False) Attributes ---------- K_flat : cupy.ndarray Flattened element stiffness matrices (on GPU) K_ptr : cupy.ndarray Pointer array for accessing K_flat: K_e = K_flat[K_ptr[e]:K_ptr[e+1]] (on GPU) elements_flat : cupy.ndarray Flattened element connectivity (on GPU) elements_ptr : cupy.ndarray Pointer array for accessing elements (on GPU) element_sizes : cupy.ndarray Number of nodes per element, shape (n_elements,) (on GPU) shape : tuple Global stiffness matrix dimensions (n_dof, n_dof) constraints : cupy.ndarray (bool) Boolean array for constrained DOFs (on GPU) has_cons : bool True if boundary conditions have been applied Methods ------- Same interface as StructuredStiffnessKernel (set_rho, dot, construct, diagonal, etc.) Notes ----- - Supports mixed element types (triangles + quads, tets + hexes, etc.) - Flat storage with pointers enables variable element sizes - Slower than Uniform/Structured kernels due to indirection - Automatically used if mesh.is_uniform == False - All arrays stored as CuPy arrays on GPU - All operations on GPU using CuPy - Use with CUDA solvers (CG, MultiGrid) for best performance - Memory limited by GPU VRAM Examples -------- >>> from pyFANTOM.CUDA import GeneralMesh, GeneralStiffnessKernel >>> import numpy as np >>> # Mixed triangular and quad mesh >>> nodes = np.array([[0,0], [1,0], [0,1], [1,1], [2,0]]) >>> elements = [np.array([0,1,2]), np.array([1,4,3,2])] # Triangle + Quad >>> mesh = GeneralMesh(nodes, elements) >>> kernel = GeneralStiffnessKernel(mesh=mesh) """
[docs] def __init__(self, mesh: CuGeneralMesh): super().__init__() if mesh.is_uniform: raise ValueError( "Mesh is uniform, you should use UniformStiffnessKernel instead." ) self.mesh = mesh self.nodes, self.elements = mesh.nodes, mesh.elements self.dof = mesh.dof self.dtype = mesh.dtype # Initialize lists for flattening elements_flat = cp.array(mesh.elements_flat, dtype=cp.int32, copy=False) element_sizes = cp.array(mesh.element_sizes, dtype=cp.int32, copy=False) K_flat = mesh.K_flat # Move everything to GPU with appropriate types self.nodes = cp.array(self.nodes, dtype=self.dtype, copy=False) self.elements_flat = cp.array(elements_flat, dtype=cp.int32, copy=False) self.K_flat = cp.array(K_flat, dtype=self.dtype, copy=False) # Calculate pointers self.K_ptr = cp.array(mesh.K_ptr, dtype=cp.int32, copy=False) self.elements_ptr = cp.array(mesh.elements_ptr, dtype=cp.int32, copy=False) self.el_ids = cp.arange(self.elements.shape[0], dtype=cp.int32).repeat( element_sizes.get().tolist() ) self.sorter = cp.argsort(self.elements_flat).astype(cp.int32) self.node_ids = cp.searchsorted( self.elements_flat, cp.arange(self.nodes.shape[0], dtype=cp.int32), sorter=self.sorter, side="left", ).astype(cp.int32) self.n_nodes = self.nodes.shape[0] self.has_been_constructed = False self.ptr = None self.indices = None self.data = None self.max_con_count = ( cp.diff(self.elements_ptr).max() * int(self.dof) ) * cp.unique(self.elements_flat, return_counts=True)[1].max() self.constraints = cp.zeros(self.n_nodes * self.dof, dtype=cp.bool_) self.shape = (self.n_nodes * self.dof, self.n_nodes * self.dof) self.has_con = False self.idx_map = cp.arange(self.n_nodes * self.dof, dtype=cp.int32)
[docs] def diagonal(self, rho=None): """ Compute diagonal entries of stiffness matrix. Parameters ---------- rho : cupy.ndarray, optional Element density variables, shape (n_elements,). If None, uses cached self.rho Returns ------- cupy.ndarray Diagonal entries of K(rho), shape (n_dof,) Notes ----- - Uses GPU-accelerated flat storage assembly - Respects Dirichlet boundary conditions (constrained DOFs zeroed) - Caches result in self.diag for reuse - Used for preconditioning and diagnostics """ if rho is None and not self.has_rho: raise ValueError( "Rho has not been set. diagonal works only after setting rho or if rho is provided." ) elif rho is None: self.diag = get_diagonal_node_basis_flat( self.K_flat, self.elements_flat, self.K_ptr, self.elements_ptr, self.el_ids, self.rho, self.sorter, self.node_ids, self.n_nodes, self.dof, self.constraints, self.diag ) return self.diag else: self.diag = get_diagonal_node_basis_flat( self.K_flat, self.elements_flat, self.K_ptr, self.elements_ptr, self.el_ids, rho, self.sorter, self.node_ids, self.n_nodes, self.dof, self.constraints, self.diag ) return self.diag
[docs] def set_constraints(self, constraints): """ Set Dirichlet boundary condition DOF (replaces existing constraints). Parameters ---------- constraints : cp.ndarray or array-like Indices of constrained DOF Notes ----- - Resets all previous constraints - Use add_constraints() to append to existing constraints - Constrained rows/columns are zeroed in matrix operations """ self.constraints[:] = False self.constraints[constraints] = True self.has_con = True self.non_con_map = self.idx_map[~self.constraints]
[docs] def add_constraints(self, constraints): """ Add Dirichlet boundary conditions (accumulates with existing constraints). Parameters ---------- constraints : cp.ndarray DOF indices to constrain on GPU, shape (n_constrained_dofs,) Notes ----- Adds to existing constraints. Use set_constraints() to replace all constraints. - Use set_constraints() to replace all constraints """ self.constraints[constraints] = True self.has_con = True self.non_con_map = self.idx_map[~self.constraints]
[docs] def process_grad(self, U): """ Compute element-wise compliance sensitivities dC/drho. Parameters ---------- U : cp.ndarray Displacement vector from FEA solve Returns ------- dk : cp.ndarray Element sensitivities (compliance derivative per element) Notes ----- - Computes: dk[e] = -U[e]^T @ K_e @ U[e] - Used for compliance minimization gradient - Negative sign convention for minimization """ self.dk = process_dk( self.K_single, self.elements_flat, U, self.dof, self.elements_size, self.dk ) return self.dk
[docs] def construct(self, rho): """ Construct CSR stiffness matrix representation. Parameters ---------- rho : cupy.ndarray Element density variables, shape (n_elements,) Returns ------- cupy.sparse.csr_matrix Sparse CSR stiffness matrix K(rho), shape (n_dof, n_dof) Notes ----- - Assembles full sparse matrix structure (first call) or updates values (subsequent) - Uses matrix-matrix product with identity to extract sparsity pattern - Caches sparsity pattern (ptr) for efficient updates - Sets has_been_constructed flag after first call - More memory-intensive than matrix-free operations but needed for some solvers """ size = self.n_nodes * self.dof if not self.has_been_constructed: self.CSR = self._matmat( rho, cp.sparse.eye(size, format="csr", dtype=self.dtype) ) self.ptr = self.CSR.indptr else: self.CSR = self._matmat( rho, cp.sparse.eye(size, format="csr", dtype=self.dtype), self.ptr ) self.has_been_constructed = True return self.CSR
def _matvec(self, rho, vec): """ Matrix-vector product K(rho) @ vec using GPU-accelerated flat storage. Parameters ---------- rho : cupy.ndarray Element density variables, shape (n_elements,) vec : cupy.ndarray Input vector, shape (n_dof,) Returns ------- cupy.ndarray Output vector K(rho) @ vec, shape (n_dof,) Notes ----- - Uses parallel GPU kernel for element-wise assembly - Handles Dirichlet constraints (zeroed rows/columns) - Caches result in self.mat_vec for reuse - Optimized for flat storage with pointer arrays """ if not self.has_con: self.mat_vec = mat_vec_node_basis_parallel_flat( self.K_flat, self.elements_flat, self.K_ptr, self.elements_ptr, self.el_ids, rho, self.sorter, self.node_ids, self.n_nodes, vec, self.dof, out=self.mat_vec ) return self.mat_vec else: self.mat_vec = mat_vec_node_basis_parallel_flat( self.K_flat, self.elements_flat, self.K_ptr, self.elements_ptr, self.el_ids, rho, self.sorter, self.node_ids, self.n_nodes, vec, self.dof, self.constraints, self.mat_vec ) return self.mat_vec def _rmatvec(self, rho, vec): """ Transpose matrix-vector product K(rho)^T @ vec. Parameters ---------- rho : cupy.ndarray Element density variables, shape (n_elements,) vec : cupy.ndarray Input vector, shape (n_dof,) Returns ------- cupy.ndarray Output vector K(rho)^T @ vec, shape (n_dof,) Notes ----- For symmetric stiffness matrices, same as _matvec(). Delegates to _matvec(). """ return self._matvec(rho, vec) def _matmat(self, rho, mat, Cp=None): """ Matrix-matrix product K(rho) @ mat using GPU-accelerated assembly. Parameters ---------- rho : cupy.ndarray Element density variables, shape (n_elements,) mat : cupy.sparse.csr_matrix or cupy.ndarray Input matrix, shape (n_dof, n_cols) Cp : cupy.sparse.csr_matrix, optional Pre-allocated output sparsity pattern. If None, computes new pattern Returns ------- cupy.sparse.csr_matrix Output matrix K(rho) @ mat, shape (n_dof, n_cols) Notes ----- - Uses parallel GPU kernel for batch element assembly - Handles Dirichlet constraints (zeroed rows/columns) - If Cp provided, reuses sparsity pattern for efficiency - Used by construct() to build full stiffness matrix - Negative indices set to 0 (constraint handling artifact) """ if Cp is None: if not self.has_con: return matmat_node_basis_flat_parallel( self.K_flat, self.elements_flat, self.K_ptr, self.elements_ptr, self.el_ids, rho, self.sorter, self.node_ids, self.n_nodes, self.dof, mat, self.max_con_count, ) else: out = matmat_node_basis_flat_parallel( self.K_flat, self.elements_flat, self.K_ptr, self.elements_ptr, self.el_ids, rho, self.sorter, self.node_ids, self.n_nodes, self.dof, mat, self.max_con_count, self.constraints, ) out.indices[out.indices < 0] = 0 return out else: if not self.has_con: return matmat_node_basis_flat_parallel_( self.K_flat, self.elements_flat, self.K_ptr, self.elements_ptr, self.el_ids, rho, self.sorter, self.node_ids, self.n_nodes, self.dof, mat, Cp, ) else: out = matmat_node_basis_flat_parallel( self.K_flat, self.elements_flat, self.K_ptr, self.elements_ptr, self.el_ids, rho, self.sorter, self.node_ids, self.n_nodes, self.dof, mat, self.max_con_count, self.constraints, ) out.indices[out.indices < 0] = 0 return out def _rmatmat(self, rho, mat, Cp=None): """ Transpose matrix-matrix product K(rho)^T @ mat. Parameters ---------- rho : cupy.ndarray Element density variables, shape (n_elements,) mat : cupy.sparse.csr_matrix or cupy.ndarray Input matrix, shape (n_dof, n_cols) Cp : cupy.sparse.csr_matrix, optional Pre-allocated output sparsity pattern Returns ------- cupy.sparse.csr_matrix Output matrix K(rho)^T @ mat, shape (n_dof, n_cols) Notes ----- For symmetric stiffness matrices, same as _matmat(). Delegates to _matmat(). """ return self._matmat(rho, mat, Cp)
[docs] def dot(self, rhs): """ Matrix-vector or matrix-matrix product K @ rhs. Parameters ---------- rhs : cupy.ndarray or cupy.sparse.csr_matrix Right-hand side vector or matrix - Vector: shape (n_dof,) - Matrix: shape (n_dof, n_cols) or sparse CSR matrix Returns ------- cupy.ndarray or cupy.sparse.csr_matrix Result K @ rhs, same type and shape as rhs (except columns) Raises ------ ValueError If rho not set or rhs shape mismatch NotImplementedError If rhs is not cupy array or sparse matrix Notes ----- - Uses cached self.rho if available - Automatically detects vector vs matrix input - Converts sparse matrices to CSR format if needed - Main interface for matrix operations """ if self.has_rho: if isinstance(rhs, cp.ndarray): if rhs.shape[0] == self.n_nodes * self.dof: return self._matvec(self.rho, rhs) else: raise ValueError( "Shape of the input vector does not match the number of nodes and dof." ) elif cp.sparse.issparse(rhs): if rhs.shape[0] == self.n_nodes * self.dof: if isinstance(rhs, cp.sparse.csr_matrix): return self._matmat(self.rho, rhs) else: return self._matmat(self.rho, rhs.tocsr()) else: raise ValueError( "Shape of the input matrix does not match the number of nodes and dof." ) else: raise NotImplementedError( "Only cupy arrays and sparse matrices are supported." ) else: raise ValueError("Rho has not been set. dot works only after setting rho.")