Source code for topoembedx.classes.complexnetmf

"""Topological NetMF embedding algorithm."""

from collections.abc import Hashable
from typing import Literal

import networkx as nx
import numpy as np
import scipy.sparse as sp
import toponetx as tnx
from karateclub import NetMF

from topoembedx.neighborhood import neighborhood_from_complex


[docs] class ComplexNetMF(NetMF): """Topological version of the NetMF [1] embedding algorithm. Parameters ---------- dimensions : int, default=32 Number of embedding dimension. iteration : int, default=10 Number of SVD iterations. order : int, default=2 Number of PMI matrix powers. negative_samples : int, default=1 Number of negative samples. seed : int, default=42 Seed for randomized singular value decomposition. References ---------- .. [1] Qiu, Jiezhong, et al. "Network Embedding as Matrix Factorization: Unifying DeepWalk, LINE, PTE, and Node2vec". Proceedings of the Eleventh ACM International Conference on Web Search and Data Mining [Marina Del Rey CA USA], 2018, pp. 459-67. https://doi.org/10.1145/3159652.3159706. """ A: sp.csr_matrix ind: list[Hashable] _embedding: np.ndarray def __init__( self, dimensions: int = 32, iteration: int = 10, order: int = 2, negative_samples: int = 1, seed: int = 42, ) -> None: super().__init__( dimensions=dimensions, iteration=iteration, order=order, negative_samples=negative_samples, seed=seed, )
[docs] def fit( self, domain: tnx.Complex, neighborhood_type: Literal["adj", "coadj"] = "adj", neighborhood_dim=None, ) -> None: """Fit the model. Parameters ---------- domain : toponetx.Complex The topological domain to be embedded. neighborhood_type : {"adj", "coadj"}, default="adj" The type of neighborhood to compute. "adj" for adjacency matrix, "coadj" for coadjacency matrix. neighborhood_dim : dict The integer parameters needed to specify the neighborhood of the cells to generate the embedding. In TopoNetX (co)adjacency neighborhood matrices are specified via one or two parameters. - For Cell/Simplicial/Path complexes (co)adjacency matrix is specified by a single parameter, this is precisely neighborhood_dim["rank"]. - For Combinatorial/ColoredHyperGraph the (co)adjacency matrix is specified by two parameters, this is precisely neighborhood_dim["rank"] and neighborhood_dim["via_rank"]. Notes ----- Here neighborhood_dim={"rank": 1, "via_rank": -1} specifies the dimension for which the cell embeddings are going to be computed. "rank": 1 means that the embeddings will be computed for the first dimension. The integer "via_rank": -1 is ignored when the input is cell/simplicial complex and must be specified when the input complex is a combinatorial complex or colored hypergraph. """ self.ind, self.A = neighborhood_from_complex( domain, neighborhood_type, neighborhood_dim ) self.A.setdiag(1) g = nx.from_scipy_sparse_array(self.A) super().fit(g)
[docs] def get_embedding(self, get_dict: bool = False) -> np.ndarray | dict: """Get embedding. Parameters ---------- get_dict : bool, optional Whether to return a dictionary. Defaults to False. Returns ------- dict or numpy.ndarray Embedding. """ emb = super().get_embedding() if get_dict: return dict(zip(self.ind, emb, strict=True)) return emb