Source code for topoembedx.classes.complexnetmf

"""Topological NetMF embedding algorithm."""

from collections.abc import Hashable
from typing import Literal

import networkx as nx
import numpy as np
import scipy.sparse as sp
import toponetx as tnx
from karateclub import NetMF

from topoembedx.neighborhood import neighborhood_from_complex



[docs]
class ComplexNetMF(NetMF):
    """Topological version of the NetMF [1] embedding algorithm.

    Parameters
    ----------
    dimensions : int, default=32
        Number of embedding dimension.
    iteration : int, default=10
        Number of SVD iterations.
    order : int, default=2
        Number of PMI matrix powers.
    negative_samples : int, default=1
        Number of negative samples.
    seed : int, default=42
        Seed for randomized singular value decomposition.

    References
    ----------
    .. [1] Qiu, Jiezhong, et al. "Network Embedding as Matrix Factorization: Unifying
           DeepWalk, LINE, PTE, and Node2vec". Proceedings of the Eleventh ACM
           International Conference on Web Search and Data Mining [Marina Del Rey CA
           USA], 2018, pp. 459-67. https://doi.org/10.1145/3159652.3159706.
    """

    A: sp.csr_matrix
    ind: list[Hashable]
    _embedding: np.ndarray

    def __init__(
        self,
        dimensions: int = 32,
        iteration: int = 10,
        order: int = 2,
        negative_samples: int = 1,
        seed: int = 42,
    ) -> None:
        super().__init__(
            dimensions=dimensions,
            iteration=iteration,
            order=order,
            negative_samples=negative_samples,
            seed=seed,
        )


[docs]
    def fit(
        self,
        domain: tnx.Complex,
        neighborhood_type: Literal["adj", "coadj"] = "adj",
        neighborhood_dim=None,
    ) -> None:
        """Fit the model.

        Parameters
        ----------
        domain : toponetx.Complex
            The topological domain to be embedded.
        neighborhood_type : {"adj", "coadj"}, default="adj"
            The type of neighborhood to compute. "adj" for adjacency matrix, "coadj" for coadjacency matrix.
        neighborhood_dim : dict
            The integer parameters needed to specify the neighborhood of the cells to generate the embedding.
            In TopoNetX  (co)adjacency neighborhood matrices are specified via one or two parameters.
            - For Cell/Simplicial/Path complexes (co)adjacency matrix is specified by a single parameter, this is precisely
            neighborhood_dim["rank"].
            - For Combinatorial/ColoredHyperGraph the (co)adjacency matrix is specified by two parameters, this is precisely
            neighborhood_dim["rank"] and neighborhood_dim["via_rank"].

        Notes
        -----
        Here neighborhood_dim={"rank": 1, "via_rank": -1} specifies the dimension for
        which the cell embeddings are going to be computed.
        "rank": 1 means that the embeddings will be computed for the first dimension.
        The integer "via_rank": -1 is ignored when the input is cell/simplicial complex
        and  must be specified when the input complex is a combinatorial complex or
        colored hypergraph.
        """
        self.ind, self.A = neighborhood_from_complex(
            domain, neighborhood_type, neighborhood_dim
        )
        self.A.setdiag(1)

        g = nx.from_scipy_sparse_array(self.A)
        super().fit(g)



[docs]
    def get_embedding(self, get_dict: bool = False) -> np.ndarray | dict:
        """Get embedding.

        Parameters
        ----------
        get_dict : bool, optional
            Whether to return a dictionary. Defaults to False.

        Returns
        -------
        dict or numpy.ndarray
            Embedding.
        """
        emb = super().get_embedding()
        if get_dict:
            return dict(zip(self.ind, emb, strict=True))
        return emb