Source code for hypernetx.reports.descriptive_stats

"""
This module contains methods which compute various distributions for hypergraphs:
    * Edge size distribution
    * Node degree distribution
    * Component size distribution
    * Toplex size distribution
    * Diameter

Also computes general hypergraph information: number of nodes, edges, cells, aspect ratio, incidence matrix density
"""

from collections import Counter
import numpy as np
from hypernetx.utils.decorators import not_implemented_for


[docs] def centrality_stats(X): """ Computes basic centrality statistics for X Parameters ---------- X : an iterable of numbers Returns ------- [min, max, mean, median, standard deviation] : list List of centrality statistics for X """ return [ min(X), max(X), np.mean(X).tolist(), np.median(X).tolist(), np.std(X).tolist(), ]
[docs] def edge_size_dist(H, aggregated=False): """ Computes edge sizes of a hypergraph. Parameters ---------- H: hnx.Hypergraph aggregated : If aggregated is True, returns a dictionary of edge sizes and counts. If aggregated is False, returns a list of edge sizes in H. Returns ------- edge_size_dist : list or dict List of edge sizes or dictionary of edge size distribution. """ if aggregated: return Counter(H.edge_size_dist()) else: return H.edge_size_dist()
[docs] def degree_dist(H, aggregated=False): """ Computes degrees of nodes of a hypergraph. Parameters ---------- H: hnx.Hypergraph aggregated : If aggregated is True, returns a dictionary of degrees and counts. If aggregated is False, returns a list of degrees in H. Returns ------- degree_dist : list or dict List of degrees or dictionary of degree distribution """ distr = [H.degree(n) for n in H.nodes] if aggregated: return Counter(distr) else: return distr
[docs] def comp_dist(H, aggregated=False): """ Computes component sizes, number of nodes. Parameters ---------- H: hnx.Hypergraph aggregated : If aggregated is True, returns a dictionary of component sizes (number of nodes) and counts. If aggregated is False, returns a list of components sizes in H. Returns ------- comp_dist : list or dictionary List of component sizes or dictionary of component size distribution See Also -------- s_comp_dist """ distr = [len(c) for c in H.components()] if aggregated: return Counter(distr) else: return distr
[docs] def s_comp_dist(H, s=1, aggregated=False, edges=True, return_singletons=True): """ Computes s-component sizes, counting nodes or edges. Parameters ---------- H: hnx.Hypergraph s : positive integer, default is 1 aggregated : If aggregated is True, returns a dictionary of s-component sizes and counts in H. If aggregated is False, returns a list of s-component sizes in H. edges : If edges is True, the component size is number of edges. If edges is False, the component size is number of nodes. return_singletons : bool, optional, default=True Returns ------- s_comp_dist : list or dictionary List of component sizes or dictionary of component size distribution in H See Also -------- comp_dist """ distr = list() comps = H.s_connected_components( s=s, edges=edges, return_singletons=return_singletons ) distr = [len(c) for c in comps] if aggregated: return Counter(distr) else: return distr
[docs] @not_implemented_for("static") def toplex_dist(H, aggregated=False): """ Computes toplex sizes for hypergraph H. Parameters ---------- H: hnx.Hypergraph aggregated : If aggregated is True, returns a dictionary of toplex sizes and counts in H. If aggregated is False, returns a list of toplex sizes in H. Returns ------- toplex_dist : list or dictionary List of toplex sizes or dictionary of toplex size distribution in H """ distr = [H.size(e) for e in H.toplexes().edges] if aggregated: return Counter(distr) else: return distr
[docs] def s_node_diameter_dist(H): """ Parameters ---------- H: hnx.Hypergraph Returns ------- s_node_diameter_dist : list List of s-node-diameters for hypergraph H starting with s=1 and going up as long as the hypergraph is s-node-connected """ i = 1 diams = [] while H.is_connected(s=i): diams.append(H.diameter(s=i)) i += 1 return diams
[docs] def s_edge_diameter_dist(H): """ Parameters ---------- H: hnx.Hypergraph Returns ------- s_edge_diameter_dist : list List of s-edge-diameters for hypergraph H starting with s=1 and going up as long as the hypergraph is s-edge-connected """ i = 1 diams = [] while H.is_connected(s=i, edges=True): diams.append(H.edge_diameter(s=i)) i += 1 return diams
[docs] def info(H, node=None, edge=None): """ Print a summary of simple statistics for H Parameters ---------- H: hnx.Hypergraph obj : optional either a node or edge uid from the hypergraph dictionary : optional If True then returns the info as a dictionary rather than a string If False (default) returns the info as a string Returns ------- info : string Returns a string of statistics of the size, aspect ratio, and density of the hypergraph. Print the string to see it formatted. """ if not H.edges.elements: return f"Hypergraph {H.name} is empty." report = info_dict(H, node=node, edge=edge) info = "" if node: info += f"Node '{node}' has the following properties:\n" info += f"Degree: {report['degree']}\n" info += f"Contained in: {report['membs']}\n" info += f"Neighbors: {report['neighbors']}" elif edge: info += f"Edge '{edge}' has the following properties:\n" info += f"Size: {report['size']}\n" info += f"Elements: {report['elements']}" else: info += f"Number of Rows: {report['nrows']}\n" info += f"Number of Columns: {report['ncols']}\n" info += f"Aspect Ratio: {report['aspect ratio']}\n" info += f"Number of non-empty Cells: {report['ncells']}\n" info += f"Density: {report['density']}" return info
[docs] def info_dict(H, node=None, edge=None): """ Create a summary of simple statistics for H Parameters ---------- H: hnx.Hypergraph obj : optional either a node or edge uid from the hypergraph Returns ------- info_dict : dict Returns a dictionary of statistics of the size, aspect ratio, and density of the hypergraph. """ report = dict() if len(H.edges.elements) == 0: return {} if node: report["membs"] = list(H.dual().edges[node]) report["degree"] = len(report["membs"]) report["neighbors"] = H.neighbors(node) return report if edge: report["size"] = H.size(edge) report["elements"] = list(H.edges[edge]) return report else: lnodes, ledges = H.shape M = H.incidence_matrix(index=False) ncells = M.nnz report["nrows"] = lnodes report["ncols"] = ledges report["aspect ratio"] = lnodes / ledges report["ncells"] = ncells report["density"] = ncells / (lnodes * ledges) return report
[docs] def dist_stats(H): """ Computes many basic hypergraph stats and puts them all into a single dictionary object * nrows = number of nodes (rows in the incidence matrix) * ncols = number of edges (columns in the incidence matrix) * aspect ratio = nrows/ncols * ncells = number of filled cells in incidence matrix * density = ncells/(nrows*ncols) * node degree list = degree_dist(H) * node degree dist = centrality_stats(degree_dist(H)) * node degree hist = Counter(degree_dist(H)) * max node degree = max(degree_dist(H)) * edge size list = edge_size_dist(H) * edge size dist = centrality_stats(edge_size_dist(H)) * edge size hist = Counter(edge_size_dist(H)) * max edge size = max(edge_size_dist(H)) * comp nodes list = s_comp_dist(H, s=1, edges=False) * comp nodes dist = centrality_stats(s_comp_dist(H, s=1, edges=False)) * comp nodes hist = Counter(s_comp_dist(H, s=1, edges=False)) * comp edges list = s_comp_dist(H, s=1, edges=True) * comp edges dist = centrality_stats(s_comp_dist(H, s=1, edges=True)) * comp edges hist = Counter(s_comp_dist(H, s=1, edges=True)) * num comps = len(s_comp_dist(H)) Parameters ---------- H: hnx.Hypergraph Returns ------- dist_stats : dict Dictionary which keeps track of each of the above items (e.g., basic['nrows'] = the number of nodes in H) """ stats = H._state_dict.get("dist_stats", None) if stats is not None: return H._state_dict["dist_stats"] cstats = ["min", "max", "mean", "median", "std"] basic = dict() # Number of rows (nodes), columns (edges), and aspect ratio basic["nrows"] = len(H.nodes) basic["ncols"] = len(H.edges) basic["aspect ratio"] = basic["nrows"] / basic["ncols"] # Number of cells and density M = H.incidence_matrix(index=False) basic["ncells"] = M.nnz basic["density"] = basic["ncells"] / (basic["nrows"] * basic["ncols"]) # Node degree distribution basic["node degree list"] = sorted(degree_dist(H), reverse=True) basic["node degree centrality stats"] = dict( zip(cstats, centrality_stats(basic["node degree list"])) ) basic["node degree hist"] = Counter(basic["node degree list"]) basic["max node degree"] = max(basic["node degree list"]) # Edge size distribution basic["edge size list"] = sorted(H.edge_size_dist(), reverse=True) basic["edge size centrality stats"] = dict( zip(cstats, centrality_stats(basic["edge size list"])) ) basic["edge size hist"] = Counter(basic["edge size list"]) basic["max edge size"] = max(basic["edge size hist"]) # Component size distribution (nodes) basic["comp nodes list"] = sorted(s_comp_dist(H, edges=False), reverse=True) basic["comp nodes hist"] = Counter(basic["comp nodes list"]) basic["comp nodes centrality stats"] = dict( zip(cstats, centrality_stats(basic["comp nodes list"])) ) # Component size distribution (edges) basic["comp edges list"] = sorted(s_comp_dist(H, edges=True), reverse=True) basic["comp edges hist"] = Counter(basic["comp edges list"]) basic["comp edges centrality stats"] = dict( zip(cstats, centrality_stats(basic["comp edges list"])) ) # Number of components basic["num comps"] = len(basic["comp nodes list"]) # # Diameters # basic['s edge diam list'] = s_edge_diameter_dist(H) # basic['s node diam list'] = s_node_diameter_dist(H) H.set_state(dist_stats=basic) return basic