schematic.schemas.data_model_graph

DataModel Graph

  1"""DataModel Graph"""
  2
  3import logging
  4from typing import Any, Optional, Union, AbstractSet
  5
  6import graphviz  # type: ignore
  7import networkx as nx  # type: ignore
  8from opentelemetry import trace
  9
 10from schematic.schemas.data_model_edges import DataModelEdges
 11from schematic.schemas.data_model_nodes import DataModelNodes
 12from schematic.schemas.data_model_relationships import (
 13    DataModelRelationships,
 14    JSONSchemaType,
 15)
 16from schematic.utils.general import unlist
 17from schematic.utils.schema_utils import (
 18    DisplayLabelType,
 19    extract_component_validation_rules,
 20    get_class_label_from_display_name,
 21    get_property_label_from_display_name,
 22)
 23from schematic.utils.validate_utils import rule_in_rule_list
 24from schematic.utils.viz_utils import visualize
 25
 26logger = logging.getLogger(__name__)
 27
 28
 29logger = logging.getLogger(__name__)
 30tracer = trace.get_tracer("Schematic")
 31
 32
 33class DataModelGraphMeta:  # pylint: disable=too-few-public-methods
 34    """DataModelGraphMeta"""
 35
 36    _instances: dict = {}
 37
 38    def __call__(  # pylint: disable=no-self-argument
 39        cls, *args: Any, **kwargs: Any
 40    ) -> Any:
 41        """
 42        Possible changes to the value of the `__init__` argument do not affect
 43        the returned instance.
 44        """
 45        if cls not in cls._instances:
 46            instance = super().__call__(*args, **kwargs)  # type: ignore # pylint: disable=no-member
 47            cls._instances[cls] = instance
 48        return cls._instances[cls]
 49
 50
 51class DataModelGraph:  # pylint: disable=too-few-public-methods
 52    """
 53    Generate graph network (networkx) from the attributes and relationships returned
 54    from the data model parser.
 55
 56    Create a singleton.
 57    """
 58
 59    __metaclass__ = DataModelGraphMeta
 60
 61    def __init__(
 62        self,
 63        attribute_relationships_dict: dict,
 64        data_model_labels: DisplayLabelType = "class_label",
 65    ) -> None:
 66        """Load parsed data model.
 67        Args:
 68            attributes_relationship_dict, dict: generated in data_model_parser
 69                {Attribute Display Name: {
 70                        Relationships: {
 71                                    CSV Header: Value}}}
 72            data_model_labels: str, display_label or class_label.
 73                display_label, use the display name as a label, if it is valid
 74                (contains no blacklisted characters) otherwise will default to schema_label.
 75                class_label, default, use standard class or property label.
 76        Raises:
 77            ValueError, attribute_relationship_dict not loaded.
 78        """
 79        self.attribute_relationships_dict = attribute_relationships_dict
 80        self.dmn = DataModelNodes(self.attribute_relationships_dict)
 81        self.dme = DataModelEdges()
 82        self.dmr = DataModelRelationships()
 83        self.data_model_labels = data_model_labels
 84
 85        if not self.attribute_relationships_dict:
 86            raise ValueError(
 87                (
 88                    "Something has gone wrong, a data model was not loaded into the DataModelGraph "
 89                    "Class. Please check that your paths are correct"
 90                )
 91            )
 92        self.graph = self.generate_data_model_graph()
 93
 94    @tracer.start_as_current_span("DataModelGraph::generate_data_model_graph")
 95    def generate_data_model_graph(self) -> nx.MultiDiGraph:
 96        """
 97        Generate NetworkX Graph from the Relationships/attributes dictionary, the graph is built
 98          by first adding all nodes to the graph, then connecting nodes by the relationships defined
 99          in the attributes_relationship dictionary.
100        Returns:
101            G: nx.MultiDiGraph, networkx graph representation of the data model
102        """
103        # Get all relationships with edges
104        edge_relationships = self.dmr.retrieve_rel_headers_dict(edge=True)
105
106        # Find all nodes
107        all_nodes = self.dmn.gather_all_nodes_in_model(
108            attr_rel_dict=self.attribute_relationships_dict
109        )
110
111        # Instantiate NetworkX MultiDigraph
112        graph: nx.MultiDiGraph = nx.MultiDiGraph()
113
114        all_node_dict = {}
115
116        ## Fill in MultiDigraph with nodes
117        for node in all_nodes:
118            # Gather information for each node
119            node_dict = self.dmn.generate_node_dict(
120                node_display_name=node,
121                attr_rel_dict=self.attribute_relationships_dict,
122                data_model_labels=self.data_model_labels,
123            )
124
125            # Add each node to the all_node_dict to be used for generating edges
126            all_node_dict[node] = node_dict
127
128            # Generate node and attach information (attributes) to each node
129            graph = self.dmn.generate_node(graph, node_dict)
130
131        edge_list: list[tuple[str, str, dict[str, Union[str, int]]]] = []
132        ## Connect nodes via edges
133        for node in all_nodes:
134            # Generate edges
135            edge_list_2 = self.dme.generate_edge(
136                node,
137                all_node_dict,
138                self.attribute_relationships_dict,
139                edge_relationships,
140                edge_list,
141            )
142            edge_list = edge_list_2.copy()
143
144        # Add edges to the Graph
145        for node_1, node_2, edge_dict in edge_list:
146            graph.add_edge(
147                node_1, node_2, key=edge_dict["key"], weight=edge_dict["weight"]
148            )
149        return graph
150
151
152class DataModelGraphExplorer:  # pylint: disable=too-many-public-methods
153    """DataModelGraphExplorer"""
154
155    def __init__(
156        self,
157        graph: nx.MultiDiGraph,
158    ):
159        """Load data model graph as a singleton.
160        Args:
161            G: nx.MultiDiGraph, networkx graph representation of the data model
162        """
163        self.graph = graph  # At this point the graph is expected to be fully formed.
164        self.dmr = DataModelRelationships()
165
166    def find_properties(self) -> set[str]:
167        """
168        Identify all properties, as defined by the first node in a pair, connected with
169        'domainIncludes' edge type
170
171        Returns:
172            properties, set: All properties defined in the data model, each property name
173              is defined by its label.
174        """
175        properties_list: list[str] = []
176        for node_1, _, rel in self.graph.edges:
177            if rel == self.dmr.get_relationship_value("domainIncludes", "edge_key"):
178                properties_list.append(node_1)
179        properties_set = set(properties_list)
180        return properties_set
181
182    def find_classes(self) -> AbstractSet[str]:
183        """
184        Identify all classes, as defined but all nodes, minus all properties
185        (which are explicitly defined)
186        Returns:
187            classes, set:  All classes defined in the data model, each class
188              name is defined by its label.
189        """
190        nodes = self.graph.nodes
191        properties = self.find_properties()
192        classes = nodes - properties
193        return classes
194
195    def find_node_range(
196        self, node_label: Optional[str] = None, node_display_name: Optional[str] = None
197    ) -> list:
198        """Get valid values for the given node (attribute)
199        Args:
200            node_label, str, Optional[str]: label of the node for which to retrieve valid values
201            node_display_name, str, Optional[str]: Display Name of the node for which to
202              retrieve valid values
203        Returns:
204            valid_values, list: List of valid values associated with the provided node.
205        """
206        node_label = self._get_node_label(node_label, node_display_name)
207
208        valid_values = []
209        for node_1, node_2, rel in self.graph.edges:
210            if node_1 == node_label and rel == self.dmr.get_relationship_value(
211                "rangeIncludes", "edge_key"
212            ):
213                valid_values.append(node_2)
214        valid_values = list(set(valid_values))
215        return valid_values
216
217    def get_adjacent_nodes_by_relationship(
218        self, node_label: str, relationship: str
219    ) -> list[str]:
220        """Get a list of nodes that is / are adjacent to a given node, based on a relationship type.
221
222        Args:
223            node_label: label of the the node whose edges we need to look at.
224            relationship: the type of link(s) that the above node and its immediate neighbors share.
225
226        Returns:
227            List of nodes that are adjacent to the given node.
228        #checked
229        """
230        nodes = set()
231        for _, node_2, key, _ in self.graph.out_edges(node_label, data=True, keys=True):
232            if key == relationship:
233                nodes.add(node_2)
234
235        return list(nodes)
236
237    def get_component_node_required(
238        self,
239        manifest_component: str,
240        node_validation_rules: Optional[list[str]] = None,
241        node_label: Optional[str] = None,
242        node_display_name: Optional[str] = None,
243    ) -> bool:
244        """Check if a node is required taking into account the manifest component it is defined in
245        (requirements can be set in validation rule as well as required column)
246        Args:
247            manifest_component: str, manifest component display name that the node belongs to.
248            node_validation_rules: list[str], validation rules for a given node and component.
249            node_label: str, Label of the node you would want to get the comment for.
250            node_display_name: str, node display name for the node being queried.
251        Returns:
252            True, if node is required, False if not
253        """
254        node_required = False
255
256        if not node_validation_rules:
257            # Get node validation rules for a given component
258            node_validation_rules = self.get_component_node_validation_rules(
259                manifest_component=manifest_component,
260                node_label=node_label,
261                node_display_name=node_display_name,
262            )
263
264        # Check if the validation rule specifies that the node is required for this particular
265        # component.
266        if rule_in_rule_list("required", node_validation_rules):
267            node_required = True
268            # To prevent any unintended errors, ensure the Required field for this node is False
269            if self.get_node_required(
270                node_label=node_label, node_display_name=node_display_name
271            ):
272                if not node_display_name:
273                    assert node_label is not None
274                    node_display_name = self.graph.nodes[node_label][
275                        self.dmr.get_relationship_value("displayName", "node_label")
276                    ]
277                error_str = " ".join(
278                    [
279                        f"For component: {manifest_component} and attribute: {node_display_name}",
280                        "requirements are being specified in both the Required field and in the",
281                        "Validation Rules. If you desire to use validation rules to set component",
282                        "specific requirements for this attribute",
283                        "then the Required field needs to be set to False, or the validation may",
284                        "not work as intended, for other components where the attribute",
285                        "that should not be required.",
286                    ]
287                )
288
289                logger.error(error_str)
290        else:
291            # If requirements are not being set in the validation rule, then just pull the
292            # standard node requirements from the model
293            node_required = self.get_node_required(
294                node_label=node_label, node_display_name=node_display_name
295            )
296        return node_required
297
298    def get_component_node_validation_rules(
299        self,
300        manifest_component: str,
301        node_label: Optional[str] = None,
302        node_display_name: Optional[str] = None,
303    ) -> list:
304        """Get validation rules for a given node and component.
305        Args:
306            manifest_component: str, manifest component display name that the node belongs to.
307            node_label: str, Label of the node you would want to get the comment for.
308            node_display_name: str, node display name for the node being queried.
309        Returns:
310            validation_rules: list, validation rules list for a given node and component.
311        """
312        # get any additional validation rules associated with this node (e.g. can this node
313        # be mapped to a list of other nodes)
314        node_validation_rules = self.get_node_validation_rules(
315            node_label=node_label, node_display_name=node_display_name
316        )
317
318        # Parse the validation rules per component if applicable
319        if node_validation_rules and isinstance(node_validation_rules, dict):
320            node_validation_rules_list = extract_component_validation_rules(
321                manifest_component=manifest_component,
322                validation_rules_dict=node_validation_rules,  # type: ignore
323            )
324        else:
325            assert isinstance(node_validation_rules, list)
326            node_validation_rules_list = node_validation_rules
327        return node_validation_rules_list
328
329    def get_component_requirements(
330        self,
331        source_component: str,
332    ) -> list[str]:
333        """
334        Get all components that are associated with a given source component and are
335          required by it.
336
337        Args:
338            source_component: source component for which we need to find all required downstream
339              components.
340
341        Returns:
342            List of nodes that are descendants from the source component are are related to the
343              source through a specific component relationship.
344        """
345
346        req_components = list(
347            reversed(
348                self.get_descendants_by_edge_type(
349                    source_component,
350                    self.dmr.get_relationship_value("requiresComponent", "edge_key"),
351                    ordered=True,
352                )
353            )
354        )
355
356        return req_components
357
358    def get_component_requirements_graph(
359        self,
360        source_component: str,
361    ) -> nx.Graph:
362        """
363        Get all components that are associated with a given source component and are required by it;
364          return the components as a dependency graph (i.e. a DAG).
365
366        Args:
367            source_component, str: source component for which we need to find all required
368              downstream components.
369
370        Returns:
371            A subgraph of the schema graph induced on nodes that are descendants from the source
372              component and are related to the source through a specific component relationship.
373        """
374
375        # get a list of required component nodes
376        req_components = self.get_component_requirements(source_component)
377
378        # get the subgraph induced on required component nodes
379        req_components_graph = self.get_subgraph_by_edge_type(
380            self.dmr.get_relationship_value("requiresComponent", "edge_key"),
381        ).subgraph(req_components)
382
383        return req_components_graph
384
385    def get_descendants_by_edge_type(
386        self,
387        source_node: str,
388        relationship: str,
389        connected: bool = True,
390        ordered: bool = False,
391    ) -> list[str]:
392        """
393        Get all nodes that are descendants of a given source node, based on a specific
394          type of edge / relationship type.
395
396        Args:
397            source_node: The node whose descendants need to be retrieved.
398            relationship: Edge / link relationship type with possible values same as in above docs.
399            connected:
400              If True, we need to ensure that all descendant nodes are reachable from the source
401                node, i.e., they are part of the same connected component.
402              If False, the descendants could be in multiple connected components.
403              Default value is True.
404            ordered:
405              If True, the list of descendants will be topologically ordered.
406              If False, the list has no particular order (depends on the order in which the
407                descendants were traversed in the subgraph).
408
409        Returns:
410            List of nodes that are descendants from a particular node (sorted / unsorted)
411        """
412
413        root_descendants = nx.descendants(self.graph, source_node)
414
415        subgraph_nodes = list(root_descendants)
416        subgraph_nodes.append(source_node)
417        descendants_subgraph = self.graph.subgraph(subgraph_nodes)
418
419        # prune the descendants subgraph so as to include only those edges that match
420        # the relationship type
421        rel_edges = []
422        for node_1, node_2, key, _ in descendants_subgraph.edges(data=True, keys=True):
423            if key == relationship:
424                rel_edges.append((node_1, node_2))
425
426        relationship_subgraph: nx.DiGraph = nx.DiGraph()
427        relationship_subgraph.add_edges_from(rel_edges)
428
429        descendants = relationship_subgraph.nodes()
430
431        if not descendants:
432            # return empty list if there are no nodes that are reachable from the
433            # source node based on this relationship type
434            return []
435
436        if connected and ordered:
437            # get the set of reachable nodes from the source node
438            descendants = nx.descendants(relationship_subgraph, source_node)
439            descendants.add(source_node)
440
441            # normally, the descendants from a node are unordered (peculiarity
442            # of nx descendants call)
443            # form the subgraph on descendants and order it topologically
444            # this assumes an acyclic subgraph
445            descendants = nx.topological_sort(
446                relationship_subgraph.subgraph(descendants)
447            )
448        elif connected:
449            # get the nodes that are reachable from a given source node
450            # after the pruning process above some nodes in the
451            # root_descendants subgraph might have become disconnected and
452            # will be omitted
453            descendants = nx.descendants(relationship_subgraph, source_node)
454            descendants.add(source_node)
455        elif ordered:
456            # sort the nodes topologically
457            # this requires the graph to be an acyclic graph
458            descendants = nx.topological_sort(relationship_subgraph)
459
460        return list(descendants)
461
462    def get_digraph_by_edge_type(self, edge_type: str) -> nx.DiGraph:
463        """Get a networkx digraph of the nodes connected via a given edge_type.
464        Args:
465            edge_type:
466                Edge type to search for, possible types are defined by 'edge_key'
467                  in relationship class
468        Returns:
469        """
470
471        digraph: nx.DiGraph = nx.DiGraph()
472        for node_1, node_2, key, _ in self.graph.edges(data=True, keys=True):
473            if key == edge_type:
474                digraph.add_edge(node_1, node_2)
475        return digraph
476
477    def get_edges_by_relationship(
478        self,
479        node: str,
480        relationship: str,
481    ) -> list[tuple[str, str]]:
482        """Get a list of out-edges of a node where the edges match a specific type of relationship.
483
484        i.e., the edges connecting a node to its neighbors are of relationship type -- "parentOf"
485          (set of edges to children / sub-class nodes).
486
487        Args:
488            node: the node whose edges we need to look at.
489            relationship: the type of link(s) that the above node and its immediate neighbors share.
490
491        Returns:
492            List of edges that are connected to the node.
493        """
494        edges: list[tuple[str, str]] = []
495
496        for node_1, node_2, key, _ in self.graph.out_edges(node, data=True, keys=True):
497            if key == relationship:
498                edges.append((node_1, node_2))
499
500        return edges
501
502    def get_ordered_entry(self, key: str, source_node_label: str) -> list[str]:
503        """
504        Order the values associated with a particular node and edge_key to
505          match original ordering in schema.
506
507        Args:
508            key (str): a key representing and edge relationship in
509              DataModelRelationships.relationships_dictionary
510            source_node_label (str): node to look for edges of and order
511
512        Raises:
513            KeyError: cannot find source node in graph
514
515        Returns:
516            list[str]:
517              list of sorted nodes, that share the specified relationship with the source node
518              For the example data model, for key='rangeIncludes', source_node_label='CancerType'
519                the return would be ['Breast, 'Colorectal', 'Lung', 'Prostate', 'Skin'] in that
520                exact order.
521        """
522        # Check if node is in the graph, if not throw an error.
523        if not self.is_class_in_schema(node_label=source_node_label):
524            raise KeyError(
525                f"Cannot find node: {source_node_label} in the graph, please check entry."
526            )
527
528        edge_key = self.dmr.get_relationship_value(key, "edge_key")
529
530        # Handle out edges
531        if self.dmr.get_relationship_value(key, "jsonld_direction") == "out":
532            # use out edges
533
534            original_edge_weights_dict = {
535                attached_node: self.graph[source_node][attached_node][edge_key][
536                    "weight"
537                ]
538                for source_node, attached_node in self.graph.out_edges(
539                    source_node_label
540                )
541                if edge_key in self.graph[source_node][attached_node]
542            }
543        # Handle in edges
544        else:
545            # use in edges
546            original_edge_weights_dict = {
547                attached_node: self.graph[attached_node][source_node][edge_key][
548                    "weight"
549                ]
550                for attached_node, source_node in self.graph.in_edges(source_node_label)
551                if edge_key in self.graph[attached_node][source_node]
552            }
553
554        sorted_nodes = list(
555            dict(
556                sorted(original_edge_weights_dict.items(), key=lambda item: item[1])
557            ).keys()
558        )
559
560        return sorted_nodes
561
562    # Get values associated with a node
563    def get_nodes_ancestors(self, subgraph: nx.DiGraph, node_label: str) -> list[str]:
564        """Get a list of nodes reachable from source component in graph
565
566        Args:
567            subgraph (nx.DiGraph): networkx graph object
568            node_label (str): label of node to find ancestors for
569
570        Returns:
571            list[str]: nodes reachable from source in graph
572        """
573        all_ancestors = list(nx.ancestors(subgraph, node_label))
574
575        return all_ancestors
576
577    def get_node_comment(
578        self, node_display_name: Optional[str] = None, node_label: Optional[str] = None
579    ) -> str:
580        """Get the node definition, i.e., the "comment" associated with a given node display name.
581
582        Args:
583            node_display_name, str: Display name of the node which you want to get the comment for.
584            node_label, str: Label of the node you would want to get the comment for.
585        Returns:
586            Comment associated with node, as a string.
587        """
588        node_label = self._get_node_label(node_label, node_display_name)
589
590        if not node_label:
591            return ""
592
593        node_definition = self.graph.nodes[node_label][
594            self.dmr.get_relationship_value("comment", "node_label")
595        ]
596        return node_definition
597
598    def get_node_dependencies(
599        self,
600        source_node: str,
601        display_names: bool = True,
602        schema_ordered: bool = True,
603    ) -> list[str]:
604        """Get the immediate dependencies that are related to a given source node.
605
606        Args:
607            source_node: The node whose dependencies we need to compute.
608            display_names: if True, return list of display names of each of the dependencies.
609                           if False, return list of node labels of each of the dependencies.
610            schema_ordered:
611              if True, return the dependencies of the node following the order of the schema
612                (slower).
613              if False, return dependencies from graph without guaranteeing schema order (faster)
614
615        Returns:
616            List of nodes that are dependent on the source node.
617        """
618
619        if schema_ordered:
620            # get dependencies in the same order in which they are defined in the schema
621            required_dependencies = self.get_ordered_entry(
622                key=self.dmr.get_relationship_value("requiresDependency", "edge_key"),
623                source_node_label=source_node,
624            )
625        else:
626            required_dependencies = self.get_adjacent_nodes_by_relationship(
627                node_label=source_node,
628                relationship=self.dmr.get_relationship_value(
629                    "requiresDependency", "edge_key"
630                ),
631            )
632
633        if display_names:
634            # get display names of dependencies
635            dependencies_display_names = []
636
637            for req in required_dependencies:
638                dependencies_display_names.append(
639                    self.graph.nodes[req][
640                        self.dmr.get_relationship_value("displayName", "node_label")
641                    ]
642                )
643
644            return dependencies_display_names
645
646        return required_dependencies
647
648    def get_nodes_descendants(self, node_label: str) -> list[str]:
649        """Return a list of nodes reachable from source in graph
650        Args:
651            node_label, str: any given node
652        Return:
653            all_descendants, list: nodes reachable from source in graph
654        """
655        all_descendants = list(nx.descendants(self.graph, node_label))
656
657        return all_descendants
658
659    def get_nodes_display_names(
660        self,
661        node_list: list[str],
662    ) -> list[str]:
663        """Get display names associated with the given list of nodes.
664
665        Args:
666            node_list: List of nodes whose display names we need to retrieve.
667
668        Returns:
669            List of display names.
670        """
671        node_list_display_names = [
672            self.graph.nodes[node][
673                self.dmr.get_relationship_value("displayName", "node_label")
674            ]
675            for node in node_list
676        ]
677
678        return node_list_display_names
679
680    def get_node_label(self, node_display_name: str) -> str:
681        """Get the node label for a given display name.
682
683        Args:
684            node_display_name: Display name of the node which you want to get the label for.
685        Returns:
686            Node label associated with given node.
687            If display name not part of schema, return an empty string.
688        """
689
690        node_class_label = get_class_label_from_display_name(
691            display_name=node_display_name
692        )
693        node_property_label = get_property_label_from_display_name(
694            display_name=node_display_name
695        )
696
697        if node_class_label in self.graph.nodes:
698            node_label = node_class_label
699        elif node_property_label in self.graph.nodes:
700            node_label = node_property_label
701        else:
702            node_label = ""
703
704        return node_label
705
706    def get_node_range(
707        self,
708        node_label: Optional[str] = None,
709        node_display_name: Optional[str] = None,
710        display_names: bool = False,
711    ) -> list[str]:
712        """
713        Get the range, i.e., all the valid values that are associated with a node label.
714
715
716        Args:
717            node_label (Optional[str], optional): Node for which you need to retrieve the range.
718              Defaults to None.
719            node_display_name (Optional[str], optional): _description_. Defaults to None.
720            display_names (bool, optional): _description_. Defaults to False.
721
722        Raises:
723            ValueError: If the node cannot be found in the graph.
724
725        Returns:
726            list[str]:
727              If display_names=False, a list of valid values (labels) associated with a given node.
728              If display_names=True, a list of valid values (display names) associated
729                with a given node
730        """
731        node_label = self._get_node_label(node_label, node_display_name)
732        try:
733            # get node range in the order defined in schema for given node
734            required_range = self.find_node_range(node_label=node_label)
735        except KeyError as exc:
736            raise ValueError(
737                f"The source node {node_label} does not exist in the graph. "
738                "Please use a different node."
739            ) from exc
740
741        if display_names:
742            # get the display name(s) of all dependencies
743            dependencies_display_names = []
744
745            for req in required_range:
746                dependencies_display_names.append(self.graph.nodes[req]["displayName"])
747
748            return dependencies_display_names
749
750        return required_range
751
752    def get_node_required(
753        self, node_label: Optional[str] = None, node_display_name: Optional[str] = None
754    ) -> bool:
755        """Check if a given node is required or not.
756
757        Note: The possible options that a node can be associated with -- "required" / "optional".
758
759        Args:
760            node_label: Label of the node for which you need to look up.
761            node_display_name: Display name of the node for which you want look up.
762        Returns:
763            True: If the given node is a "required" node.
764            False: If the given node is not a "required" (i.e., an "optional") node.
765        """
766        node_label = self._get_node_label(node_label, node_display_name)
767        rel_node_label = self.dmr.get_relationship_value("required", "node_label")
768        node_required = self.graph.nodes[node_label][rel_node_label]
769        return node_required
770
771    def get_node_validation_rules(
772        self, node_label: Optional[str] = None, node_display_name: Optional[str] = None
773    ) -> Union[list, dict[str, str]]:
774        """Get validation rules associated with a node,
775
776        Args:
777            node_label: Label of the node for which you need to look up.
778            node_display_name: Display name of the node which you want to get the label for.
779        Returns:
780            A set of validation rules associated with node, as a list or a dictionary.
781        """
782        node_label = self._get_node_label(node_label, node_display_name)
783
784        if not node_label:
785            return []
786
787        try:
788            node_validation_rules = self.graph.nodes[node_label]["validationRules"]
789        except KeyError as key_error:
790            raise ValueError(
791                f"{node_label} is not in the graph, please provide a proper node label"
792            ) from key_error
793
794        return node_validation_rules
795
796    def get_subgraph_by_edge_type(self, relationship: str) -> nx.DiGraph:
797        """Get a subgraph containing all edges of a given type (aka relationship).
798
799        Args:
800            relationship: edge / link relationship type with possible values same as in above docs.
801
802        Returns:
803            Directed graph on edges of a particular type (aka relationship)
804        """
805
806        # prune the metadata model graph so as to include only those edges that
807        # match the relationship type
808        rel_edges = []
809        for node_1, node_2, key, _ in self.graph.out_edges(data=True, keys=True):
810            if key == relationship:
811                rel_edges.append((node_1, node_2))
812
813        relationship_subgraph: nx.DiGraph = nx.DiGraph()
814        relationship_subgraph.add_edges_from(rel_edges)
815
816        return relationship_subgraph
817
818    def find_adjacent_child_classes(
819        self, node_label: Optional[str] = None, node_display_name: Optional[str] = None
820    ) -> list[str]:
821        """Find child classes of a given node.
822        Args:
823            node_display_name: Display name of the node to look up.
824            node_label: Label of the node to look up.
825        Returns:
826            List of nodes that are adjacent to the given node, by SubclassOf relationship.
827        """
828        node_label = self._get_node_label(node_label, node_display_name)
829        return self.get_adjacent_nodes_by_relationship(
830            node_label=node_label,
831            relationship=self.dmr.get_relationship_value("subClassOf", "edge_key"),
832        )
833
834    def find_child_classes(self, schema_class: str) -> list:
835        """Find schema classes that inherit from the given class
836        Args:
837            schema_class: node label for the class to from which to look for children.
838        Returns:
839            list of children to the schema_class.
840        """
841        child_classes = unlist(list(self.graph.successors(schema_class)))
842        assert isinstance(child_classes, list)
843        return child_classes
844
845    def find_class_specific_properties(self, schema_class: str) -> list[str]:
846        """Find properties specifically associated with a given class
847        Args:
848            schema_class, str: node/class label, to identify properties for.
849        Returns:
850            properties, list: List of properties associate with a given schema class.
851        Raises:
852            KeyError: Key error is raised if the provided schema_class is not in the graph
853        """
854
855        if not self.is_class_in_schema(schema_class):
856            raise KeyError(
857                (
858                    f"Schema_class provided: {schema_class} is not in the data model, please check "
859                    "that you are providing the proper class/node label"
860                )
861            )
862
863        properties = []
864        for node1, node2 in self.graph.edges():
865            if (
866                node2 == schema_class
867                and "domainValue" in self.graph[node1][schema_class]
868            ):
869                properties.append(node1)
870        return properties
871
872    def find_parent_classes(self, node_label: str) -> list[list[str]]:
873        """Find all parents of the provided node
874        Args:
875            node_label: label of the node to find parents of
876        Returns:
877            List of list of Parents to the given node.
878        """
879        # Get digraph of nodes with parents
880        digraph = self.get_digraph_by_edge_type("parentOf")
881
882        # Get root node
883        root_node = list(nx.topological_sort(digraph))[0]
884
885        # Get paths between root_node and the target node.
886        paths = nx.all_simple_paths(self.graph, source=root_node, target=node_label)
887
888        return [_path[:-1] for _path in paths]
889
890    def full_schema_graph(self, size: Optional[int] = None) -> graphviz.Digraph:
891        """Create a graph of the data model.
892        Args:
893            size, float: max height and width of the graph, if one value provided
894               it is used for both.
895        Returns:
896            schema graph viz
897        """
898        edges = self.graph.edges()
899        return visualize(edges, size=size)
900
901    def is_class_in_schema(self, node_label: str) -> bool:
902        """Determine if provided node_label is in the schema graph/data model.
903        Args:
904            node_label: label of node to search for in the
905        Returns:
906            True, if node is in the graph schema
907            False, if node is not in graph schema
908        """
909        return node_label in self.graph.nodes()
910
911    def sub_schema_graph(
912        self, source: str, direction: str, size: Optional[float] = None
913    ) -> Optional[graphviz.Digraph]:
914        """Create a sub-schema graph
915        Args:
916            source, str: source node label to start graph
917            direction, str: direction to create the visualization, choose from "up", "down", "both"
918            size, float: max height and width of the graph, if one value provided it is used for
919              both.
920        Returns:
921            Sub-schema graph viz
922        """
923        if direction == "down":
924            edges = list(nx.edge_bfs(self.graph, [source]))
925            return visualize(edges, size=size)
926        if direction == "up":
927            paths = self.find_parent_classes(source)
928            edges = []
929            for _path in paths:
930                _path.append(source)
931                for i in range(0, len(_path) - 1):
932                    edges.append((_path[i], _path[i + 1]))
933            return visualize(edges, size=size)
934        if direction == "both":
935            paths = self.find_parent_classes(source)
936            edges = list(nx.edge_bfs(self.graph, [source]))
937            for _path in paths:
938                _path.append(source)
939                for i in range(0, len(_path) - 1):
940                    edges.append((_path[i], _path[i + 1]))
941            return visualize(edges, size=size)
942        return None
943
944    def get_node_column_type(
945        self, node_label: Optional[str] = None, node_display_name: Optional[str] = None
946    ) -> Optional[JSONSchemaType]:
947        """Gets the column type of the node
948
949        Args:
950            node_label: The label of the node to get the type from
951            node_display_name: The display name of the node to get the type from
952
953        Returns:
954            The column type of the node if it has one, otherwise None
955        """
956        node_label = self._get_node_label(node_label, node_display_name)
957        rel_node_label = self.dmr.get_relationship_value("columnType", "node_label")
958        return self.graph.nodes[node_label][rel_node_label]
959
960    def _get_node_label(
961        self, node_label: Optional[str] = None, node_display_name: Optional[str] = None
962    ) -> str:
963        """Returns the node label if given otherwise gets the node label from the display name
964
965        Args:
966            node_label: The label of the node to get the type from
967            node_display_name: The display name of the node to get the type from
968
969        Raises:
970            ValueError: If neither node_label or node_display_name is provided
971
972        Returns:
973            The node label
974        """
975        if node_label is not None:
976            return node_label
977        if node_display_name is not None:
978            return self.get_node_label(node_display_name)
979        raise ValueError("Either 'node_label' or 'node_display_name' must be provided.")
logger = <Logger schematic.schemas.data_model_graph (WARNING)>
tracer = <opentelemetry.sdk.trace.Tracer object>
class DataModelGraphMeta:
34class DataModelGraphMeta:  # pylint: disable=too-few-public-methods
35    """DataModelGraphMeta"""
36
37    _instances: dict = {}
38
39    def __call__(  # pylint: disable=no-self-argument
40        cls, *args: Any, **kwargs: Any
41    ) -> Any:
42        """
43        Possible changes to the value of the `__init__` argument do not affect
44        the returned instance.
45        """
46        if cls not in cls._instances:
47            instance = super().__call__(*args, **kwargs)  # type: ignore # pylint: disable=no-member
48            cls._instances[cls] = instance
49        return cls._instances[cls]

DataModelGraphMeta

class DataModelGraph:
 52class DataModelGraph:  # pylint: disable=too-few-public-methods
 53    """
 54    Generate graph network (networkx) from the attributes and relationships returned
 55    from the data model parser.
 56
 57    Create a singleton.
 58    """
 59
 60    __metaclass__ = DataModelGraphMeta
 61
 62    def __init__(
 63        self,
 64        attribute_relationships_dict: dict,
 65        data_model_labels: DisplayLabelType = "class_label",
 66    ) -> None:
 67        """Load parsed data model.
 68        Args:
 69            attributes_relationship_dict, dict: generated in data_model_parser
 70                {Attribute Display Name: {
 71                        Relationships: {
 72                                    CSV Header: Value}}}
 73            data_model_labels: str, display_label or class_label.
 74                display_label, use the display name as a label, if it is valid
 75                (contains no blacklisted characters) otherwise will default to schema_label.
 76                class_label, default, use standard class or property label.
 77        Raises:
 78            ValueError, attribute_relationship_dict not loaded.
 79        """
 80        self.attribute_relationships_dict = attribute_relationships_dict
 81        self.dmn = DataModelNodes(self.attribute_relationships_dict)
 82        self.dme = DataModelEdges()
 83        self.dmr = DataModelRelationships()
 84        self.data_model_labels = data_model_labels
 85
 86        if not self.attribute_relationships_dict:
 87            raise ValueError(
 88                (
 89                    "Something has gone wrong, a data model was not loaded into the DataModelGraph "
 90                    "Class. Please check that your paths are correct"
 91                )
 92            )
 93        self.graph = self.generate_data_model_graph()
 94
 95    @tracer.start_as_current_span("DataModelGraph::generate_data_model_graph")
 96    def generate_data_model_graph(self) -> nx.MultiDiGraph:
 97        """
 98        Generate NetworkX Graph from the Relationships/attributes dictionary, the graph is built
 99          by first adding all nodes to the graph, then connecting nodes by the relationships defined
100          in the attributes_relationship dictionary.
101        Returns:
102            G: nx.MultiDiGraph, networkx graph representation of the data model
103        """
104        # Get all relationships with edges
105        edge_relationships = self.dmr.retrieve_rel_headers_dict(edge=True)
106
107        # Find all nodes
108        all_nodes = self.dmn.gather_all_nodes_in_model(
109            attr_rel_dict=self.attribute_relationships_dict
110        )
111
112        # Instantiate NetworkX MultiDigraph
113        graph: nx.MultiDiGraph = nx.MultiDiGraph()
114
115        all_node_dict = {}
116
117        ## Fill in MultiDigraph with nodes
118        for node in all_nodes:
119            # Gather information for each node
120            node_dict = self.dmn.generate_node_dict(
121                node_display_name=node,
122                attr_rel_dict=self.attribute_relationships_dict,
123                data_model_labels=self.data_model_labels,
124            )
125
126            # Add each node to the all_node_dict to be used for generating edges
127            all_node_dict[node] = node_dict
128
129            # Generate node and attach information (attributes) to each node
130            graph = self.dmn.generate_node(graph, node_dict)
131
132        edge_list: list[tuple[str, str, dict[str, Union[str, int]]]] = []
133        ## Connect nodes via edges
134        for node in all_nodes:
135            # Generate edges
136            edge_list_2 = self.dme.generate_edge(
137                node,
138                all_node_dict,
139                self.attribute_relationships_dict,
140                edge_relationships,
141                edge_list,
142            )
143            edge_list = edge_list_2.copy()
144
145        # Add edges to the Graph
146        for node_1, node_2, edge_dict in edge_list:
147            graph.add_edge(
148                node_1, node_2, key=edge_dict["key"], weight=edge_dict["weight"]
149            )
150        return graph

Generate graph network (networkx) from the attributes and relationships returned from the data model parser.

Create a singleton.

DataModelGraph( attribute_relationships_dict: dict, data_model_labels: Literal['class_label', 'display_label'] = 'class_label')
62    def __init__(
63        self,
64        attribute_relationships_dict: dict,
65        data_model_labels: DisplayLabelType = "class_label",
66    ) -> None:
67        """Load parsed data model.
68        Args:
69            attributes_relationship_dict, dict: generated in data_model_parser
70                {Attribute Display Name: {
71                        Relationships: {
72                                    CSV Header: Value}}}
73            data_model_labels: str, display_label or class_label.
74                display_label, use the display name as a label, if it is valid
75                (contains no blacklisted characters) otherwise will default to schema_label.
76                class_label, default, use standard class or property label.
77        Raises:
78            ValueError, attribute_relationship_dict not loaded.
79        """
80        self.attribute_relationships_dict = attribute_relationships_dict
81        self.dmn = DataModelNodes(self.attribute_relationships_dict)
82        self.dme = DataModelEdges()
83        self.dmr = DataModelRelationships()
84        self.data_model_labels = data_model_labels
85
86        if not self.attribute_relationships_dict:
87            raise ValueError(
88                (
89                    "Something has gone wrong, a data model was not loaded into the DataModelGraph "
90                    "Class. Please check that your paths are correct"
91                )
92            )
93        self.graph = self.generate_data_model_graph()

Load parsed data model.

Arguments:
  • attributes_relationship_dict, dict: generated in data_model_parser {Attribute Display Name: { Relationships: { CSV Header: Value}}}
  • data_model_labels: str, display_label or class_label. display_label, use the display name as a label, if it is valid (contains no blacklisted characters) otherwise will default to schema_label. class_label, default, use standard class or property label.
Raises:
  • ValueError, attribute_relationship_dict not loaded.
attribute_relationships_dict
dmn
dme
dmr
data_model_labels
graph
@tracer.start_as_current_span('DataModelGraph::generate_data_model_graph')
def generate_data_model_graph(self) -> networkx.classes.multidigraph.MultiDiGraph:
 95    @tracer.start_as_current_span("DataModelGraph::generate_data_model_graph")
 96    def generate_data_model_graph(self) -> nx.MultiDiGraph:
 97        """
 98        Generate NetworkX Graph from the Relationships/attributes dictionary, the graph is built
 99          by first adding all nodes to the graph, then connecting nodes by the relationships defined
100          in the attributes_relationship dictionary.
101        Returns:
102            G: nx.MultiDiGraph, networkx graph representation of the data model
103        """
104        # Get all relationships with edges
105        edge_relationships = self.dmr.retrieve_rel_headers_dict(edge=True)
106
107        # Find all nodes
108        all_nodes = self.dmn.gather_all_nodes_in_model(
109            attr_rel_dict=self.attribute_relationships_dict
110        )
111
112        # Instantiate NetworkX MultiDigraph
113        graph: nx.MultiDiGraph = nx.MultiDiGraph()
114
115        all_node_dict = {}
116
117        ## Fill in MultiDigraph with nodes
118        for node in all_nodes:
119            # Gather information for each node
120            node_dict = self.dmn.generate_node_dict(
121                node_display_name=node,
122                attr_rel_dict=self.attribute_relationships_dict,
123                data_model_labels=self.data_model_labels,
124            )
125
126            # Add each node to the all_node_dict to be used for generating edges
127            all_node_dict[node] = node_dict
128
129            # Generate node and attach information (attributes) to each node
130            graph = self.dmn.generate_node(graph, node_dict)
131
132        edge_list: list[tuple[str, str, dict[str, Union[str, int]]]] = []
133        ## Connect nodes via edges
134        for node in all_nodes:
135            # Generate edges
136            edge_list_2 = self.dme.generate_edge(
137                node,
138                all_node_dict,
139                self.attribute_relationships_dict,
140                edge_relationships,
141                edge_list,
142            )
143            edge_list = edge_list_2.copy()
144
145        # Add edges to the Graph
146        for node_1, node_2, edge_dict in edge_list:
147            graph.add_edge(
148                node_1, node_2, key=edge_dict["key"], weight=edge_dict["weight"]
149            )
150        return graph

Generate NetworkX Graph from the Relationships/attributes dictionary, the graph is built by first adding all nodes to the graph, then connecting nodes by the relationships defined in the attributes_relationship dictionary.

Returns:

G: nx.MultiDiGraph, networkx graph representation of the data model

class DataModelGraphExplorer:
153class DataModelGraphExplorer:  # pylint: disable=too-many-public-methods
154    """DataModelGraphExplorer"""
155
156    def __init__(
157        self,
158        graph: nx.MultiDiGraph,
159    ):
160        """Load data model graph as a singleton.
161        Args:
162            G: nx.MultiDiGraph, networkx graph representation of the data model
163        """
164        self.graph = graph  # At this point the graph is expected to be fully formed.
165        self.dmr = DataModelRelationships()
166
167    def find_properties(self) -> set[str]:
168        """
169        Identify all properties, as defined by the first node in a pair, connected with
170        'domainIncludes' edge type
171
172        Returns:
173            properties, set: All properties defined in the data model, each property name
174              is defined by its label.
175        """
176        properties_list: list[str] = []
177        for node_1, _, rel in self.graph.edges:
178            if rel == self.dmr.get_relationship_value("domainIncludes", "edge_key"):
179                properties_list.append(node_1)
180        properties_set = set(properties_list)
181        return properties_set
182
183    def find_classes(self) -> AbstractSet[str]:
184        """
185        Identify all classes, as defined but all nodes, minus all properties
186        (which are explicitly defined)
187        Returns:
188            classes, set:  All classes defined in the data model, each class
189              name is defined by its label.
190        """
191        nodes = self.graph.nodes
192        properties = self.find_properties()
193        classes = nodes - properties
194        return classes
195
196    def find_node_range(
197        self, node_label: Optional[str] = None, node_display_name: Optional[str] = None
198    ) -> list:
199        """Get valid values for the given node (attribute)
200        Args:
201            node_label, str, Optional[str]: label of the node for which to retrieve valid values
202            node_display_name, str, Optional[str]: Display Name of the node for which to
203              retrieve valid values
204        Returns:
205            valid_values, list: List of valid values associated with the provided node.
206        """
207        node_label = self._get_node_label(node_label, node_display_name)
208
209        valid_values = []
210        for node_1, node_2, rel in self.graph.edges:
211            if node_1 == node_label and rel == self.dmr.get_relationship_value(
212                "rangeIncludes", "edge_key"
213            ):
214                valid_values.append(node_2)
215        valid_values = list(set(valid_values))
216        return valid_values
217
218    def get_adjacent_nodes_by_relationship(
219        self, node_label: str, relationship: str
220    ) -> list[str]:
221        """Get a list of nodes that is / are adjacent to a given node, based on a relationship type.
222
223        Args:
224            node_label: label of the the node whose edges we need to look at.
225            relationship: the type of link(s) that the above node and its immediate neighbors share.
226
227        Returns:
228            List of nodes that are adjacent to the given node.
229        #checked
230        """
231        nodes = set()
232        for _, node_2, key, _ in self.graph.out_edges(node_label, data=True, keys=True):
233            if key == relationship:
234                nodes.add(node_2)
235
236        return list(nodes)
237
238    def get_component_node_required(
239        self,
240        manifest_component: str,
241        node_validation_rules: Optional[list[str]] = None,
242        node_label: Optional[str] = None,
243        node_display_name: Optional[str] = None,
244    ) -> bool:
245        """Check if a node is required taking into account the manifest component it is defined in
246        (requirements can be set in validation rule as well as required column)
247        Args:
248            manifest_component: str, manifest component display name that the node belongs to.
249            node_validation_rules: list[str], validation rules for a given node and component.
250            node_label: str, Label of the node you would want to get the comment for.
251            node_display_name: str, node display name for the node being queried.
252        Returns:
253            True, if node is required, False if not
254        """
255        node_required = False
256
257        if not node_validation_rules:
258            # Get node validation rules for a given component
259            node_validation_rules = self.get_component_node_validation_rules(
260                manifest_component=manifest_component,
261                node_label=node_label,
262                node_display_name=node_display_name,
263            )
264
265        # Check if the validation rule specifies that the node is required for this particular
266        # component.
267        if rule_in_rule_list("required", node_validation_rules):
268            node_required = True
269            # To prevent any unintended errors, ensure the Required field for this node is False
270            if self.get_node_required(
271                node_label=node_label, node_display_name=node_display_name
272            ):
273                if not node_display_name:
274                    assert node_label is not None
275                    node_display_name = self.graph.nodes[node_label][
276                        self.dmr.get_relationship_value("displayName", "node_label")
277                    ]
278                error_str = " ".join(
279                    [
280                        f"For component: {manifest_component} and attribute: {node_display_name}",
281                        "requirements are being specified in both the Required field and in the",
282                        "Validation Rules. If you desire to use validation rules to set component",
283                        "specific requirements for this attribute",
284                        "then the Required field needs to be set to False, or the validation may",
285                        "not work as intended, for other components where the attribute",
286                        "that should not be required.",
287                    ]
288                )
289
290                logger.error(error_str)
291        else:
292            # If requirements are not being set in the validation rule, then just pull the
293            # standard node requirements from the model
294            node_required = self.get_node_required(
295                node_label=node_label, node_display_name=node_display_name
296            )
297        return node_required
298
299    def get_component_node_validation_rules(
300        self,
301        manifest_component: str,
302        node_label: Optional[str] = None,
303        node_display_name: Optional[str] = None,
304    ) -> list:
305        """Get validation rules for a given node and component.
306        Args:
307            manifest_component: str, manifest component display name that the node belongs to.
308            node_label: str, Label of the node you would want to get the comment for.
309            node_display_name: str, node display name for the node being queried.
310        Returns:
311            validation_rules: list, validation rules list for a given node and component.
312        """
313        # get any additional validation rules associated with this node (e.g. can this node
314        # be mapped to a list of other nodes)
315        node_validation_rules = self.get_node_validation_rules(
316            node_label=node_label, node_display_name=node_display_name
317        )
318
319        # Parse the validation rules per component if applicable
320        if node_validation_rules and isinstance(node_validation_rules, dict):
321            node_validation_rules_list = extract_component_validation_rules(
322                manifest_component=manifest_component,
323                validation_rules_dict=node_validation_rules,  # type: ignore
324            )
325        else:
326            assert isinstance(node_validation_rules, list)
327            node_validation_rules_list = node_validation_rules
328        return node_validation_rules_list
329
330    def get_component_requirements(
331        self,
332        source_component: str,
333    ) -> list[str]:
334        """
335        Get all components that are associated with a given source component and are
336          required by it.
337
338        Args:
339            source_component: source component for which we need to find all required downstream
340              components.
341
342        Returns:
343            List of nodes that are descendants from the source component are are related to the
344              source through a specific component relationship.
345        """
346
347        req_components = list(
348            reversed(
349                self.get_descendants_by_edge_type(
350                    source_component,
351                    self.dmr.get_relationship_value("requiresComponent", "edge_key"),
352                    ordered=True,
353                )
354            )
355        )
356
357        return req_components
358
359    def get_component_requirements_graph(
360        self,
361        source_component: str,
362    ) -> nx.Graph:
363        """
364        Get all components that are associated with a given source component and are required by it;
365          return the components as a dependency graph (i.e. a DAG).
366
367        Args:
368            source_component, str: source component for which we need to find all required
369              downstream components.
370
371        Returns:
372            A subgraph of the schema graph induced on nodes that are descendants from the source
373              component and are related to the source through a specific component relationship.
374        """
375
376        # get a list of required component nodes
377        req_components = self.get_component_requirements(source_component)
378
379        # get the subgraph induced on required component nodes
380        req_components_graph = self.get_subgraph_by_edge_type(
381            self.dmr.get_relationship_value("requiresComponent", "edge_key"),
382        ).subgraph(req_components)
383
384        return req_components_graph
385
386    def get_descendants_by_edge_type(
387        self,
388        source_node: str,
389        relationship: str,
390        connected: bool = True,
391        ordered: bool = False,
392    ) -> list[str]:
393        """
394        Get all nodes that are descendants of a given source node, based on a specific
395          type of edge / relationship type.
396
397        Args:
398            source_node: The node whose descendants need to be retrieved.
399            relationship: Edge / link relationship type with possible values same as in above docs.
400            connected:
401              If True, we need to ensure that all descendant nodes are reachable from the source
402                node, i.e., they are part of the same connected component.
403              If False, the descendants could be in multiple connected components.
404              Default value is True.
405            ordered:
406              If True, the list of descendants will be topologically ordered.
407              If False, the list has no particular order (depends on the order in which the
408                descendants were traversed in the subgraph).
409
410        Returns:
411            List of nodes that are descendants from a particular node (sorted / unsorted)
412        """
413
414        root_descendants = nx.descendants(self.graph, source_node)
415
416        subgraph_nodes = list(root_descendants)
417        subgraph_nodes.append(source_node)
418        descendants_subgraph = self.graph.subgraph(subgraph_nodes)
419
420        # prune the descendants subgraph so as to include only those edges that match
421        # the relationship type
422        rel_edges = []
423        for node_1, node_2, key, _ in descendants_subgraph.edges(data=True, keys=True):
424            if key == relationship:
425                rel_edges.append((node_1, node_2))
426
427        relationship_subgraph: nx.DiGraph = nx.DiGraph()
428        relationship_subgraph.add_edges_from(rel_edges)
429
430        descendants = relationship_subgraph.nodes()
431
432        if not descendants:
433            # return empty list if there are no nodes that are reachable from the
434            # source node based on this relationship type
435            return []
436
437        if connected and ordered:
438            # get the set of reachable nodes from the source node
439            descendants = nx.descendants(relationship_subgraph, source_node)
440            descendants.add(source_node)
441
442            # normally, the descendants from a node are unordered (peculiarity
443            # of nx descendants call)
444            # form the subgraph on descendants and order it topologically
445            # this assumes an acyclic subgraph
446            descendants = nx.topological_sort(
447                relationship_subgraph.subgraph(descendants)
448            )
449        elif connected:
450            # get the nodes that are reachable from a given source node
451            # after the pruning process above some nodes in the
452            # root_descendants subgraph might have become disconnected and
453            # will be omitted
454            descendants = nx.descendants(relationship_subgraph, source_node)
455            descendants.add(source_node)
456        elif ordered:
457            # sort the nodes topologically
458            # this requires the graph to be an acyclic graph
459            descendants = nx.topological_sort(relationship_subgraph)
460
461        return list(descendants)
462
463    def get_digraph_by_edge_type(self, edge_type: str) -> nx.DiGraph:
464        """Get a networkx digraph of the nodes connected via a given edge_type.
465        Args:
466            edge_type:
467                Edge type to search for, possible types are defined by 'edge_key'
468                  in relationship class
469        Returns:
470        """
471
472        digraph: nx.DiGraph = nx.DiGraph()
473        for node_1, node_2, key, _ in self.graph.edges(data=True, keys=True):
474            if key == edge_type:
475                digraph.add_edge(node_1, node_2)
476        return digraph
477
478    def get_edges_by_relationship(
479        self,
480        node: str,
481        relationship: str,
482    ) -> list[tuple[str, str]]:
483        """Get a list of out-edges of a node where the edges match a specific type of relationship.
484
485        i.e., the edges connecting a node to its neighbors are of relationship type -- "parentOf"
486          (set of edges to children / sub-class nodes).
487
488        Args:
489            node: the node whose edges we need to look at.
490            relationship: the type of link(s) that the above node and its immediate neighbors share.
491
492        Returns:
493            List of edges that are connected to the node.
494        """
495        edges: list[tuple[str, str]] = []
496
497        for node_1, node_2, key, _ in self.graph.out_edges(node, data=True, keys=True):
498            if key == relationship:
499                edges.append((node_1, node_2))
500
501        return edges
502
503    def get_ordered_entry(self, key: str, source_node_label: str) -> list[str]:
504        """
505        Order the values associated with a particular node and edge_key to
506          match original ordering in schema.
507
508        Args:
509            key (str): a key representing and edge relationship in
510              DataModelRelationships.relationships_dictionary
511            source_node_label (str): node to look for edges of and order
512
513        Raises:
514            KeyError: cannot find source node in graph
515
516        Returns:
517            list[str]:
518              list of sorted nodes, that share the specified relationship with the source node
519              For the example data model, for key='rangeIncludes', source_node_label='CancerType'
520                the return would be ['Breast, 'Colorectal', 'Lung', 'Prostate', 'Skin'] in that
521                exact order.
522        """
523        # Check if node is in the graph, if not throw an error.
524        if not self.is_class_in_schema(node_label=source_node_label):
525            raise KeyError(
526                f"Cannot find node: {source_node_label} in the graph, please check entry."
527            )
528
529        edge_key = self.dmr.get_relationship_value(key, "edge_key")
530
531        # Handle out edges
532        if self.dmr.get_relationship_value(key, "jsonld_direction") == "out":
533            # use out edges
534
535            original_edge_weights_dict = {
536                attached_node: self.graph[source_node][attached_node][edge_key][
537                    "weight"
538                ]
539                for source_node, attached_node in self.graph.out_edges(
540                    source_node_label
541                )
542                if edge_key in self.graph[source_node][attached_node]
543            }
544        # Handle in edges
545        else:
546            # use in edges
547            original_edge_weights_dict = {
548                attached_node: self.graph[attached_node][source_node][edge_key][
549                    "weight"
550                ]
551                for attached_node, source_node in self.graph.in_edges(source_node_label)
552                if edge_key in self.graph[attached_node][source_node]
553            }
554
555        sorted_nodes = list(
556            dict(
557                sorted(original_edge_weights_dict.items(), key=lambda item: item[1])
558            ).keys()
559        )
560
561        return sorted_nodes
562
563    # Get values associated with a node
564    def get_nodes_ancestors(self, subgraph: nx.DiGraph, node_label: str) -> list[str]:
565        """Get a list of nodes reachable from source component in graph
566
567        Args:
568            subgraph (nx.DiGraph): networkx graph object
569            node_label (str): label of node to find ancestors for
570
571        Returns:
572            list[str]: nodes reachable from source in graph
573        """
574        all_ancestors = list(nx.ancestors(subgraph, node_label))
575
576        return all_ancestors
577
578    def get_node_comment(
579        self, node_display_name: Optional[str] = None, node_label: Optional[str] = None
580    ) -> str:
581        """Get the node definition, i.e., the "comment" associated with a given node display name.
582
583        Args:
584            node_display_name, str: Display name of the node which you want to get the comment for.
585            node_label, str: Label of the node you would want to get the comment for.
586        Returns:
587            Comment associated with node, as a string.
588        """
589        node_label = self._get_node_label(node_label, node_display_name)
590
591        if not node_label:
592            return ""
593
594        node_definition = self.graph.nodes[node_label][
595            self.dmr.get_relationship_value("comment", "node_label")
596        ]
597        return node_definition
598
599    def get_node_dependencies(
600        self,
601        source_node: str,
602        display_names: bool = True,
603        schema_ordered: bool = True,
604    ) -> list[str]:
605        """Get the immediate dependencies that are related to a given source node.
606
607        Args:
608            source_node: The node whose dependencies we need to compute.
609            display_names: if True, return list of display names of each of the dependencies.
610                           if False, return list of node labels of each of the dependencies.
611            schema_ordered:
612              if True, return the dependencies of the node following the order of the schema
613                (slower).
614              if False, return dependencies from graph without guaranteeing schema order (faster)
615
616        Returns:
617            List of nodes that are dependent on the source node.
618        """
619
620        if schema_ordered:
621            # get dependencies in the same order in which they are defined in the schema
622            required_dependencies = self.get_ordered_entry(
623                key=self.dmr.get_relationship_value("requiresDependency", "edge_key"),
624                source_node_label=source_node,
625            )
626        else:
627            required_dependencies = self.get_adjacent_nodes_by_relationship(
628                node_label=source_node,
629                relationship=self.dmr.get_relationship_value(
630                    "requiresDependency", "edge_key"
631                ),
632            )
633
634        if display_names:
635            # get display names of dependencies
636            dependencies_display_names = []
637
638            for req in required_dependencies:
639                dependencies_display_names.append(
640                    self.graph.nodes[req][
641                        self.dmr.get_relationship_value("displayName", "node_label")
642                    ]
643                )
644
645            return dependencies_display_names
646
647        return required_dependencies
648
649    def get_nodes_descendants(self, node_label: str) -> list[str]:
650        """Return a list of nodes reachable from source in graph
651        Args:
652            node_label, str: any given node
653        Return:
654            all_descendants, list: nodes reachable from source in graph
655        """
656        all_descendants = list(nx.descendants(self.graph, node_label))
657
658        return all_descendants
659
660    def get_nodes_display_names(
661        self,
662        node_list: list[str],
663    ) -> list[str]:
664        """Get display names associated with the given list of nodes.
665
666        Args:
667            node_list: List of nodes whose display names we need to retrieve.
668
669        Returns:
670            List of display names.
671        """
672        node_list_display_names = [
673            self.graph.nodes[node][
674                self.dmr.get_relationship_value("displayName", "node_label")
675            ]
676            for node in node_list
677        ]
678
679        return node_list_display_names
680
681    def get_node_label(self, node_display_name: str) -> str:
682        """Get the node label for a given display name.
683
684        Args:
685            node_display_name: Display name of the node which you want to get the label for.
686        Returns:
687            Node label associated with given node.
688            If display name not part of schema, return an empty string.
689        """
690
691        node_class_label = get_class_label_from_display_name(
692            display_name=node_display_name
693        )
694        node_property_label = get_property_label_from_display_name(
695            display_name=node_display_name
696        )
697
698        if node_class_label in self.graph.nodes:
699            node_label = node_class_label
700        elif node_property_label in self.graph.nodes:
701            node_label = node_property_label
702        else:
703            node_label = ""
704
705        return node_label
706
707    def get_node_range(
708        self,
709        node_label: Optional[str] = None,
710        node_display_name: Optional[str] = None,
711        display_names: bool = False,
712    ) -> list[str]:
713        """
714        Get the range, i.e., all the valid values that are associated with a node label.
715
716
717        Args:
718            node_label (Optional[str], optional): Node for which you need to retrieve the range.
719              Defaults to None.
720            node_display_name (Optional[str], optional): _description_. Defaults to None.
721            display_names (bool, optional): _description_. Defaults to False.
722
723        Raises:
724            ValueError: If the node cannot be found in the graph.
725
726        Returns:
727            list[str]:
728              If display_names=False, a list of valid values (labels) associated with a given node.
729              If display_names=True, a list of valid values (display names) associated
730                with a given node
731        """
732        node_label = self._get_node_label(node_label, node_display_name)
733        try:
734            # get node range in the order defined in schema for given node
735            required_range = self.find_node_range(node_label=node_label)
736        except KeyError as exc:
737            raise ValueError(
738                f"The source node {node_label} does not exist in the graph. "
739                "Please use a different node."
740            ) from exc
741
742        if display_names:
743            # get the display name(s) of all dependencies
744            dependencies_display_names = []
745
746            for req in required_range:
747                dependencies_display_names.append(self.graph.nodes[req]["displayName"])
748
749            return dependencies_display_names
750
751        return required_range
752
753    def get_node_required(
754        self, node_label: Optional[str] = None, node_display_name: Optional[str] = None
755    ) -> bool:
756        """Check if a given node is required or not.
757
758        Note: The possible options that a node can be associated with -- "required" / "optional".
759
760        Args:
761            node_label: Label of the node for which you need to look up.
762            node_display_name: Display name of the node for which you want look up.
763        Returns:
764            True: If the given node is a "required" node.
765            False: If the given node is not a "required" (i.e., an "optional") node.
766        """
767        node_label = self._get_node_label(node_label, node_display_name)
768        rel_node_label = self.dmr.get_relationship_value("required", "node_label")
769        node_required = self.graph.nodes[node_label][rel_node_label]
770        return node_required
771
772    def get_node_validation_rules(
773        self, node_label: Optional[str] = None, node_display_name: Optional[str] = None
774    ) -> Union[list, dict[str, str]]:
775        """Get validation rules associated with a node,
776
777        Args:
778            node_label: Label of the node for which you need to look up.
779            node_display_name: Display name of the node which you want to get the label for.
780        Returns:
781            A set of validation rules associated with node, as a list or a dictionary.
782        """
783        node_label = self._get_node_label(node_label, node_display_name)
784
785        if not node_label:
786            return []
787
788        try:
789            node_validation_rules = self.graph.nodes[node_label]["validationRules"]
790        except KeyError as key_error:
791            raise ValueError(
792                f"{node_label} is not in the graph, please provide a proper node label"
793            ) from key_error
794
795        return node_validation_rules
796
797    def get_subgraph_by_edge_type(self, relationship: str) -> nx.DiGraph:
798        """Get a subgraph containing all edges of a given type (aka relationship).
799
800        Args:
801            relationship: edge / link relationship type with possible values same as in above docs.
802
803        Returns:
804            Directed graph on edges of a particular type (aka relationship)
805        """
806
807        # prune the metadata model graph so as to include only those edges that
808        # match the relationship type
809        rel_edges = []
810        for node_1, node_2, key, _ in self.graph.out_edges(data=True, keys=True):
811            if key == relationship:
812                rel_edges.append((node_1, node_2))
813
814        relationship_subgraph: nx.DiGraph = nx.DiGraph()
815        relationship_subgraph.add_edges_from(rel_edges)
816
817        return relationship_subgraph
818
819    def find_adjacent_child_classes(
820        self, node_label: Optional[str] = None, node_display_name: Optional[str] = None
821    ) -> list[str]:
822        """Find child classes of a given node.
823        Args:
824            node_display_name: Display name of the node to look up.
825            node_label: Label of the node to look up.
826        Returns:
827            List of nodes that are adjacent to the given node, by SubclassOf relationship.
828        """
829        node_label = self._get_node_label(node_label, node_display_name)
830        return self.get_adjacent_nodes_by_relationship(
831            node_label=node_label,
832            relationship=self.dmr.get_relationship_value("subClassOf", "edge_key"),
833        )
834
835    def find_child_classes(self, schema_class: str) -> list:
836        """Find schema classes that inherit from the given class
837        Args:
838            schema_class: node label for the class to from which to look for children.
839        Returns:
840            list of children to the schema_class.
841        """
842        child_classes = unlist(list(self.graph.successors(schema_class)))
843        assert isinstance(child_classes, list)
844        return child_classes
845
846    def find_class_specific_properties(self, schema_class: str) -> list[str]:
847        """Find properties specifically associated with a given class
848        Args:
849            schema_class, str: node/class label, to identify properties for.
850        Returns:
851            properties, list: List of properties associate with a given schema class.
852        Raises:
853            KeyError: Key error is raised if the provided schema_class is not in the graph
854        """
855
856        if not self.is_class_in_schema(schema_class):
857            raise KeyError(
858                (
859                    f"Schema_class provided: {schema_class} is not in the data model, please check "
860                    "that you are providing the proper class/node label"
861                )
862            )
863
864        properties = []
865        for node1, node2 in self.graph.edges():
866            if (
867                node2 == schema_class
868                and "domainValue" in self.graph[node1][schema_class]
869            ):
870                properties.append(node1)
871        return properties
872
873    def find_parent_classes(self, node_label: str) -> list[list[str]]:
874        """Find all parents of the provided node
875        Args:
876            node_label: label of the node to find parents of
877        Returns:
878            List of list of Parents to the given node.
879        """
880        # Get digraph of nodes with parents
881        digraph = self.get_digraph_by_edge_type("parentOf")
882
883        # Get root node
884        root_node = list(nx.topological_sort(digraph))[0]
885
886        # Get paths between root_node and the target node.
887        paths = nx.all_simple_paths(self.graph, source=root_node, target=node_label)
888
889        return [_path[:-1] for _path in paths]
890
891    def full_schema_graph(self, size: Optional[int] = None) -> graphviz.Digraph:
892        """Create a graph of the data model.
893        Args:
894            size, float: max height and width of the graph, if one value provided
895               it is used for both.
896        Returns:
897            schema graph viz
898        """
899        edges = self.graph.edges()
900        return visualize(edges, size=size)
901
902    def is_class_in_schema(self, node_label: str) -> bool:
903        """Determine if provided node_label is in the schema graph/data model.
904        Args:
905            node_label: label of node to search for in the
906        Returns:
907            True, if node is in the graph schema
908            False, if node is not in graph schema
909        """
910        return node_label in self.graph.nodes()
911
912    def sub_schema_graph(
913        self, source: str, direction: str, size: Optional[float] = None
914    ) -> Optional[graphviz.Digraph]:
915        """Create a sub-schema graph
916        Args:
917            source, str: source node label to start graph
918            direction, str: direction to create the visualization, choose from "up", "down", "both"
919            size, float: max height and width of the graph, if one value provided it is used for
920              both.
921        Returns:
922            Sub-schema graph viz
923        """
924        if direction == "down":
925            edges = list(nx.edge_bfs(self.graph, [source]))
926            return visualize(edges, size=size)
927        if direction == "up":
928            paths = self.find_parent_classes(source)
929            edges = []
930            for _path in paths:
931                _path.append(source)
932                for i in range(0, len(_path) - 1):
933                    edges.append((_path[i], _path[i + 1]))
934            return visualize(edges, size=size)
935        if direction == "both":
936            paths = self.find_parent_classes(source)
937            edges = list(nx.edge_bfs(self.graph, [source]))
938            for _path in paths:
939                _path.append(source)
940                for i in range(0, len(_path) - 1):
941                    edges.append((_path[i], _path[i + 1]))
942            return visualize(edges, size=size)
943        return None
944
945    def get_node_column_type(
946        self, node_label: Optional[str] = None, node_display_name: Optional[str] = None
947    ) -> Optional[JSONSchemaType]:
948        """Gets the column type of the node
949
950        Args:
951            node_label: The label of the node to get the type from
952            node_display_name: The display name of the node to get the type from
953
954        Returns:
955            The column type of the node if it has one, otherwise None
956        """
957        node_label = self._get_node_label(node_label, node_display_name)
958        rel_node_label = self.dmr.get_relationship_value("columnType", "node_label")
959        return self.graph.nodes[node_label][rel_node_label]
960
961    def _get_node_label(
962        self, node_label: Optional[str] = None, node_display_name: Optional[str] = None
963    ) -> str:
964        """Returns the node label if given otherwise gets the node label from the display name
965
966        Args:
967            node_label: The label of the node to get the type from
968            node_display_name: The display name of the node to get the type from
969
970        Raises:
971            ValueError: If neither node_label or node_display_name is provided
972
973        Returns:
974            The node label
975        """
976        if node_label is not None:
977            return node_label
978        if node_display_name is not None:
979            return self.get_node_label(node_display_name)
980        raise ValueError("Either 'node_label' or 'node_display_name' must be provided.")

DataModelGraphExplorer

DataModelGraphExplorer(graph: networkx.classes.multidigraph.MultiDiGraph)
156    def __init__(
157        self,
158        graph: nx.MultiDiGraph,
159    ):
160        """Load data model graph as a singleton.
161        Args:
162            G: nx.MultiDiGraph, networkx graph representation of the data model
163        """
164        self.graph = graph  # At this point the graph is expected to be fully formed.
165        self.dmr = DataModelRelationships()

Load data model graph as a singleton.

Arguments:
  • G: nx.MultiDiGraph, networkx graph representation of the data model
graph
dmr
def find_properties(self) -> set[str]:
167    def find_properties(self) -> set[str]:
168        """
169        Identify all properties, as defined by the first node in a pair, connected with
170        'domainIncludes' edge type
171
172        Returns:
173            properties, set: All properties defined in the data model, each property name
174              is defined by its label.
175        """
176        properties_list: list[str] = []
177        for node_1, _, rel in self.graph.edges:
178            if rel == self.dmr.get_relationship_value("domainIncludes", "edge_key"):
179                properties_list.append(node_1)
180        properties_set = set(properties_list)
181        return properties_set

Identify all properties, as defined by the first node in a pair, connected with 'domainIncludes' edge type

Returns:

properties, set: All properties defined in the data model, each property name is defined by its label.

def find_classes(self) -> AbstractSet[str]:
183    def find_classes(self) -> AbstractSet[str]:
184        """
185        Identify all classes, as defined but all nodes, minus all properties
186        (which are explicitly defined)
187        Returns:
188            classes, set:  All classes defined in the data model, each class
189              name is defined by its label.
190        """
191        nodes = self.graph.nodes
192        properties = self.find_properties()
193        classes = nodes - properties
194        return classes

Identify all classes, as defined but all nodes, minus all properties (which are explicitly defined)

Returns:

classes, set: All classes defined in the data model, each class name is defined by its label.

def find_node_range( self, node_label: Optional[str] = None, node_display_name: Optional[str] = None) -> list:
196    def find_node_range(
197        self, node_label: Optional[str] = None, node_display_name: Optional[str] = None
198    ) -> list:
199        """Get valid values for the given node (attribute)
200        Args:
201            node_label, str, Optional[str]: label of the node for which to retrieve valid values
202            node_display_name, str, Optional[str]: Display Name of the node for which to
203              retrieve valid values
204        Returns:
205            valid_values, list: List of valid values associated with the provided node.
206        """
207        node_label = self._get_node_label(node_label, node_display_name)
208
209        valid_values = []
210        for node_1, node_2, rel in self.graph.edges:
211            if node_1 == node_label and rel == self.dmr.get_relationship_value(
212                "rangeIncludes", "edge_key"
213            ):
214                valid_values.append(node_2)
215        valid_values = list(set(valid_values))
216        return valid_values

Get valid values for the given node (attribute)

Arguments:
  • node_label, str, Optional[str]: label of the node for which to retrieve valid values
  • node_display_name, str, Optional[str]: Display Name of the node for which to retrieve valid values
Returns:

valid_values, list: List of valid values associated with the provided node.

def get_adjacent_nodes_by_relationship(self, node_label: str, relationship: str) -> list[str]:
218    def get_adjacent_nodes_by_relationship(
219        self, node_label: str, relationship: str
220    ) -> list[str]:
221        """Get a list of nodes that is / are adjacent to a given node, based on a relationship type.
222
223        Args:
224            node_label: label of the the node whose edges we need to look at.
225            relationship: the type of link(s) that the above node and its immediate neighbors share.
226
227        Returns:
228            List of nodes that are adjacent to the given node.
229        #checked
230        """
231        nodes = set()
232        for _, node_2, key, _ in self.graph.out_edges(node_label, data=True, keys=True):
233            if key == relationship:
234                nodes.add(node_2)
235
236        return list(nodes)

Get a list of nodes that is / are adjacent to a given node, based on a relationship type.

Arguments:
  • node_label: label of the the node whose edges we need to look at.
  • relationship: the type of link(s) that the above node and its immediate neighbors share.
Returns:

List of nodes that are adjacent to the given node.

checked

def get_component_node_required( self, manifest_component: str, node_validation_rules: Optional[list[str]] = None, node_label: Optional[str] = None, node_display_name: Optional[str] = None) -> bool:
238    def get_component_node_required(
239        self,
240        manifest_component: str,
241        node_validation_rules: Optional[list[str]] = None,
242        node_label: Optional[str] = None,
243        node_display_name: Optional[str] = None,
244    ) -> bool:
245        """Check if a node is required taking into account the manifest component it is defined in
246        (requirements can be set in validation rule as well as required column)
247        Args:
248            manifest_component: str, manifest component display name that the node belongs to.
249            node_validation_rules: list[str], validation rules for a given node and component.
250            node_label: str, Label of the node you would want to get the comment for.
251            node_display_name: str, node display name for the node being queried.
252        Returns:
253            True, if node is required, False if not
254        """
255        node_required = False
256
257        if not node_validation_rules:
258            # Get node validation rules for a given component
259            node_validation_rules = self.get_component_node_validation_rules(
260                manifest_component=manifest_component,
261                node_label=node_label,
262                node_display_name=node_display_name,
263            )
264
265        # Check if the validation rule specifies that the node is required for this particular
266        # component.
267        if rule_in_rule_list("required", node_validation_rules):
268            node_required = True
269            # To prevent any unintended errors, ensure the Required field for this node is False
270            if self.get_node_required(
271                node_label=node_label, node_display_name=node_display_name
272            ):
273                if not node_display_name:
274                    assert node_label is not None
275                    node_display_name = self.graph.nodes[node_label][
276                        self.dmr.get_relationship_value("displayName", "node_label")
277                    ]
278                error_str = " ".join(
279                    [
280                        f"For component: {manifest_component} and attribute: {node_display_name}",
281                        "requirements are being specified in both the Required field and in the",
282                        "Validation Rules. If you desire to use validation rules to set component",
283                        "specific requirements for this attribute",
284                        "then the Required field needs to be set to False, or the validation may",
285                        "not work as intended, for other components where the attribute",
286                        "that should not be required.",
287                    ]
288                )
289
290                logger.error(error_str)
291        else:
292            # If requirements are not being set in the validation rule, then just pull the
293            # standard node requirements from the model
294            node_required = self.get_node_required(
295                node_label=node_label, node_display_name=node_display_name
296            )
297        return node_required

Check if a node is required taking into account the manifest component it is defined in (requirements can be set in validation rule as well as required column)

Arguments:
  • manifest_component: str, manifest component display name that the node belongs to.
  • node_validation_rules: list[str], validation rules for a given node and component.
  • node_label: str, Label of the node you would want to get the comment for.
  • node_display_name: str, node display name for the node being queried.
Returns:

True, if node is required, False if not

def get_component_node_validation_rules( self, manifest_component: str, node_label: Optional[str] = None, node_display_name: Optional[str] = None) -> list:
299    def get_component_node_validation_rules(
300        self,
301        manifest_component: str,
302        node_label: Optional[str] = None,
303        node_display_name: Optional[str] = None,
304    ) -> list:
305        """Get validation rules for a given node and component.
306        Args:
307            manifest_component: str, manifest component display name that the node belongs to.
308            node_label: str, Label of the node you would want to get the comment for.
309            node_display_name: str, node display name for the node being queried.
310        Returns:
311            validation_rules: list, validation rules list for a given node and component.
312        """
313        # get any additional validation rules associated with this node (e.g. can this node
314        # be mapped to a list of other nodes)
315        node_validation_rules = self.get_node_validation_rules(
316            node_label=node_label, node_display_name=node_display_name
317        )
318
319        # Parse the validation rules per component if applicable
320        if node_validation_rules and isinstance(node_validation_rules, dict):
321            node_validation_rules_list = extract_component_validation_rules(
322                manifest_component=manifest_component,
323                validation_rules_dict=node_validation_rules,  # type: ignore
324            )
325        else:
326            assert isinstance(node_validation_rules, list)
327            node_validation_rules_list = node_validation_rules
328        return node_validation_rules_list

Get validation rules for a given node and component.

Arguments:
  • manifest_component: str, manifest component display name that the node belongs to.
  • node_label: str, Label of the node you would want to get the comment for.
  • node_display_name: str, node display name for the node being queried.
Returns:

validation_rules: list, validation rules list for a given node and component.

def get_component_requirements(self, source_component: str) -> list[str]:
330    def get_component_requirements(
331        self,
332        source_component: str,
333    ) -> list[str]:
334        """
335        Get all components that are associated with a given source component and are
336          required by it.
337
338        Args:
339            source_component: source component for which we need to find all required downstream
340              components.
341
342        Returns:
343            List of nodes that are descendants from the source component are are related to the
344              source through a specific component relationship.
345        """
346
347        req_components = list(
348            reversed(
349                self.get_descendants_by_edge_type(
350                    source_component,
351                    self.dmr.get_relationship_value("requiresComponent", "edge_key"),
352                    ordered=True,
353                )
354            )
355        )
356
357        return req_components

Get all components that are associated with a given source component and are required by it.

Arguments:
  • source_component: source component for which we need to find all required downstream components.
Returns:

List of nodes that are descendants from the source component are are related to the source through a specific component relationship.

def get_component_requirements_graph(self, source_component: str) -> networkx.classes.graph.Graph:
359    def get_component_requirements_graph(
360        self,
361        source_component: str,
362    ) -> nx.Graph:
363        """
364        Get all components that are associated with a given source component and are required by it;
365          return the components as a dependency graph (i.e. a DAG).
366
367        Args:
368            source_component, str: source component for which we need to find all required
369              downstream components.
370
371        Returns:
372            A subgraph of the schema graph induced on nodes that are descendants from the source
373              component and are related to the source through a specific component relationship.
374        """
375
376        # get a list of required component nodes
377        req_components = self.get_component_requirements(source_component)
378
379        # get the subgraph induced on required component nodes
380        req_components_graph = self.get_subgraph_by_edge_type(
381            self.dmr.get_relationship_value("requiresComponent", "edge_key"),
382        ).subgraph(req_components)
383
384        return req_components_graph

Get all components that are associated with a given source component and are required by it; return the components as a dependency graph (i.e. a DAG).

Arguments:
  • source_component, str: source component for which we need to find all required downstream components.
Returns:

A subgraph of the schema graph induced on nodes that are descendants from the source component and are related to the source through a specific component relationship.

def get_descendants_by_edge_type( self, source_node: str, relationship: str, connected: bool = True, ordered: bool = False) -> list[str]:
386    def get_descendants_by_edge_type(
387        self,
388        source_node: str,
389        relationship: str,
390        connected: bool = True,
391        ordered: bool = False,
392    ) -> list[str]:
393        """
394        Get all nodes that are descendants of a given source node, based on a specific
395          type of edge / relationship type.
396
397        Args:
398            source_node: The node whose descendants need to be retrieved.
399            relationship: Edge / link relationship type with possible values same as in above docs.
400            connected:
401              If True, we need to ensure that all descendant nodes are reachable from the source
402                node, i.e., they are part of the same connected component.
403              If False, the descendants could be in multiple connected components.
404              Default value is True.
405            ordered:
406              If True, the list of descendants will be topologically ordered.
407              If False, the list has no particular order (depends on the order in which the
408                descendants were traversed in the subgraph).
409
410        Returns:
411            List of nodes that are descendants from a particular node (sorted / unsorted)
412        """
413
414        root_descendants = nx.descendants(self.graph, source_node)
415
416        subgraph_nodes = list(root_descendants)
417        subgraph_nodes.append(source_node)
418        descendants_subgraph = self.graph.subgraph(subgraph_nodes)
419
420        # prune the descendants subgraph so as to include only those edges that match
421        # the relationship type
422        rel_edges = []
423        for node_1, node_2, key, _ in descendants_subgraph.edges(data=True, keys=True):
424            if key == relationship:
425                rel_edges.append((node_1, node_2))
426
427        relationship_subgraph: nx.DiGraph = nx.DiGraph()
428        relationship_subgraph.add_edges_from(rel_edges)
429
430        descendants = relationship_subgraph.nodes()
431
432        if not descendants:
433            # return empty list if there are no nodes that are reachable from the
434            # source node based on this relationship type
435            return []
436
437        if connected and ordered:
438            # get the set of reachable nodes from the source node
439            descendants = nx.descendants(relationship_subgraph, source_node)
440            descendants.add(source_node)
441
442            # normally, the descendants from a node are unordered (peculiarity
443            # of nx descendants call)
444            # form the subgraph on descendants and order it topologically
445            # this assumes an acyclic subgraph
446            descendants = nx.topological_sort(
447                relationship_subgraph.subgraph(descendants)
448            )
449        elif connected:
450            # get the nodes that are reachable from a given source node
451            # after the pruning process above some nodes in the
452            # root_descendants subgraph might have become disconnected and
453            # will be omitted
454            descendants = nx.descendants(relationship_subgraph, source_node)
455            descendants.add(source_node)
456        elif ordered:
457            # sort the nodes topologically
458            # this requires the graph to be an acyclic graph
459            descendants = nx.topological_sort(relationship_subgraph)
460
461        return list(descendants)

Get all nodes that are descendants of a given source node, based on a specific type of edge / relationship type.

Arguments:
  • source_node: The node whose descendants need to be retrieved.
  • relationship: Edge / link relationship type with possible values same as in above docs.
  • connected: If True, we need to ensure that all descendant nodes are reachable from the source node, i.e., they are part of the same connected component. If False, the descendants could be in multiple connected components. Default value is True.
  • ordered: If True, the list of descendants will be topologically ordered. If False, the list has no particular order (depends on the order in which the descendants were traversed in the subgraph).
Returns:

List of nodes that are descendants from a particular node (sorted / unsorted)

def get_digraph_by_edge_type(self, edge_type: str) -> networkx.classes.digraph.DiGraph:
463    def get_digraph_by_edge_type(self, edge_type: str) -> nx.DiGraph:
464        """Get a networkx digraph of the nodes connected via a given edge_type.
465        Args:
466            edge_type:
467                Edge type to search for, possible types are defined by 'edge_key'
468                  in relationship class
469        Returns:
470        """
471
472        digraph: nx.DiGraph = nx.DiGraph()
473        for node_1, node_2, key, _ in self.graph.edges(data=True, keys=True):
474            if key == edge_type:
475                digraph.add_edge(node_1, node_2)
476        return digraph

Get a networkx digraph of the nodes connected via a given edge_type.

Arguments:
  • edge_type: Edge type to search for, possible types are defined by 'edge_key' in relationship class

Returns:

def get_edges_by_relationship(self, node: str, relationship: str) -> list[tuple[str, str]]:
478    def get_edges_by_relationship(
479        self,
480        node: str,
481        relationship: str,
482    ) -> list[tuple[str, str]]:
483        """Get a list of out-edges of a node where the edges match a specific type of relationship.
484
485        i.e., the edges connecting a node to its neighbors are of relationship type -- "parentOf"
486          (set of edges to children / sub-class nodes).
487
488        Args:
489            node: the node whose edges we need to look at.
490            relationship: the type of link(s) that the above node and its immediate neighbors share.
491
492        Returns:
493            List of edges that are connected to the node.
494        """
495        edges: list[tuple[str, str]] = []
496
497        for node_1, node_2, key, _ in self.graph.out_edges(node, data=True, keys=True):
498            if key == relationship:
499                edges.append((node_1, node_2))
500
501        return edges

Get a list of out-edges of a node where the edges match a specific type of relationship.

i.e., the edges connecting a node to its neighbors are of relationship type -- "parentOf" (set of edges to children / sub-class nodes).

Arguments:
  • node: the node whose edges we need to look at.
  • relationship: the type of link(s) that the above node and its immediate neighbors share.
Returns:

List of edges that are connected to the node.

def get_ordered_entry(self, key: str, source_node_label: str) -> list[str]:
503    def get_ordered_entry(self, key: str, source_node_label: str) -> list[str]:
504        """
505        Order the values associated with a particular node and edge_key to
506          match original ordering in schema.
507
508        Args:
509            key (str): a key representing and edge relationship in
510              DataModelRelationships.relationships_dictionary
511            source_node_label (str): node to look for edges of and order
512
513        Raises:
514            KeyError: cannot find source node in graph
515
516        Returns:
517            list[str]:
518              list of sorted nodes, that share the specified relationship with the source node
519              For the example data model, for key='rangeIncludes', source_node_label='CancerType'
520                the return would be ['Breast, 'Colorectal', 'Lung', 'Prostate', 'Skin'] in that
521                exact order.
522        """
523        # Check if node is in the graph, if not throw an error.
524        if not self.is_class_in_schema(node_label=source_node_label):
525            raise KeyError(
526                f"Cannot find node: {source_node_label} in the graph, please check entry."
527            )
528
529        edge_key = self.dmr.get_relationship_value(key, "edge_key")
530
531        # Handle out edges
532        if self.dmr.get_relationship_value(key, "jsonld_direction") == "out":
533            # use out edges
534
535            original_edge_weights_dict = {
536                attached_node: self.graph[source_node][attached_node][edge_key][
537                    "weight"
538                ]
539                for source_node, attached_node in self.graph.out_edges(
540                    source_node_label
541                )
542                if edge_key in self.graph[source_node][attached_node]
543            }
544        # Handle in edges
545        else:
546            # use in edges
547            original_edge_weights_dict = {
548                attached_node: self.graph[attached_node][source_node][edge_key][
549                    "weight"
550                ]
551                for attached_node, source_node in self.graph.in_edges(source_node_label)
552                if edge_key in self.graph[attached_node][source_node]
553            }
554
555        sorted_nodes = list(
556            dict(
557                sorted(original_edge_weights_dict.items(), key=lambda item: item[1])
558            ).keys()
559        )
560
561        return sorted_nodes

Order the values associated with a particular node and edge_key to match original ordering in schema.

Arguments:
  • key (str): a key representing and edge relationship in DataModelRelationships.relationships_dictionary
  • source_node_label (str): node to look for edges of and order
Raises:
  • KeyError: cannot find source node in graph
Returns:

list[str]: list of sorted nodes, that share the specified relationship with the source node For the example data model, for key='rangeIncludes', source_node_label='CancerType' the return would be ['Breast, 'Colorectal', 'Lung', 'Prostate', 'Skin'] in that exact order.

def get_nodes_ancestors( self, subgraph: networkx.classes.digraph.DiGraph, node_label: str) -> list[str]:
564    def get_nodes_ancestors(self, subgraph: nx.DiGraph, node_label: str) -> list[str]:
565        """Get a list of nodes reachable from source component in graph
566
567        Args:
568            subgraph (nx.DiGraph): networkx graph object
569            node_label (str): label of node to find ancestors for
570
571        Returns:
572            list[str]: nodes reachable from source in graph
573        """
574        all_ancestors = list(nx.ancestors(subgraph, node_label))
575
576        return all_ancestors

Get a list of nodes reachable from source component in graph

Arguments:
  • subgraph (nx.DiGraph): networkx graph object
  • node_label (str): label of node to find ancestors for
Returns:

list[str]: nodes reachable from source in graph

def get_node_comment( self, node_display_name: Optional[str] = None, node_label: Optional[str] = None) -> str:
578    def get_node_comment(
579        self, node_display_name: Optional[str] = None, node_label: Optional[str] = None
580    ) -> str:
581        """Get the node definition, i.e., the "comment" associated with a given node display name.
582
583        Args:
584            node_display_name, str: Display name of the node which you want to get the comment for.
585            node_label, str: Label of the node you would want to get the comment for.
586        Returns:
587            Comment associated with node, as a string.
588        """
589        node_label = self._get_node_label(node_label, node_display_name)
590
591        if not node_label:
592            return ""
593
594        node_definition = self.graph.nodes[node_label][
595            self.dmr.get_relationship_value("comment", "node_label")
596        ]
597        return node_definition

Get the node definition, i.e., the "comment" associated with a given node display name.

Arguments:
  • node_display_name, str: Display name of the node which you want to get the comment for.
  • node_label, str: Label of the node you would want to get the comment for.
Returns:

Comment associated with node, as a string.

def get_node_dependencies( self, source_node: str, display_names: bool = True, schema_ordered: bool = True) -> list[str]:
599    def get_node_dependencies(
600        self,
601        source_node: str,
602        display_names: bool = True,
603        schema_ordered: bool = True,
604    ) -> list[str]:
605        """Get the immediate dependencies that are related to a given source node.
606
607        Args:
608            source_node: The node whose dependencies we need to compute.
609            display_names: if True, return list of display names of each of the dependencies.
610                           if False, return list of node labels of each of the dependencies.
611            schema_ordered:
612              if True, return the dependencies of the node following the order of the schema
613                (slower).
614              if False, return dependencies from graph without guaranteeing schema order (faster)
615
616        Returns:
617            List of nodes that are dependent on the source node.
618        """
619
620        if schema_ordered:
621            # get dependencies in the same order in which they are defined in the schema
622            required_dependencies = self.get_ordered_entry(
623                key=self.dmr.get_relationship_value("requiresDependency", "edge_key"),
624                source_node_label=source_node,
625            )
626        else:
627            required_dependencies = self.get_adjacent_nodes_by_relationship(
628                node_label=source_node,
629                relationship=self.dmr.get_relationship_value(
630                    "requiresDependency", "edge_key"
631                ),
632            )
633
634        if display_names:
635            # get display names of dependencies
636            dependencies_display_names = []
637
638            for req in required_dependencies:
639                dependencies_display_names.append(
640                    self.graph.nodes[req][
641                        self.dmr.get_relationship_value("displayName", "node_label")
642                    ]
643                )
644
645            return dependencies_display_names
646
647        return required_dependencies

Get the immediate dependencies that are related to a given source node.

Arguments:
  • source_node: The node whose dependencies we need to compute.
  • display_names: if True, return list of display names of each of the dependencies. if False, return list of node labels of each of the dependencies.
  • schema_ordered: if True, return the dependencies of the node following the order of the schema (slower). if False, return dependencies from graph without guaranteeing schema order (faster)
Returns:

List of nodes that are dependent on the source node.

def get_nodes_descendants(self, node_label: str) -> list[str]:
649    def get_nodes_descendants(self, node_label: str) -> list[str]:
650        """Return a list of nodes reachable from source in graph
651        Args:
652            node_label, str: any given node
653        Return:
654            all_descendants, list: nodes reachable from source in graph
655        """
656        all_descendants = list(nx.descendants(self.graph, node_label))
657
658        return all_descendants

Return a list of nodes reachable from source in graph

Arguments:
  • node_label, str: any given node
Return:

all_descendants, list: nodes reachable from source in graph

def get_nodes_display_names(self, node_list: list[str]) -> list[str]:
660    def get_nodes_display_names(
661        self,
662        node_list: list[str],
663    ) -> list[str]:
664        """Get display names associated with the given list of nodes.
665
666        Args:
667            node_list: List of nodes whose display names we need to retrieve.
668
669        Returns:
670            List of display names.
671        """
672        node_list_display_names = [
673            self.graph.nodes[node][
674                self.dmr.get_relationship_value("displayName", "node_label")
675            ]
676            for node in node_list
677        ]
678
679        return node_list_display_names

Get display names associated with the given list of nodes.

Arguments:
  • node_list: List of nodes whose display names we need to retrieve.
Returns:

List of display names.

def get_node_label(self, node_display_name: str) -> str:
681    def get_node_label(self, node_display_name: str) -> str:
682        """Get the node label for a given display name.
683
684        Args:
685            node_display_name: Display name of the node which you want to get the label for.
686        Returns:
687            Node label associated with given node.
688            If display name not part of schema, return an empty string.
689        """
690
691        node_class_label = get_class_label_from_display_name(
692            display_name=node_display_name
693        )
694        node_property_label = get_property_label_from_display_name(
695            display_name=node_display_name
696        )
697
698        if node_class_label in self.graph.nodes:
699            node_label = node_class_label
700        elif node_property_label in self.graph.nodes:
701            node_label = node_property_label
702        else:
703            node_label = ""
704
705        return node_label

Get the node label for a given display name.

Arguments:
  • node_display_name: Display name of the node which you want to get the label for.
Returns:

Node label associated with given node. If display name not part of schema, return an empty string.

def get_node_range( self, node_label: Optional[str] = None, node_display_name: Optional[str] = None, display_names: bool = False) -> list[str]:
707    def get_node_range(
708        self,
709        node_label: Optional[str] = None,
710        node_display_name: Optional[str] = None,
711        display_names: bool = False,
712    ) -> list[str]:
713        """
714        Get the range, i.e., all the valid values that are associated with a node label.
715
716
717        Args:
718            node_label (Optional[str], optional): Node for which you need to retrieve the range.
719              Defaults to None.
720            node_display_name (Optional[str], optional): _description_. Defaults to None.
721            display_names (bool, optional): _description_. Defaults to False.
722
723        Raises:
724            ValueError: If the node cannot be found in the graph.
725
726        Returns:
727            list[str]:
728              If display_names=False, a list of valid values (labels) associated with a given node.
729              If display_names=True, a list of valid values (display names) associated
730                with a given node
731        """
732        node_label = self._get_node_label(node_label, node_display_name)
733        try:
734            # get node range in the order defined in schema for given node
735            required_range = self.find_node_range(node_label=node_label)
736        except KeyError as exc:
737            raise ValueError(
738                f"The source node {node_label} does not exist in the graph. "
739                "Please use a different node."
740            ) from exc
741
742        if display_names:
743            # get the display name(s) of all dependencies
744            dependencies_display_names = []
745
746            for req in required_range:
747                dependencies_display_names.append(self.graph.nodes[req]["displayName"])
748
749            return dependencies_display_names
750
751        return required_range

Get the range, i.e., all the valid values that are associated with a node label.

Arguments:
  • node_label (Optional[str], optional): Node for which you need to retrieve the range. Defaults to None.
  • node_display_name (Optional[str], optional): _description_. Defaults to None.
  • display_names (bool, optional): _description_. Defaults to False.
Raises:
  • ValueError: If the node cannot be found in the graph.
Returns:

list[str]: If display_names=False, a list of valid values (labels) associated with a given node. If display_names=True, a list of valid values (display names) associated with a given node

def get_node_required( self, node_label: Optional[str] = None, node_display_name: Optional[str] = None) -> bool:
753    def get_node_required(
754        self, node_label: Optional[str] = None, node_display_name: Optional[str] = None
755    ) -> bool:
756        """Check if a given node is required or not.
757
758        Note: The possible options that a node can be associated with -- "required" / "optional".
759
760        Args:
761            node_label: Label of the node for which you need to look up.
762            node_display_name: Display name of the node for which you want look up.
763        Returns:
764            True: If the given node is a "required" node.
765            False: If the given node is not a "required" (i.e., an "optional") node.
766        """
767        node_label = self._get_node_label(node_label, node_display_name)
768        rel_node_label = self.dmr.get_relationship_value("required", "node_label")
769        node_required = self.graph.nodes[node_label][rel_node_label]
770        return node_required

Check if a given node is required or not.

Note: The possible options that a node can be associated with -- "required" / "optional".

Arguments:
  • node_label: Label of the node for which you need to look up.
  • node_display_name: Display name of the node for which you want look up.
Returns:

True: If the given node is a "required" node. False: If the given node is not a "required" (i.e., an "optional") node.

def get_node_validation_rules( self, node_label: Optional[str] = None, node_display_name: Optional[str] = None) -> Union[list, dict[str, str]]:
772    def get_node_validation_rules(
773        self, node_label: Optional[str] = None, node_display_name: Optional[str] = None
774    ) -> Union[list, dict[str, str]]:
775        """Get validation rules associated with a node,
776
777        Args:
778            node_label: Label of the node for which you need to look up.
779            node_display_name: Display name of the node which you want to get the label for.
780        Returns:
781            A set of validation rules associated with node, as a list or a dictionary.
782        """
783        node_label = self._get_node_label(node_label, node_display_name)
784
785        if not node_label:
786            return []
787
788        try:
789            node_validation_rules = self.graph.nodes[node_label]["validationRules"]
790        except KeyError as key_error:
791            raise ValueError(
792                f"{node_label} is not in the graph, please provide a proper node label"
793            ) from key_error
794
795        return node_validation_rules

Get validation rules associated with a node,

Arguments:
  • node_label: Label of the node for which you need to look up.
  • node_display_name: Display name of the node which you want to get the label for.
Returns:

A set of validation rules associated with node, as a list or a dictionary.

def get_subgraph_by_edge_type(self, relationship: str) -> networkx.classes.digraph.DiGraph:
797    def get_subgraph_by_edge_type(self, relationship: str) -> nx.DiGraph:
798        """Get a subgraph containing all edges of a given type (aka relationship).
799
800        Args:
801            relationship: edge / link relationship type with possible values same as in above docs.
802
803        Returns:
804            Directed graph on edges of a particular type (aka relationship)
805        """
806
807        # prune the metadata model graph so as to include only those edges that
808        # match the relationship type
809        rel_edges = []
810        for node_1, node_2, key, _ in self.graph.out_edges(data=True, keys=True):
811            if key == relationship:
812                rel_edges.append((node_1, node_2))
813
814        relationship_subgraph: nx.DiGraph = nx.DiGraph()
815        relationship_subgraph.add_edges_from(rel_edges)
816
817        return relationship_subgraph

Get a subgraph containing all edges of a given type (aka relationship).

Arguments:
  • relationship: edge / link relationship type with possible values same as in above docs.
Returns:

Directed graph on edges of a particular type (aka relationship)

def find_adjacent_child_classes( self, node_label: Optional[str] = None, node_display_name: Optional[str] = None) -> list[str]:
819    def find_adjacent_child_classes(
820        self, node_label: Optional[str] = None, node_display_name: Optional[str] = None
821    ) -> list[str]:
822        """Find child classes of a given node.
823        Args:
824            node_display_name: Display name of the node to look up.
825            node_label: Label of the node to look up.
826        Returns:
827            List of nodes that are adjacent to the given node, by SubclassOf relationship.
828        """
829        node_label = self._get_node_label(node_label, node_display_name)
830        return self.get_adjacent_nodes_by_relationship(
831            node_label=node_label,
832            relationship=self.dmr.get_relationship_value("subClassOf", "edge_key"),
833        )

Find child classes of a given node.

Arguments:
  • node_display_name: Display name of the node to look up.
  • node_label: Label of the node to look up.
Returns:

List of nodes that are adjacent to the given node, by SubclassOf relationship.

def find_child_classes(self, schema_class: str) -> list:
835    def find_child_classes(self, schema_class: str) -> list:
836        """Find schema classes that inherit from the given class
837        Args:
838            schema_class: node label for the class to from which to look for children.
839        Returns:
840            list of children to the schema_class.
841        """
842        child_classes = unlist(list(self.graph.successors(schema_class)))
843        assert isinstance(child_classes, list)
844        return child_classes

Find schema classes that inherit from the given class

Arguments:
  • schema_class: node label for the class to from which to look for children.
Returns:

list of children to the schema_class.

def find_class_specific_properties(self, schema_class: str) -> list[str]:
846    def find_class_specific_properties(self, schema_class: str) -> list[str]:
847        """Find properties specifically associated with a given class
848        Args:
849            schema_class, str: node/class label, to identify properties for.
850        Returns:
851            properties, list: List of properties associate with a given schema class.
852        Raises:
853            KeyError: Key error is raised if the provided schema_class is not in the graph
854        """
855
856        if not self.is_class_in_schema(schema_class):
857            raise KeyError(
858                (
859                    f"Schema_class provided: {schema_class} is not in the data model, please check "
860                    "that you are providing the proper class/node label"
861                )
862            )
863
864        properties = []
865        for node1, node2 in self.graph.edges():
866            if (
867                node2 == schema_class
868                and "domainValue" in self.graph[node1][schema_class]
869            ):
870                properties.append(node1)
871        return properties

Find properties specifically associated with a given class

Arguments:
  • schema_class, str: node/class label, to identify properties for.
Returns:

properties, list: List of properties associate with a given schema class.

Raises:
  • KeyError: Key error is raised if the provided schema_class is not in the graph
def find_parent_classes(self, node_label: str) -> list[list[str]]:
873    def find_parent_classes(self, node_label: str) -> list[list[str]]:
874        """Find all parents of the provided node
875        Args:
876            node_label: label of the node to find parents of
877        Returns:
878            List of list of Parents to the given node.
879        """
880        # Get digraph of nodes with parents
881        digraph = self.get_digraph_by_edge_type("parentOf")
882
883        # Get root node
884        root_node = list(nx.topological_sort(digraph))[0]
885
886        # Get paths between root_node and the target node.
887        paths = nx.all_simple_paths(self.graph, source=root_node, target=node_label)
888
889        return [_path[:-1] for _path in paths]

Find all parents of the provided node

Arguments:
  • node_label: label of the node to find parents of
Returns:

List of list of Parents to the given node.

def full_schema_graph(self, size: Optional[int] = None) -> graphviz.graphs.Digraph:
891    def full_schema_graph(self, size: Optional[int] = None) -> graphviz.Digraph:
892        """Create a graph of the data model.
893        Args:
894            size, float: max height and width of the graph, if one value provided
895               it is used for both.
896        Returns:
897            schema graph viz
898        """
899        edges = self.graph.edges()
900        return visualize(edges, size=size)

Create a graph of the data model.

Arguments:
  • size, float: max height and width of the graph, if one value provided it is used for both.
Returns:

schema graph viz

def is_class_in_schema(self, node_label: str) -> bool:
902    def is_class_in_schema(self, node_label: str) -> bool:
903        """Determine if provided node_label is in the schema graph/data model.
904        Args:
905            node_label: label of node to search for in the
906        Returns:
907            True, if node is in the graph schema
908            False, if node is not in graph schema
909        """
910        return node_label in self.graph.nodes()

Determine if provided node_label is in the schema graph/data model.

Arguments:
  • node_label: label of node to search for in the
Returns:

True, if node is in the graph schema False, if node is not in graph schema

def sub_schema_graph( self, source: str, direction: str, size: Optional[float] = None) -> Optional[graphviz.graphs.Digraph]:
912    def sub_schema_graph(
913        self, source: str, direction: str, size: Optional[float] = None
914    ) -> Optional[graphviz.Digraph]:
915        """Create a sub-schema graph
916        Args:
917            source, str: source node label to start graph
918            direction, str: direction to create the visualization, choose from "up", "down", "both"
919            size, float: max height and width of the graph, if one value provided it is used for
920              both.
921        Returns:
922            Sub-schema graph viz
923        """
924        if direction == "down":
925            edges = list(nx.edge_bfs(self.graph, [source]))
926            return visualize(edges, size=size)
927        if direction == "up":
928            paths = self.find_parent_classes(source)
929            edges = []
930            for _path in paths:
931                _path.append(source)
932                for i in range(0, len(_path) - 1):
933                    edges.append((_path[i], _path[i + 1]))
934            return visualize(edges, size=size)
935        if direction == "both":
936            paths = self.find_parent_classes(source)
937            edges = list(nx.edge_bfs(self.graph, [source]))
938            for _path in paths:
939                _path.append(source)
940                for i in range(0, len(_path) - 1):
941                    edges.append((_path[i], _path[i + 1]))
942            return visualize(edges, size=size)
943        return None

Create a sub-schema graph

Arguments:
  • source, str: source node label to start graph
  • direction, str: direction to create the visualization, choose from "up", "down", "both"
  • size, float: max height and width of the graph, if one value provided it is used for both.
Returns:

Sub-schema graph viz

def get_node_column_type( self, node_label: Optional[str] = None, node_display_name: Optional[str] = None) -> Optional[schematic.schemas.constants.JSONSchemaType]:
945    def get_node_column_type(
946        self, node_label: Optional[str] = None, node_display_name: Optional[str] = None
947    ) -> Optional[JSONSchemaType]:
948        """Gets the column type of the node
949
950        Args:
951            node_label: The label of the node to get the type from
952            node_display_name: The display name of the node to get the type from
953
954        Returns:
955            The column type of the node if it has one, otherwise None
956        """
957        node_label = self._get_node_label(node_label, node_display_name)
958        rel_node_label = self.dmr.get_relationship_value("columnType", "node_label")
959        return self.graph.nodes[node_label][rel_node_label]

Gets the column type of the node

Arguments:
  • node_label: The label of the node to get the type from
  • node_display_name: The display name of the node to get the type from
Returns:

The column type of the node if it has one, otherwise None