schematic.schemas.data_model_graph
DataModel Graph
1"""DataModel Graph""" 2 3import logging 4from typing import Any, Optional, Union, AbstractSet 5 6import graphviz # type: ignore 7import networkx as nx # type: ignore 8from opentelemetry import trace 9 10from schematic.schemas.data_model_edges import DataModelEdges 11from schematic.schemas.data_model_nodes import DataModelNodes 12from schematic.schemas.data_model_relationships import ( 13 DataModelRelationships, 14 JSONSchemaType, 15) 16from schematic.utils.general import unlist 17from schematic.utils.schema_utils import ( 18 DisplayLabelType, 19 extract_component_validation_rules, 20 get_class_label_from_display_name, 21 get_property_label_from_display_name, 22) 23from schematic.utils.validate_utils import rule_in_rule_list 24from schematic.utils.viz_utils import visualize 25 26logger = logging.getLogger(__name__) 27 28 29logger = logging.getLogger(__name__) 30tracer = trace.get_tracer("Schematic") 31 32 33class DataModelGraphMeta: # pylint: disable=too-few-public-methods 34 """DataModelGraphMeta""" 35 36 _instances: dict = {} 37 38 def __call__( # pylint: disable=no-self-argument 39 cls, *args: Any, **kwargs: Any 40 ) -> Any: 41 """ 42 Possible changes to the value of the `__init__` argument do not affect 43 the returned instance. 44 """ 45 if cls not in cls._instances: 46 instance = super().__call__(*args, **kwargs) # type: ignore # pylint: disable=no-member 47 cls._instances[cls] = instance 48 return cls._instances[cls] 49 50 51class DataModelGraph: # pylint: disable=too-few-public-methods 52 """ 53 Generate graph network (networkx) from the attributes and relationships returned 54 from the data model parser. 55 56 Create a singleton. 57 """ 58 59 __metaclass__ = DataModelGraphMeta 60 61 def __init__( 62 self, 63 attribute_relationships_dict: dict, 64 data_model_labels: DisplayLabelType = "class_label", 65 ) -> None: 66 """Load parsed data model. 67 Args: 68 attributes_relationship_dict, dict: generated in data_model_parser 69 {Attribute Display Name: { 70 Relationships: { 71 CSV Header: Value}}} 72 data_model_labels: str, display_label or class_label. 73 display_label, use the display name as a label, if it is valid 74 (contains no blacklisted characters) otherwise will default to schema_label. 75 class_label, default, use standard class or property label. 76 Raises: 77 ValueError, attribute_relationship_dict not loaded. 78 """ 79 self.attribute_relationships_dict = attribute_relationships_dict 80 self.dmn = DataModelNodes(self.attribute_relationships_dict) 81 self.dme = DataModelEdges() 82 self.dmr = DataModelRelationships() 83 self.data_model_labels = data_model_labels 84 85 if not self.attribute_relationships_dict: 86 raise ValueError( 87 ( 88 "Something has gone wrong, a data model was not loaded into the DataModelGraph " 89 "Class. Please check that your paths are correct" 90 ) 91 ) 92 self.graph = self.generate_data_model_graph() 93 94 @tracer.start_as_current_span("DataModelGraph::generate_data_model_graph") 95 def generate_data_model_graph(self) -> nx.MultiDiGraph: 96 """ 97 Generate NetworkX Graph from the Relationships/attributes dictionary, the graph is built 98 by first adding all nodes to the graph, then connecting nodes by the relationships defined 99 in the attributes_relationship dictionary. 100 Returns: 101 G: nx.MultiDiGraph, networkx graph representation of the data model 102 """ 103 # Get all relationships with edges 104 edge_relationships = self.dmr.retrieve_rel_headers_dict(edge=True) 105 106 # Find all nodes 107 all_nodes = self.dmn.gather_all_nodes_in_model( 108 attr_rel_dict=self.attribute_relationships_dict 109 ) 110 111 # Instantiate NetworkX MultiDigraph 112 graph: nx.MultiDiGraph = nx.MultiDiGraph() 113 114 all_node_dict = {} 115 116 ## Fill in MultiDigraph with nodes 117 for node in all_nodes: 118 # Gather information for each node 119 node_dict = self.dmn.generate_node_dict( 120 node_display_name=node, 121 attr_rel_dict=self.attribute_relationships_dict, 122 data_model_labels=self.data_model_labels, 123 ) 124 125 # Add each node to the all_node_dict to be used for generating edges 126 all_node_dict[node] = node_dict 127 128 # Generate node and attach information (attributes) to each node 129 graph = self.dmn.generate_node(graph, node_dict) 130 131 edge_list: list[tuple[str, str, dict[str, Union[str, int]]]] = [] 132 ## Connect nodes via edges 133 for node in all_nodes: 134 # Generate edges 135 edge_list_2 = self.dme.generate_edge( 136 node, 137 all_node_dict, 138 self.attribute_relationships_dict, 139 edge_relationships, 140 edge_list, 141 ) 142 edge_list = edge_list_2.copy() 143 144 # Add edges to the Graph 145 for node_1, node_2, edge_dict in edge_list: 146 graph.add_edge( 147 node_1, node_2, key=edge_dict["key"], weight=edge_dict["weight"] 148 ) 149 return graph 150 151 152class DataModelGraphExplorer: # pylint: disable=too-many-public-methods 153 """DataModelGraphExplorer""" 154 155 def __init__( 156 self, 157 graph: nx.MultiDiGraph, 158 ): 159 """Load data model graph as a singleton. 160 Args: 161 G: nx.MultiDiGraph, networkx graph representation of the data model 162 """ 163 self.graph = graph # At this point the graph is expected to be fully formed. 164 self.dmr = DataModelRelationships() 165 166 def find_properties(self) -> set[str]: 167 """ 168 Identify all properties, as defined by the first node in a pair, connected with 169 'domainIncludes' edge type 170 171 Returns: 172 properties, set: All properties defined in the data model, each property name 173 is defined by its label. 174 """ 175 properties_list: list[str] = [] 176 for node_1, _, rel in self.graph.edges: 177 if rel == self.dmr.get_relationship_value("domainIncludes", "edge_key"): 178 properties_list.append(node_1) 179 properties_set = set(properties_list) 180 return properties_set 181 182 def find_classes(self) -> AbstractSet[str]: 183 """ 184 Identify all classes, as defined but all nodes, minus all properties 185 (which are explicitly defined) 186 Returns: 187 classes, set: All classes defined in the data model, each class 188 name is defined by its label. 189 """ 190 nodes = self.graph.nodes 191 properties = self.find_properties() 192 classes = nodes - properties 193 return classes 194 195 def find_node_range( 196 self, node_label: Optional[str] = None, node_display_name: Optional[str] = None 197 ) -> list: 198 """Get valid values for the given node (attribute) 199 Args: 200 node_label, str, Optional[str]: label of the node for which to retrieve valid values 201 node_display_name, str, Optional[str]: Display Name of the node for which to 202 retrieve valid values 203 Returns: 204 valid_values, list: List of valid values associated with the provided node. 205 """ 206 node_label = self._get_node_label(node_label, node_display_name) 207 208 valid_values = [] 209 for node_1, node_2, rel in self.graph.edges: 210 if node_1 == node_label and rel == self.dmr.get_relationship_value( 211 "rangeIncludes", "edge_key" 212 ): 213 valid_values.append(node_2) 214 valid_values = list(set(valid_values)) 215 return valid_values 216 217 def get_adjacent_nodes_by_relationship( 218 self, node_label: str, relationship: str 219 ) -> list[str]: 220 """Get a list of nodes that is / are adjacent to a given node, based on a relationship type. 221 222 Args: 223 node_label: label of the the node whose edges we need to look at. 224 relationship: the type of link(s) that the above node and its immediate neighbors share. 225 226 Returns: 227 List of nodes that are adjacent to the given node. 228 #checked 229 """ 230 nodes = set() 231 for _, node_2, key, _ in self.graph.out_edges(node_label, data=True, keys=True): 232 if key == relationship: 233 nodes.add(node_2) 234 235 return list(nodes) 236 237 def get_component_node_required( 238 self, 239 manifest_component: str, 240 node_validation_rules: Optional[list[str]] = None, 241 node_label: Optional[str] = None, 242 node_display_name: Optional[str] = None, 243 ) -> bool: 244 """Check if a node is required taking into account the manifest component it is defined in 245 (requirements can be set in validation rule as well as required column) 246 Args: 247 manifest_component: str, manifest component display name that the node belongs to. 248 node_validation_rules: list[str], validation rules for a given node and component. 249 node_label: str, Label of the node you would want to get the comment for. 250 node_display_name: str, node display name for the node being queried. 251 Returns: 252 True, if node is required, False if not 253 """ 254 node_required = False 255 256 if not node_validation_rules: 257 # Get node validation rules for a given component 258 node_validation_rules = self.get_component_node_validation_rules( 259 manifest_component=manifest_component, 260 node_label=node_label, 261 node_display_name=node_display_name, 262 ) 263 264 # Check if the validation rule specifies that the node is required for this particular 265 # component. 266 if rule_in_rule_list("required", node_validation_rules): 267 node_required = True 268 # To prevent any unintended errors, ensure the Required field for this node is False 269 if self.get_node_required( 270 node_label=node_label, node_display_name=node_display_name 271 ): 272 if not node_display_name: 273 assert node_label is not None 274 node_display_name = self.graph.nodes[node_label][ 275 self.dmr.get_relationship_value("displayName", "node_label") 276 ] 277 error_str = " ".join( 278 [ 279 f"For component: {manifest_component} and attribute: {node_display_name}", 280 "requirements are being specified in both the Required field and in the", 281 "Validation Rules. If you desire to use validation rules to set component", 282 "specific requirements for this attribute", 283 "then the Required field needs to be set to False, or the validation may", 284 "not work as intended, for other components where the attribute", 285 "that should not be required.", 286 ] 287 ) 288 289 logger.error(error_str) 290 else: 291 # If requirements are not being set in the validation rule, then just pull the 292 # standard node requirements from the model 293 node_required = self.get_node_required( 294 node_label=node_label, node_display_name=node_display_name 295 ) 296 return node_required 297 298 def get_component_node_validation_rules( 299 self, 300 manifest_component: str, 301 node_label: Optional[str] = None, 302 node_display_name: Optional[str] = None, 303 ) -> list: 304 """Get validation rules for a given node and component. 305 Args: 306 manifest_component: str, manifest component display name that the node belongs to. 307 node_label: str, Label of the node you would want to get the comment for. 308 node_display_name: str, node display name for the node being queried. 309 Returns: 310 validation_rules: list, validation rules list for a given node and component. 311 """ 312 # get any additional validation rules associated with this node (e.g. can this node 313 # be mapped to a list of other nodes) 314 node_validation_rules = self.get_node_validation_rules( 315 node_label=node_label, node_display_name=node_display_name 316 ) 317 318 # Parse the validation rules per component if applicable 319 if node_validation_rules and isinstance(node_validation_rules, dict): 320 node_validation_rules_list = extract_component_validation_rules( 321 manifest_component=manifest_component, 322 validation_rules_dict=node_validation_rules, # type: ignore 323 ) 324 else: 325 assert isinstance(node_validation_rules, list) 326 node_validation_rules_list = node_validation_rules 327 return node_validation_rules_list 328 329 def get_component_requirements( 330 self, 331 source_component: str, 332 ) -> list[str]: 333 """ 334 Get all components that are associated with a given source component and are 335 required by it. 336 337 Args: 338 source_component: source component for which we need to find all required downstream 339 components. 340 341 Returns: 342 List of nodes that are descendants from the source component are are related to the 343 source through a specific component relationship. 344 """ 345 346 req_components = list( 347 reversed( 348 self.get_descendants_by_edge_type( 349 source_component, 350 self.dmr.get_relationship_value("requiresComponent", "edge_key"), 351 ordered=True, 352 ) 353 ) 354 ) 355 356 return req_components 357 358 def get_component_requirements_graph( 359 self, 360 source_component: str, 361 ) -> nx.Graph: 362 """ 363 Get all components that are associated with a given source component and are required by it; 364 return the components as a dependency graph (i.e. a DAG). 365 366 Args: 367 source_component, str: source component for which we need to find all required 368 downstream components. 369 370 Returns: 371 A subgraph of the schema graph induced on nodes that are descendants from the source 372 component and are related to the source through a specific component relationship. 373 """ 374 375 # get a list of required component nodes 376 req_components = self.get_component_requirements(source_component) 377 378 # get the subgraph induced on required component nodes 379 req_components_graph = self.get_subgraph_by_edge_type( 380 self.dmr.get_relationship_value("requiresComponent", "edge_key"), 381 ).subgraph(req_components) 382 383 return req_components_graph 384 385 def get_descendants_by_edge_type( 386 self, 387 source_node: str, 388 relationship: str, 389 connected: bool = True, 390 ordered: bool = False, 391 ) -> list[str]: 392 """ 393 Get all nodes that are descendants of a given source node, based on a specific 394 type of edge / relationship type. 395 396 Args: 397 source_node: The node whose descendants need to be retrieved. 398 relationship: Edge / link relationship type with possible values same as in above docs. 399 connected: 400 If True, we need to ensure that all descendant nodes are reachable from the source 401 node, i.e., they are part of the same connected component. 402 If False, the descendants could be in multiple connected components. 403 Default value is True. 404 ordered: 405 If True, the list of descendants will be topologically ordered. 406 If False, the list has no particular order (depends on the order in which the 407 descendants were traversed in the subgraph). 408 409 Returns: 410 List of nodes that are descendants from a particular node (sorted / unsorted) 411 """ 412 413 root_descendants = nx.descendants(self.graph, source_node) 414 415 subgraph_nodes = list(root_descendants) 416 subgraph_nodes.append(source_node) 417 descendants_subgraph = self.graph.subgraph(subgraph_nodes) 418 419 # prune the descendants subgraph so as to include only those edges that match 420 # the relationship type 421 rel_edges = [] 422 for node_1, node_2, key, _ in descendants_subgraph.edges(data=True, keys=True): 423 if key == relationship: 424 rel_edges.append((node_1, node_2)) 425 426 relationship_subgraph: nx.DiGraph = nx.DiGraph() 427 relationship_subgraph.add_edges_from(rel_edges) 428 429 descendants = relationship_subgraph.nodes() 430 431 if not descendants: 432 # return empty list if there are no nodes that are reachable from the 433 # source node based on this relationship type 434 return [] 435 436 if connected and ordered: 437 # get the set of reachable nodes from the source node 438 descendants = nx.descendants(relationship_subgraph, source_node) 439 descendants.add(source_node) 440 441 # normally, the descendants from a node are unordered (peculiarity 442 # of nx descendants call) 443 # form the subgraph on descendants and order it topologically 444 # this assumes an acyclic subgraph 445 descendants = nx.topological_sort( 446 relationship_subgraph.subgraph(descendants) 447 ) 448 elif connected: 449 # get the nodes that are reachable from a given source node 450 # after the pruning process above some nodes in the 451 # root_descendants subgraph might have become disconnected and 452 # will be omitted 453 descendants = nx.descendants(relationship_subgraph, source_node) 454 descendants.add(source_node) 455 elif ordered: 456 # sort the nodes topologically 457 # this requires the graph to be an acyclic graph 458 descendants = nx.topological_sort(relationship_subgraph) 459 460 return list(descendants) 461 462 def get_digraph_by_edge_type(self, edge_type: str) -> nx.DiGraph: 463 """Get a networkx digraph of the nodes connected via a given edge_type. 464 Args: 465 edge_type: 466 Edge type to search for, possible types are defined by 'edge_key' 467 in relationship class 468 Returns: 469 """ 470 471 digraph: nx.DiGraph = nx.DiGraph() 472 for node_1, node_2, key, _ in self.graph.edges(data=True, keys=True): 473 if key == edge_type: 474 digraph.add_edge(node_1, node_2) 475 return digraph 476 477 def get_edges_by_relationship( 478 self, 479 node: str, 480 relationship: str, 481 ) -> list[tuple[str, str]]: 482 """Get a list of out-edges of a node where the edges match a specific type of relationship. 483 484 i.e., the edges connecting a node to its neighbors are of relationship type -- "parentOf" 485 (set of edges to children / sub-class nodes). 486 487 Args: 488 node: the node whose edges we need to look at. 489 relationship: the type of link(s) that the above node and its immediate neighbors share. 490 491 Returns: 492 List of edges that are connected to the node. 493 """ 494 edges: list[tuple[str, str]] = [] 495 496 for node_1, node_2, key, _ in self.graph.out_edges(node, data=True, keys=True): 497 if key == relationship: 498 edges.append((node_1, node_2)) 499 500 return edges 501 502 def get_ordered_entry(self, key: str, source_node_label: str) -> list[str]: 503 """ 504 Order the values associated with a particular node and edge_key to 505 match original ordering in schema. 506 507 Args: 508 key (str): a key representing and edge relationship in 509 DataModelRelationships.relationships_dictionary 510 source_node_label (str): node to look for edges of and order 511 512 Raises: 513 KeyError: cannot find source node in graph 514 515 Returns: 516 list[str]: 517 list of sorted nodes, that share the specified relationship with the source node 518 For the example data model, for key='rangeIncludes', source_node_label='CancerType' 519 the return would be ['Breast, 'Colorectal', 'Lung', 'Prostate', 'Skin'] in that 520 exact order. 521 """ 522 # Check if node is in the graph, if not throw an error. 523 if not self.is_class_in_schema(node_label=source_node_label): 524 raise KeyError( 525 f"Cannot find node: {source_node_label} in the graph, please check entry." 526 ) 527 528 edge_key = self.dmr.get_relationship_value(key, "edge_key") 529 530 # Handle out edges 531 if self.dmr.get_relationship_value(key, "jsonld_direction") == "out": 532 # use out edges 533 534 original_edge_weights_dict = { 535 attached_node: self.graph[source_node][attached_node][edge_key][ 536 "weight" 537 ] 538 for source_node, attached_node in self.graph.out_edges( 539 source_node_label 540 ) 541 if edge_key in self.graph[source_node][attached_node] 542 } 543 # Handle in edges 544 else: 545 # use in edges 546 original_edge_weights_dict = { 547 attached_node: self.graph[attached_node][source_node][edge_key][ 548 "weight" 549 ] 550 for attached_node, source_node in self.graph.in_edges(source_node_label) 551 if edge_key in self.graph[attached_node][source_node] 552 } 553 554 sorted_nodes = list( 555 dict( 556 sorted(original_edge_weights_dict.items(), key=lambda item: item[1]) 557 ).keys() 558 ) 559 560 return sorted_nodes 561 562 # Get values associated with a node 563 def get_nodes_ancestors(self, subgraph: nx.DiGraph, node_label: str) -> list[str]: 564 """Get a list of nodes reachable from source component in graph 565 566 Args: 567 subgraph (nx.DiGraph): networkx graph object 568 node_label (str): label of node to find ancestors for 569 570 Returns: 571 list[str]: nodes reachable from source in graph 572 """ 573 all_ancestors = list(nx.ancestors(subgraph, node_label)) 574 575 return all_ancestors 576 577 def get_node_comment( 578 self, node_display_name: Optional[str] = None, node_label: Optional[str] = None 579 ) -> str: 580 """Get the node definition, i.e., the "comment" associated with a given node display name. 581 582 Args: 583 node_display_name, str: Display name of the node which you want to get the comment for. 584 node_label, str: Label of the node you would want to get the comment for. 585 Returns: 586 Comment associated with node, as a string. 587 """ 588 node_label = self._get_node_label(node_label, node_display_name) 589 590 if not node_label: 591 return "" 592 593 node_definition = self.graph.nodes[node_label][ 594 self.dmr.get_relationship_value("comment", "node_label") 595 ] 596 return node_definition 597 598 def get_node_dependencies( 599 self, 600 source_node: str, 601 display_names: bool = True, 602 schema_ordered: bool = True, 603 ) -> list[str]: 604 """Get the immediate dependencies that are related to a given source node. 605 606 Args: 607 source_node: The node whose dependencies we need to compute. 608 display_names: if True, return list of display names of each of the dependencies. 609 if False, return list of node labels of each of the dependencies. 610 schema_ordered: 611 if True, return the dependencies of the node following the order of the schema 612 (slower). 613 if False, return dependencies from graph without guaranteeing schema order (faster) 614 615 Returns: 616 List of nodes that are dependent on the source node. 617 """ 618 619 if schema_ordered: 620 # get dependencies in the same order in which they are defined in the schema 621 required_dependencies = self.get_ordered_entry( 622 key=self.dmr.get_relationship_value("requiresDependency", "edge_key"), 623 source_node_label=source_node, 624 ) 625 else: 626 required_dependencies = self.get_adjacent_nodes_by_relationship( 627 node_label=source_node, 628 relationship=self.dmr.get_relationship_value( 629 "requiresDependency", "edge_key" 630 ), 631 ) 632 633 if display_names: 634 # get display names of dependencies 635 dependencies_display_names = [] 636 637 for req in required_dependencies: 638 dependencies_display_names.append( 639 self.graph.nodes[req][ 640 self.dmr.get_relationship_value("displayName", "node_label") 641 ] 642 ) 643 644 return dependencies_display_names 645 646 return required_dependencies 647 648 def get_nodes_descendants(self, node_label: str) -> list[str]: 649 """Return a list of nodes reachable from source in graph 650 Args: 651 node_label, str: any given node 652 Return: 653 all_descendants, list: nodes reachable from source in graph 654 """ 655 all_descendants = list(nx.descendants(self.graph, node_label)) 656 657 return all_descendants 658 659 def get_nodes_display_names( 660 self, 661 node_list: list[str], 662 ) -> list[str]: 663 """Get display names associated with the given list of nodes. 664 665 Args: 666 node_list: List of nodes whose display names we need to retrieve. 667 668 Returns: 669 List of display names. 670 """ 671 node_list_display_names = [ 672 self.graph.nodes[node][ 673 self.dmr.get_relationship_value("displayName", "node_label") 674 ] 675 for node in node_list 676 ] 677 678 return node_list_display_names 679 680 def get_node_label(self, node_display_name: str) -> str: 681 """Get the node label for a given display name. 682 683 Args: 684 node_display_name: Display name of the node which you want to get the label for. 685 Returns: 686 Node label associated with given node. 687 If display name not part of schema, return an empty string. 688 """ 689 690 node_class_label = get_class_label_from_display_name( 691 display_name=node_display_name 692 ) 693 node_property_label = get_property_label_from_display_name( 694 display_name=node_display_name 695 ) 696 697 if node_class_label in self.graph.nodes: 698 node_label = node_class_label 699 elif node_property_label in self.graph.nodes: 700 node_label = node_property_label 701 else: 702 node_label = "" 703 704 return node_label 705 706 def get_node_range( 707 self, 708 node_label: Optional[str] = None, 709 node_display_name: Optional[str] = None, 710 display_names: bool = False, 711 ) -> list[str]: 712 """ 713 Get the range, i.e., all the valid values that are associated with a node label. 714 715 716 Args: 717 node_label (Optional[str], optional): Node for which you need to retrieve the range. 718 Defaults to None. 719 node_display_name (Optional[str], optional): _description_. Defaults to None. 720 display_names (bool, optional): _description_. Defaults to False. 721 722 Raises: 723 ValueError: If the node cannot be found in the graph. 724 725 Returns: 726 list[str]: 727 If display_names=False, a list of valid values (labels) associated with a given node. 728 If display_names=True, a list of valid values (display names) associated 729 with a given node 730 """ 731 node_label = self._get_node_label(node_label, node_display_name) 732 try: 733 # get node range in the order defined in schema for given node 734 required_range = self.find_node_range(node_label=node_label) 735 except KeyError as exc: 736 raise ValueError( 737 f"The source node {node_label} does not exist in the graph. " 738 "Please use a different node." 739 ) from exc 740 741 if display_names: 742 # get the display name(s) of all dependencies 743 dependencies_display_names = [] 744 745 for req in required_range: 746 dependencies_display_names.append(self.graph.nodes[req]["displayName"]) 747 748 return dependencies_display_names 749 750 return required_range 751 752 def get_node_required( 753 self, node_label: Optional[str] = None, node_display_name: Optional[str] = None 754 ) -> bool: 755 """Check if a given node is required or not. 756 757 Note: The possible options that a node can be associated with -- "required" / "optional". 758 759 Args: 760 node_label: Label of the node for which you need to look up. 761 node_display_name: Display name of the node for which you want look up. 762 Returns: 763 True: If the given node is a "required" node. 764 False: If the given node is not a "required" (i.e., an "optional") node. 765 """ 766 node_label = self._get_node_label(node_label, node_display_name) 767 rel_node_label = self.dmr.get_relationship_value("required", "node_label") 768 node_required = self.graph.nodes[node_label][rel_node_label] 769 return node_required 770 771 def get_node_validation_rules( 772 self, node_label: Optional[str] = None, node_display_name: Optional[str] = None 773 ) -> Union[list, dict[str, str]]: 774 """Get validation rules associated with a node, 775 776 Args: 777 node_label: Label of the node for which you need to look up. 778 node_display_name: Display name of the node which you want to get the label for. 779 Returns: 780 A set of validation rules associated with node, as a list or a dictionary. 781 """ 782 node_label = self._get_node_label(node_label, node_display_name) 783 784 if not node_label: 785 return [] 786 787 try: 788 node_validation_rules = self.graph.nodes[node_label]["validationRules"] 789 except KeyError as key_error: 790 raise ValueError( 791 f"{node_label} is not in the graph, please provide a proper node label" 792 ) from key_error 793 794 return node_validation_rules 795 796 def get_subgraph_by_edge_type(self, relationship: str) -> nx.DiGraph: 797 """Get a subgraph containing all edges of a given type (aka relationship). 798 799 Args: 800 relationship: edge / link relationship type with possible values same as in above docs. 801 802 Returns: 803 Directed graph on edges of a particular type (aka relationship) 804 """ 805 806 # prune the metadata model graph so as to include only those edges that 807 # match the relationship type 808 rel_edges = [] 809 for node_1, node_2, key, _ in self.graph.out_edges(data=True, keys=True): 810 if key == relationship: 811 rel_edges.append((node_1, node_2)) 812 813 relationship_subgraph: nx.DiGraph = nx.DiGraph() 814 relationship_subgraph.add_edges_from(rel_edges) 815 816 return relationship_subgraph 817 818 def find_adjacent_child_classes( 819 self, node_label: Optional[str] = None, node_display_name: Optional[str] = None 820 ) -> list[str]: 821 """Find child classes of a given node. 822 Args: 823 node_display_name: Display name of the node to look up. 824 node_label: Label of the node to look up. 825 Returns: 826 List of nodes that are adjacent to the given node, by SubclassOf relationship. 827 """ 828 node_label = self._get_node_label(node_label, node_display_name) 829 return self.get_adjacent_nodes_by_relationship( 830 node_label=node_label, 831 relationship=self.dmr.get_relationship_value("subClassOf", "edge_key"), 832 ) 833 834 def find_child_classes(self, schema_class: str) -> list: 835 """Find schema classes that inherit from the given class 836 Args: 837 schema_class: node label for the class to from which to look for children. 838 Returns: 839 list of children to the schema_class. 840 """ 841 child_classes = unlist(list(self.graph.successors(schema_class))) 842 assert isinstance(child_classes, list) 843 return child_classes 844 845 def find_class_specific_properties(self, schema_class: str) -> list[str]: 846 """Find properties specifically associated with a given class 847 Args: 848 schema_class, str: node/class label, to identify properties for. 849 Returns: 850 properties, list: List of properties associate with a given schema class. 851 Raises: 852 KeyError: Key error is raised if the provided schema_class is not in the graph 853 """ 854 855 if not self.is_class_in_schema(schema_class): 856 raise KeyError( 857 ( 858 f"Schema_class provided: {schema_class} is not in the data model, please check " 859 "that you are providing the proper class/node label" 860 ) 861 ) 862 863 properties = [] 864 for node1, node2 in self.graph.edges(): 865 if ( 866 node2 == schema_class 867 and "domainValue" in self.graph[node1][schema_class] 868 ): 869 properties.append(node1) 870 return properties 871 872 def find_parent_classes(self, node_label: str) -> list[list[str]]: 873 """Find all parents of the provided node 874 Args: 875 node_label: label of the node to find parents of 876 Returns: 877 List of list of Parents to the given node. 878 """ 879 # Get digraph of nodes with parents 880 digraph = self.get_digraph_by_edge_type("parentOf") 881 882 # Get root node 883 root_node = list(nx.topological_sort(digraph))[0] 884 885 # Get paths between root_node and the target node. 886 paths = nx.all_simple_paths(self.graph, source=root_node, target=node_label) 887 888 return [_path[:-1] for _path in paths] 889 890 def full_schema_graph(self, size: Optional[int] = None) -> graphviz.Digraph: 891 """Create a graph of the data model. 892 Args: 893 size, float: max height and width of the graph, if one value provided 894 it is used for both. 895 Returns: 896 schema graph viz 897 """ 898 edges = self.graph.edges() 899 return visualize(edges, size=size) 900 901 def is_class_in_schema(self, node_label: str) -> bool: 902 """Determine if provided node_label is in the schema graph/data model. 903 Args: 904 node_label: label of node to search for in the 905 Returns: 906 True, if node is in the graph schema 907 False, if node is not in graph schema 908 """ 909 return node_label in self.graph.nodes() 910 911 def sub_schema_graph( 912 self, source: str, direction: str, size: Optional[float] = None 913 ) -> Optional[graphviz.Digraph]: 914 """Create a sub-schema graph 915 Args: 916 source, str: source node label to start graph 917 direction, str: direction to create the visualization, choose from "up", "down", "both" 918 size, float: max height and width of the graph, if one value provided it is used for 919 both. 920 Returns: 921 Sub-schema graph viz 922 """ 923 if direction == "down": 924 edges = list(nx.edge_bfs(self.graph, [source])) 925 return visualize(edges, size=size) 926 if direction == "up": 927 paths = self.find_parent_classes(source) 928 edges = [] 929 for _path in paths: 930 _path.append(source) 931 for i in range(0, len(_path) - 1): 932 edges.append((_path[i], _path[i + 1])) 933 return visualize(edges, size=size) 934 if direction == "both": 935 paths = self.find_parent_classes(source) 936 edges = list(nx.edge_bfs(self.graph, [source])) 937 for _path in paths: 938 _path.append(source) 939 for i in range(0, len(_path) - 1): 940 edges.append((_path[i], _path[i + 1])) 941 return visualize(edges, size=size) 942 return None 943 944 def get_node_column_type( 945 self, node_label: Optional[str] = None, node_display_name: Optional[str] = None 946 ) -> Optional[JSONSchemaType]: 947 """Gets the column type of the node 948 949 Args: 950 node_label: The label of the node to get the type from 951 node_display_name: The display name of the node to get the type from 952 953 Returns: 954 The column type of the node if it has one, otherwise None 955 """ 956 node_label = self._get_node_label(node_label, node_display_name) 957 rel_node_label = self.dmr.get_relationship_value("columnType", "node_label") 958 return self.graph.nodes[node_label][rel_node_label] 959 960 def _get_node_label( 961 self, node_label: Optional[str] = None, node_display_name: Optional[str] = None 962 ) -> str: 963 """Returns the node label if given otherwise gets the node label from the display name 964 965 Args: 966 node_label: The label of the node to get the type from 967 node_display_name: The display name of the node to get the type from 968 969 Raises: 970 ValueError: If neither node_label or node_display_name is provided 971 972 Returns: 973 The node label 974 """ 975 if node_label is not None: 976 return node_label 977 if node_display_name is not None: 978 return self.get_node_label(node_display_name) 979 raise ValueError("Either 'node_label' or 'node_display_name' must be provided.")
34class DataModelGraphMeta: # pylint: disable=too-few-public-methods 35 """DataModelGraphMeta""" 36 37 _instances: dict = {} 38 39 def __call__( # pylint: disable=no-self-argument 40 cls, *args: Any, **kwargs: Any 41 ) -> Any: 42 """ 43 Possible changes to the value of the `__init__` argument do not affect 44 the returned instance. 45 """ 46 if cls not in cls._instances: 47 instance = super().__call__(*args, **kwargs) # type: ignore # pylint: disable=no-member 48 cls._instances[cls] = instance 49 return cls._instances[cls]
DataModelGraphMeta
52class DataModelGraph: # pylint: disable=too-few-public-methods 53 """ 54 Generate graph network (networkx) from the attributes and relationships returned 55 from the data model parser. 56 57 Create a singleton. 58 """ 59 60 __metaclass__ = DataModelGraphMeta 61 62 def __init__( 63 self, 64 attribute_relationships_dict: dict, 65 data_model_labels: DisplayLabelType = "class_label", 66 ) -> None: 67 """Load parsed data model. 68 Args: 69 attributes_relationship_dict, dict: generated in data_model_parser 70 {Attribute Display Name: { 71 Relationships: { 72 CSV Header: Value}}} 73 data_model_labels: str, display_label or class_label. 74 display_label, use the display name as a label, if it is valid 75 (contains no blacklisted characters) otherwise will default to schema_label. 76 class_label, default, use standard class or property label. 77 Raises: 78 ValueError, attribute_relationship_dict not loaded. 79 """ 80 self.attribute_relationships_dict = attribute_relationships_dict 81 self.dmn = DataModelNodes(self.attribute_relationships_dict) 82 self.dme = DataModelEdges() 83 self.dmr = DataModelRelationships() 84 self.data_model_labels = data_model_labels 85 86 if not self.attribute_relationships_dict: 87 raise ValueError( 88 ( 89 "Something has gone wrong, a data model was not loaded into the DataModelGraph " 90 "Class. Please check that your paths are correct" 91 ) 92 ) 93 self.graph = self.generate_data_model_graph() 94 95 @tracer.start_as_current_span("DataModelGraph::generate_data_model_graph") 96 def generate_data_model_graph(self) -> nx.MultiDiGraph: 97 """ 98 Generate NetworkX Graph from the Relationships/attributes dictionary, the graph is built 99 by first adding all nodes to the graph, then connecting nodes by the relationships defined 100 in the attributes_relationship dictionary. 101 Returns: 102 G: nx.MultiDiGraph, networkx graph representation of the data model 103 """ 104 # Get all relationships with edges 105 edge_relationships = self.dmr.retrieve_rel_headers_dict(edge=True) 106 107 # Find all nodes 108 all_nodes = self.dmn.gather_all_nodes_in_model( 109 attr_rel_dict=self.attribute_relationships_dict 110 ) 111 112 # Instantiate NetworkX MultiDigraph 113 graph: nx.MultiDiGraph = nx.MultiDiGraph() 114 115 all_node_dict = {} 116 117 ## Fill in MultiDigraph with nodes 118 for node in all_nodes: 119 # Gather information for each node 120 node_dict = self.dmn.generate_node_dict( 121 node_display_name=node, 122 attr_rel_dict=self.attribute_relationships_dict, 123 data_model_labels=self.data_model_labels, 124 ) 125 126 # Add each node to the all_node_dict to be used for generating edges 127 all_node_dict[node] = node_dict 128 129 # Generate node and attach information (attributes) to each node 130 graph = self.dmn.generate_node(graph, node_dict) 131 132 edge_list: list[tuple[str, str, dict[str, Union[str, int]]]] = [] 133 ## Connect nodes via edges 134 for node in all_nodes: 135 # Generate edges 136 edge_list_2 = self.dme.generate_edge( 137 node, 138 all_node_dict, 139 self.attribute_relationships_dict, 140 edge_relationships, 141 edge_list, 142 ) 143 edge_list = edge_list_2.copy() 144 145 # Add edges to the Graph 146 for node_1, node_2, edge_dict in edge_list: 147 graph.add_edge( 148 node_1, node_2, key=edge_dict["key"], weight=edge_dict["weight"] 149 ) 150 return graph
Generate graph network (networkx) from the attributes and relationships returned from the data model parser.
Create a singleton.
62 def __init__( 63 self, 64 attribute_relationships_dict: dict, 65 data_model_labels: DisplayLabelType = "class_label", 66 ) -> None: 67 """Load parsed data model. 68 Args: 69 attributes_relationship_dict, dict: generated in data_model_parser 70 {Attribute Display Name: { 71 Relationships: { 72 CSV Header: Value}}} 73 data_model_labels: str, display_label or class_label. 74 display_label, use the display name as a label, if it is valid 75 (contains no blacklisted characters) otherwise will default to schema_label. 76 class_label, default, use standard class or property label. 77 Raises: 78 ValueError, attribute_relationship_dict not loaded. 79 """ 80 self.attribute_relationships_dict = attribute_relationships_dict 81 self.dmn = DataModelNodes(self.attribute_relationships_dict) 82 self.dme = DataModelEdges() 83 self.dmr = DataModelRelationships() 84 self.data_model_labels = data_model_labels 85 86 if not self.attribute_relationships_dict: 87 raise ValueError( 88 ( 89 "Something has gone wrong, a data model was not loaded into the DataModelGraph " 90 "Class. Please check that your paths are correct" 91 ) 92 ) 93 self.graph = self.generate_data_model_graph()
Load parsed data model.
Arguments:
- attributes_relationship_dict, dict: generated in data_model_parser {Attribute Display Name: { Relationships: { CSV Header: Value}}}
- data_model_labels: str, display_label or class_label. display_label, use the display name as a label, if it is valid (contains no blacklisted characters) otherwise will default to schema_label. class_label, default, use standard class or property label.
Raises:
- ValueError, attribute_relationship_dict not loaded.
95 @tracer.start_as_current_span("DataModelGraph::generate_data_model_graph") 96 def generate_data_model_graph(self) -> nx.MultiDiGraph: 97 """ 98 Generate NetworkX Graph from the Relationships/attributes dictionary, the graph is built 99 by first adding all nodes to the graph, then connecting nodes by the relationships defined 100 in the attributes_relationship dictionary. 101 Returns: 102 G: nx.MultiDiGraph, networkx graph representation of the data model 103 """ 104 # Get all relationships with edges 105 edge_relationships = self.dmr.retrieve_rel_headers_dict(edge=True) 106 107 # Find all nodes 108 all_nodes = self.dmn.gather_all_nodes_in_model( 109 attr_rel_dict=self.attribute_relationships_dict 110 ) 111 112 # Instantiate NetworkX MultiDigraph 113 graph: nx.MultiDiGraph = nx.MultiDiGraph() 114 115 all_node_dict = {} 116 117 ## Fill in MultiDigraph with nodes 118 for node in all_nodes: 119 # Gather information for each node 120 node_dict = self.dmn.generate_node_dict( 121 node_display_name=node, 122 attr_rel_dict=self.attribute_relationships_dict, 123 data_model_labels=self.data_model_labels, 124 ) 125 126 # Add each node to the all_node_dict to be used for generating edges 127 all_node_dict[node] = node_dict 128 129 # Generate node and attach information (attributes) to each node 130 graph = self.dmn.generate_node(graph, node_dict) 131 132 edge_list: list[tuple[str, str, dict[str, Union[str, int]]]] = [] 133 ## Connect nodes via edges 134 for node in all_nodes: 135 # Generate edges 136 edge_list_2 = self.dme.generate_edge( 137 node, 138 all_node_dict, 139 self.attribute_relationships_dict, 140 edge_relationships, 141 edge_list, 142 ) 143 edge_list = edge_list_2.copy() 144 145 # Add edges to the Graph 146 for node_1, node_2, edge_dict in edge_list: 147 graph.add_edge( 148 node_1, node_2, key=edge_dict["key"], weight=edge_dict["weight"] 149 ) 150 return graph
Generate NetworkX Graph from the Relationships/attributes dictionary, the graph is built by first adding all nodes to the graph, then connecting nodes by the relationships defined in the attributes_relationship dictionary.
Returns:
G: nx.MultiDiGraph, networkx graph representation of the data model
153class DataModelGraphExplorer: # pylint: disable=too-many-public-methods 154 """DataModelGraphExplorer""" 155 156 def __init__( 157 self, 158 graph: nx.MultiDiGraph, 159 ): 160 """Load data model graph as a singleton. 161 Args: 162 G: nx.MultiDiGraph, networkx graph representation of the data model 163 """ 164 self.graph = graph # At this point the graph is expected to be fully formed. 165 self.dmr = DataModelRelationships() 166 167 def find_properties(self) -> set[str]: 168 """ 169 Identify all properties, as defined by the first node in a pair, connected with 170 'domainIncludes' edge type 171 172 Returns: 173 properties, set: All properties defined in the data model, each property name 174 is defined by its label. 175 """ 176 properties_list: list[str] = [] 177 for node_1, _, rel in self.graph.edges: 178 if rel == self.dmr.get_relationship_value("domainIncludes", "edge_key"): 179 properties_list.append(node_1) 180 properties_set = set(properties_list) 181 return properties_set 182 183 def find_classes(self) -> AbstractSet[str]: 184 """ 185 Identify all classes, as defined but all nodes, minus all properties 186 (which are explicitly defined) 187 Returns: 188 classes, set: All classes defined in the data model, each class 189 name is defined by its label. 190 """ 191 nodes = self.graph.nodes 192 properties = self.find_properties() 193 classes = nodes - properties 194 return classes 195 196 def find_node_range( 197 self, node_label: Optional[str] = None, node_display_name: Optional[str] = None 198 ) -> list: 199 """Get valid values for the given node (attribute) 200 Args: 201 node_label, str, Optional[str]: label of the node for which to retrieve valid values 202 node_display_name, str, Optional[str]: Display Name of the node for which to 203 retrieve valid values 204 Returns: 205 valid_values, list: List of valid values associated with the provided node. 206 """ 207 node_label = self._get_node_label(node_label, node_display_name) 208 209 valid_values = [] 210 for node_1, node_2, rel in self.graph.edges: 211 if node_1 == node_label and rel == self.dmr.get_relationship_value( 212 "rangeIncludes", "edge_key" 213 ): 214 valid_values.append(node_2) 215 valid_values = list(set(valid_values)) 216 return valid_values 217 218 def get_adjacent_nodes_by_relationship( 219 self, node_label: str, relationship: str 220 ) -> list[str]: 221 """Get a list of nodes that is / are adjacent to a given node, based on a relationship type. 222 223 Args: 224 node_label: label of the the node whose edges we need to look at. 225 relationship: the type of link(s) that the above node and its immediate neighbors share. 226 227 Returns: 228 List of nodes that are adjacent to the given node. 229 #checked 230 """ 231 nodes = set() 232 for _, node_2, key, _ in self.graph.out_edges(node_label, data=True, keys=True): 233 if key == relationship: 234 nodes.add(node_2) 235 236 return list(nodes) 237 238 def get_component_node_required( 239 self, 240 manifest_component: str, 241 node_validation_rules: Optional[list[str]] = None, 242 node_label: Optional[str] = None, 243 node_display_name: Optional[str] = None, 244 ) -> bool: 245 """Check if a node is required taking into account the manifest component it is defined in 246 (requirements can be set in validation rule as well as required column) 247 Args: 248 manifest_component: str, manifest component display name that the node belongs to. 249 node_validation_rules: list[str], validation rules for a given node and component. 250 node_label: str, Label of the node you would want to get the comment for. 251 node_display_name: str, node display name for the node being queried. 252 Returns: 253 True, if node is required, False if not 254 """ 255 node_required = False 256 257 if not node_validation_rules: 258 # Get node validation rules for a given component 259 node_validation_rules = self.get_component_node_validation_rules( 260 manifest_component=manifest_component, 261 node_label=node_label, 262 node_display_name=node_display_name, 263 ) 264 265 # Check if the validation rule specifies that the node is required for this particular 266 # component. 267 if rule_in_rule_list("required", node_validation_rules): 268 node_required = True 269 # To prevent any unintended errors, ensure the Required field for this node is False 270 if self.get_node_required( 271 node_label=node_label, node_display_name=node_display_name 272 ): 273 if not node_display_name: 274 assert node_label is not None 275 node_display_name = self.graph.nodes[node_label][ 276 self.dmr.get_relationship_value("displayName", "node_label") 277 ] 278 error_str = " ".join( 279 [ 280 f"For component: {manifest_component} and attribute: {node_display_name}", 281 "requirements are being specified in both the Required field and in the", 282 "Validation Rules. If you desire to use validation rules to set component", 283 "specific requirements for this attribute", 284 "then the Required field needs to be set to False, or the validation may", 285 "not work as intended, for other components where the attribute", 286 "that should not be required.", 287 ] 288 ) 289 290 logger.error(error_str) 291 else: 292 # If requirements are not being set in the validation rule, then just pull the 293 # standard node requirements from the model 294 node_required = self.get_node_required( 295 node_label=node_label, node_display_name=node_display_name 296 ) 297 return node_required 298 299 def get_component_node_validation_rules( 300 self, 301 manifest_component: str, 302 node_label: Optional[str] = None, 303 node_display_name: Optional[str] = None, 304 ) -> list: 305 """Get validation rules for a given node and component. 306 Args: 307 manifest_component: str, manifest component display name that the node belongs to. 308 node_label: str, Label of the node you would want to get the comment for. 309 node_display_name: str, node display name for the node being queried. 310 Returns: 311 validation_rules: list, validation rules list for a given node and component. 312 """ 313 # get any additional validation rules associated with this node (e.g. can this node 314 # be mapped to a list of other nodes) 315 node_validation_rules = self.get_node_validation_rules( 316 node_label=node_label, node_display_name=node_display_name 317 ) 318 319 # Parse the validation rules per component if applicable 320 if node_validation_rules and isinstance(node_validation_rules, dict): 321 node_validation_rules_list = extract_component_validation_rules( 322 manifest_component=manifest_component, 323 validation_rules_dict=node_validation_rules, # type: ignore 324 ) 325 else: 326 assert isinstance(node_validation_rules, list) 327 node_validation_rules_list = node_validation_rules 328 return node_validation_rules_list 329 330 def get_component_requirements( 331 self, 332 source_component: str, 333 ) -> list[str]: 334 """ 335 Get all components that are associated with a given source component and are 336 required by it. 337 338 Args: 339 source_component: source component for which we need to find all required downstream 340 components. 341 342 Returns: 343 List of nodes that are descendants from the source component are are related to the 344 source through a specific component relationship. 345 """ 346 347 req_components = list( 348 reversed( 349 self.get_descendants_by_edge_type( 350 source_component, 351 self.dmr.get_relationship_value("requiresComponent", "edge_key"), 352 ordered=True, 353 ) 354 ) 355 ) 356 357 return req_components 358 359 def get_component_requirements_graph( 360 self, 361 source_component: str, 362 ) -> nx.Graph: 363 """ 364 Get all components that are associated with a given source component and are required by it; 365 return the components as a dependency graph (i.e. a DAG). 366 367 Args: 368 source_component, str: source component for which we need to find all required 369 downstream components. 370 371 Returns: 372 A subgraph of the schema graph induced on nodes that are descendants from the source 373 component and are related to the source through a specific component relationship. 374 """ 375 376 # get a list of required component nodes 377 req_components = self.get_component_requirements(source_component) 378 379 # get the subgraph induced on required component nodes 380 req_components_graph = self.get_subgraph_by_edge_type( 381 self.dmr.get_relationship_value("requiresComponent", "edge_key"), 382 ).subgraph(req_components) 383 384 return req_components_graph 385 386 def get_descendants_by_edge_type( 387 self, 388 source_node: str, 389 relationship: str, 390 connected: bool = True, 391 ordered: bool = False, 392 ) -> list[str]: 393 """ 394 Get all nodes that are descendants of a given source node, based on a specific 395 type of edge / relationship type. 396 397 Args: 398 source_node: The node whose descendants need to be retrieved. 399 relationship: Edge / link relationship type with possible values same as in above docs. 400 connected: 401 If True, we need to ensure that all descendant nodes are reachable from the source 402 node, i.e., they are part of the same connected component. 403 If False, the descendants could be in multiple connected components. 404 Default value is True. 405 ordered: 406 If True, the list of descendants will be topologically ordered. 407 If False, the list has no particular order (depends on the order in which the 408 descendants were traversed in the subgraph). 409 410 Returns: 411 List of nodes that are descendants from a particular node (sorted / unsorted) 412 """ 413 414 root_descendants = nx.descendants(self.graph, source_node) 415 416 subgraph_nodes = list(root_descendants) 417 subgraph_nodes.append(source_node) 418 descendants_subgraph = self.graph.subgraph(subgraph_nodes) 419 420 # prune the descendants subgraph so as to include only those edges that match 421 # the relationship type 422 rel_edges = [] 423 for node_1, node_2, key, _ in descendants_subgraph.edges(data=True, keys=True): 424 if key == relationship: 425 rel_edges.append((node_1, node_2)) 426 427 relationship_subgraph: nx.DiGraph = nx.DiGraph() 428 relationship_subgraph.add_edges_from(rel_edges) 429 430 descendants = relationship_subgraph.nodes() 431 432 if not descendants: 433 # return empty list if there are no nodes that are reachable from the 434 # source node based on this relationship type 435 return [] 436 437 if connected and ordered: 438 # get the set of reachable nodes from the source node 439 descendants = nx.descendants(relationship_subgraph, source_node) 440 descendants.add(source_node) 441 442 # normally, the descendants from a node are unordered (peculiarity 443 # of nx descendants call) 444 # form the subgraph on descendants and order it topologically 445 # this assumes an acyclic subgraph 446 descendants = nx.topological_sort( 447 relationship_subgraph.subgraph(descendants) 448 ) 449 elif connected: 450 # get the nodes that are reachable from a given source node 451 # after the pruning process above some nodes in the 452 # root_descendants subgraph might have become disconnected and 453 # will be omitted 454 descendants = nx.descendants(relationship_subgraph, source_node) 455 descendants.add(source_node) 456 elif ordered: 457 # sort the nodes topologically 458 # this requires the graph to be an acyclic graph 459 descendants = nx.topological_sort(relationship_subgraph) 460 461 return list(descendants) 462 463 def get_digraph_by_edge_type(self, edge_type: str) -> nx.DiGraph: 464 """Get a networkx digraph of the nodes connected via a given edge_type. 465 Args: 466 edge_type: 467 Edge type to search for, possible types are defined by 'edge_key' 468 in relationship class 469 Returns: 470 """ 471 472 digraph: nx.DiGraph = nx.DiGraph() 473 for node_1, node_2, key, _ in self.graph.edges(data=True, keys=True): 474 if key == edge_type: 475 digraph.add_edge(node_1, node_2) 476 return digraph 477 478 def get_edges_by_relationship( 479 self, 480 node: str, 481 relationship: str, 482 ) -> list[tuple[str, str]]: 483 """Get a list of out-edges of a node where the edges match a specific type of relationship. 484 485 i.e., the edges connecting a node to its neighbors are of relationship type -- "parentOf" 486 (set of edges to children / sub-class nodes). 487 488 Args: 489 node: the node whose edges we need to look at. 490 relationship: the type of link(s) that the above node and its immediate neighbors share. 491 492 Returns: 493 List of edges that are connected to the node. 494 """ 495 edges: list[tuple[str, str]] = [] 496 497 for node_1, node_2, key, _ in self.graph.out_edges(node, data=True, keys=True): 498 if key == relationship: 499 edges.append((node_1, node_2)) 500 501 return edges 502 503 def get_ordered_entry(self, key: str, source_node_label: str) -> list[str]: 504 """ 505 Order the values associated with a particular node and edge_key to 506 match original ordering in schema. 507 508 Args: 509 key (str): a key representing and edge relationship in 510 DataModelRelationships.relationships_dictionary 511 source_node_label (str): node to look for edges of and order 512 513 Raises: 514 KeyError: cannot find source node in graph 515 516 Returns: 517 list[str]: 518 list of sorted nodes, that share the specified relationship with the source node 519 For the example data model, for key='rangeIncludes', source_node_label='CancerType' 520 the return would be ['Breast, 'Colorectal', 'Lung', 'Prostate', 'Skin'] in that 521 exact order. 522 """ 523 # Check if node is in the graph, if not throw an error. 524 if not self.is_class_in_schema(node_label=source_node_label): 525 raise KeyError( 526 f"Cannot find node: {source_node_label} in the graph, please check entry." 527 ) 528 529 edge_key = self.dmr.get_relationship_value(key, "edge_key") 530 531 # Handle out edges 532 if self.dmr.get_relationship_value(key, "jsonld_direction") == "out": 533 # use out edges 534 535 original_edge_weights_dict = { 536 attached_node: self.graph[source_node][attached_node][edge_key][ 537 "weight" 538 ] 539 for source_node, attached_node in self.graph.out_edges( 540 source_node_label 541 ) 542 if edge_key in self.graph[source_node][attached_node] 543 } 544 # Handle in edges 545 else: 546 # use in edges 547 original_edge_weights_dict = { 548 attached_node: self.graph[attached_node][source_node][edge_key][ 549 "weight" 550 ] 551 for attached_node, source_node in self.graph.in_edges(source_node_label) 552 if edge_key in self.graph[attached_node][source_node] 553 } 554 555 sorted_nodes = list( 556 dict( 557 sorted(original_edge_weights_dict.items(), key=lambda item: item[1]) 558 ).keys() 559 ) 560 561 return sorted_nodes 562 563 # Get values associated with a node 564 def get_nodes_ancestors(self, subgraph: nx.DiGraph, node_label: str) -> list[str]: 565 """Get a list of nodes reachable from source component in graph 566 567 Args: 568 subgraph (nx.DiGraph): networkx graph object 569 node_label (str): label of node to find ancestors for 570 571 Returns: 572 list[str]: nodes reachable from source in graph 573 """ 574 all_ancestors = list(nx.ancestors(subgraph, node_label)) 575 576 return all_ancestors 577 578 def get_node_comment( 579 self, node_display_name: Optional[str] = None, node_label: Optional[str] = None 580 ) -> str: 581 """Get the node definition, i.e., the "comment" associated with a given node display name. 582 583 Args: 584 node_display_name, str: Display name of the node which you want to get the comment for. 585 node_label, str: Label of the node you would want to get the comment for. 586 Returns: 587 Comment associated with node, as a string. 588 """ 589 node_label = self._get_node_label(node_label, node_display_name) 590 591 if not node_label: 592 return "" 593 594 node_definition = self.graph.nodes[node_label][ 595 self.dmr.get_relationship_value("comment", "node_label") 596 ] 597 return node_definition 598 599 def get_node_dependencies( 600 self, 601 source_node: str, 602 display_names: bool = True, 603 schema_ordered: bool = True, 604 ) -> list[str]: 605 """Get the immediate dependencies that are related to a given source node. 606 607 Args: 608 source_node: The node whose dependencies we need to compute. 609 display_names: if True, return list of display names of each of the dependencies. 610 if False, return list of node labels of each of the dependencies. 611 schema_ordered: 612 if True, return the dependencies of the node following the order of the schema 613 (slower). 614 if False, return dependencies from graph without guaranteeing schema order (faster) 615 616 Returns: 617 List of nodes that are dependent on the source node. 618 """ 619 620 if schema_ordered: 621 # get dependencies in the same order in which they are defined in the schema 622 required_dependencies = self.get_ordered_entry( 623 key=self.dmr.get_relationship_value("requiresDependency", "edge_key"), 624 source_node_label=source_node, 625 ) 626 else: 627 required_dependencies = self.get_adjacent_nodes_by_relationship( 628 node_label=source_node, 629 relationship=self.dmr.get_relationship_value( 630 "requiresDependency", "edge_key" 631 ), 632 ) 633 634 if display_names: 635 # get display names of dependencies 636 dependencies_display_names = [] 637 638 for req in required_dependencies: 639 dependencies_display_names.append( 640 self.graph.nodes[req][ 641 self.dmr.get_relationship_value("displayName", "node_label") 642 ] 643 ) 644 645 return dependencies_display_names 646 647 return required_dependencies 648 649 def get_nodes_descendants(self, node_label: str) -> list[str]: 650 """Return a list of nodes reachable from source in graph 651 Args: 652 node_label, str: any given node 653 Return: 654 all_descendants, list: nodes reachable from source in graph 655 """ 656 all_descendants = list(nx.descendants(self.graph, node_label)) 657 658 return all_descendants 659 660 def get_nodes_display_names( 661 self, 662 node_list: list[str], 663 ) -> list[str]: 664 """Get display names associated with the given list of nodes. 665 666 Args: 667 node_list: List of nodes whose display names we need to retrieve. 668 669 Returns: 670 List of display names. 671 """ 672 node_list_display_names = [ 673 self.graph.nodes[node][ 674 self.dmr.get_relationship_value("displayName", "node_label") 675 ] 676 for node in node_list 677 ] 678 679 return node_list_display_names 680 681 def get_node_label(self, node_display_name: str) -> str: 682 """Get the node label for a given display name. 683 684 Args: 685 node_display_name: Display name of the node which you want to get the label for. 686 Returns: 687 Node label associated with given node. 688 If display name not part of schema, return an empty string. 689 """ 690 691 node_class_label = get_class_label_from_display_name( 692 display_name=node_display_name 693 ) 694 node_property_label = get_property_label_from_display_name( 695 display_name=node_display_name 696 ) 697 698 if node_class_label in self.graph.nodes: 699 node_label = node_class_label 700 elif node_property_label in self.graph.nodes: 701 node_label = node_property_label 702 else: 703 node_label = "" 704 705 return node_label 706 707 def get_node_range( 708 self, 709 node_label: Optional[str] = None, 710 node_display_name: Optional[str] = None, 711 display_names: bool = False, 712 ) -> list[str]: 713 """ 714 Get the range, i.e., all the valid values that are associated with a node label. 715 716 717 Args: 718 node_label (Optional[str], optional): Node for which you need to retrieve the range. 719 Defaults to None. 720 node_display_name (Optional[str], optional): _description_. Defaults to None. 721 display_names (bool, optional): _description_. Defaults to False. 722 723 Raises: 724 ValueError: If the node cannot be found in the graph. 725 726 Returns: 727 list[str]: 728 If display_names=False, a list of valid values (labels) associated with a given node. 729 If display_names=True, a list of valid values (display names) associated 730 with a given node 731 """ 732 node_label = self._get_node_label(node_label, node_display_name) 733 try: 734 # get node range in the order defined in schema for given node 735 required_range = self.find_node_range(node_label=node_label) 736 except KeyError as exc: 737 raise ValueError( 738 f"The source node {node_label} does not exist in the graph. " 739 "Please use a different node." 740 ) from exc 741 742 if display_names: 743 # get the display name(s) of all dependencies 744 dependencies_display_names = [] 745 746 for req in required_range: 747 dependencies_display_names.append(self.graph.nodes[req]["displayName"]) 748 749 return dependencies_display_names 750 751 return required_range 752 753 def get_node_required( 754 self, node_label: Optional[str] = None, node_display_name: Optional[str] = None 755 ) -> bool: 756 """Check if a given node is required or not. 757 758 Note: The possible options that a node can be associated with -- "required" / "optional". 759 760 Args: 761 node_label: Label of the node for which you need to look up. 762 node_display_name: Display name of the node for which you want look up. 763 Returns: 764 True: If the given node is a "required" node. 765 False: If the given node is not a "required" (i.e., an "optional") node. 766 """ 767 node_label = self._get_node_label(node_label, node_display_name) 768 rel_node_label = self.dmr.get_relationship_value("required", "node_label") 769 node_required = self.graph.nodes[node_label][rel_node_label] 770 return node_required 771 772 def get_node_validation_rules( 773 self, node_label: Optional[str] = None, node_display_name: Optional[str] = None 774 ) -> Union[list, dict[str, str]]: 775 """Get validation rules associated with a node, 776 777 Args: 778 node_label: Label of the node for which you need to look up. 779 node_display_name: Display name of the node which you want to get the label for. 780 Returns: 781 A set of validation rules associated with node, as a list or a dictionary. 782 """ 783 node_label = self._get_node_label(node_label, node_display_name) 784 785 if not node_label: 786 return [] 787 788 try: 789 node_validation_rules = self.graph.nodes[node_label]["validationRules"] 790 except KeyError as key_error: 791 raise ValueError( 792 f"{node_label} is not in the graph, please provide a proper node label" 793 ) from key_error 794 795 return node_validation_rules 796 797 def get_subgraph_by_edge_type(self, relationship: str) -> nx.DiGraph: 798 """Get a subgraph containing all edges of a given type (aka relationship). 799 800 Args: 801 relationship: edge / link relationship type with possible values same as in above docs. 802 803 Returns: 804 Directed graph on edges of a particular type (aka relationship) 805 """ 806 807 # prune the metadata model graph so as to include only those edges that 808 # match the relationship type 809 rel_edges = [] 810 for node_1, node_2, key, _ in self.graph.out_edges(data=True, keys=True): 811 if key == relationship: 812 rel_edges.append((node_1, node_2)) 813 814 relationship_subgraph: nx.DiGraph = nx.DiGraph() 815 relationship_subgraph.add_edges_from(rel_edges) 816 817 return relationship_subgraph 818 819 def find_adjacent_child_classes( 820 self, node_label: Optional[str] = None, node_display_name: Optional[str] = None 821 ) -> list[str]: 822 """Find child classes of a given node. 823 Args: 824 node_display_name: Display name of the node to look up. 825 node_label: Label of the node to look up. 826 Returns: 827 List of nodes that are adjacent to the given node, by SubclassOf relationship. 828 """ 829 node_label = self._get_node_label(node_label, node_display_name) 830 return self.get_adjacent_nodes_by_relationship( 831 node_label=node_label, 832 relationship=self.dmr.get_relationship_value("subClassOf", "edge_key"), 833 ) 834 835 def find_child_classes(self, schema_class: str) -> list: 836 """Find schema classes that inherit from the given class 837 Args: 838 schema_class: node label for the class to from which to look for children. 839 Returns: 840 list of children to the schema_class. 841 """ 842 child_classes = unlist(list(self.graph.successors(schema_class))) 843 assert isinstance(child_classes, list) 844 return child_classes 845 846 def find_class_specific_properties(self, schema_class: str) -> list[str]: 847 """Find properties specifically associated with a given class 848 Args: 849 schema_class, str: node/class label, to identify properties for. 850 Returns: 851 properties, list: List of properties associate with a given schema class. 852 Raises: 853 KeyError: Key error is raised if the provided schema_class is not in the graph 854 """ 855 856 if not self.is_class_in_schema(schema_class): 857 raise KeyError( 858 ( 859 f"Schema_class provided: {schema_class} is not in the data model, please check " 860 "that you are providing the proper class/node label" 861 ) 862 ) 863 864 properties = [] 865 for node1, node2 in self.graph.edges(): 866 if ( 867 node2 == schema_class 868 and "domainValue" in self.graph[node1][schema_class] 869 ): 870 properties.append(node1) 871 return properties 872 873 def find_parent_classes(self, node_label: str) -> list[list[str]]: 874 """Find all parents of the provided node 875 Args: 876 node_label: label of the node to find parents of 877 Returns: 878 List of list of Parents to the given node. 879 """ 880 # Get digraph of nodes with parents 881 digraph = self.get_digraph_by_edge_type("parentOf") 882 883 # Get root node 884 root_node = list(nx.topological_sort(digraph))[0] 885 886 # Get paths between root_node and the target node. 887 paths = nx.all_simple_paths(self.graph, source=root_node, target=node_label) 888 889 return [_path[:-1] for _path in paths] 890 891 def full_schema_graph(self, size: Optional[int] = None) -> graphviz.Digraph: 892 """Create a graph of the data model. 893 Args: 894 size, float: max height and width of the graph, if one value provided 895 it is used for both. 896 Returns: 897 schema graph viz 898 """ 899 edges = self.graph.edges() 900 return visualize(edges, size=size) 901 902 def is_class_in_schema(self, node_label: str) -> bool: 903 """Determine if provided node_label is in the schema graph/data model. 904 Args: 905 node_label: label of node to search for in the 906 Returns: 907 True, if node is in the graph schema 908 False, if node is not in graph schema 909 """ 910 return node_label in self.graph.nodes() 911 912 def sub_schema_graph( 913 self, source: str, direction: str, size: Optional[float] = None 914 ) -> Optional[graphviz.Digraph]: 915 """Create a sub-schema graph 916 Args: 917 source, str: source node label to start graph 918 direction, str: direction to create the visualization, choose from "up", "down", "both" 919 size, float: max height and width of the graph, if one value provided it is used for 920 both. 921 Returns: 922 Sub-schema graph viz 923 """ 924 if direction == "down": 925 edges = list(nx.edge_bfs(self.graph, [source])) 926 return visualize(edges, size=size) 927 if direction == "up": 928 paths = self.find_parent_classes(source) 929 edges = [] 930 for _path in paths: 931 _path.append(source) 932 for i in range(0, len(_path) - 1): 933 edges.append((_path[i], _path[i + 1])) 934 return visualize(edges, size=size) 935 if direction == "both": 936 paths = self.find_parent_classes(source) 937 edges = list(nx.edge_bfs(self.graph, [source])) 938 for _path in paths: 939 _path.append(source) 940 for i in range(0, len(_path) - 1): 941 edges.append((_path[i], _path[i + 1])) 942 return visualize(edges, size=size) 943 return None 944 945 def get_node_column_type( 946 self, node_label: Optional[str] = None, node_display_name: Optional[str] = None 947 ) -> Optional[JSONSchemaType]: 948 """Gets the column type of the node 949 950 Args: 951 node_label: The label of the node to get the type from 952 node_display_name: The display name of the node to get the type from 953 954 Returns: 955 The column type of the node if it has one, otherwise None 956 """ 957 node_label = self._get_node_label(node_label, node_display_name) 958 rel_node_label = self.dmr.get_relationship_value("columnType", "node_label") 959 return self.graph.nodes[node_label][rel_node_label] 960 961 def _get_node_label( 962 self, node_label: Optional[str] = None, node_display_name: Optional[str] = None 963 ) -> str: 964 """Returns the node label if given otherwise gets the node label from the display name 965 966 Args: 967 node_label: The label of the node to get the type from 968 node_display_name: The display name of the node to get the type from 969 970 Raises: 971 ValueError: If neither node_label or node_display_name is provided 972 973 Returns: 974 The node label 975 """ 976 if node_label is not None: 977 return node_label 978 if node_display_name is not None: 979 return self.get_node_label(node_display_name) 980 raise ValueError("Either 'node_label' or 'node_display_name' must be provided.")
DataModelGraphExplorer
156 def __init__( 157 self, 158 graph: nx.MultiDiGraph, 159 ): 160 """Load data model graph as a singleton. 161 Args: 162 G: nx.MultiDiGraph, networkx graph representation of the data model 163 """ 164 self.graph = graph # At this point the graph is expected to be fully formed. 165 self.dmr = DataModelRelationships()
Load data model graph as a singleton.
Arguments:
- G: nx.MultiDiGraph, networkx graph representation of the data model
167 def find_properties(self) -> set[str]: 168 """ 169 Identify all properties, as defined by the first node in a pair, connected with 170 'domainIncludes' edge type 171 172 Returns: 173 properties, set: All properties defined in the data model, each property name 174 is defined by its label. 175 """ 176 properties_list: list[str] = [] 177 for node_1, _, rel in self.graph.edges: 178 if rel == self.dmr.get_relationship_value("domainIncludes", "edge_key"): 179 properties_list.append(node_1) 180 properties_set = set(properties_list) 181 return properties_set
Identify all properties, as defined by the first node in a pair, connected with 'domainIncludes' edge type
Returns:
properties, set: All properties defined in the data model, each property name is defined by its label.
183 def find_classes(self) -> AbstractSet[str]: 184 """ 185 Identify all classes, as defined but all nodes, minus all properties 186 (which are explicitly defined) 187 Returns: 188 classes, set: All classes defined in the data model, each class 189 name is defined by its label. 190 """ 191 nodes = self.graph.nodes 192 properties = self.find_properties() 193 classes = nodes - properties 194 return classes
Identify all classes, as defined but all nodes, minus all properties (which are explicitly defined)
Returns:
classes, set: All classes defined in the data model, each class name is defined by its label.
196 def find_node_range( 197 self, node_label: Optional[str] = None, node_display_name: Optional[str] = None 198 ) -> list: 199 """Get valid values for the given node (attribute) 200 Args: 201 node_label, str, Optional[str]: label of the node for which to retrieve valid values 202 node_display_name, str, Optional[str]: Display Name of the node for which to 203 retrieve valid values 204 Returns: 205 valid_values, list: List of valid values associated with the provided node. 206 """ 207 node_label = self._get_node_label(node_label, node_display_name) 208 209 valid_values = [] 210 for node_1, node_2, rel in self.graph.edges: 211 if node_1 == node_label and rel == self.dmr.get_relationship_value( 212 "rangeIncludes", "edge_key" 213 ): 214 valid_values.append(node_2) 215 valid_values = list(set(valid_values)) 216 return valid_values
Get valid values for the given node (attribute)
Arguments:
- node_label, str, Optional[str]: label of the node for which to retrieve valid values
- node_display_name, str, Optional[str]: Display Name of the node for which to retrieve valid values
Returns:
valid_values, list: List of valid values associated with the provided node.
218 def get_adjacent_nodes_by_relationship( 219 self, node_label: str, relationship: str 220 ) -> list[str]: 221 """Get a list of nodes that is / are adjacent to a given node, based on a relationship type. 222 223 Args: 224 node_label: label of the the node whose edges we need to look at. 225 relationship: the type of link(s) that the above node and its immediate neighbors share. 226 227 Returns: 228 List of nodes that are adjacent to the given node. 229 #checked 230 """ 231 nodes = set() 232 for _, node_2, key, _ in self.graph.out_edges(node_label, data=True, keys=True): 233 if key == relationship: 234 nodes.add(node_2) 235 236 return list(nodes)
Get a list of nodes that is / are adjacent to a given node, based on a relationship type.
Arguments:
- node_label: label of the the node whose edges we need to look at.
- relationship: the type of link(s) that the above node and its immediate neighbors share.
Returns:
List of nodes that are adjacent to the given node.
checked
238 def get_component_node_required( 239 self, 240 manifest_component: str, 241 node_validation_rules: Optional[list[str]] = None, 242 node_label: Optional[str] = None, 243 node_display_name: Optional[str] = None, 244 ) -> bool: 245 """Check if a node is required taking into account the manifest component it is defined in 246 (requirements can be set in validation rule as well as required column) 247 Args: 248 manifest_component: str, manifest component display name that the node belongs to. 249 node_validation_rules: list[str], validation rules for a given node and component. 250 node_label: str, Label of the node you would want to get the comment for. 251 node_display_name: str, node display name for the node being queried. 252 Returns: 253 True, if node is required, False if not 254 """ 255 node_required = False 256 257 if not node_validation_rules: 258 # Get node validation rules for a given component 259 node_validation_rules = self.get_component_node_validation_rules( 260 manifest_component=manifest_component, 261 node_label=node_label, 262 node_display_name=node_display_name, 263 ) 264 265 # Check if the validation rule specifies that the node is required for this particular 266 # component. 267 if rule_in_rule_list("required", node_validation_rules): 268 node_required = True 269 # To prevent any unintended errors, ensure the Required field for this node is False 270 if self.get_node_required( 271 node_label=node_label, node_display_name=node_display_name 272 ): 273 if not node_display_name: 274 assert node_label is not None 275 node_display_name = self.graph.nodes[node_label][ 276 self.dmr.get_relationship_value("displayName", "node_label") 277 ] 278 error_str = " ".join( 279 [ 280 f"For component: {manifest_component} and attribute: {node_display_name}", 281 "requirements are being specified in both the Required field and in the", 282 "Validation Rules. If you desire to use validation rules to set component", 283 "specific requirements for this attribute", 284 "then the Required field needs to be set to False, or the validation may", 285 "not work as intended, for other components where the attribute", 286 "that should not be required.", 287 ] 288 ) 289 290 logger.error(error_str) 291 else: 292 # If requirements are not being set in the validation rule, then just pull the 293 # standard node requirements from the model 294 node_required = self.get_node_required( 295 node_label=node_label, node_display_name=node_display_name 296 ) 297 return node_required
Check if a node is required taking into account the manifest component it is defined in (requirements can be set in validation rule as well as required column)
Arguments:
- manifest_component: str, manifest component display name that the node belongs to.
- node_validation_rules: list[str], validation rules for a given node and component.
- node_label: str, Label of the node you would want to get the comment for.
- node_display_name: str, node display name for the node being queried.
Returns:
True, if node is required, False if not
299 def get_component_node_validation_rules( 300 self, 301 manifest_component: str, 302 node_label: Optional[str] = None, 303 node_display_name: Optional[str] = None, 304 ) -> list: 305 """Get validation rules for a given node and component. 306 Args: 307 manifest_component: str, manifest component display name that the node belongs to. 308 node_label: str, Label of the node you would want to get the comment for. 309 node_display_name: str, node display name for the node being queried. 310 Returns: 311 validation_rules: list, validation rules list for a given node and component. 312 """ 313 # get any additional validation rules associated with this node (e.g. can this node 314 # be mapped to a list of other nodes) 315 node_validation_rules = self.get_node_validation_rules( 316 node_label=node_label, node_display_name=node_display_name 317 ) 318 319 # Parse the validation rules per component if applicable 320 if node_validation_rules and isinstance(node_validation_rules, dict): 321 node_validation_rules_list = extract_component_validation_rules( 322 manifest_component=manifest_component, 323 validation_rules_dict=node_validation_rules, # type: ignore 324 ) 325 else: 326 assert isinstance(node_validation_rules, list) 327 node_validation_rules_list = node_validation_rules 328 return node_validation_rules_list
Get validation rules for a given node and component.
Arguments:
- manifest_component: str, manifest component display name that the node belongs to.
- node_label: str, Label of the node you would want to get the comment for.
- node_display_name: str, node display name for the node being queried.
Returns:
validation_rules: list, validation rules list for a given node and component.
330 def get_component_requirements( 331 self, 332 source_component: str, 333 ) -> list[str]: 334 """ 335 Get all components that are associated with a given source component and are 336 required by it. 337 338 Args: 339 source_component: source component for which we need to find all required downstream 340 components. 341 342 Returns: 343 List of nodes that are descendants from the source component are are related to the 344 source through a specific component relationship. 345 """ 346 347 req_components = list( 348 reversed( 349 self.get_descendants_by_edge_type( 350 source_component, 351 self.dmr.get_relationship_value("requiresComponent", "edge_key"), 352 ordered=True, 353 ) 354 ) 355 ) 356 357 return req_components
Get all components that are associated with a given source component and are required by it.
Arguments:
- source_component: source component for which we need to find all required downstream components.
Returns:
List of nodes that are descendants from the source component are are related to the source through a specific component relationship.
359 def get_component_requirements_graph( 360 self, 361 source_component: str, 362 ) -> nx.Graph: 363 """ 364 Get all components that are associated with a given source component and are required by it; 365 return the components as a dependency graph (i.e. a DAG). 366 367 Args: 368 source_component, str: source component for which we need to find all required 369 downstream components. 370 371 Returns: 372 A subgraph of the schema graph induced on nodes that are descendants from the source 373 component and are related to the source through a specific component relationship. 374 """ 375 376 # get a list of required component nodes 377 req_components = self.get_component_requirements(source_component) 378 379 # get the subgraph induced on required component nodes 380 req_components_graph = self.get_subgraph_by_edge_type( 381 self.dmr.get_relationship_value("requiresComponent", "edge_key"), 382 ).subgraph(req_components) 383 384 return req_components_graph
Get all components that are associated with a given source component and are required by it; return the components as a dependency graph (i.e. a DAG).
Arguments:
- source_component, str: source component for which we need to find all required downstream components.
Returns:
A subgraph of the schema graph induced on nodes that are descendants from the source component and are related to the source through a specific component relationship.
386 def get_descendants_by_edge_type( 387 self, 388 source_node: str, 389 relationship: str, 390 connected: bool = True, 391 ordered: bool = False, 392 ) -> list[str]: 393 """ 394 Get all nodes that are descendants of a given source node, based on a specific 395 type of edge / relationship type. 396 397 Args: 398 source_node: The node whose descendants need to be retrieved. 399 relationship: Edge / link relationship type with possible values same as in above docs. 400 connected: 401 If True, we need to ensure that all descendant nodes are reachable from the source 402 node, i.e., they are part of the same connected component. 403 If False, the descendants could be in multiple connected components. 404 Default value is True. 405 ordered: 406 If True, the list of descendants will be topologically ordered. 407 If False, the list has no particular order (depends on the order in which the 408 descendants were traversed in the subgraph). 409 410 Returns: 411 List of nodes that are descendants from a particular node (sorted / unsorted) 412 """ 413 414 root_descendants = nx.descendants(self.graph, source_node) 415 416 subgraph_nodes = list(root_descendants) 417 subgraph_nodes.append(source_node) 418 descendants_subgraph = self.graph.subgraph(subgraph_nodes) 419 420 # prune the descendants subgraph so as to include only those edges that match 421 # the relationship type 422 rel_edges = [] 423 for node_1, node_2, key, _ in descendants_subgraph.edges(data=True, keys=True): 424 if key == relationship: 425 rel_edges.append((node_1, node_2)) 426 427 relationship_subgraph: nx.DiGraph = nx.DiGraph() 428 relationship_subgraph.add_edges_from(rel_edges) 429 430 descendants = relationship_subgraph.nodes() 431 432 if not descendants: 433 # return empty list if there are no nodes that are reachable from the 434 # source node based on this relationship type 435 return [] 436 437 if connected and ordered: 438 # get the set of reachable nodes from the source node 439 descendants = nx.descendants(relationship_subgraph, source_node) 440 descendants.add(source_node) 441 442 # normally, the descendants from a node are unordered (peculiarity 443 # of nx descendants call) 444 # form the subgraph on descendants and order it topologically 445 # this assumes an acyclic subgraph 446 descendants = nx.topological_sort( 447 relationship_subgraph.subgraph(descendants) 448 ) 449 elif connected: 450 # get the nodes that are reachable from a given source node 451 # after the pruning process above some nodes in the 452 # root_descendants subgraph might have become disconnected and 453 # will be omitted 454 descendants = nx.descendants(relationship_subgraph, source_node) 455 descendants.add(source_node) 456 elif ordered: 457 # sort the nodes topologically 458 # this requires the graph to be an acyclic graph 459 descendants = nx.topological_sort(relationship_subgraph) 460 461 return list(descendants)
Get all nodes that are descendants of a given source node, based on a specific type of edge / relationship type.
Arguments:
- source_node: The node whose descendants need to be retrieved.
- relationship: Edge / link relationship type with possible values same as in above docs.
- connected: If True, we need to ensure that all descendant nodes are reachable from the source node, i.e., they are part of the same connected component. If False, the descendants could be in multiple connected components. Default value is True.
- ordered: If True, the list of descendants will be topologically ordered. If False, the list has no particular order (depends on the order in which the descendants were traversed in the subgraph).
Returns:
List of nodes that are descendants from a particular node (sorted / unsorted)
463 def get_digraph_by_edge_type(self, edge_type: str) -> nx.DiGraph: 464 """Get a networkx digraph of the nodes connected via a given edge_type. 465 Args: 466 edge_type: 467 Edge type to search for, possible types are defined by 'edge_key' 468 in relationship class 469 Returns: 470 """ 471 472 digraph: nx.DiGraph = nx.DiGraph() 473 for node_1, node_2, key, _ in self.graph.edges(data=True, keys=True): 474 if key == edge_type: 475 digraph.add_edge(node_1, node_2) 476 return digraph
Get a networkx digraph of the nodes connected via a given edge_type.
Arguments:
- edge_type: Edge type to search for, possible types are defined by 'edge_key' in relationship class
Returns:
478 def get_edges_by_relationship( 479 self, 480 node: str, 481 relationship: str, 482 ) -> list[tuple[str, str]]: 483 """Get a list of out-edges of a node where the edges match a specific type of relationship. 484 485 i.e., the edges connecting a node to its neighbors are of relationship type -- "parentOf" 486 (set of edges to children / sub-class nodes). 487 488 Args: 489 node: the node whose edges we need to look at. 490 relationship: the type of link(s) that the above node and its immediate neighbors share. 491 492 Returns: 493 List of edges that are connected to the node. 494 """ 495 edges: list[tuple[str, str]] = [] 496 497 for node_1, node_2, key, _ in self.graph.out_edges(node, data=True, keys=True): 498 if key == relationship: 499 edges.append((node_1, node_2)) 500 501 return edges
Get a list of out-edges of a node where the edges match a specific type of relationship.
i.e., the edges connecting a node to its neighbors are of relationship type -- "parentOf" (set of edges to children / sub-class nodes).
Arguments:
- node: the node whose edges we need to look at.
- relationship: the type of link(s) that the above node and its immediate neighbors share.
Returns:
List of edges that are connected to the node.
503 def get_ordered_entry(self, key: str, source_node_label: str) -> list[str]: 504 """ 505 Order the values associated with a particular node and edge_key to 506 match original ordering in schema. 507 508 Args: 509 key (str): a key representing and edge relationship in 510 DataModelRelationships.relationships_dictionary 511 source_node_label (str): node to look for edges of and order 512 513 Raises: 514 KeyError: cannot find source node in graph 515 516 Returns: 517 list[str]: 518 list of sorted nodes, that share the specified relationship with the source node 519 For the example data model, for key='rangeIncludes', source_node_label='CancerType' 520 the return would be ['Breast, 'Colorectal', 'Lung', 'Prostate', 'Skin'] in that 521 exact order. 522 """ 523 # Check if node is in the graph, if not throw an error. 524 if not self.is_class_in_schema(node_label=source_node_label): 525 raise KeyError( 526 f"Cannot find node: {source_node_label} in the graph, please check entry." 527 ) 528 529 edge_key = self.dmr.get_relationship_value(key, "edge_key") 530 531 # Handle out edges 532 if self.dmr.get_relationship_value(key, "jsonld_direction") == "out": 533 # use out edges 534 535 original_edge_weights_dict = { 536 attached_node: self.graph[source_node][attached_node][edge_key][ 537 "weight" 538 ] 539 for source_node, attached_node in self.graph.out_edges( 540 source_node_label 541 ) 542 if edge_key in self.graph[source_node][attached_node] 543 } 544 # Handle in edges 545 else: 546 # use in edges 547 original_edge_weights_dict = { 548 attached_node: self.graph[attached_node][source_node][edge_key][ 549 "weight" 550 ] 551 for attached_node, source_node in self.graph.in_edges(source_node_label) 552 if edge_key in self.graph[attached_node][source_node] 553 } 554 555 sorted_nodes = list( 556 dict( 557 sorted(original_edge_weights_dict.items(), key=lambda item: item[1]) 558 ).keys() 559 ) 560 561 return sorted_nodes
Order the values associated with a particular node and edge_key to match original ordering in schema.
Arguments:
- key (str): a key representing and edge relationship in DataModelRelationships.relationships_dictionary
- source_node_label (str): node to look for edges of and order
Raises:
- KeyError: cannot find source node in graph
Returns:
list[str]: list of sorted nodes, that share the specified relationship with the source node For the example data model, for key='rangeIncludes', source_node_label='CancerType' the return would be ['Breast, 'Colorectal', 'Lung', 'Prostate', 'Skin'] in that exact order.
564 def get_nodes_ancestors(self, subgraph: nx.DiGraph, node_label: str) -> list[str]: 565 """Get a list of nodes reachable from source component in graph 566 567 Args: 568 subgraph (nx.DiGraph): networkx graph object 569 node_label (str): label of node to find ancestors for 570 571 Returns: 572 list[str]: nodes reachable from source in graph 573 """ 574 all_ancestors = list(nx.ancestors(subgraph, node_label)) 575 576 return all_ancestors
Get a list of nodes reachable from source component in graph
Arguments:
- subgraph (nx.DiGraph): networkx graph object
- node_label (str): label of node to find ancestors for
Returns:
list[str]: nodes reachable from source in graph
578 def get_node_comment( 579 self, node_display_name: Optional[str] = None, node_label: Optional[str] = None 580 ) -> str: 581 """Get the node definition, i.e., the "comment" associated with a given node display name. 582 583 Args: 584 node_display_name, str: Display name of the node which you want to get the comment for. 585 node_label, str: Label of the node you would want to get the comment for. 586 Returns: 587 Comment associated with node, as a string. 588 """ 589 node_label = self._get_node_label(node_label, node_display_name) 590 591 if not node_label: 592 return "" 593 594 node_definition = self.graph.nodes[node_label][ 595 self.dmr.get_relationship_value("comment", "node_label") 596 ] 597 return node_definition
Get the node definition, i.e., the "comment" associated with a given node display name.
Arguments:
- node_display_name, str: Display name of the node which you want to get the comment for.
- node_label, str: Label of the node you would want to get the comment for.
Returns:
Comment associated with node, as a string.
599 def get_node_dependencies( 600 self, 601 source_node: str, 602 display_names: bool = True, 603 schema_ordered: bool = True, 604 ) -> list[str]: 605 """Get the immediate dependencies that are related to a given source node. 606 607 Args: 608 source_node: The node whose dependencies we need to compute. 609 display_names: if True, return list of display names of each of the dependencies. 610 if False, return list of node labels of each of the dependencies. 611 schema_ordered: 612 if True, return the dependencies of the node following the order of the schema 613 (slower). 614 if False, return dependencies from graph without guaranteeing schema order (faster) 615 616 Returns: 617 List of nodes that are dependent on the source node. 618 """ 619 620 if schema_ordered: 621 # get dependencies in the same order in which they are defined in the schema 622 required_dependencies = self.get_ordered_entry( 623 key=self.dmr.get_relationship_value("requiresDependency", "edge_key"), 624 source_node_label=source_node, 625 ) 626 else: 627 required_dependencies = self.get_adjacent_nodes_by_relationship( 628 node_label=source_node, 629 relationship=self.dmr.get_relationship_value( 630 "requiresDependency", "edge_key" 631 ), 632 ) 633 634 if display_names: 635 # get display names of dependencies 636 dependencies_display_names = [] 637 638 for req in required_dependencies: 639 dependencies_display_names.append( 640 self.graph.nodes[req][ 641 self.dmr.get_relationship_value("displayName", "node_label") 642 ] 643 ) 644 645 return dependencies_display_names 646 647 return required_dependencies
Get the immediate dependencies that are related to a given source node.
Arguments:
- source_node: The node whose dependencies we need to compute.
- display_names: if True, return list of display names of each of the dependencies. if False, return list of node labels of each of the dependencies.
- schema_ordered: if True, return the dependencies of the node following the order of the schema (slower). if False, return dependencies from graph without guaranteeing schema order (faster)
Returns:
List of nodes that are dependent on the source node.
649 def get_nodes_descendants(self, node_label: str) -> list[str]: 650 """Return a list of nodes reachable from source in graph 651 Args: 652 node_label, str: any given node 653 Return: 654 all_descendants, list: nodes reachable from source in graph 655 """ 656 all_descendants = list(nx.descendants(self.graph, node_label)) 657 658 return all_descendants
Return a list of nodes reachable from source in graph
Arguments:
- node_label, str: any given node
Return:
all_descendants, list: nodes reachable from source in graph
660 def get_nodes_display_names( 661 self, 662 node_list: list[str], 663 ) -> list[str]: 664 """Get display names associated with the given list of nodes. 665 666 Args: 667 node_list: List of nodes whose display names we need to retrieve. 668 669 Returns: 670 List of display names. 671 """ 672 node_list_display_names = [ 673 self.graph.nodes[node][ 674 self.dmr.get_relationship_value("displayName", "node_label") 675 ] 676 for node in node_list 677 ] 678 679 return node_list_display_names
Get display names associated with the given list of nodes.
Arguments:
- node_list: List of nodes whose display names we need to retrieve.
Returns:
List of display names.
681 def get_node_label(self, node_display_name: str) -> str: 682 """Get the node label for a given display name. 683 684 Args: 685 node_display_name: Display name of the node which you want to get the label for. 686 Returns: 687 Node label associated with given node. 688 If display name not part of schema, return an empty string. 689 """ 690 691 node_class_label = get_class_label_from_display_name( 692 display_name=node_display_name 693 ) 694 node_property_label = get_property_label_from_display_name( 695 display_name=node_display_name 696 ) 697 698 if node_class_label in self.graph.nodes: 699 node_label = node_class_label 700 elif node_property_label in self.graph.nodes: 701 node_label = node_property_label 702 else: 703 node_label = "" 704 705 return node_label
Get the node label for a given display name.
Arguments:
- node_display_name: Display name of the node which you want to get the label for.
Returns:
Node label associated with given node. If display name not part of schema, return an empty string.
707 def get_node_range( 708 self, 709 node_label: Optional[str] = None, 710 node_display_name: Optional[str] = None, 711 display_names: bool = False, 712 ) -> list[str]: 713 """ 714 Get the range, i.e., all the valid values that are associated with a node label. 715 716 717 Args: 718 node_label (Optional[str], optional): Node for which you need to retrieve the range. 719 Defaults to None. 720 node_display_name (Optional[str], optional): _description_. Defaults to None. 721 display_names (bool, optional): _description_. Defaults to False. 722 723 Raises: 724 ValueError: If the node cannot be found in the graph. 725 726 Returns: 727 list[str]: 728 If display_names=False, a list of valid values (labels) associated with a given node. 729 If display_names=True, a list of valid values (display names) associated 730 with a given node 731 """ 732 node_label = self._get_node_label(node_label, node_display_name) 733 try: 734 # get node range in the order defined in schema for given node 735 required_range = self.find_node_range(node_label=node_label) 736 except KeyError as exc: 737 raise ValueError( 738 f"The source node {node_label} does not exist in the graph. " 739 "Please use a different node." 740 ) from exc 741 742 if display_names: 743 # get the display name(s) of all dependencies 744 dependencies_display_names = [] 745 746 for req in required_range: 747 dependencies_display_names.append(self.graph.nodes[req]["displayName"]) 748 749 return dependencies_display_names 750 751 return required_range
Get the range, i.e., all the valid values that are associated with a node label.
Arguments:
- node_label (Optional[str], optional): Node for which you need to retrieve the range. Defaults to None.
- node_display_name (Optional[str], optional): _description_. Defaults to None.
- display_names (bool, optional): _description_. Defaults to False.
Raises:
- ValueError: If the node cannot be found in the graph.
Returns:
list[str]: If display_names=False, a list of valid values (labels) associated with a given node. If display_names=True, a list of valid values (display names) associated with a given node
753 def get_node_required( 754 self, node_label: Optional[str] = None, node_display_name: Optional[str] = None 755 ) -> bool: 756 """Check if a given node is required or not. 757 758 Note: The possible options that a node can be associated with -- "required" / "optional". 759 760 Args: 761 node_label: Label of the node for which you need to look up. 762 node_display_name: Display name of the node for which you want look up. 763 Returns: 764 True: If the given node is a "required" node. 765 False: If the given node is not a "required" (i.e., an "optional") node. 766 """ 767 node_label = self._get_node_label(node_label, node_display_name) 768 rel_node_label = self.dmr.get_relationship_value("required", "node_label") 769 node_required = self.graph.nodes[node_label][rel_node_label] 770 return node_required
Check if a given node is required or not.
Note: The possible options that a node can be associated with -- "required" / "optional".
Arguments:
- node_label: Label of the node for which you need to look up.
- node_display_name: Display name of the node for which you want look up.
Returns:
True: If the given node is a "required" node. False: If the given node is not a "required" (i.e., an "optional") node.
772 def get_node_validation_rules( 773 self, node_label: Optional[str] = None, node_display_name: Optional[str] = None 774 ) -> Union[list, dict[str, str]]: 775 """Get validation rules associated with a node, 776 777 Args: 778 node_label: Label of the node for which you need to look up. 779 node_display_name: Display name of the node which you want to get the label for. 780 Returns: 781 A set of validation rules associated with node, as a list or a dictionary. 782 """ 783 node_label = self._get_node_label(node_label, node_display_name) 784 785 if not node_label: 786 return [] 787 788 try: 789 node_validation_rules = self.graph.nodes[node_label]["validationRules"] 790 except KeyError as key_error: 791 raise ValueError( 792 f"{node_label} is not in the graph, please provide a proper node label" 793 ) from key_error 794 795 return node_validation_rules
Get validation rules associated with a node,
Arguments:
- node_label: Label of the node for which you need to look up.
- node_display_name: Display name of the node which you want to get the label for.
Returns:
A set of validation rules associated with node, as a list or a dictionary.
797 def get_subgraph_by_edge_type(self, relationship: str) -> nx.DiGraph: 798 """Get a subgraph containing all edges of a given type (aka relationship). 799 800 Args: 801 relationship: edge / link relationship type with possible values same as in above docs. 802 803 Returns: 804 Directed graph on edges of a particular type (aka relationship) 805 """ 806 807 # prune the metadata model graph so as to include only those edges that 808 # match the relationship type 809 rel_edges = [] 810 for node_1, node_2, key, _ in self.graph.out_edges(data=True, keys=True): 811 if key == relationship: 812 rel_edges.append((node_1, node_2)) 813 814 relationship_subgraph: nx.DiGraph = nx.DiGraph() 815 relationship_subgraph.add_edges_from(rel_edges) 816 817 return relationship_subgraph
Get a subgraph containing all edges of a given type (aka relationship).
Arguments:
- relationship: edge / link relationship type with possible values same as in above docs.
Returns:
Directed graph on edges of a particular type (aka relationship)
819 def find_adjacent_child_classes( 820 self, node_label: Optional[str] = None, node_display_name: Optional[str] = None 821 ) -> list[str]: 822 """Find child classes of a given node. 823 Args: 824 node_display_name: Display name of the node to look up. 825 node_label: Label of the node to look up. 826 Returns: 827 List of nodes that are adjacent to the given node, by SubclassOf relationship. 828 """ 829 node_label = self._get_node_label(node_label, node_display_name) 830 return self.get_adjacent_nodes_by_relationship( 831 node_label=node_label, 832 relationship=self.dmr.get_relationship_value("subClassOf", "edge_key"), 833 )
Find child classes of a given node.
Arguments:
- node_display_name: Display name of the node to look up.
- node_label: Label of the node to look up.
Returns:
List of nodes that are adjacent to the given node, by SubclassOf relationship.
835 def find_child_classes(self, schema_class: str) -> list: 836 """Find schema classes that inherit from the given class 837 Args: 838 schema_class: node label for the class to from which to look for children. 839 Returns: 840 list of children to the schema_class. 841 """ 842 child_classes = unlist(list(self.graph.successors(schema_class))) 843 assert isinstance(child_classes, list) 844 return child_classes
Find schema classes that inherit from the given class
Arguments:
- schema_class: node label for the class to from which to look for children.
Returns:
list of children to the schema_class.
846 def find_class_specific_properties(self, schema_class: str) -> list[str]: 847 """Find properties specifically associated with a given class 848 Args: 849 schema_class, str: node/class label, to identify properties for. 850 Returns: 851 properties, list: List of properties associate with a given schema class. 852 Raises: 853 KeyError: Key error is raised if the provided schema_class is not in the graph 854 """ 855 856 if not self.is_class_in_schema(schema_class): 857 raise KeyError( 858 ( 859 f"Schema_class provided: {schema_class} is not in the data model, please check " 860 "that you are providing the proper class/node label" 861 ) 862 ) 863 864 properties = [] 865 for node1, node2 in self.graph.edges(): 866 if ( 867 node2 == schema_class 868 and "domainValue" in self.graph[node1][schema_class] 869 ): 870 properties.append(node1) 871 return properties
Find properties specifically associated with a given class
Arguments:
- schema_class, str: node/class label, to identify properties for.
Returns:
properties, list: List of properties associate with a given schema class.
Raises:
- KeyError: Key error is raised if the provided schema_class is not in the graph
873 def find_parent_classes(self, node_label: str) -> list[list[str]]: 874 """Find all parents of the provided node 875 Args: 876 node_label: label of the node to find parents of 877 Returns: 878 List of list of Parents to the given node. 879 """ 880 # Get digraph of nodes with parents 881 digraph = self.get_digraph_by_edge_type("parentOf") 882 883 # Get root node 884 root_node = list(nx.topological_sort(digraph))[0] 885 886 # Get paths between root_node and the target node. 887 paths = nx.all_simple_paths(self.graph, source=root_node, target=node_label) 888 889 return [_path[:-1] for _path in paths]
Find all parents of the provided node
Arguments:
- node_label: label of the node to find parents of
Returns:
List of list of Parents to the given node.
891 def full_schema_graph(self, size: Optional[int] = None) -> graphviz.Digraph: 892 """Create a graph of the data model. 893 Args: 894 size, float: max height and width of the graph, if one value provided 895 it is used for both. 896 Returns: 897 schema graph viz 898 """ 899 edges = self.graph.edges() 900 return visualize(edges, size=size)
Create a graph of the data model.
Arguments:
- size, float: max height and width of the graph, if one value provided it is used for both.
Returns:
schema graph viz
902 def is_class_in_schema(self, node_label: str) -> bool: 903 """Determine if provided node_label is in the schema graph/data model. 904 Args: 905 node_label: label of node to search for in the 906 Returns: 907 True, if node is in the graph schema 908 False, if node is not in graph schema 909 """ 910 return node_label in self.graph.nodes()
Determine if provided node_label is in the schema graph/data model.
Arguments:
- node_label: label of node to search for in the
Returns:
True, if node is in the graph schema False, if node is not in graph schema
912 def sub_schema_graph( 913 self, source: str, direction: str, size: Optional[float] = None 914 ) -> Optional[graphviz.Digraph]: 915 """Create a sub-schema graph 916 Args: 917 source, str: source node label to start graph 918 direction, str: direction to create the visualization, choose from "up", "down", "both" 919 size, float: max height and width of the graph, if one value provided it is used for 920 both. 921 Returns: 922 Sub-schema graph viz 923 """ 924 if direction == "down": 925 edges = list(nx.edge_bfs(self.graph, [source])) 926 return visualize(edges, size=size) 927 if direction == "up": 928 paths = self.find_parent_classes(source) 929 edges = [] 930 for _path in paths: 931 _path.append(source) 932 for i in range(0, len(_path) - 1): 933 edges.append((_path[i], _path[i + 1])) 934 return visualize(edges, size=size) 935 if direction == "both": 936 paths = self.find_parent_classes(source) 937 edges = list(nx.edge_bfs(self.graph, [source])) 938 for _path in paths: 939 _path.append(source) 940 for i in range(0, len(_path) - 1): 941 edges.append((_path[i], _path[i + 1])) 942 return visualize(edges, size=size) 943 return None
Create a sub-schema graph
Arguments:
- source, str: source node label to start graph
- direction, str: direction to create the visualization, choose from "up", "down", "both"
- size, float: max height and width of the graph, if one value provided it is used for both.
Returns:
Sub-schema graph viz
945 def get_node_column_type( 946 self, node_label: Optional[str] = None, node_display_name: Optional[str] = None 947 ) -> Optional[JSONSchemaType]: 948 """Gets the column type of the node 949 950 Args: 951 node_label: The label of the node to get the type from 952 node_display_name: The display name of the node to get the type from 953 954 Returns: 955 The column type of the node if it has one, otherwise None 956 """ 957 node_label = self._get_node_label(node_label, node_display_name) 958 rel_node_label = self.dmr.get_relationship_value("columnType", "node_label") 959 return self.graph.nodes[node_label][rel_node_label]
Gets the column type of the node
Arguments:
- node_label: The label of the node to get the type from
- node_display_name: The display name of the node to get the type from
Returns:
The column type of the node if it has one, otherwise None