schematic.schemas.data_model_graph
DataModel Graph
1"""DataModel Graph""" 2 3import logging 4from typing import Any, Optional, Union, AbstractSet 5 6import graphviz # type: ignore 7import networkx as nx # type: ignore 8from opentelemetry import trace 9 10from schematic.schemas.data_model_parser import DataModelParser 11from schematic.schemas.data_model_edges import DataModelEdges 12from schematic.schemas.data_model_nodes import DataModelNodes 13from schematic.schemas.data_model_relationships import DataModelRelationships 14from schematic.schemas.constants import JSONSchemaType 15from schematic.utils.general import unlist 16from schematic.utils.schema_utils import ( 17 DisplayLabelType, 18 extract_component_validation_rules, 19 get_class_label_from_display_name, 20 get_property_label_from_display_name, 21) 22from schematic.utils.validate_utils import rule_in_rule_list 23from schematic.utils.viz_utils import visualize 24 25logger = logging.getLogger(__name__) 26 27 28logger = logging.getLogger(__name__) 29tracer = trace.get_tracer("Schematic") 30 31 32class DataModelGraphMeta: # pylint: disable=too-few-public-methods 33 """DataModelGraphMeta""" 34 35 _instances: dict = {} 36 37 def __call__( # pylint: disable=no-self-argument 38 cls, *args: Any, **kwargs: Any 39 ) -> Any: 40 """ 41 Possible changes to the value of the `__init__` argument do not affect 42 the returned instance. 43 """ 44 if cls not in cls._instances: 45 instance = super().__call__(*args, **kwargs) # type: ignore # pylint: disable=no-member 46 cls._instances[cls] = instance 47 return cls._instances[cls] 48 49 50class DataModelGraph: # pylint: disable=too-few-public-methods 51 """ 52 Generate graph network (networkx) from the attributes and relationships returned 53 from the data model parser. 54 55 Create a singleton. 56 """ 57 58 __metaclass__ = DataModelGraphMeta 59 60 def __init__( 61 self, 62 attribute_relationships_dict: dict, 63 data_model_labels: DisplayLabelType = "class_label", 64 ) -> None: 65 """Load parsed data model. 66 Args: 67 attributes_relationship_dict, dict: generated in data_model_parser 68 {Attribute Display Name: { 69 Relationships: { 70 CSV Header: Value}}} 71 data_model_labels: str, display_label or class_label. 72 display_label, use the display name as a label, if it is valid 73 (contains no blacklisted characters) otherwise will default to schema_label. 74 class_label, default, use standard class or property label. 75 Raises: 76 ValueError, attribute_relationship_dict not loaded. 77 """ 78 self.attribute_relationships_dict = attribute_relationships_dict 79 self.dmn = DataModelNodes(self.attribute_relationships_dict) 80 self.dme = DataModelEdges() 81 self.dmr = DataModelRelationships() 82 self.data_model_labels = data_model_labels 83 84 if not self.attribute_relationships_dict: 85 raise ValueError( 86 ( 87 "Something has gone wrong, a data model was not loaded into the DataModelGraph " 88 "Class. Please check that your paths are correct" 89 ) 90 ) 91 self.graph = self.generate_data_model_graph() 92 93 @tracer.start_as_current_span("DataModelGraph::generate_data_model_graph") 94 def generate_data_model_graph(self) -> nx.MultiDiGraph: 95 """ 96 Generate NetworkX Graph from the Relationships/attributes dictionary, the graph is built 97 by first adding all nodes to the graph, then connecting nodes by the relationships defined 98 in the attributes_relationship dictionary. 99 Returns: 100 G: nx.MultiDiGraph, networkx graph representation of the data model 101 """ 102 # Get all relationships with edges 103 edge_relationships = self.dmr.retrieve_rel_headers_dict(edge=True) 104 105 # Find all nodes 106 all_nodes = self.dmn.gather_all_nodes_in_model( 107 attr_rel_dict=self.attribute_relationships_dict 108 ) 109 110 # Instantiate NetworkX MultiDigraph 111 graph: nx.MultiDiGraph = nx.MultiDiGraph() 112 113 all_node_dict = {} 114 115 ## Fill in MultiDigraph with nodes 116 for node in all_nodes: 117 # Gather information for each node 118 node_dict = self.dmn.generate_node_dict( 119 node_display_name=node, 120 attr_rel_dict=self.attribute_relationships_dict, 121 data_model_labels=self.data_model_labels, 122 ) 123 124 # Add each node to the all_node_dict to be used for generating edges 125 all_node_dict[node] = node_dict 126 127 # Generate node and attach information (attributes) to each node 128 graph = self.dmn.generate_node(graph, node_dict) 129 130 edge_list: list[tuple[str, str, dict[str, Union[str, int]]]] = [] 131 ## Connect nodes via edges 132 for node in all_nodes: 133 # Generate edges 134 edge_list_2 = self.dme.generate_edge( 135 node, 136 all_node_dict, 137 self.attribute_relationships_dict, 138 edge_relationships, 139 edge_list, 140 ) 141 edge_list = edge_list_2.copy() 142 143 # Add edges to the Graph 144 for node_1, node_2, edge_dict in edge_list: 145 graph.add_edge( 146 node_1, node_2, key=edge_dict["key"], weight=edge_dict["weight"] 147 ) 148 return graph 149 150 151class DataModelGraphExplorer: # pylint: disable=too-many-public-methods 152 """DataModelGraphExplorer""" 153 154 def __init__( 155 self, 156 graph: nx.MultiDiGraph, 157 ): 158 """Load data model graph as a singleton. 159 Args: 160 G: nx.MultiDiGraph, networkx graph representation of the data model 161 """ 162 self.graph = graph # At this point the graph is expected to be fully formed. 163 self.dmr = DataModelRelationships() 164 165 def find_properties(self) -> set[str]: 166 """ 167 Identify all properties, as defined by the first node in a pair, connected with 168 'domainIncludes' edge type 169 170 Returns: 171 properties, set: All properties defined in the data model, each property name 172 is defined by its label. 173 """ 174 properties_list: list[str] = [] 175 for node_1, _, rel in self.graph.edges: 176 if rel == self.dmr.get_relationship_value("domainIncludes", "edge_key"): 177 properties_list.append(node_1) 178 properties_set = set(properties_list) 179 return properties_set 180 181 def find_classes(self) -> AbstractSet[str]: 182 """ 183 Identify all classes, as defined but all nodes, minus all properties 184 (which are explicitly defined) 185 Returns: 186 classes, set: All classes defined in the data model, each class 187 name is defined by its label. 188 """ 189 nodes = self.graph.nodes 190 properties = self.find_properties() 191 classes = nodes - properties 192 return classes 193 194 def find_node_range( 195 self, node_label: Optional[str] = None, node_display_name: Optional[str] = None 196 ) -> list: 197 """Get valid values for the given node (attribute) 198 Args: 199 node_label, str, Optional[str]: label of the node for which to retrieve valid values 200 node_display_name, str, Optional[str]: Display Name of the node for which to 201 retrieve valid values 202 Returns: 203 valid_values, list: List of valid values associated with the provided node. 204 """ 205 node_label = self._get_node_label(node_label, node_display_name) 206 207 valid_values = [] 208 for node_1, node_2, rel in self.graph.edges: 209 if node_1 == node_label and rel == self.dmr.get_relationship_value( 210 "rangeIncludes", "edge_key" 211 ): 212 valid_values.append(node_2) 213 valid_values = list(set(valid_values)) 214 return valid_values 215 216 def get_adjacent_nodes_by_relationship( 217 self, node_label: str, relationship: str 218 ) -> list[str]: 219 """Get a list of nodes that is / are adjacent to a given node, based on a relationship type. 220 221 Args: 222 node_label: label of the the node whose edges we need to look at. 223 relationship: the type of link(s) that the above node and its immediate neighbors share. 224 225 Returns: 226 List of nodes that are adjacent to the given node. 227 #checked 228 """ 229 nodes = set() 230 for _, node_2, key, _ in self.graph.out_edges(node_label, data=True, keys=True): 231 if key == relationship: 232 nodes.add(node_2) 233 234 return list(nodes) 235 236 def get_component_node_required( 237 self, 238 manifest_component: str, 239 node_validation_rules: Optional[list[str]] = None, 240 node_label: Optional[str] = None, 241 node_display_name: Optional[str] = None, 242 ) -> bool: 243 """Check if a node is required taking into account the manifest component it is defined in 244 (requirements can be set in validation rule as well as required column) 245 Args: 246 manifest_component: str, manifest component display name that the node belongs to. 247 node_validation_rules: list[str], validation rules for a given node and component. 248 node_label: str, Label of the node you would want to get the comment for. 249 node_display_name: str, node display name for the node being queried. 250 Returns: 251 True, if node is required, False if not 252 """ 253 node_required = False 254 255 if not node_validation_rules: 256 # Get node validation rules for a given component 257 node_validation_rules = self.get_component_node_validation_rules( 258 manifest_component=manifest_component, 259 node_label=node_label, 260 node_display_name=node_display_name, 261 ) 262 263 # Check if the validation rule specifies that the node is required for this particular 264 # component. 265 if rule_in_rule_list("required", node_validation_rules): 266 node_required = True 267 # To prevent any unintended errors, ensure the Required field for this node is False 268 if self.get_node_required( 269 node_label=node_label, node_display_name=node_display_name 270 ): 271 if not node_display_name: 272 assert node_label is not None 273 node_display_name = self.graph.nodes[node_label][ 274 self.dmr.get_relationship_value("displayName", "node_label") 275 ] 276 error_str = " ".join( 277 [ 278 f"For component: {manifest_component} and attribute: {node_display_name}", 279 "requirements are being specified in both the Required field and in the", 280 "Validation Rules. If you desire to use validation rules to set component", 281 "specific requirements for this attribute", 282 "then the Required field needs to be set to False, or the validation may", 283 "not work as intended, for other components where the attribute", 284 "that should not be required.", 285 ] 286 ) 287 288 logger.error(error_str) 289 else: 290 # If requirements are not being set in the validation rule, then just pull the 291 # standard node requirements from the model 292 node_required = self.get_node_required( 293 node_label=node_label, node_display_name=node_display_name 294 ) 295 return node_required 296 297 def get_component_node_validation_rules( 298 self, 299 manifest_component: str, 300 node_label: Optional[str] = None, 301 node_display_name: Optional[str] = None, 302 ) -> list: 303 """Get validation rules for a given node and component. 304 Args: 305 manifest_component: str, manifest component display name that the node belongs to. 306 node_label: str, Label of the node you would want to get the comment for. 307 node_display_name: str, node display name for the node being queried. 308 Returns: 309 validation_rules: list, validation rules list for a given node and component. 310 """ 311 # get any additional validation rules associated with this node (e.g. can this node 312 # be mapped to a list of other nodes) 313 node_validation_rules = self.get_node_validation_rules( 314 node_label=node_label, node_display_name=node_display_name 315 ) 316 317 # Parse the validation rules per component if applicable 318 if node_validation_rules and isinstance(node_validation_rules, dict): 319 node_validation_rules_list = extract_component_validation_rules( 320 manifest_component=manifest_component, 321 validation_rules_dict=node_validation_rules, # type: ignore 322 ) 323 else: 324 assert isinstance(node_validation_rules, list) 325 node_validation_rules_list = node_validation_rules 326 return node_validation_rules_list 327 328 def get_component_requirements( 329 self, 330 source_component: str, 331 ) -> list[str]: 332 """ 333 Get all components that are associated with a given source component and are 334 required by it. 335 336 Args: 337 source_component: source component for which we need to find all required downstream 338 components. 339 340 Returns: 341 List of nodes that are descendants from the source component are are related to the 342 source through a specific component relationship. 343 """ 344 345 req_components = list( 346 reversed( 347 self.get_descendants_by_edge_type( 348 source_component, 349 self.dmr.get_relationship_value("requiresComponent", "edge_key"), 350 ordered=True, 351 ) 352 ) 353 ) 354 355 return req_components 356 357 def get_component_requirements_graph( 358 self, 359 source_component: str, 360 ) -> nx.Graph: 361 """ 362 Get all components that are associated with a given source component and are required by it; 363 return the components as a dependency graph (i.e. a DAG). 364 365 Args: 366 source_component, str: source component for which we need to find all required 367 downstream components. 368 369 Returns: 370 A subgraph of the schema graph induced on nodes that are descendants from the source 371 component and are related to the source through a specific component relationship. 372 """ 373 374 # get a list of required component nodes 375 req_components = self.get_component_requirements(source_component) 376 377 # get the subgraph induced on required component nodes 378 req_components_graph = self.get_subgraph_by_edge_type( 379 self.dmr.get_relationship_value("requiresComponent", "edge_key"), 380 ).subgraph(req_components) 381 382 return req_components_graph 383 384 def get_descendants_by_edge_type( 385 self, 386 source_node: str, 387 relationship: str, 388 connected: bool = True, 389 ordered: bool = False, 390 ) -> list[str]: 391 """ 392 Get all nodes that are descendants of a given source node, based on a specific 393 type of edge / relationship type. 394 395 Args: 396 source_node: The node whose descendants need to be retrieved. 397 relationship: Edge / link relationship type with possible values same as in above docs. 398 connected: 399 If True, we need to ensure that all descendant nodes are reachable from the source 400 node, i.e., they are part of the same connected component. 401 If False, the descendants could be in multiple connected components. 402 Default value is True. 403 ordered: 404 If True, the list of descendants will be topologically ordered. 405 If False, the list has no particular order (depends on the order in which the 406 descendants were traversed in the subgraph). 407 408 Returns: 409 List of nodes that are descendants from a particular node (sorted / unsorted) 410 """ 411 412 root_descendants = nx.descendants(self.graph, source_node) 413 414 subgraph_nodes = list(root_descendants) 415 subgraph_nodes.append(source_node) 416 descendants_subgraph = self.graph.subgraph(subgraph_nodes) 417 418 # prune the descendants subgraph so as to include only those edges that match 419 # the relationship type 420 rel_edges = [] 421 for node_1, node_2, key, _ in descendants_subgraph.edges(data=True, keys=True): 422 if key == relationship: 423 rel_edges.append((node_1, node_2)) 424 425 relationship_subgraph: nx.DiGraph = nx.DiGraph() 426 relationship_subgraph.add_edges_from(rel_edges) 427 428 descendants = relationship_subgraph.nodes() 429 430 if not descendants: 431 # return empty list if there are no nodes that are reachable from the 432 # source node based on this relationship type 433 return [] 434 435 if connected and ordered: 436 # get the set of reachable nodes from the source node 437 descendants = nx.descendants(relationship_subgraph, source_node) 438 descendants.add(source_node) 439 440 # normally, the descendants from a node are unordered (peculiarity 441 # of nx descendants call) 442 # form the subgraph on descendants and order it topologically 443 # this assumes an acyclic subgraph 444 descendants = nx.topological_sort( 445 relationship_subgraph.subgraph(descendants) 446 ) 447 elif connected: 448 # get the nodes that are reachable from a given source node 449 # after the pruning process above some nodes in the 450 # root_descendants subgraph might have become disconnected and 451 # will be omitted 452 descendants = nx.descendants(relationship_subgraph, source_node) 453 descendants.add(source_node) 454 elif ordered: 455 # sort the nodes topologically 456 # this requires the graph to be an acyclic graph 457 descendants = nx.topological_sort(relationship_subgraph) 458 459 return list(descendants) 460 461 def get_digraph_by_edge_type(self, edge_type: str) -> nx.DiGraph: 462 """Get a networkx digraph of the nodes connected via a given edge_type. 463 Args: 464 edge_type: 465 Edge type to search for, possible types are defined by 'edge_key' 466 in relationship class 467 Returns: 468 """ 469 470 digraph: nx.DiGraph = nx.DiGraph() 471 for node_1, node_2, key, _ in self.graph.edges(data=True, keys=True): 472 if key == edge_type: 473 digraph.add_edge(node_1, node_2) 474 return digraph 475 476 def get_edges_by_relationship( 477 self, 478 node: str, 479 relationship: str, 480 ) -> list[tuple[str, str]]: 481 """Get a list of out-edges of a node where the edges match a specific type of relationship. 482 483 i.e., the edges connecting a node to its neighbors are of relationship type -- "parentOf" 484 (set of edges to children / sub-class nodes). 485 486 Args: 487 node: the node whose edges we need to look at. 488 relationship: the type of link(s) that the above node and its immediate neighbors share. 489 490 Returns: 491 List of edges that are connected to the node. 492 """ 493 edges: list[tuple[str, str]] = [] 494 495 for node_1, node_2, key, _ in self.graph.out_edges(node, data=True, keys=True): 496 if key == relationship: 497 edges.append((node_1, node_2)) 498 499 return edges 500 501 def get_ordered_entry(self, key: str, source_node_label: str) -> list[str]: 502 """ 503 Order the values associated with a particular node and edge_key to 504 match original ordering in schema. 505 506 Args: 507 key (str): a key representing and edge relationship in 508 DataModelRelationships.relationships_dictionary 509 source_node_label (str): node to look for edges of and order 510 511 Raises: 512 KeyError: cannot find source node in graph 513 514 Returns: 515 list[str]: 516 list of sorted nodes, that share the specified relationship with the source node 517 For the example data model, for key='rangeIncludes', source_node_label='CancerType' 518 the return would be ['Breast, 'Colorectal', 'Lung', 'Prostate', 'Skin'] in that 519 exact order. 520 """ 521 # Check if node is in the graph, if not throw an error. 522 if not self.is_class_in_schema(node_label=source_node_label): 523 raise KeyError( 524 f"Cannot find node: {source_node_label} in the graph, please check entry." 525 ) 526 527 edge_key = self.dmr.get_relationship_value(key, "edge_key") 528 529 # Handle out edges 530 if self.dmr.get_relationship_value(key, "jsonld_direction") == "out": 531 # use out edges 532 533 original_edge_weights_dict = { 534 attached_node: self.graph[source_node][attached_node][edge_key][ 535 "weight" 536 ] 537 for source_node, attached_node in self.graph.out_edges( 538 source_node_label 539 ) 540 if edge_key in self.graph[source_node][attached_node] 541 } 542 # Handle in edges 543 else: 544 # use in edges 545 original_edge_weights_dict = { 546 attached_node: self.graph[attached_node][source_node][edge_key][ 547 "weight" 548 ] 549 for attached_node, source_node in self.graph.in_edges(source_node_label) 550 if edge_key in self.graph[attached_node][source_node] 551 } 552 553 sorted_nodes = list( 554 dict( 555 sorted(original_edge_weights_dict.items(), key=lambda item: item[1]) 556 ).keys() 557 ) 558 559 return sorted_nodes 560 561 # Get values associated with a node 562 def get_nodes_ancestors(self, subgraph: nx.DiGraph, node_label: str) -> list[str]: 563 """Get a list of nodes reachable from source component in graph 564 565 Args: 566 subgraph (nx.DiGraph): networkx graph object 567 node_label (str): label of node to find ancestors for 568 569 Returns: 570 list[str]: nodes reachable from source in graph 571 """ 572 all_ancestors = list(nx.ancestors(subgraph, node_label)) 573 574 return all_ancestors 575 576 def get_node_comment( 577 self, node_display_name: Optional[str] = None, node_label: Optional[str] = None 578 ) -> str: 579 """Get the node definition, i.e., the "comment" associated with a given node display name. 580 581 Args: 582 node_display_name, str: Display name of the node which you want to get the comment for. 583 node_label, str: Label of the node you would want to get the comment for. 584 Returns: 585 Comment associated with node, as a string. 586 """ 587 node_label = self._get_node_label(node_label, node_display_name) 588 589 if not node_label: 590 return "" 591 592 node_definition = self.graph.nodes[node_label][ 593 self.dmr.get_relationship_value("comment", "node_label") 594 ] 595 return node_definition 596 597 def get_node_dependencies( 598 self, 599 source_node: str, 600 display_names: bool = True, 601 schema_ordered: bool = True, 602 ) -> list[str]: 603 """Get the immediate dependencies that are related to a given source node. 604 605 Args: 606 source_node: The node whose dependencies we need to compute. 607 display_names: if True, return list of display names of each of the dependencies. 608 if False, return list of node labels of each of the dependencies. 609 schema_ordered: 610 if True, return the dependencies of the node following the order of the schema 611 (slower). 612 if False, return dependencies from graph without guaranteeing schema order (faster) 613 614 Returns: 615 List of nodes that are dependent on the source node. 616 """ 617 618 if schema_ordered: 619 # get dependencies in the same order in which they are defined in the schema 620 required_dependencies = self.get_ordered_entry( 621 key=self.dmr.get_relationship_value("requiresDependency", "edge_key"), 622 source_node_label=source_node, 623 ) 624 else: 625 required_dependencies = self.get_adjacent_nodes_by_relationship( 626 node_label=source_node, 627 relationship=self.dmr.get_relationship_value( 628 "requiresDependency", "edge_key" 629 ), 630 ) 631 632 if display_names: 633 # get display names of dependencies 634 dependencies_display_names = [] 635 636 for req in required_dependencies: 637 dependencies_display_names.append( 638 self.graph.nodes[req][ 639 self.dmr.get_relationship_value("displayName", "node_label") 640 ] 641 ) 642 643 return dependencies_display_names 644 645 return required_dependencies 646 647 def get_nodes_descendants(self, node_label: str) -> list[str]: 648 """Return a list of nodes reachable from source in graph 649 Args: 650 node_label, str: any given node 651 Return: 652 all_descendants, list: nodes reachable from source in graph 653 """ 654 all_descendants = list(nx.descendants(self.graph, node_label)) 655 656 return all_descendants 657 658 def get_nodes_display_names( 659 self, 660 node_list: list[str], 661 ) -> list[str]: 662 """Get display names associated with the given list of nodes. 663 664 Args: 665 node_list: List of nodes whose display names we need to retrieve. 666 667 Returns: 668 List of display names. 669 """ 670 node_list_display_names = [ 671 self.graph.nodes[node][ 672 self.dmr.get_relationship_value("displayName", "node_label") 673 ] 674 for node in node_list 675 ] 676 677 return node_list_display_names 678 679 def get_node_label(self, node_display_name: str) -> str: 680 """Get the node label for a given display name. 681 682 Args: 683 node_display_name: Display name of the node which you want to get the label for. 684 Returns: 685 Node label associated with given node. 686 If display name not part of schema, return an empty string. 687 """ 688 689 node_class_label = get_class_label_from_display_name( 690 display_name=node_display_name 691 ) 692 node_property_label = get_property_label_from_display_name( 693 display_name=node_display_name 694 ) 695 696 if node_class_label in self.graph.nodes: 697 node_label = node_class_label 698 elif node_property_label in self.graph.nodes: 699 node_label = node_property_label 700 else: 701 node_label = "" 702 703 return node_label 704 705 def get_node_range( 706 self, 707 node_label: Optional[str] = None, 708 node_display_name: Optional[str] = None, 709 display_names: bool = False, 710 ) -> list[str]: 711 """ 712 Get the range, i.e., all the valid values that are associated with a node label. 713 714 715 Args: 716 node_label (Optional[str], optional): Node for which you need to retrieve the range. 717 Defaults to None. 718 node_display_name (Optional[str], optional): _description_. Defaults to None. 719 display_names (bool, optional): _description_. Defaults to False. 720 721 Raises: 722 ValueError: If the node cannot be found in the graph. 723 724 Returns: 725 list[str]: 726 If display_names=False, a list of valid values (labels) associated with a given node. 727 If display_names=True, a list of valid values (display names) associated 728 with a given node 729 """ 730 node_label = self._get_node_label(node_label, node_display_name) 731 try: 732 # get node range in the order defined in schema for given node 733 required_range = self.find_node_range(node_label=node_label) 734 except KeyError as exc: 735 raise ValueError( 736 f"The source node {node_label} does not exist in the graph. " 737 "Please use a different node." 738 ) from exc 739 740 if display_names: 741 # get the display name(s) of all dependencies 742 dependencies_display_names = [] 743 744 for req in required_range: 745 dependencies_display_names.append(self.graph.nodes[req]["displayName"]) 746 747 return dependencies_display_names 748 749 return required_range 750 751 def get_node_required( 752 self, node_label: Optional[str] = None, node_display_name: Optional[str] = None 753 ) -> bool: 754 """Check if a given node is required or not. 755 756 Note: The possible options that a node can be associated with -- "required" / "optional". 757 758 Args: 759 node_label: Label of the node for which you need to look up. 760 node_display_name: Display name of the node for which you want look up. 761 Returns: 762 True: If the given node is a "required" node. 763 False: If the given node is not a "required" (i.e., an "optional") node. 764 """ 765 node_label = self._get_node_label(node_label, node_display_name) 766 rel_node_label = self.dmr.get_relationship_value("required", "node_label") 767 node_required = self.graph.nodes[node_label][rel_node_label] 768 return node_required 769 770 def get_node_validation_rules( 771 self, node_label: Optional[str] = None, node_display_name: Optional[str] = None 772 ) -> Union[list, dict[str, str]]: 773 """Get validation rules associated with a node, 774 775 Args: 776 node_label: Label of the node for which you need to look up. 777 node_display_name: Display name of the node which you want to get the label for. 778 Returns: 779 A set of validation rules associated with node, as a list or a dictionary. 780 """ 781 node_label = self._get_node_label(node_label, node_display_name) 782 783 if not node_label: 784 return [] 785 786 try: 787 node_validation_rules = self.graph.nodes[node_label]["validationRules"] 788 except KeyError as key_error: 789 raise ValueError( 790 f"{node_label} is not in the graph, please provide a proper node label" 791 ) from key_error 792 793 return node_validation_rules 794 795 def get_subgraph_by_edge_type(self, relationship: str) -> nx.DiGraph: 796 """Get a subgraph containing all edges of a given type (aka relationship). 797 798 Args: 799 relationship: edge / link relationship type with possible values same as in above docs. 800 801 Returns: 802 Directed graph on edges of a particular type (aka relationship) 803 """ 804 805 # prune the metadata model graph so as to include only those edges that 806 # match the relationship type 807 rel_edges = [] 808 for node_1, node_2, key, _ in self.graph.out_edges(data=True, keys=True): 809 if key == relationship: 810 rel_edges.append((node_1, node_2)) 811 812 relationship_subgraph: nx.DiGraph = nx.DiGraph() 813 relationship_subgraph.add_edges_from(rel_edges) 814 815 return relationship_subgraph 816 817 def find_adjacent_child_classes( 818 self, node_label: Optional[str] = None, node_display_name: Optional[str] = None 819 ) -> list[str]: 820 """Find child classes of a given node. 821 Args: 822 node_display_name: Display name of the node to look up. 823 node_label: Label of the node to look up. 824 Returns: 825 List of nodes that are adjacent to the given node, by SubclassOf relationship. 826 """ 827 node_label = self._get_node_label(node_label, node_display_name) 828 return self.get_adjacent_nodes_by_relationship( 829 node_label=node_label, 830 relationship=self.dmr.get_relationship_value("subClassOf", "edge_key"), 831 ) 832 833 def find_child_classes(self, schema_class: str) -> list: 834 """Find schema classes that inherit from the given class 835 Args: 836 schema_class: node label for the class to from which to look for children. 837 Returns: 838 list of children to the schema_class. 839 """ 840 child_classes = unlist(list(self.graph.successors(schema_class))) 841 assert isinstance(child_classes, list) 842 return child_classes 843 844 def find_class_specific_properties(self, schema_class: str) -> list[str]: 845 """Find properties specifically associated with a given class 846 Args: 847 schema_class, str: node/class label, to identify properties for. 848 Returns: 849 properties, list: List of properties associate with a given schema class. 850 Raises: 851 KeyError: Key error is raised if the provided schema_class is not in the graph 852 """ 853 854 if not self.is_class_in_schema(schema_class): 855 raise KeyError( 856 ( 857 f"Schema_class provided: {schema_class} is not in the data model, please check " 858 "that you are providing the proper class/node label" 859 ) 860 ) 861 862 properties = [] 863 for node1, node2 in self.graph.edges(): 864 if ( 865 node2 == schema_class 866 and "domainValue" in self.graph[node1][schema_class] 867 ): 868 properties.append(node1) 869 return properties 870 871 def find_parent_classes(self, node_label: str) -> list[list[str]]: 872 """Find all parents of the provided node 873 Args: 874 node_label: label of the node to find parents of 875 Returns: 876 List of list of Parents to the given node. 877 """ 878 # Get digraph of nodes with parents 879 digraph = self.get_digraph_by_edge_type("parentOf") 880 881 # Get root node 882 root_node = list(nx.topological_sort(digraph))[0] 883 884 # Get paths between root_node and the target node. 885 paths = nx.all_simple_paths(self.graph, source=root_node, target=node_label) 886 887 return [_path[:-1] for _path in paths] 888 889 def full_schema_graph(self, size: Optional[int] = None) -> graphviz.Digraph: 890 """Create a graph of the data model. 891 Args: 892 size, float: max height and width of the graph, if one value provided 893 it is used for both. 894 Returns: 895 schema graph viz 896 """ 897 edges = self.graph.edges() 898 return visualize(edges, size=size) 899 900 def is_class_in_schema(self, node_label: str) -> bool: 901 """Determine if provided node_label is in the schema graph/data model. 902 Args: 903 node_label: label of node to search for in the 904 Returns: 905 True, if node is in the graph schema 906 False, if node is not in graph schema 907 """ 908 return node_label in self.graph.nodes() 909 910 def sub_schema_graph( 911 self, source: str, direction: str, size: Optional[float] = None 912 ) -> Optional[graphviz.Digraph]: 913 """Create a sub-schema graph 914 Args: 915 source, str: source node label to start graph 916 direction, str: direction to create the visualization, choose from "up", "down", "both" 917 size, float: max height and width of the graph, if one value provided it is used for 918 both. 919 Returns: 920 Sub-schema graph viz 921 """ 922 if direction == "down": 923 edges = list(nx.edge_bfs(self.graph, [source])) 924 return visualize(edges, size=size) 925 if direction == "up": 926 paths = self.find_parent_classes(source) 927 edges = [] 928 for _path in paths: 929 _path.append(source) 930 for i in range(0, len(_path) - 1): 931 edges.append((_path[i], _path[i + 1])) 932 return visualize(edges, size=size) 933 if direction == "both": 934 paths = self.find_parent_classes(source) 935 edges = list(nx.edge_bfs(self.graph, [source])) 936 for _path in paths: 937 _path.append(source) 938 for i in range(0, len(_path) - 1): 939 edges.append((_path[i], _path[i + 1])) 940 return visualize(edges, size=size) 941 return None 942 943 def get_node_column_type( 944 self, node_label: Optional[str] = None, node_display_name: Optional[str] = None 945 ) -> Optional[JSONSchemaType]: 946 """Gets the column type of the node 947 948 Args: 949 node_label: The label of the node to get the type from 950 node_display_name: The display name of the node to get the type from 951 952 Returns: 953 The column type of the node if it has one, otherwise None 954 """ 955 node_label = self._get_node_label(node_label, node_display_name) 956 rel_node_label = self.dmr.get_relationship_value("columnType", "node_label") 957 type_string = self.graph.nodes[node_label][rel_node_label] 958 if type_string is None: 959 return type_string 960 return JSONSchemaType(type_string) 961 962 def _get_node_label( 963 self, node_label: Optional[str] = None, node_display_name: Optional[str] = None 964 ) -> str: 965 """Returns the node label if given otherwise gets the node label from the display name 966 967 Args: 968 node_label: The label of the node to get the type from 969 node_display_name: The display name of the node to get the type from 970 971 Raises: 972 ValueError: If neither node_label or node_display_name is provided 973 974 Returns: 975 The node label 976 """ 977 if node_label is not None: 978 return node_label 979 if node_display_name is not None: 980 return self.get_node_label(node_display_name) 981 raise ValueError("Either 'node_label' or 'node_display_name' must be provided.") 982 983 984def create_data_model_graph_explorer(data_model_path: str) -> DataModelGraphExplorer: 985 """Creates a DataModelGraphExplore using a data model 986 987 Args: 988 data_model_path: The path to a data model to create the dmge 989 990 Returns: 991 DataModelGraphExplorer: A dmge created using the input data model 992 """ 993 data_model_parser = DataModelParser(path_to_data_model=data_model_path) 994 parsed_data_model = data_model_parser.parse_model() 995 data_model_grapher = DataModelGraph(parsed_data_model) 996 graph_data_model = data_model_grapher.graph 997 return DataModelGraphExplorer(graph_data_model)
33class DataModelGraphMeta: # pylint: disable=too-few-public-methods 34 """DataModelGraphMeta""" 35 36 _instances: dict = {} 37 38 def __call__( # pylint: disable=no-self-argument 39 cls, *args: Any, **kwargs: Any 40 ) -> Any: 41 """ 42 Possible changes to the value of the `__init__` argument do not affect 43 the returned instance. 44 """ 45 if cls not in cls._instances: 46 instance = super().__call__(*args, **kwargs) # type: ignore # pylint: disable=no-member 47 cls._instances[cls] = instance 48 return cls._instances[cls]
DataModelGraphMeta
51class DataModelGraph: # pylint: disable=too-few-public-methods 52 """ 53 Generate graph network (networkx) from the attributes and relationships returned 54 from the data model parser. 55 56 Create a singleton. 57 """ 58 59 __metaclass__ = DataModelGraphMeta 60 61 def __init__( 62 self, 63 attribute_relationships_dict: dict, 64 data_model_labels: DisplayLabelType = "class_label", 65 ) -> None: 66 """Load parsed data model. 67 Args: 68 attributes_relationship_dict, dict: generated in data_model_parser 69 {Attribute Display Name: { 70 Relationships: { 71 CSV Header: Value}}} 72 data_model_labels: str, display_label or class_label. 73 display_label, use the display name as a label, if it is valid 74 (contains no blacklisted characters) otherwise will default to schema_label. 75 class_label, default, use standard class or property label. 76 Raises: 77 ValueError, attribute_relationship_dict not loaded. 78 """ 79 self.attribute_relationships_dict = attribute_relationships_dict 80 self.dmn = DataModelNodes(self.attribute_relationships_dict) 81 self.dme = DataModelEdges() 82 self.dmr = DataModelRelationships() 83 self.data_model_labels = data_model_labels 84 85 if not self.attribute_relationships_dict: 86 raise ValueError( 87 ( 88 "Something has gone wrong, a data model was not loaded into the DataModelGraph " 89 "Class. Please check that your paths are correct" 90 ) 91 ) 92 self.graph = self.generate_data_model_graph() 93 94 @tracer.start_as_current_span("DataModelGraph::generate_data_model_graph") 95 def generate_data_model_graph(self) -> nx.MultiDiGraph: 96 """ 97 Generate NetworkX Graph from the Relationships/attributes dictionary, the graph is built 98 by first adding all nodes to the graph, then connecting nodes by the relationships defined 99 in the attributes_relationship dictionary. 100 Returns: 101 G: nx.MultiDiGraph, networkx graph representation of the data model 102 """ 103 # Get all relationships with edges 104 edge_relationships = self.dmr.retrieve_rel_headers_dict(edge=True) 105 106 # Find all nodes 107 all_nodes = self.dmn.gather_all_nodes_in_model( 108 attr_rel_dict=self.attribute_relationships_dict 109 ) 110 111 # Instantiate NetworkX MultiDigraph 112 graph: nx.MultiDiGraph = nx.MultiDiGraph() 113 114 all_node_dict = {} 115 116 ## Fill in MultiDigraph with nodes 117 for node in all_nodes: 118 # Gather information for each node 119 node_dict = self.dmn.generate_node_dict( 120 node_display_name=node, 121 attr_rel_dict=self.attribute_relationships_dict, 122 data_model_labels=self.data_model_labels, 123 ) 124 125 # Add each node to the all_node_dict to be used for generating edges 126 all_node_dict[node] = node_dict 127 128 # Generate node and attach information (attributes) to each node 129 graph = self.dmn.generate_node(graph, node_dict) 130 131 edge_list: list[tuple[str, str, dict[str, Union[str, int]]]] = [] 132 ## Connect nodes via edges 133 for node in all_nodes: 134 # Generate edges 135 edge_list_2 = self.dme.generate_edge( 136 node, 137 all_node_dict, 138 self.attribute_relationships_dict, 139 edge_relationships, 140 edge_list, 141 ) 142 edge_list = edge_list_2.copy() 143 144 # Add edges to the Graph 145 for node_1, node_2, edge_dict in edge_list: 146 graph.add_edge( 147 node_1, node_2, key=edge_dict["key"], weight=edge_dict["weight"] 148 ) 149 return graph
Generate graph network (networkx) from the attributes and relationships returned from the data model parser.
Create a singleton.
61 def __init__( 62 self, 63 attribute_relationships_dict: dict, 64 data_model_labels: DisplayLabelType = "class_label", 65 ) -> None: 66 """Load parsed data model. 67 Args: 68 attributes_relationship_dict, dict: generated in data_model_parser 69 {Attribute Display Name: { 70 Relationships: { 71 CSV Header: Value}}} 72 data_model_labels: str, display_label or class_label. 73 display_label, use the display name as a label, if it is valid 74 (contains no blacklisted characters) otherwise will default to schema_label. 75 class_label, default, use standard class or property label. 76 Raises: 77 ValueError, attribute_relationship_dict not loaded. 78 """ 79 self.attribute_relationships_dict = attribute_relationships_dict 80 self.dmn = DataModelNodes(self.attribute_relationships_dict) 81 self.dme = DataModelEdges() 82 self.dmr = DataModelRelationships() 83 self.data_model_labels = data_model_labels 84 85 if not self.attribute_relationships_dict: 86 raise ValueError( 87 ( 88 "Something has gone wrong, a data model was not loaded into the DataModelGraph " 89 "Class. Please check that your paths are correct" 90 ) 91 ) 92 self.graph = self.generate_data_model_graph()
Load parsed data model.
Arguments:
- attributes_relationship_dict, dict: generated in data_model_parser {Attribute Display Name: { Relationships: { CSV Header: Value}}}
- data_model_labels: str, display_label or class_label. display_label, use the display name as a label, if it is valid (contains no blacklisted characters) otherwise will default to schema_label. class_label, default, use standard class or property label.
Raises:
- ValueError, attribute_relationship_dict not loaded.
94 @tracer.start_as_current_span("DataModelGraph::generate_data_model_graph") 95 def generate_data_model_graph(self) -> nx.MultiDiGraph: 96 """ 97 Generate NetworkX Graph from the Relationships/attributes dictionary, the graph is built 98 by first adding all nodes to the graph, then connecting nodes by the relationships defined 99 in the attributes_relationship dictionary. 100 Returns: 101 G: nx.MultiDiGraph, networkx graph representation of the data model 102 """ 103 # Get all relationships with edges 104 edge_relationships = self.dmr.retrieve_rel_headers_dict(edge=True) 105 106 # Find all nodes 107 all_nodes = self.dmn.gather_all_nodes_in_model( 108 attr_rel_dict=self.attribute_relationships_dict 109 ) 110 111 # Instantiate NetworkX MultiDigraph 112 graph: nx.MultiDiGraph = nx.MultiDiGraph() 113 114 all_node_dict = {} 115 116 ## Fill in MultiDigraph with nodes 117 for node in all_nodes: 118 # Gather information for each node 119 node_dict = self.dmn.generate_node_dict( 120 node_display_name=node, 121 attr_rel_dict=self.attribute_relationships_dict, 122 data_model_labels=self.data_model_labels, 123 ) 124 125 # Add each node to the all_node_dict to be used for generating edges 126 all_node_dict[node] = node_dict 127 128 # Generate node and attach information (attributes) to each node 129 graph = self.dmn.generate_node(graph, node_dict) 130 131 edge_list: list[tuple[str, str, dict[str, Union[str, int]]]] = [] 132 ## Connect nodes via edges 133 for node in all_nodes: 134 # Generate edges 135 edge_list_2 = self.dme.generate_edge( 136 node, 137 all_node_dict, 138 self.attribute_relationships_dict, 139 edge_relationships, 140 edge_list, 141 ) 142 edge_list = edge_list_2.copy() 143 144 # Add edges to the Graph 145 for node_1, node_2, edge_dict in edge_list: 146 graph.add_edge( 147 node_1, node_2, key=edge_dict["key"], weight=edge_dict["weight"] 148 ) 149 return graph
Generate NetworkX Graph from the Relationships/attributes dictionary, the graph is built by first adding all nodes to the graph, then connecting nodes by the relationships defined in the attributes_relationship dictionary.
Returns:
G: nx.MultiDiGraph, networkx graph representation of the data model
152class DataModelGraphExplorer: # pylint: disable=too-many-public-methods 153 """DataModelGraphExplorer""" 154 155 def __init__( 156 self, 157 graph: nx.MultiDiGraph, 158 ): 159 """Load data model graph as a singleton. 160 Args: 161 G: nx.MultiDiGraph, networkx graph representation of the data model 162 """ 163 self.graph = graph # At this point the graph is expected to be fully formed. 164 self.dmr = DataModelRelationships() 165 166 def find_properties(self) -> set[str]: 167 """ 168 Identify all properties, as defined by the first node in a pair, connected with 169 'domainIncludes' edge type 170 171 Returns: 172 properties, set: All properties defined in the data model, each property name 173 is defined by its label. 174 """ 175 properties_list: list[str] = [] 176 for node_1, _, rel in self.graph.edges: 177 if rel == self.dmr.get_relationship_value("domainIncludes", "edge_key"): 178 properties_list.append(node_1) 179 properties_set = set(properties_list) 180 return properties_set 181 182 def find_classes(self) -> AbstractSet[str]: 183 """ 184 Identify all classes, as defined but all nodes, minus all properties 185 (which are explicitly defined) 186 Returns: 187 classes, set: All classes defined in the data model, each class 188 name is defined by its label. 189 """ 190 nodes = self.graph.nodes 191 properties = self.find_properties() 192 classes = nodes - properties 193 return classes 194 195 def find_node_range( 196 self, node_label: Optional[str] = None, node_display_name: Optional[str] = None 197 ) -> list: 198 """Get valid values for the given node (attribute) 199 Args: 200 node_label, str, Optional[str]: label of the node for which to retrieve valid values 201 node_display_name, str, Optional[str]: Display Name of the node for which to 202 retrieve valid values 203 Returns: 204 valid_values, list: List of valid values associated with the provided node. 205 """ 206 node_label = self._get_node_label(node_label, node_display_name) 207 208 valid_values = [] 209 for node_1, node_2, rel in self.graph.edges: 210 if node_1 == node_label and rel == self.dmr.get_relationship_value( 211 "rangeIncludes", "edge_key" 212 ): 213 valid_values.append(node_2) 214 valid_values = list(set(valid_values)) 215 return valid_values 216 217 def get_adjacent_nodes_by_relationship( 218 self, node_label: str, relationship: str 219 ) -> list[str]: 220 """Get a list of nodes that is / are adjacent to a given node, based on a relationship type. 221 222 Args: 223 node_label: label of the the node whose edges we need to look at. 224 relationship: the type of link(s) that the above node and its immediate neighbors share. 225 226 Returns: 227 List of nodes that are adjacent to the given node. 228 #checked 229 """ 230 nodes = set() 231 for _, node_2, key, _ in self.graph.out_edges(node_label, data=True, keys=True): 232 if key == relationship: 233 nodes.add(node_2) 234 235 return list(nodes) 236 237 def get_component_node_required( 238 self, 239 manifest_component: str, 240 node_validation_rules: Optional[list[str]] = None, 241 node_label: Optional[str] = None, 242 node_display_name: Optional[str] = None, 243 ) -> bool: 244 """Check if a node is required taking into account the manifest component it is defined in 245 (requirements can be set in validation rule as well as required column) 246 Args: 247 manifest_component: str, manifest component display name that the node belongs to. 248 node_validation_rules: list[str], validation rules for a given node and component. 249 node_label: str, Label of the node you would want to get the comment for. 250 node_display_name: str, node display name for the node being queried. 251 Returns: 252 True, if node is required, False if not 253 """ 254 node_required = False 255 256 if not node_validation_rules: 257 # Get node validation rules for a given component 258 node_validation_rules = self.get_component_node_validation_rules( 259 manifest_component=manifest_component, 260 node_label=node_label, 261 node_display_name=node_display_name, 262 ) 263 264 # Check if the validation rule specifies that the node is required for this particular 265 # component. 266 if rule_in_rule_list("required", node_validation_rules): 267 node_required = True 268 # To prevent any unintended errors, ensure the Required field for this node is False 269 if self.get_node_required( 270 node_label=node_label, node_display_name=node_display_name 271 ): 272 if not node_display_name: 273 assert node_label is not None 274 node_display_name = self.graph.nodes[node_label][ 275 self.dmr.get_relationship_value("displayName", "node_label") 276 ] 277 error_str = " ".join( 278 [ 279 f"For component: {manifest_component} and attribute: {node_display_name}", 280 "requirements are being specified in both the Required field and in the", 281 "Validation Rules. If you desire to use validation rules to set component", 282 "specific requirements for this attribute", 283 "then the Required field needs to be set to False, or the validation may", 284 "not work as intended, for other components where the attribute", 285 "that should not be required.", 286 ] 287 ) 288 289 logger.error(error_str) 290 else: 291 # If requirements are not being set in the validation rule, then just pull the 292 # standard node requirements from the model 293 node_required = self.get_node_required( 294 node_label=node_label, node_display_name=node_display_name 295 ) 296 return node_required 297 298 def get_component_node_validation_rules( 299 self, 300 manifest_component: str, 301 node_label: Optional[str] = None, 302 node_display_name: Optional[str] = None, 303 ) -> list: 304 """Get validation rules for a given node and component. 305 Args: 306 manifest_component: str, manifest component display name that the node belongs to. 307 node_label: str, Label of the node you would want to get the comment for. 308 node_display_name: str, node display name for the node being queried. 309 Returns: 310 validation_rules: list, validation rules list for a given node and component. 311 """ 312 # get any additional validation rules associated with this node (e.g. can this node 313 # be mapped to a list of other nodes) 314 node_validation_rules = self.get_node_validation_rules( 315 node_label=node_label, node_display_name=node_display_name 316 ) 317 318 # Parse the validation rules per component if applicable 319 if node_validation_rules and isinstance(node_validation_rules, dict): 320 node_validation_rules_list = extract_component_validation_rules( 321 manifest_component=manifest_component, 322 validation_rules_dict=node_validation_rules, # type: ignore 323 ) 324 else: 325 assert isinstance(node_validation_rules, list) 326 node_validation_rules_list = node_validation_rules 327 return node_validation_rules_list 328 329 def get_component_requirements( 330 self, 331 source_component: str, 332 ) -> list[str]: 333 """ 334 Get all components that are associated with a given source component and are 335 required by it. 336 337 Args: 338 source_component: source component for which we need to find all required downstream 339 components. 340 341 Returns: 342 List of nodes that are descendants from the source component are are related to the 343 source through a specific component relationship. 344 """ 345 346 req_components = list( 347 reversed( 348 self.get_descendants_by_edge_type( 349 source_component, 350 self.dmr.get_relationship_value("requiresComponent", "edge_key"), 351 ordered=True, 352 ) 353 ) 354 ) 355 356 return req_components 357 358 def get_component_requirements_graph( 359 self, 360 source_component: str, 361 ) -> nx.Graph: 362 """ 363 Get all components that are associated with a given source component and are required by it; 364 return the components as a dependency graph (i.e. a DAG). 365 366 Args: 367 source_component, str: source component for which we need to find all required 368 downstream components. 369 370 Returns: 371 A subgraph of the schema graph induced on nodes that are descendants from the source 372 component and are related to the source through a specific component relationship. 373 """ 374 375 # get a list of required component nodes 376 req_components = self.get_component_requirements(source_component) 377 378 # get the subgraph induced on required component nodes 379 req_components_graph = self.get_subgraph_by_edge_type( 380 self.dmr.get_relationship_value("requiresComponent", "edge_key"), 381 ).subgraph(req_components) 382 383 return req_components_graph 384 385 def get_descendants_by_edge_type( 386 self, 387 source_node: str, 388 relationship: str, 389 connected: bool = True, 390 ordered: bool = False, 391 ) -> list[str]: 392 """ 393 Get all nodes that are descendants of a given source node, based on a specific 394 type of edge / relationship type. 395 396 Args: 397 source_node: The node whose descendants need to be retrieved. 398 relationship: Edge / link relationship type with possible values same as in above docs. 399 connected: 400 If True, we need to ensure that all descendant nodes are reachable from the source 401 node, i.e., they are part of the same connected component. 402 If False, the descendants could be in multiple connected components. 403 Default value is True. 404 ordered: 405 If True, the list of descendants will be topologically ordered. 406 If False, the list has no particular order (depends on the order in which the 407 descendants were traversed in the subgraph). 408 409 Returns: 410 List of nodes that are descendants from a particular node (sorted / unsorted) 411 """ 412 413 root_descendants = nx.descendants(self.graph, source_node) 414 415 subgraph_nodes = list(root_descendants) 416 subgraph_nodes.append(source_node) 417 descendants_subgraph = self.graph.subgraph(subgraph_nodes) 418 419 # prune the descendants subgraph so as to include only those edges that match 420 # the relationship type 421 rel_edges = [] 422 for node_1, node_2, key, _ in descendants_subgraph.edges(data=True, keys=True): 423 if key == relationship: 424 rel_edges.append((node_1, node_2)) 425 426 relationship_subgraph: nx.DiGraph = nx.DiGraph() 427 relationship_subgraph.add_edges_from(rel_edges) 428 429 descendants = relationship_subgraph.nodes() 430 431 if not descendants: 432 # return empty list if there are no nodes that are reachable from the 433 # source node based on this relationship type 434 return [] 435 436 if connected and ordered: 437 # get the set of reachable nodes from the source node 438 descendants = nx.descendants(relationship_subgraph, source_node) 439 descendants.add(source_node) 440 441 # normally, the descendants from a node are unordered (peculiarity 442 # of nx descendants call) 443 # form the subgraph on descendants and order it topologically 444 # this assumes an acyclic subgraph 445 descendants = nx.topological_sort( 446 relationship_subgraph.subgraph(descendants) 447 ) 448 elif connected: 449 # get the nodes that are reachable from a given source node 450 # after the pruning process above some nodes in the 451 # root_descendants subgraph might have become disconnected and 452 # will be omitted 453 descendants = nx.descendants(relationship_subgraph, source_node) 454 descendants.add(source_node) 455 elif ordered: 456 # sort the nodes topologically 457 # this requires the graph to be an acyclic graph 458 descendants = nx.topological_sort(relationship_subgraph) 459 460 return list(descendants) 461 462 def get_digraph_by_edge_type(self, edge_type: str) -> nx.DiGraph: 463 """Get a networkx digraph of the nodes connected via a given edge_type. 464 Args: 465 edge_type: 466 Edge type to search for, possible types are defined by 'edge_key' 467 in relationship class 468 Returns: 469 """ 470 471 digraph: nx.DiGraph = nx.DiGraph() 472 for node_1, node_2, key, _ in self.graph.edges(data=True, keys=True): 473 if key == edge_type: 474 digraph.add_edge(node_1, node_2) 475 return digraph 476 477 def get_edges_by_relationship( 478 self, 479 node: str, 480 relationship: str, 481 ) -> list[tuple[str, str]]: 482 """Get a list of out-edges of a node where the edges match a specific type of relationship. 483 484 i.e., the edges connecting a node to its neighbors are of relationship type -- "parentOf" 485 (set of edges to children / sub-class nodes). 486 487 Args: 488 node: the node whose edges we need to look at. 489 relationship: the type of link(s) that the above node and its immediate neighbors share. 490 491 Returns: 492 List of edges that are connected to the node. 493 """ 494 edges: list[tuple[str, str]] = [] 495 496 for node_1, node_2, key, _ in self.graph.out_edges(node, data=True, keys=True): 497 if key == relationship: 498 edges.append((node_1, node_2)) 499 500 return edges 501 502 def get_ordered_entry(self, key: str, source_node_label: str) -> list[str]: 503 """ 504 Order the values associated with a particular node and edge_key to 505 match original ordering in schema. 506 507 Args: 508 key (str): a key representing and edge relationship in 509 DataModelRelationships.relationships_dictionary 510 source_node_label (str): node to look for edges of and order 511 512 Raises: 513 KeyError: cannot find source node in graph 514 515 Returns: 516 list[str]: 517 list of sorted nodes, that share the specified relationship with the source node 518 For the example data model, for key='rangeIncludes', source_node_label='CancerType' 519 the return would be ['Breast, 'Colorectal', 'Lung', 'Prostate', 'Skin'] in that 520 exact order. 521 """ 522 # Check if node is in the graph, if not throw an error. 523 if not self.is_class_in_schema(node_label=source_node_label): 524 raise KeyError( 525 f"Cannot find node: {source_node_label} in the graph, please check entry." 526 ) 527 528 edge_key = self.dmr.get_relationship_value(key, "edge_key") 529 530 # Handle out edges 531 if self.dmr.get_relationship_value(key, "jsonld_direction") == "out": 532 # use out edges 533 534 original_edge_weights_dict = { 535 attached_node: self.graph[source_node][attached_node][edge_key][ 536 "weight" 537 ] 538 for source_node, attached_node in self.graph.out_edges( 539 source_node_label 540 ) 541 if edge_key in self.graph[source_node][attached_node] 542 } 543 # Handle in edges 544 else: 545 # use in edges 546 original_edge_weights_dict = { 547 attached_node: self.graph[attached_node][source_node][edge_key][ 548 "weight" 549 ] 550 for attached_node, source_node in self.graph.in_edges(source_node_label) 551 if edge_key in self.graph[attached_node][source_node] 552 } 553 554 sorted_nodes = list( 555 dict( 556 sorted(original_edge_weights_dict.items(), key=lambda item: item[1]) 557 ).keys() 558 ) 559 560 return sorted_nodes 561 562 # Get values associated with a node 563 def get_nodes_ancestors(self, subgraph: nx.DiGraph, node_label: str) -> list[str]: 564 """Get a list of nodes reachable from source component in graph 565 566 Args: 567 subgraph (nx.DiGraph): networkx graph object 568 node_label (str): label of node to find ancestors for 569 570 Returns: 571 list[str]: nodes reachable from source in graph 572 """ 573 all_ancestors = list(nx.ancestors(subgraph, node_label)) 574 575 return all_ancestors 576 577 def get_node_comment( 578 self, node_display_name: Optional[str] = None, node_label: Optional[str] = None 579 ) -> str: 580 """Get the node definition, i.e., the "comment" associated with a given node display name. 581 582 Args: 583 node_display_name, str: Display name of the node which you want to get the comment for. 584 node_label, str: Label of the node you would want to get the comment for. 585 Returns: 586 Comment associated with node, as a string. 587 """ 588 node_label = self._get_node_label(node_label, node_display_name) 589 590 if not node_label: 591 return "" 592 593 node_definition = self.graph.nodes[node_label][ 594 self.dmr.get_relationship_value("comment", "node_label") 595 ] 596 return node_definition 597 598 def get_node_dependencies( 599 self, 600 source_node: str, 601 display_names: bool = True, 602 schema_ordered: bool = True, 603 ) -> list[str]: 604 """Get the immediate dependencies that are related to a given source node. 605 606 Args: 607 source_node: The node whose dependencies we need to compute. 608 display_names: if True, return list of display names of each of the dependencies. 609 if False, return list of node labels of each of the dependencies. 610 schema_ordered: 611 if True, return the dependencies of the node following the order of the schema 612 (slower). 613 if False, return dependencies from graph without guaranteeing schema order (faster) 614 615 Returns: 616 List of nodes that are dependent on the source node. 617 """ 618 619 if schema_ordered: 620 # get dependencies in the same order in which they are defined in the schema 621 required_dependencies = self.get_ordered_entry( 622 key=self.dmr.get_relationship_value("requiresDependency", "edge_key"), 623 source_node_label=source_node, 624 ) 625 else: 626 required_dependencies = self.get_adjacent_nodes_by_relationship( 627 node_label=source_node, 628 relationship=self.dmr.get_relationship_value( 629 "requiresDependency", "edge_key" 630 ), 631 ) 632 633 if display_names: 634 # get display names of dependencies 635 dependencies_display_names = [] 636 637 for req in required_dependencies: 638 dependencies_display_names.append( 639 self.graph.nodes[req][ 640 self.dmr.get_relationship_value("displayName", "node_label") 641 ] 642 ) 643 644 return dependencies_display_names 645 646 return required_dependencies 647 648 def get_nodes_descendants(self, node_label: str) -> list[str]: 649 """Return a list of nodes reachable from source in graph 650 Args: 651 node_label, str: any given node 652 Return: 653 all_descendants, list: nodes reachable from source in graph 654 """ 655 all_descendants = list(nx.descendants(self.graph, node_label)) 656 657 return all_descendants 658 659 def get_nodes_display_names( 660 self, 661 node_list: list[str], 662 ) -> list[str]: 663 """Get display names associated with the given list of nodes. 664 665 Args: 666 node_list: List of nodes whose display names we need to retrieve. 667 668 Returns: 669 List of display names. 670 """ 671 node_list_display_names = [ 672 self.graph.nodes[node][ 673 self.dmr.get_relationship_value("displayName", "node_label") 674 ] 675 for node in node_list 676 ] 677 678 return node_list_display_names 679 680 def get_node_label(self, node_display_name: str) -> str: 681 """Get the node label for a given display name. 682 683 Args: 684 node_display_name: Display name of the node which you want to get the label for. 685 Returns: 686 Node label associated with given node. 687 If display name not part of schema, return an empty string. 688 """ 689 690 node_class_label = get_class_label_from_display_name( 691 display_name=node_display_name 692 ) 693 node_property_label = get_property_label_from_display_name( 694 display_name=node_display_name 695 ) 696 697 if node_class_label in self.graph.nodes: 698 node_label = node_class_label 699 elif node_property_label in self.graph.nodes: 700 node_label = node_property_label 701 else: 702 node_label = "" 703 704 return node_label 705 706 def get_node_range( 707 self, 708 node_label: Optional[str] = None, 709 node_display_name: Optional[str] = None, 710 display_names: bool = False, 711 ) -> list[str]: 712 """ 713 Get the range, i.e., all the valid values that are associated with a node label. 714 715 716 Args: 717 node_label (Optional[str], optional): Node for which you need to retrieve the range. 718 Defaults to None. 719 node_display_name (Optional[str], optional): _description_. Defaults to None. 720 display_names (bool, optional): _description_. Defaults to False. 721 722 Raises: 723 ValueError: If the node cannot be found in the graph. 724 725 Returns: 726 list[str]: 727 If display_names=False, a list of valid values (labels) associated with a given node. 728 If display_names=True, a list of valid values (display names) associated 729 with a given node 730 """ 731 node_label = self._get_node_label(node_label, node_display_name) 732 try: 733 # get node range in the order defined in schema for given node 734 required_range = self.find_node_range(node_label=node_label) 735 except KeyError as exc: 736 raise ValueError( 737 f"The source node {node_label} does not exist in the graph. " 738 "Please use a different node." 739 ) from exc 740 741 if display_names: 742 # get the display name(s) of all dependencies 743 dependencies_display_names = [] 744 745 for req in required_range: 746 dependencies_display_names.append(self.graph.nodes[req]["displayName"]) 747 748 return dependencies_display_names 749 750 return required_range 751 752 def get_node_required( 753 self, node_label: Optional[str] = None, node_display_name: Optional[str] = None 754 ) -> bool: 755 """Check if a given node is required or not. 756 757 Note: The possible options that a node can be associated with -- "required" / "optional". 758 759 Args: 760 node_label: Label of the node for which you need to look up. 761 node_display_name: Display name of the node for which you want look up. 762 Returns: 763 True: If the given node is a "required" node. 764 False: If the given node is not a "required" (i.e., an "optional") node. 765 """ 766 node_label = self._get_node_label(node_label, node_display_name) 767 rel_node_label = self.dmr.get_relationship_value("required", "node_label") 768 node_required = self.graph.nodes[node_label][rel_node_label] 769 return node_required 770 771 def get_node_validation_rules( 772 self, node_label: Optional[str] = None, node_display_name: Optional[str] = None 773 ) -> Union[list, dict[str, str]]: 774 """Get validation rules associated with a node, 775 776 Args: 777 node_label: Label of the node for which you need to look up. 778 node_display_name: Display name of the node which you want to get the label for. 779 Returns: 780 A set of validation rules associated with node, as a list or a dictionary. 781 """ 782 node_label = self._get_node_label(node_label, node_display_name) 783 784 if not node_label: 785 return [] 786 787 try: 788 node_validation_rules = self.graph.nodes[node_label]["validationRules"] 789 except KeyError as key_error: 790 raise ValueError( 791 f"{node_label} is not in the graph, please provide a proper node label" 792 ) from key_error 793 794 return node_validation_rules 795 796 def get_subgraph_by_edge_type(self, relationship: str) -> nx.DiGraph: 797 """Get a subgraph containing all edges of a given type (aka relationship). 798 799 Args: 800 relationship: edge / link relationship type with possible values same as in above docs. 801 802 Returns: 803 Directed graph on edges of a particular type (aka relationship) 804 """ 805 806 # prune the metadata model graph so as to include only those edges that 807 # match the relationship type 808 rel_edges = [] 809 for node_1, node_2, key, _ in self.graph.out_edges(data=True, keys=True): 810 if key == relationship: 811 rel_edges.append((node_1, node_2)) 812 813 relationship_subgraph: nx.DiGraph = nx.DiGraph() 814 relationship_subgraph.add_edges_from(rel_edges) 815 816 return relationship_subgraph 817 818 def find_adjacent_child_classes( 819 self, node_label: Optional[str] = None, node_display_name: Optional[str] = None 820 ) -> list[str]: 821 """Find child classes of a given node. 822 Args: 823 node_display_name: Display name of the node to look up. 824 node_label: Label of the node to look up. 825 Returns: 826 List of nodes that are adjacent to the given node, by SubclassOf relationship. 827 """ 828 node_label = self._get_node_label(node_label, node_display_name) 829 return self.get_adjacent_nodes_by_relationship( 830 node_label=node_label, 831 relationship=self.dmr.get_relationship_value("subClassOf", "edge_key"), 832 ) 833 834 def find_child_classes(self, schema_class: str) -> list: 835 """Find schema classes that inherit from the given class 836 Args: 837 schema_class: node label for the class to from which to look for children. 838 Returns: 839 list of children to the schema_class. 840 """ 841 child_classes = unlist(list(self.graph.successors(schema_class))) 842 assert isinstance(child_classes, list) 843 return child_classes 844 845 def find_class_specific_properties(self, schema_class: str) -> list[str]: 846 """Find properties specifically associated with a given class 847 Args: 848 schema_class, str: node/class label, to identify properties for. 849 Returns: 850 properties, list: List of properties associate with a given schema class. 851 Raises: 852 KeyError: Key error is raised if the provided schema_class is not in the graph 853 """ 854 855 if not self.is_class_in_schema(schema_class): 856 raise KeyError( 857 ( 858 f"Schema_class provided: {schema_class} is not in the data model, please check " 859 "that you are providing the proper class/node label" 860 ) 861 ) 862 863 properties = [] 864 for node1, node2 in self.graph.edges(): 865 if ( 866 node2 == schema_class 867 and "domainValue" in self.graph[node1][schema_class] 868 ): 869 properties.append(node1) 870 return properties 871 872 def find_parent_classes(self, node_label: str) -> list[list[str]]: 873 """Find all parents of the provided node 874 Args: 875 node_label: label of the node to find parents of 876 Returns: 877 List of list of Parents to the given node. 878 """ 879 # Get digraph of nodes with parents 880 digraph = self.get_digraph_by_edge_type("parentOf") 881 882 # Get root node 883 root_node = list(nx.topological_sort(digraph))[0] 884 885 # Get paths between root_node and the target node. 886 paths = nx.all_simple_paths(self.graph, source=root_node, target=node_label) 887 888 return [_path[:-1] for _path in paths] 889 890 def full_schema_graph(self, size: Optional[int] = None) -> graphviz.Digraph: 891 """Create a graph of the data model. 892 Args: 893 size, float: max height and width of the graph, if one value provided 894 it is used for both. 895 Returns: 896 schema graph viz 897 """ 898 edges = self.graph.edges() 899 return visualize(edges, size=size) 900 901 def is_class_in_schema(self, node_label: str) -> bool: 902 """Determine if provided node_label is in the schema graph/data model. 903 Args: 904 node_label: label of node to search for in the 905 Returns: 906 True, if node is in the graph schema 907 False, if node is not in graph schema 908 """ 909 return node_label in self.graph.nodes() 910 911 def sub_schema_graph( 912 self, source: str, direction: str, size: Optional[float] = None 913 ) -> Optional[graphviz.Digraph]: 914 """Create a sub-schema graph 915 Args: 916 source, str: source node label to start graph 917 direction, str: direction to create the visualization, choose from "up", "down", "both" 918 size, float: max height and width of the graph, if one value provided it is used for 919 both. 920 Returns: 921 Sub-schema graph viz 922 """ 923 if direction == "down": 924 edges = list(nx.edge_bfs(self.graph, [source])) 925 return visualize(edges, size=size) 926 if direction == "up": 927 paths = self.find_parent_classes(source) 928 edges = [] 929 for _path in paths: 930 _path.append(source) 931 for i in range(0, len(_path) - 1): 932 edges.append((_path[i], _path[i + 1])) 933 return visualize(edges, size=size) 934 if direction == "both": 935 paths = self.find_parent_classes(source) 936 edges = list(nx.edge_bfs(self.graph, [source])) 937 for _path in paths: 938 _path.append(source) 939 for i in range(0, len(_path) - 1): 940 edges.append((_path[i], _path[i + 1])) 941 return visualize(edges, size=size) 942 return None 943 944 def get_node_column_type( 945 self, node_label: Optional[str] = None, node_display_name: Optional[str] = None 946 ) -> Optional[JSONSchemaType]: 947 """Gets the column type of the node 948 949 Args: 950 node_label: The label of the node to get the type from 951 node_display_name: The display name of the node to get the type from 952 953 Returns: 954 The column type of the node if it has one, otherwise None 955 """ 956 node_label = self._get_node_label(node_label, node_display_name) 957 rel_node_label = self.dmr.get_relationship_value("columnType", "node_label") 958 type_string = self.graph.nodes[node_label][rel_node_label] 959 if type_string is None: 960 return type_string 961 return JSONSchemaType(type_string) 962 963 def _get_node_label( 964 self, node_label: Optional[str] = None, node_display_name: Optional[str] = None 965 ) -> str: 966 """Returns the node label if given otherwise gets the node label from the display name 967 968 Args: 969 node_label: The label of the node to get the type from 970 node_display_name: The display name of the node to get the type from 971 972 Raises: 973 ValueError: If neither node_label or node_display_name is provided 974 975 Returns: 976 The node label 977 """ 978 if node_label is not None: 979 return node_label 980 if node_display_name is not None: 981 return self.get_node_label(node_display_name) 982 raise ValueError("Either 'node_label' or 'node_display_name' must be provided.")
DataModelGraphExplorer
155 def __init__( 156 self, 157 graph: nx.MultiDiGraph, 158 ): 159 """Load data model graph as a singleton. 160 Args: 161 G: nx.MultiDiGraph, networkx graph representation of the data model 162 """ 163 self.graph = graph # At this point the graph is expected to be fully formed. 164 self.dmr = DataModelRelationships()
Load data model graph as a singleton.
Arguments:
- G: nx.MultiDiGraph, networkx graph representation of the data model
166 def find_properties(self) -> set[str]: 167 """ 168 Identify all properties, as defined by the first node in a pair, connected with 169 'domainIncludes' edge type 170 171 Returns: 172 properties, set: All properties defined in the data model, each property name 173 is defined by its label. 174 """ 175 properties_list: list[str] = [] 176 for node_1, _, rel in self.graph.edges: 177 if rel == self.dmr.get_relationship_value("domainIncludes", "edge_key"): 178 properties_list.append(node_1) 179 properties_set = set(properties_list) 180 return properties_set
Identify all properties, as defined by the first node in a pair, connected with 'domainIncludes' edge type
Returns:
properties, set: All properties defined in the data model, each property name is defined by its label.
182 def find_classes(self) -> AbstractSet[str]: 183 """ 184 Identify all classes, as defined but all nodes, minus all properties 185 (which are explicitly defined) 186 Returns: 187 classes, set: All classes defined in the data model, each class 188 name is defined by its label. 189 """ 190 nodes = self.graph.nodes 191 properties = self.find_properties() 192 classes = nodes - properties 193 return classes
Identify all classes, as defined but all nodes, minus all properties (which are explicitly defined)
Returns:
classes, set: All classes defined in the data model, each class name is defined by its label.
195 def find_node_range( 196 self, node_label: Optional[str] = None, node_display_name: Optional[str] = None 197 ) -> list: 198 """Get valid values for the given node (attribute) 199 Args: 200 node_label, str, Optional[str]: label of the node for which to retrieve valid values 201 node_display_name, str, Optional[str]: Display Name of the node for which to 202 retrieve valid values 203 Returns: 204 valid_values, list: List of valid values associated with the provided node. 205 """ 206 node_label = self._get_node_label(node_label, node_display_name) 207 208 valid_values = [] 209 for node_1, node_2, rel in self.graph.edges: 210 if node_1 == node_label and rel == self.dmr.get_relationship_value( 211 "rangeIncludes", "edge_key" 212 ): 213 valid_values.append(node_2) 214 valid_values = list(set(valid_values)) 215 return valid_values
Get valid values for the given node (attribute)
Arguments:
- node_label, str, Optional[str]: label of the node for which to retrieve valid values
- node_display_name, str, Optional[str]: Display Name of the node for which to retrieve valid values
Returns:
valid_values, list: List of valid values associated with the provided node.
217 def get_adjacent_nodes_by_relationship( 218 self, node_label: str, relationship: str 219 ) -> list[str]: 220 """Get a list of nodes that is / are adjacent to a given node, based on a relationship type. 221 222 Args: 223 node_label: label of the the node whose edges we need to look at. 224 relationship: the type of link(s) that the above node and its immediate neighbors share. 225 226 Returns: 227 List of nodes that are adjacent to the given node. 228 #checked 229 """ 230 nodes = set() 231 for _, node_2, key, _ in self.graph.out_edges(node_label, data=True, keys=True): 232 if key == relationship: 233 nodes.add(node_2) 234 235 return list(nodes)
Get a list of nodes that is / are adjacent to a given node, based on a relationship type.
Arguments:
- node_label: label of the the node whose edges we need to look at.
- relationship: the type of link(s) that the above node and its immediate neighbors share.
Returns:
List of nodes that are adjacent to the given node.
checked
237 def get_component_node_required( 238 self, 239 manifest_component: str, 240 node_validation_rules: Optional[list[str]] = None, 241 node_label: Optional[str] = None, 242 node_display_name: Optional[str] = None, 243 ) -> bool: 244 """Check if a node is required taking into account the manifest component it is defined in 245 (requirements can be set in validation rule as well as required column) 246 Args: 247 manifest_component: str, manifest component display name that the node belongs to. 248 node_validation_rules: list[str], validation rules for a given node and component. 249 node_label: str, Label of the node you would want to get the comment for. 250 node_display_name: str, node display name for the node being queried. 251 Returns: 252 True, if node is required, False if not 253 """ 254 node_required = False 255 256 if not node_validation_rules: 257 # Get node validation rules for a given component 258 node_validation_rules = self.get_component_node_validation_rules( 259 manifest_component=manifest_component, 260 node_label=node_label, 261 node_display_name=node_display_name, 262 ) 263 264 # Check if the validation rule specifies that the node is required for this particular 265 # component. 266 if rule_in_rule_list("required", node_validation_rules): 267 node_required = True 268 # To prevent any unintended errors, ensure the Required field for this node is False 269 if self.get_node_required( 270 node_label=node_label, node_display_name=node_display_name 271 ): 272 if not node_display_name: 273 assert node_label is not None 274 node_display_name = self.graph.nodes[node_label][ 275 self.dmr.get_relationship_value("displayName", "node_label") 276 ] 277 error_str = " ".join( 278 [ 279 f"For component: {manifest_component} and attribute: {node_display_name}", 280 "requirements are being specified in both the Required field and in the", 281 "Validation Rules. If you desire to use validation rules to set component", 282 "specific requirements for this attribute", 283 "then the Required field needs to be set to False, or the validation may", 284 "not work as intended, for other components where the attribute", 285 "that should not be required.", 286 ] 287 ) 288 289 logger.error(error_str) 290 else: 291 # If requirements are not being set in the validation rule, then just pull the 292 # standard node requirements from the model 293 node_required = self.get_node_required( 294 node_label=node_label, node_display_name=node_display_name 295 ) 296 return node_required
Check if a node is required taking into account the manifest component it is defined in (requirements can be set in validation rule as well as required column)
Arguments:
- manifest_component: str, manifest component display name that the node belongs to.
- node_validation_rules: list[str], validation rules for a given node and component.
- node_label: str, Label of the node you would want to get the comment for.
- node_display_name: str, node display name for the node being queried.
Returns:
True, if node is required, False if not
298 def get_component_node_validation_rules( 299 self, 300 manifest_component: str, 301 node_label: Optional[str] = None, 302 node_display_name: Optional[str] = None, 303 ) -> list: 304 """Get validation rules for a given node and component. 305 Args: 306 manifest_component: str, manifest component display name that the node belongs to. 307 node_label: str, Label of the node you would want to get the comment for. 308 node_display_name: str, node display name for the node being queried. 309 Returns: 310 validation_rules: list, validation rules list for a given node and component. 311 """ 312 # get any additional validation rules associated with this node (e.g. can this node 313 # be mapped to a list of other nodes) 314 node_validation_rules = self.get_node_validation_rules( 315 node_label=node_label, node_display_name=node_display_name 316 ) 317 318 # Parse the validation rules per component if applicable 319 if node_validation_rules and isinstance(node_validation_rules, dict): 320 node_validation_rules_list = extract_component_validation_rules( 321 manifest_component=manifest_component, 322 validation_rules_dict=node_validation_rules, # type: ignore 323 ) 324 else: 325 assert isinstance(node_validation_rules, list) 326 node_validation_rules_list = node_validation_rules 327 return node_validation_rules_list
Get validation rules for a given node and component.
Arguments:
- manifest_component: str, manifest component display name that the node belongs to.
- node_label: str, Label of the node you would want to get the comment for.
- node_display_name: str, node display name for the node being queried.
Returns:
validation_rules: list, validation rules list for a given node and component.
329 def get_component_requirements( 330 self, 331 source_component: str, 332 ) -> list[str]: 333 """ 334 Get all components that are associated with a given source component and are 335 required by it. 336 337 Args: 338 source_component: source component for which we need to find all required downstream 339 components. 340 341 Returns: 342 List of nodes that are descendants from the source component are are related to the 343 source through a specific component relationship. 344 """ 345 346 req_components = list( 347 reversed( 348 self.get_descendants_by_edge_type( 349 source_component, 350 self.dmr.get_relationship_value("requiresComponent", "edge_key"), 351 ordered=True, 352 ) 353 ) 354 ) 355 356 return req_components
Get all components that are associated with a given source component and are required by it.
Arguments:
- source_component: source component for which we need to find all required downstream components.
Returns:
List of nodes that are descendants from the source component are are related to the source through a specific component relationship.
358 def get_component_requirements_graph( 359 self, 360 source_component: str, 361 ) -> nx.Graph: 362 """ 363 Get all components that are associated with a given source component and are required by it; 364 return the components as a dependency graph (i.e. a DAG). 365 366 Args: 367 source_component, str: source component for which we need to find all required 368 downstream components. 369 370 Returns: 371 A subgraph of the schema graph induced on nodes that are descendants from the source 372 component and are related to the source through a specific component relationship. 373 """ 374 375 # get a list of required component nodes 376 req_components = self.get_component_requirements(source_component) 377 378 # get the subgraph induced on required component nodes 379 req_components_graph = self.get_subgraph_by_edge_type( 380 self.dmr.get_relationship_value("requiresComponent", "edge_key"), 381 ).subgraph(req_components) 382 383 return req_components_graph
Get all components that are associated with a given source component and are required by it; return the components as a dependency graph (i.e. a DAG).
Arguments:
- source_component, str: source component for which we need to find all required downstream components.
Returns:
A subgraph of the schema graph induced on nodes that are descendants from the source component and are related to the source through a specific component relationship.
385 def get_descendants_by_edge_type( 386 self, 387 source_node: str, 388 relationship: str, 389 connected: bool = True, 390 ordered: bool = False, 391 ) -> list[str]: 392 """ 393 Get all nodes that are descendants of a given source node, based on a specific 394 type of edge / relationship type. 395 396 Args: 397 source_node: The node whose descendants need to be retrieved. 398 relationship: Edge / link relationship type with possible values same as in above docs. 399 connected: 400 If True, we need to ensure that all descendant nodes are reachable from the source 401 node, i.e., they are part of the same connected component. 402 If False, the descendants could be in multiple connected components. 403 Default value is True. 404 ordered: 405 If True, the list of descendants will be topologically ordered. 406 If False, the list has no particular order (depends on the order in which the 407 descendants were traversed in the subgraph). 408 409 Returns: 410 List of nodes that are descendants from a particular node (sorted / unsorted) 411 """ 412 413 root_descendants = nx.descendants(self.graph, source_node) 414 415 subgraph_nodes = list(root_descendants) 416 subgraph_nodes.append(source_node) 417 descendants_subgraph = self.graph.subgraph(subgraph_nodes) 418 419 # prune the descendants subgraph so as to include only those edges that match 420 # the relationship type 421 rel_edges = [] 422 for node_1, node_2, key, _ in descendants_subgraph.edges(data=True, keys=True): 423 if key == relationship: 424 rel_edges.append((node_1, node_2)) 425 426 relationship_subgraph: nx.DiGraph = nx.DiGraph() 427 relationship_subgraph.add_edges_from(rel_edges) 428 429 descendants = relationship_subgraph.nodes() 430 431 if not descendants: 432 # return empty list if there are no nodes that are reachable from the 433 # source node based on this relationship type 434 return [] 435 436 if connected and ordered: 437 # get the set of reachable nodes from the source node 438 descendants = nx.descendants(relationship_subgraph, source_node) 439 descendants.add(source_node) 440 441 # normally, the descendants from a node are unordered (peculiarity 442 # of nx descendants call) 443 # form the subgraph on descendants and order it topologically 444 # this assumes an acyclic subgraph 445 descendants = nx.topological_sort( 446 relationship_subgraph.subgraph(descendants) 447 ) 448 elif connected: 449 # get the nodes that are reachable from a given source node 450 # after the pruning process above some nodes in the 451 # root_descendants subgraph might have become disconnected and 452 # will be omitted 453 descendants = nx.descendants(relationship_subgraph, source_node) 454 descendants.add(source_node) 455 elif ordered: 456 # sort the nodes topologically 457 # this requires the graph to be an acyclic graph 458 descendants = nx.topological_sort(relationship_subgraph) 459 460 return list(descendants)
Get all nodes that are descendants of a given source node, based on a specific type of edge / relationship type.
Arguments:
- source_node: The node whose descendants need to be retrieved.
- relationship: Edge / link relationship type with possible values same as in above docs.
- connected: If True, we need to ensure that all descendant nodes are reachable from the source node, i.e., they are part of the same connected component. If False, the descendants could be in multiple connected components. Default value is True.
- ordered: If True, the list of descendants will be topologically ordered. If False, the list has no particular order (depends on the order in which the descendants were traversed in the subgraph).
Returns:
List of nodes that are descendants from a particular node (sorted / unsorted)
462 def get_digraph_by_edge_type(self, edge_type: str) -> nx.DiGraph: 463 """Get a networkx digraph of the nodes connected via a given edge_type. 464 Args: 465 edge_type: 466 Edge type to search for, possible types are defined by 'edge_key' 467 in relationship class 468 Returns: 469 """ 470 471 digraph: nx.DiGraph = nx.DiGraph() 472 for node_1, node_2, key, _ in self.graph.edges(data=True, keys=True): 473 if key == edge_type: 474 digraph.add_edge(node_1, node_2) 475 return digraph
Get a networkx digraph of the nodes connected via a given edge_type.
Arguments:
- edge_type: Edge type to search for, possible types are defined by 'edge_key' in relationship class
Returns:
477 def get_edges_by_relationship( 478 self, 479 node: str, 480 relationship: str, 481 ) -> list[tuple[str, str]]: 482 """Get a list of out-edges of a node where the edges match a specific type of relationship. 483 484 i.e., the edges connecting a node to its neighbors are of relationship type -- "parentOf" 485 (set of edges to children / sub-class nodes). 486 487 Args: 488 node: the node whose edges we need to look at. 489 relationship: the type of link(s) that the above node and its immediate neighbors share. 490 491 Returns: 492 List of edges that are connected to the node. 493 """ 494 edges: list[tuple[str, str]] = [] 495 496 for node_1, node_2, key, _ in self.graph.out_edges(node, data=True, keys=True): 497 if key == relationship: 498 edges.append((node_1, node_2)) 499 500 return edges
Get a list of out-edges of a node where the edges match a specific type of relationship.
i.e., the edges connecting a node to its neighbors are of relationship type -- "parentOf" (set of edges to children / sub-class nodes).
Arguments:
- node: the node whose edges we need to look at.
- relationship: the type of link(s) that the above node and its immediate neighbors share.
Returns:
List of edges that are connected to the node.
502 def get_ordered_entry(self, key: str, source_node_label: str) -> list[str]: 503 """ 504 Order the values associated with a particular node and edge_key to 505 match original ordering in schema. 506 507 Args: 508 key (str): a key representing and edge relationship in 509 DataModelRelationships.relationships_dictionary 510 source_node_label (str): node to look for edges of and order 511 512 Raises: 513 KeyError: cannot find source node in graph 514 515 Returns: 516 list[str]: 517 list of sorted nodes, that share the specified relationship with the source node 518 For the example data model, for key='rangeIncludes', source_node_label='CancerType' 519 the return would be ['Breast, 'Colorectal', 'Lung', 'Prostate', 'Skin'] in that 520 exact order. 521 """ 522 # Check if node is in the graph, if not throw an error. 523 if not self.is_class_in_schema(node_label=source_node_label): 524 raise KeyError( 525 f"Cannot find node: {source_node_label} in the graph, please check entry." 526 ) 527 528 edge_key = self.dmr.get_relationship_value(key, "edge_key") 529 530 # Handle out edges 531 if self.dmr.get_relationship_value(key, "jsonld_direction") == "out": 532 # use out edges 533 534 original_edge_weights_dict = { 535 attached_node: self.graph[source_node][attached_node][edge_key][ 536 "weight" 537 ] 538 for source_node, attached_node in self.graph.out_edges( 539 source_node_label 540 ) 541 if edge_key in self.graph[source_node][attached_node] 542 } 543 # Handle in edges 544 else: 545 # use in edges 546 original_edge_weights_dict = { 547 attached_node: self.graph[attached_node][source_node][edge_key][ 548 "weight" 549 ] 550 for attached_node, source_node in self.graph.in_edges(source_node_label) 551 if edge_key in self.graph[attached_node][source_node] 552 } 553 554 sorted_nodes = list( 555 dict( 556 sorted(original_edge_weights_dict.items(), key=lambda item: item[1]) 557 ).keys() 558 ) 559 560 return sorted_nodes
Order the values associated with a particular node and edge_key to match original ordering in schema.
Arguments:
- key (str): a key representing and edge relationship in DataModelRelationships.relationships_dictionary
- source_node_label (str): node to look for edges of and order
Raises:
- KeyError: cannot find source node in graph
Returns:
list[str]: list of sorted nodes, that share the specified relationship with the source node For the example data model, for key='rangeIncludes', source_node_label='CancerType' the return would be ['Breast, 'Colorectal', 'Lung', 'Prostate', 'Skin'] in that exact order.
563 def get_nodes_ancestors(self, subgraph: nx.DiGraph, node_label: str) -> list[str]: 564 """Get a list of nodes reachable from source component in graph 565 566 Args: 567 subgraph (nx.DiGraph): networkx graph object 568 node_label (str): label of node to find ancestors for 569 570 Returns: 571 list[str]: nodes reachable from source in graph 572 """ 573 all_ancestors = list(nx.ancestors(subgraph, node_label)) 574 575 return all_ancestors
Get a list of nodes reachable from source component in graph
Arguments:
- subgraph (nx.DiGraph): networkx graph object
- node_label (str): label of node to find ancestors for
Returns:
list[str]: nodes reachable from source in graph
577 def get_node_comment( 578 self, node_display_name: Optional[str] = None, node_label: Optional[str] = None 579 ) -> str: 580 """Get the node definition, i.e., the "comment" associated with a given node display name. 581 582 Args: 583 node_display_name, str: Display name of the node which you want to get the comment for. 584 node_label, str: Label of the node you would want to get the comment for. 585 Returns: 586 Comment associated with node, as a string. 587 """ 588 node_label = self._get_node_label(node_label, node_display_name) 589 590 if not node_label: 591 return "" 592 593 node_definition = self.graph.nodes[node_label][ 594 self.dmr.get_relationship_value("comment", "node_label") 595 ] 596 return node_definition
Get the node definition, i.e., the "comment" associated with a given node display name.
Arguments:
- node_display_name, str: Display name of the node which you want to get the comment for.
- node_label, str: Label of the node you would want to get the comment for.
Returns:
Comment associated with node, as a string.
598 def get_node_dependencies( 599 self, 600 source_node: str, 601 display_names: bool = True, 602 schema_ordered: bool = True, 603 ) -> list[str]: 604 """Get the immediate dependencies that are related to a given source node. 605 606 Args: 607 source_node: The node whose dependencies we need to compute. 608 display_names: if True, return list of display names of each of the dependencies. 609 if False, return list of node labels of each of the dependencies. 610 schema_ordered: 611 if True, return the dependencies of the node following the order of the schema 612 (slower). 613 if False, return dependencies from graph without guaranteeing schema order (faster) 614 615 Returns: 616 List of nodes that are dependent on the source node. 617 """ 618 619 if schema_ordered: 620 # get dependencies in the same order in which they are defined in the schema 621 required_dependencies = self.get_ordered_entry( 622 key=self.dmr.get_relationship_value("requiresDependency", "edge_key"), 623 source_node_label=source_node, 624 ) 625 else: 626 required_dependencies = self.get_adjacent_nodes_by_relationship( 627 node_label=source_node, 628 relationship=self.dmr.get_relationship_value( 629 "requiresDependency", "edge_key" 630 ), 631 ) 632 633 if display_names: 634 # get display names of dependencies 635 dependencies_display_names = [] 636 637 for req in required_dependencies: 638 dependencies_display_names.append( 639 self.graph.nodes[req][ 640 self.dmr.get_relationship_value("displayName", "node_label") 641 ] 642 ) 643 644 return dependencies_display_names 645 646 return required_dependencies
Get the immediate dependencies that are related to a given source node.
Arguments:
- source_node: The node whose dependencies we need to compute.
- display_names: if True, return list of display names of each of the dependencies. if False, return list of node labels of each of the dependencies.
- schema_ordered: if True, return the dependencies of the node following the order of the schema (slower). if False, return dependencies from graph without guaranteeing schema order (faster)
Returns:
List of nodes that are dependent on the source node.
648 def get_nodes_descendants(self, node_label: str) -> list[str]: 649 """Return a list of nodes reachable from source in graph 650 Args: 651 node_label, str: any given node 652 Return: 653 all_descendants, list: nodes reachable from source in graph 654 """ 655 all_descendants = list(nx.descendants(self.graph, node_label)) 656 657 return all_descendants
Return a list of nodes reachable from source in graph
Arguments:
- node_label, str: any given node
Return:
all_descendants, list: nodes reachable from source in graph
659 def get_nodes_display_names( 660 self, 661 node_list: list[str], 662 ) -> list[str]: 663 """Get display names associated with the given list of nodes. 664 665 Args: 666 node_list: List of nodes whose display names we need to retrieve. 667 668 Returns: 669 List of display names. 670 """ 671 node_list_display_names = [ 672 self.graph.nodes[node][ 673 self.dmr.get_relationship_value("displayName", "node_label") 674 ] 675 for node in node_list 676 ] 677 678 return node_list_display_names
Get display names associated with the given list of nodes.
Arguments:
- node_list: List of nodes whose display names we need to retrieve.
Returns:
List of display names.
680 def get_node_label(self, node_display_name: str) -> str: 681 """Get the node label for a given display name. 682 683 Args: 684 node_display_name: Display name of the node which you want to get the label for. 685 Returns: 686 Node label associated with given node. 687 If display name not part of schema, return an empty string. 688 """ 689 690 node_class_label = get_class_label_from_display_name( 691 display_name=node_display_name 692 ) 693 node_property_label = get_property_label_from_display_name( 694 display_name=node_display_name 695 ) 696 697 if node_class_label in self.graph.nodes: 698 node_label = node_class_label 699 elif node_property_label in self.graph.nodes: 700 node_label = node_property_label 701 else: 702 node_label = "" 703 704 return node_label
Get the node label for a given display name.
Arguments:
- node_display_name: Display name of the node which you want to get the label for.
Returns:
Node label associated with given node. If display name not part of schema, return an empty string.
706 def get_node_range( 707 self, 708 node_label: Optional[str] = None, 709 node_display_name: Optional[str] = None, 710 display_names: bool = False, 711 ) -> list[str]: 712 """ 713 Get the range, i.e., all the valid values that are associated with a node label. 714 715 716 Args: 717 node_label (Optional[str], optional): Node for which you need to retrieve the range. 718 Defaults to None. 719 node_display_name (Optional[str], optional): _description_. Defaults to None. 720 display_names (bool, optional): _description_. Defaults to False. 721 722 Raises: 723 ValueError: If the node cannot be found in the graph. 724 725 Returns: 726 list[str]: 727 If display_names=False, a list of valid values (labels) associated with a given node. 728 If display_names=True, a list of valid values (display names) associated 729 with a given node 730 """ 731 node_label = self._get_node_label(node_label, node_display_name) 732 try: 733 # get node range in the order defined in schema for given node 734 required_range = self.find_node_range(node_label=node_label) 735 except KeyError as exc: 736 raise ValueError( 737 f"The source node {node_label} does not exist in the graph. " 738 "Please use a different node." 739 ) from exc 740 741 if display_names: 742 # get the display name(s) of all dependencies 743 dependencies_display_names = [] 744 745 for req in required_range: 746 dependencies_display_names.append(self.graph.nodes[req]["displayName"]) 747 748 return dependencies_display_names 749 750 return required_range
Get the range, i.e., all the valid values that are associated with a node label.
Arguments:
- node_label (Optional[str], optional): Node for which you need to retrieve the range. Defaults to None.
- node_display_name (Optional[str], optional): _description_. Defaults to None.
- display_names (bool, optional): _description_. Defaults to False.
Raises:
- ValueError: If the node cannot be found in the graph.
Returns:
list[str]: If display_names=False, a list of valid values (labels) associated with a given node. If display_names=True, a list of valid values (display names) associated with a given node
752 def get_node_required( 753 self, node_label: Optional[str] = None, node_display_name: Optional[str] = None 754 ) -> bool: 755 """Check if a given node is required or not. 756 757 Note: The possible options that a node can be associated with -- "required" / "optional". 758 759 Args: 760 node_label: Label of the node for which you need to look up. 761 node_display_name: Display name of the node for which you want look up. 762 Returns: 763 True: If the given node is a "required" node. 764 False: If the given node is not a "required" (i.e., an "optional") node. 765 """ 766 node_label = self._get_node_label(node_label, node_display_name) 767 rel_node_label = self.dmr.get_relationship_value("required", "node_label") 768 node_required = self.graph.nodes[node_label][rel_node_label] 769 return node_required
Check if a given node is required or not.
Note: The possible options that a node can be associated with -- "required" / "optional".
Arguments:
- node_label: Label of the node for which you need to look up.
- node_display_name: Display name of the node for which you want look up.
Returns:
True: If the given node is a "required" node. False: If the given node is not a "required" (i.e., an "optional") node.
771 def get_node_validation_rules( 772 self, node_label: Optional[str] = None, node_display_name: Optional[str] = None 773 ) -> Union[list, dict[str, str]]: 774 """Get validation rules associated with a node, 775 776 Args: 777 node_label: Label of the node for which you need to look up. 778 node_display_name: Display name of the node which you want to get the label for. 779 Returns: 780 A set of validation rules associated with node, as a list or a dictionary. 781 """ 782 node_label = self._get_node_label(node_label, node_display_name) 783 784 if not node_label: 785 return [] 786 787 try: 788 node_validation_rules = self.graph.nodes[node_label]["validationRules"] 789 except KeyError as key_error: 790 raise ValueError( 791 f"{node_label} is not in the graph, please provide a proper node label" 792 ) from key_error 793 794 return node_validation_rules
Get validation rules associated with a node,
Arguments:
- node_label: Label of the node for which you need to look up.
- node_display_name: Display name of the node which you want to get the label for.
Returns:
A set of validation rules associated with node, as a list or a dictionary.
796 def get_subgraph_by_edge_type(self, relationship: str) -> nx.DiGraph: 797 """Get a subgraph containing all edges of a given type (aka relationship). 798 799 Args: 800 relationship: edge / link relationship type with possible values same as in above docs. 801 802 Returns: 803 Directed graph on edges of a particular type (aka relationship) 804 """ 805 806 # prune the metadata model graph so as to include only those edges that 807 # match the relationship type 808 rel_edges = [] 809 for node_1, node_2, key, _ in self.graph.out_edges(data=True, keys=True): 810 if key == relationship: 811 rel_edges.append((node_1, node_2)) 812 813 relationship_subgraph: nx.DiGraph = nx.DiGraph() 814 relationship_subgraph.add_edges_from(rel_edges) 815 816 return relationship_subgraph
Get a subgraph containing all edges of a given type (aka relationship).
Arguments:
- relationship: edge / link relationship type with possible values same as in above docs.
Returns:
Directed graph on edges of a particular type (aka relationship)
818 def find_adjacent_child_classes( 819 self, node_label: Optional[str] = None, node_display_name: Optional[str] = None 820 ) -> list[str]: 821 """Find child classes of a given node. 822 Args: 823 node_display_name: Display name of the node to look up. 824 node_label: Label of the node to look up. 825 Returns: 826 List of nodes that are adjacent to the given node, by SubclassOf relationship. 827 """ 828 node_label = self._get_node_label(node_label, node_display_name) 829 return self.get_adjacent_nodes_by_relationship( 830 node_label=node_label, 831 relationship=self.dmr.get_relationship_value("subClassOf", "edge_key"), 832 )
Find child classes of a given node.
Arguments:
- node_display_name: Display name of the node to look up.
- node_label: Label of the node to look up.
Returns:
List of nodes that are adjacent to the given node, by SubclassOf relationship.
834 def find_child_classes(self, schema_class: str) -> list: 835 """Find schema classes that inherit from the given class 836 Args: 837 schema_class: node label for the class to from which to look for children. 838 Returns: 839 list of children to the schema_class. 840 """ 841 child_classes = unlist(list(self.graph.successors(schema_class))) 842 assert isinstance(child_classes, list) 843 return child_classes
Find schema classes that inherit from the given class
Arguments:
- schema_class: node label for the class to from which to look for children.
Returns:
list of children to the schema_class.
845 def find_class_specific_properties(self, schema_class: str) -> list[str]: 846 """Find properties specifically associated with a given class 847 Args: 848 schema_class, str: node/class label, to identify properties for. 849 Returns: 850 properties, list: List of properties associate with a given schema class. 851 Raises: 852 KeyError: Key error is raised if the provided schema_class is not in the graph 853 """ 854 855 if not self.is_class_in_schema(schema_class): 856 raise KeyError( 857 ( 858 f"Schema_class provided: {schema_class} is not in the data model, please check " 859 "that you are providing the proper class/node label" 860 ) 861 ) 862 863 properties = [] 864 for node1, node2 in self.graph.edges(): 865 if ( 866 node2 == schema_class 867 and "domainValue" in self.graph[node1][schema_class] 868 ): 869 properties.append(node1) 870 return properties
Find properties specifically associated with a given class
Arguments:
- schema_class, str: node/class label, to identify properties for.
Returns:
properties, list: List of properties associate with a given schema class.
Raises:
- KeyError: Key error is raised if the provided schema_class is not in the graph
872 def find_parent_classes(self, node_label: str) -> list[list[str]]: 873 """Find all parents of the provided node 874 Args: 875 node_label: label of the node to find parents of 876 Returns: 877 List of list of Parents to the given node. 878 """ 879 # Get digraph of nodes with parents 880 digraph = self.get_digraph_by_edge_type("parentOf") 881 882 # Get root node 883 root_node = list(nx.topological_sort(digraph))[0] 884 885 # Get paths between root_node and the target node. 886 paths = nx.all_simple_paths(self.graph, source=root_node, target=node_label) 887 888 return [_path[:-1] for _path in paths]
Find all parents of the provided node
Arguments:
- node_label: label of the node to find parents of
Returns:
List of list of Parents to the given node.
890 def full_schema_graph(self, size: Optional[int] = None) -> graphviz.Digraph: 891 """Create a graph of the data model. 892 Args: 893 size, float: max height and width of the graph, if one value provided 894 it is used for both. 895 Returns: 896 schema graph viz 897 """ 898 edges = self.graph.edges() 899 return visualize(edges, size=size)
Create a graph of the data model.
Arguments:
- size, float: max height and width of the graph, if one value provided it is used for both.
Returns:
schema graph viz
901 def is_class_in_schema(self, node_label: str) -> bool: 902 """Determine if provided node_label is in the schema graph/data model. 903 Args: 904 node_label: label of node to search for in the 905 Returns: 906 True, if node is in the graph schema 907 False, if node is not in graph schema 908 """ 909 return node_label in self.graph.nodes()
Determine if provided node_label is in the schema graph/data model.
Arguments:
- node_label: label of node to search for in the
Returns:
True, if node is in the graph schema False, if node is not in graph schema
911 def sub_schema_graph( 912 self, source: str, direction: str, size: Optional[float] = None 913 ) -> Optional[graphviz.Digraph]: 914 """Create a sub-schema graph 915 Args: 916 source, str: source node label to start graph 917 direction, str: direction to create the visualization, choose from "up", "down", "both" 918 size, float: max height and width of the graph, if one value provided it is used for 919 both. 920 Returns: 921 Sub-schema graph viz 922 """ 923 if direction == "down": 924 edges = list(nx.edge_bfs(self.graph, [source])) 925 return visualize(edges, size=size) 926 if direction == "up": 927 paths = self.find_parent_classes(source) 928 edges = [] 929 for _path in paths: 930 _path.append(source) 931 for i in range(0, len(_path) - 1): 932 edges.append((_path[i], _path[i + 1])) 933 return visualize(edges, size=size) 934 if direction == "both": 935 paths = self.find_parent_classes(source) 936 edges = list(nx.edge_bfs(self.graph, [source])) 937 for _path in paths: 938 _path.append(source) 939 for i in range(0, len(_path) - 1): 940 edges.append((_path[i], _path[i + 1])) 941 return visualize(edges, size=size) 942 return None
Create a sub-schema graph
Arguments:
- source, str: source node label to start graph
- direction, str: direction to create the visualization, choose from "up", "down", "both"
- size, float: max height and width of the graph, if one value provided it is used for both.
Returns:
Sub-schema graph viz
944 def get_node_column_type( 945 self, node_label: Optional[str] = None, node_display_name: Optional[str] = None 946 ) -> Optional[JSONSchemaType]: 947 """Gets the column type of the node 948 949 Args: 950 node_label: The label of the node to get the type from 951 node_display_name: The display name of the node to get the type from 952 953 Returns: 954 The column type of the node if it has one, otherwise None 955 """ 956 node_label = self._get_node_label(node_label, node_display_name) 957 rel_node_label = self.dmr.get_relationship_value("columnType", "node_label") 958 type_string = self.graph.nodes[node_label][rel_node_label] 959 if type_string is None: 960 return type_string 961 return JSONSchemaType(type_string)
Gets the column type of the node
Arguments:
- node_label: The label of the node to get the type from
- node_display_name: The display name of the node to get the type from
Returns:
The column type of the node if it has one, otherwise None
985def create_data_model_graph_explorer(data_model_path: str) -> DataModelGraphExplorer: 986 """Creates a DataModelGraphExplore using a data model 987 988 Args: 989 data_model_path: The path to a data model to create the dmge 990 991 Returns: 992 DataModelGraphExplorer: A dmge created using the input data model 993 """ 994 data_model_parser = DataModelParser(path_to_data_model=data_model_path) 995 parsed_data_model = data_model_parser.parse_model() 996 data_model_grapher = DataModelGraph(parsed_data_model) 997 graph_data_model = data_model_grapher.graph 998 return DataModelGraphExplorer(graph_data_model)
Creates a DataModelGraphExplore using a data model
Arguments:
- data_model_path: The path to a data model to create the dmge
Returns:
DataModelGraphExplorer: A dmge created using the input data model