Commit a3b6f955 authored by Tim Bleimehl's avatar Tim Bleimehl 🤸🏼
Browse files

add "ignore_zero_changes" feat

parent d94eab11
Pipeline #1670 passed with stage
in 49 seconds
......@@ -21,8 +21,8 @@ class CapturePoint:
self.parent_logger = parent_logger
self.name = name
self.timestamp: float = time.time()
self.labels = labels_count
self.relations = relations_count
self.labels_count = labels_count
self.relations_count = relations_count
self.schema: GraphSchema = GraphSchema.from_neo4j_schema_vis_data(
neo4j_schema_vis_data,
parent_capture_point=self,
......
......@@ -48,14 +48,27 @@ class GraphSchema(py2neo.Subgraph):
# copy all nodes to track if they are involed in a relation
nodes_without_relation = list(neo4j_schema_vis_data["nodes"])
for rel in neo4j_schema_vis_data["relationships"]:
if not parent_capture_point.parent_logger.rel_filter_func(rel):
if parent_capture_point.parent_logger.rel_filter_func(rel) is None:
continue
rel_nodes = [None, None]
for node in neo4j_schema_vis_data["nodes"]:
if not parent_capture_point.parent_logger.node_filter_func(node):
continue
# we allready fiund start and end node for our relation. no need to search further
if rel_nodes[0] and rel_nodes[1]:
break
# Check if schema-node is "unwanted"/filtered-out
if parent_capture_point.parent_logger.node_filter_func(node) is None:
if node in nodes_without_relation:
nodes_without_relation.remove(node)
continue
# check if schema-node is a "zero change" (e.g. just an index on the label was created) and if zero changes are unwanted
if (
list(node.labels)[0] not in parent_capture_point.labels_count
and parent_capture_point.parent_logger.ignore_zero_changes
):
if node in nodes_without_relation:
nodes_without_relation.remove(node)
continue
if node.identity in [rel.start_node.identity, rel.end_node.identity]:
if node in nodes_without_relation:
nodes_without_relation.remove(node)
......@@ -90,9 +103,16 @@ class GraphSchema(py2neo.Subgraph):
# atm we ignored any nodes without any relation.
# lets make up that leeway
single_nodes_cleaned = []
for node in nodes_without_relation:
if not parent_capture_point.parent_logger.node_filter_func(node):
continue
if (
list(node.labels)[0] not in parent_capture_point.labels_count
and parent_capture_point.parent_logger.ignore_zero_changes
):
continue
clean_node = cls._get_or_create_unbound_schema_node(
node=node,
parent_capture_point=parent_capture_point,
......@@ -127,7 +147,7 @@ class GraphSchema(py2neo.Subgraph):
parent_capture_point.parent_logger.all_schema_nodes[tuple(node.labels)][
"__node_count"
]
!= parent_capture_point.labels[tuple(node.labels)[0]]
!= parent_capture_point.labels_count[tuple(node.labels)[0]]
):
clean_node = py2neo.Node(
*list(node.labels),
......@@ -135,7 +155,7 @@ class GraphSchema(py2neo.Subgraph):
extra_props
| {
"__label_name": ":".join(list(node.labels)),
"__node_count": parent_capture_point.labels[
"__node_count": parent_capture_point.labels_count[
tuple(node.labels)[0]
],
}
......
......@@ -14,6 +14,9 @@ class NeoMetaTracker:
self.all_schema_rels: Dict[str, py2neo.Relationship] = None
self.ignore_labels: List[str] = []
self.ignore_reliations_types: List[str] = []
# by zero changes we mean, changes where the node count did not change. e.g. if we create an index for label "myLabel" we also detect a schema change for Label "myLabel"
# this can be undesirable in some situations. to ignore such changes set `ignore_zero_changes` to True
self.ignore_zero_changes: bool = False
if isinstance(connection, dict):
self.graph: py2neo.Graph = py2neo.Graph(**connection)
elif isinstance(connection, py2neo.Graph):
......@@ -111,7 +114,9 @@ class NeoMetaTracker:
MATCH (n:{label})
RETURN count(n) AS res
"""
labels_count[label] = self.graph.run(query_label_count).data()[0]["res"]
res = self.graph.run(query_label_count).data()[0]["res"]
if (self.ignore_zero_changes and res != 0) or not self.ignore_zero_changes:
labels_count[label] = res
return labels_count
def _get_relations_count(self) -> Dict[str, int]:
......@@ -126,7 +131,9 @@ class NeoMetaTracker:
MATCH ()-[:{rel}]->()
return count(*) AS res
"""
rels_count[rel] = self.graph.run(query_rel_count).data()[0]["res"]
res = self.graph.run(query_rel_count).data()[0]["res"]
if (self.ignore_zero_changes and res != 0) or not self.ignore_zero_changes:
rels_count[rel] = res
return rels_count
def get_capture_point_by(
......@@ -197,41 +204,52 @@ class NeoMetaTracker:
from_capture=capture_point, to_capture=current_capture_point
)
def get_numeric_changes(self, from_capture: CapturePoint, to_capture: CapturePoint):
def get_numeric_changes(
self,
from_capture: CapturePoint,
to_capture: CapturePoint,
ignore_zeros: bool = None,
):
if ignore_zeros is None:
ignore_zeros = self.ignore_zero_changes
if len(self.capture_points) <= 1:
raise ValueError(
f"You need at least 2 capture points to compare any changes. Got only {len(self.capture_points)} points"
)
# Compare labels
result: Dict = {"labels": {}, "relations": {}}
if to_capture.labels or from_capture.labels:
diff_labels = set(to_capture.labels.items()) ^ set(
from_capture.labels.items()
if to_capture.labels_count or from_capture.labels_count:
diff_labels = set(to_capture.labels_count.items()) ^ set(
from_capture.labels_count.items()
)
for diff_label in set([lbl[0] for lbl in diff_labels]):
result["labels"][diff_label] = (
to_capture.labels[diff_label]
if diff_label in to_capture.labels
res = (
to_capture.labels_count[diff_label]
if diff_label in to_capture.labels_count
else 0
) - (
from_capture.labels[diff_label]
if diff_label in from_capture.labels
from_capture.labels_count[diff_label]
if diff_label in from_capture.labels_count
else 0
)
if (res != 0 and ignore_zeros) or not ignore_zeros:
result["labels"][diff_label] = res
# Compare relations
if not to_capture.relations and not from_capture.relations:
if not to_capture.relations_count and not from_capture.relations_count:
return result
diff_relations = set(to_capture.relations.items()) ^ set(
from_capture.relations.items()
diff_relations = set(to_capture.relations_count.items()) ^ set(
from_capture.relations_count.items()
)
for diff_rel in set([lbl[0] for lbl in diff_relations]):
result["relations"][diff_rel] = (
to_capture.relations[diff_rel]
if diff_rel in to_capture.relations
res = (
to_capture.relations_count[diff_rel]
if diff_rel in to_capture.relations_count
else 0
) - (
from_capture.relations[diff_rel]
if diff_rel in from_capture.relations
from_capture.relations_count[diff_rel]
if diff_rel in from_capture.relations_count
else 0
)
if (res != 0 and ignore_zeros) or not ignore_zeros:
result["relations"][diff_rel] = res
return result
......@@ -26,17 +26,15 @@ graph.run("CREATE OR REPLACE DATABASE test")
test_graph = py2neo.Graph(**(NEO4J | {"name": "test"}))
# test_graph = py2neo.Graph(**(NEO4J))
mlog = NeoMetaTracker(test_graph)
mlog.ignore_zero_changes = True
def node_filter(node: py2neo.Node):
if list(node.labels)[0].startswith("_"):
return None
return node
mlog.node_filter_func = node_filter
mlog.ignore_labels = ["_SystemNode"]
mlog.capture(name="Init")
test_graph.run(
'CALL db.index.fulltext.createNodeIndex("Alien_index",["Alien"],["name"])'
)
mlog.capture("indexCreation")
test_graph.run("CREATE (:Alien {name: 'E.T.'})")
mlog.capture("AlienCluster")
test_graph.run("CREATE (:Human{name:'Amina Okujewa'})")
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment