Commit 1fa94983 authored by Tim Bleimehl's avatar Tim Bleimehl 🤸🏼
Browse files

add de/serializer 🚀

parent a3b6f955
......@@ -7,4 +7,9 @@ https://github.com/jexp/neo4j-graphviz
# todo
output schema in https://linkml.io/linkml/
\ No newline at end of file
output schema in https://linkml.io/linkml/
# render dot
`dot -Tpng -Goverlap=prism -Kfdp -v g.dot >| g_sftp.png`
import jsonpickle
import json
import py2neo
from sqlmodel import Relationship
# todo: dirty workaround with global nodestore to prevent node cloning. because i dont understand jsonpickle with nested objects and custom handlers yet :/
# maybe its not me but a bug https://github.com/jsonpickle/jsonpickle/issues/248 ?
nodestore = {}
relationstore = {}
@jsonpickle.handlers.register(py2neo.Node, base=True)
class CustomJsonpickleHandler_Node(jsonpickle.handlers.BaseHandler):
def flatten(self, obj, data):
data["label"] = list(obj.labels)[0]
data["attr"] = dict(obj)
return data
def restore(self, obj):
global nodestore
if obj["label"] not in nodestore:
nodestore[obj["label"]] = py2neo.Node(obj["label"], **obj["attr"])
return nodestore[obj["label"]]
@jsonpickle.handlers.register(py2neo.Relationship, base=True)
class CustomJsonpickleHandler_Node(jsonpickle.handlers.BaseHandler):
def flatten(self, obj: py2neo.Relationship, data):
data["py/object"] = "py2neo.Relationship"
data["type"] = type(obj).__name__
data["start"] = jsonpickle.encode(obj.start_node)
data["end"] = jsonpickle.encode(obj.end_node)
data["props"] = dict(obj)
return data
def restore(self, obj):
global relationstore
start_node: py2neo.Node = jsonpickle.decode(obj["start"])
end_node: py2neo.Node = jsonpickle.decode(obj["end"])
rel_type = obj["type"]
rel_ident = (
f"{list(start_node.labels)[0]}_{rel_type}_{list(end_node.labels)[0]}"
)
if rel_ident not in relationstore:
relationstore[rel_ident] = py2neo.Relationship(
start_node, rel_type, end_node, **obj["props"]
)
return relationstore[rel_ident]
from typing import Union, Dict, List, Generic
import time
import py2neo
from pathlib import Path
import jsonpickle
from NeoMetaTracker.capture_point import CapturePoint
from NeoMetaTracker.graph_scheme import GraphSchema
from NeoMetaTracker.visualizer._base_visualizer import BaseVisualizer
from NeoMetaTracker.de_serializer_helper import (
CustomJsonpickleHandler_frozenset,
CustomJsonpickleHandler_Node,
)
class NeoMetaTracker:
......@@ -17,14 +21,7 @@ class NeoMetaTracker:
# by zero changes we mean, changes where the node count did not change. e.g. if we create an index for label "myLabel" we also detect a schema change for Label "myLabel"
# this can be undesirable in some situations. to ignore such changes set `ignore_zero_changes` to True
self.ignore_zero_changes: bool = False
if isinstance(connection, dict):
self.graph: py2neo.Graph = py2neo.Graph(**connection)
elif isinstance(connection, py2neo.Graph):
self.graph = connection
else:
raise TypeError(
f"Expected 'py2neo.Graph' or 'dict'. Got '{type(connection)}'"
)
self._set_graph_connection(connection=connection)
def capture(self, name: str = None):
labels_count = self._get_labels_count()
......@@ -40,6 +37,25 @@ class NeoMetaTracker:
)
)
def visualize(
self, visualizer_class: BaseVisualizer, to_file: Union[str, Path] = None
):
schemas_changes = []
for index, to_cp in enumerate(self.capture_points):
if index < 1:
continue
from_cp = self.capture_points[index - 1]
changes = self.get_schemagraph_changes(
from_capture=from_cp, to_capture=to_cp
)
changes.parent_capture_point = to_cp
schemas_changes.append(changes)
visualizer: BaseVisualizer = visualizer_class(schemas_changes)
if to_file:
visualizer.generate_file(to_file)
else:
return visualizer.generate_object()
def node_filter_func(self, node: py2neo.Node):
"""This function can be overiden to filter specifics nodes out of the schema tracker
......@@ -77,65 +93,6 @@ class NeoMetaTracker:
return None
return rel
def visualize(
self, visualizer_class: BaseVisualizer, to_file: Union[str, Path] = None
):
schemas_changes = []
for index, to_cp in enumerate(self.capture_points):
if index < 1:
continue
from_cp = self.capture_points[index - 1]
changes = self.get_schemagraph_changes(
from_capture=from_cp, to_capture=to_cp
)
changes.parent_capture_point = to_cp
schemas_changes.append(changes)
visualizer: BaseVisualizer = visualizer_class(schemas_changes)
if to_file:
visualizer.generate_file(to_file)
else:
return visualizer.generate_object()
def _get_neo4j_schema(self):
# call db.schema.visualization
return self.graph.run(
"call db.schema.visualization yield nodes, relationships"
).data()[0]
def _get_labels_count(self) -> Dict[str, int]:
all_labels: List[str] = self.graph.run(
"CALL db.labels() yield label return collect(label) as res"
).data()[0]["res"]
labels_count: Dict[str, int] = {}
for label in all_labels:
if label in self.ignore_labels:
continue
query_label_count = f"""
MATCH (n:{label})
RETURN count(n) AS res
"""
res = self.graph.run(query_label_count).data()[0]["res"]
if (self.ignore_zero_changes and res != 0) or not self.ignore_zero_changes:
labels_count[label] = res
return labels_count
def _get_relations_count(self) -> Dict[str, int]:
all_rels: List[str] = self.graph.run(
"CALL db.relationshipTypes() yield relationshipType return collect(relationshipType) as res"
).data()[0]["res"]
rels_count: Dict[str, int] = {}
for rel in all_rels:
if rel in self.ignore_reliations_types:
continue
query_rel_count = f"""
MATCH ()-[:{rel}]->()
return count(*) AS res
"""
res = self.graph.run(query_rel_count).data()[0]["res"]
if (self.ignore_zero_changes and res != 0) or not self.ignore_zero_changes:
rels_count[rel] = res
return rels_count
def get_capture_point_by(
self,
capture_point_index: int = None,
......@@ -253,3 +210,68 @@ class NeoMetaTracker:
if (res != 0 and ignore_zeros) or not ignore_zeros:
result["relations"][diff_rel] = res
return result
def _get_neo4j_schema(self):
# call db.schema.visualization
return self.graph.run(
"call db.schema.visualization yield nodes, relationships"
).data()[0]
def _get_labels_count(self) -> Dict[str, int]:
all_labels: List[str] = self.graph.run(
"CALL db.labels() yield label return collect(label) as res"
).data()[0]["res"]
labels_count: Dict[str, int] = {}
for label in all_labels:
if label in self.ignore_labels:
continue
query_label_count = f"""
MATCH (n:{label})
RETURN count(n) AS res
"""
res = self.graph.run(query_label_count).data()[0]["res"]
if (self.ignore_zero_changes and res != 0) or not self.ignore_zero_changes:
labels_count[label] = res
return labels_count
def _set_graph_connection(self, connection: Union[py2neo.Graph, Dict]):
if isinstance(connection, dict):
self.graph = py2neo.Graph(**connection)
elif isinstance(connection, py2neo.Graph):
self.graph = connection
else:
raise TypeError(
f"Expected 'py2neo.Graph' or 'dict'. Got '{type(connection)}'"
)
def _get_relations_count(self) -> Dict[str, int]:
all_rels: List[str] = self.graph.run(
"CALL db.relationshipTypes() yield relationshipType return collect(relationshipType) as res"
).data()[0]["res"]
rels_count: Dict[str, int] = {}
for rel in all_rels:
if rel in self.ignore_reliations_types:
continue
query_rel_count = f"""
MATCH ()-[:{rel}]->()
return count(*) AS res
"""
res = self.graph.run(query_rel_count).data()[0]["res"]
if (self.ignore_zero_changes and res != 0) or not self.ignore_zero_changes:
rels_count[rel] = res
return rels_count
def serialize(self) -> str:
g = self.graph
self.graph = None
obj = jsonpickle.encode(self, keys=True)
self.graph = g
return obj
@classmethod
def deserialize(
cls, json_object: str, connection: Union[py2neo.Graph, Dict]
) -> "NeoMetaTracker":
obj: "NeoMetaTracker" = jsonpickle.decode(json_object, keys=True)
obj._set_graph_connection(connection=connection)
return obj
......@@ -14,7 +14,7 @@ setup(
author_email="tim.bleimehl@helmholtz-muenchen.de",
license="MIT",
packages=["NeoMetaTracker"],
install_requires=["py2neo", "DZDUtils"],
install_requires=["py2neo", "DZDUtils", "jsonpickle"],
python_requires=">=3.9",
zip_safe=False,
include_package_data=True,
......
......@@ -81,6 +81,7 @@ print(
"###schema.visualize(GraphvizVisualizer)###\n",
mlog.capture_points[-1].schema.visualize(GraphvizVisualizer),
)
print(
mlog.get_schemagraph_last_changes().visualize(
GraphvizVisualizer, to_file=os.path.join(current_dir, "last_change.dot")
......@@ -89,3 +90,27 @@ print(
print(mlog.visualize(GraphvizVisualizer, to_file=os.path.join(current_dir, "all.dot")))
schema_graph = py2neo.Graph(**(NEO4J | {"name": "schematest"}))
schema_graph.merge(mlog.get_schemagraph_last_changes())
json_obj = mlog.serialize()
print(type(json_obj))
print(json_obj)
mlog2 = NeoMetaTracker.deserialize(json_object=json_obj, connection=test_graph)
assert mlog2.get_numeric_changes_since(
mlog2.get_capture_point_by(capture_point_name="AnimalCluster")
) == mlog.get_numeric_changes_since(
mlog.get_capture_point_by(capture_point_name="AnimalCluster")
)
a = mlog.get_schemagraph_changes_since(
mlog.get_capture_point_by(capture_point_name="AnimalCluster")
)
b = mlog2.get_schemagraph_changes_since(
mlog2.get_capture_point_by(capture_point_name="AnimalCluster")
)
assert len(a.nodes) == len(b.nodes)
assert len(a.relationships) == len(b.relationships)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment