Commit 26af619c authored by Tim Bleimehl's avatar Tim Bleimehl 🤸🏼
Browse files

ALPHA01

parent dc901803
......@@ -158,3 +158,4 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
*.dot
image: python:3
stages:
- deploy
cache:
paths:
- .cache/pip
- venv/
before_script:
- python -V # Print out python version for debugging
- pip install virtualenv
- virtualenv venv
- source venv/bin/activate
deploy-job:
stage: deploy
only:
- tags
script:
- pip install .
- python setup.py sdist bdist_wheel
- pip install twine
#- TWINE_PASSWORD=${CI_JOB_TOKEN} TWINE_USERNAME=gitlab-ci-token python -m twine upload --repository-url ${CI_API_V4_URL}/projects/${CI_PROJECT_ID}/packages/pypi dist/*
- TWINE_PASSWORD=${PYPI_TOKEN} TWINE_USERNAME=__token__ python -m twine upload dist/*
Copyright 2019 German Center for Diabetes Research (DZD)
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
\ No newline at end of file
from NeoMetaLogger.neo_meta_logger import NeoMetaLogger
from NeoMetaLogger.visualizer import GraphvizVisualizer
from __future__ import annotations
from typing import Dict, TYPE_CHECKING
from NeoMetaLogger.graph_scheme import GraphSchema
import time
if TYPE_CHECKING:
from NeoMetaLogger.neo_meta_logger import NeoMetaLogger
class CapturePoint:
"""The state of a Neo4j database in terms of relationship and node count at a certain time"""
def __init__(
self,
parent_logger: NeoMetaLogger,
name: str,
labels_count: Dict[str, int],
relations_count: Dict[str, int],
neo4j_schema_vis_data: Dict,
):
self.parent_logger = parent_logger
self.name = name
self.timestamp: float = time.time()
self.labels = labels_count
self.relations = relations_count
self.schema: GraphSchema = GraphSchema.from_neo4j_schema_vis_data(
neo4j_schema_vis_data,
parent_capture_point=self,
extra_props={"name": name} if name else {},
)
def __hash__(self):
return hash(self.timestamp)
def __eq__(self, other: "CapturePoint") -> bool:
return type(other) is type(self) and self.timestamp == other.timestamp
from __future__ import annotations
from typing import Union, List, Dict, Generic, TYPE_CHECKING
if TYPE_CHECKING:
from NeoMetaLogger.capture_point import CapturePoint
from pathlib import Path
import py2neo
from NeoMetaLogger.visualizer._base_visualizer import BaseVisualizer
class GraphSchema(py2neo.Subgraph):
def visualize(
self,
visualizer_class: BaseVisualizer,
to_file: Union[Path, str] = None,
):
visualizer: BaseVisualizer = visualizer_class(self)
if to_file:
visualizer.generate_file(to_file)
else:
return visualizer.generate_object()
@classmethod
def from_neo4j_schema_vis_data(
cls,
neo4j_schema_vis_data: Dict,
parent_capture_point: CapturePoint,
extra_props: Dict = None,
) -> "GraphSchema":
# ToDo: Break this function down. Too complex/spaghetti!
# the nodes are bound atm. meaning they belong to a certain DB. we need to unbound them.
# we clean the nodes and relationship from Neo4j's `call db.schema.visualization` from IDs (the IDs are random anyway on every call and consequently worthless outside of the own transaction)
# We connect the nodes and relationships into a py2neo.Subgraph
# store that in an instance of GraphSchema
schema_graph: GraphSchema = cls()
if not parent_capture_point.parent_logger.all_schema_nodes:
parent_capture_point.parent_logger.all_schema_nodes = {}
if not extra_props:
extra_props = {}
extra_props = extra_props | {"__neo_meta_logger_node": True}
for rel in neo4j_schema_vis_data["relationships"]:
rel_nodes = [None, None]
for node in neo4j_schema_vis_data["nodes"]:
if rel_nodes[0] and rel_nodes[1]:
break
if node.identity in [rel.start_node.identity, rel.end_node.identity]:
clean_rel_node = cls._get_or_create_unbound_schema_node(
node=node,
parent_capture_point=parent_capture_point,
extra_props=extra_props,
)
if rel.start_node.identity == rel.end_node.identity:
rel_nodes[0] = rel_nodes[1] = clean_rel_node
elif node.identity == rel.start_node.identity:
rel_nodes[0] = clean_rel_node
elif node.identity == rel.end_node.identity:
rel_nodes[1] = clean_rel_node
clean_rel = py2neo.Relationship(
rel_nodes[0],
type(rel).__name__,
rel_nodes[1],
**extra_props,
)
schema_graph = schema_graph | clean_rel
# atm we ignored any nodes without any relation.
single_nodes_cleaned = []
for node in neo4j_schema_vis_data["nodes"]:
single_nodes_cleaned.append(
cls._get_or_create_unbound_schema_node(
node=node,
parent_capture_point=parent_capture_point,
extra_props=extra_props,
)
)
# hardcoded class name in subgraph. we need to workaround this. can be removed with https://github.com/py2neo-org/py2neo/issues/940 merged
# original line: return schema_graph | cls(single_nodes_cleaned)
# workaround:
sb: py2neo.Subgraph = schema_graph | cls(single_nodes_cleaned)
return GraphSchema(nodes=sb.nodes, relationships=sb.relationships)
@classmethod
def _get_or_create_unbound_schema_node(
cls,
node: py2neo.Node,
parent_capture_point: "CapturePoint",
extra_props: Dict = None,
) -> py2neo.Node:
if tuple(
node.labels
) not in parent_capture_point.parent_logger.all_schema_nodes or (
parent_capture_point.parent_logger.all_schema_nodes[tuple(node.labels)][
"__node_count"
]
!= parent_capture_point.labels[tuple(node.labels)[0]]
):
parent_capture_point.parent_logger.all_schema_nodes[
tuple(node.labels)
] = clean_rel_node = py2neo.Node(
*list(node.labels),
**(
extra_props
| {
"__label_name": ":".join(list(node.labels)),
"__node_count": parent_capture_point.labels[
tuple(node.labels)[0]
],
}
),
)
clean_rel_node.__primarykey__ = "__label_name"
clean_rel_node.__primarylabel__ = list(node.labels)[0]
else:
clean_rel_node = parent_capture_point.parent_logger.all_schema_nodes[
tuple(node.labels)
]
return clean_rel_node
from typing import Union, Dict, List
from typing import Union, Dict, List, Generic
import time
import py2neo
class CapturePoint:
"""The state of a Neo4j database in terms of relationship and node count at a certain time"""
def __init__(
self,
parent_logger: "NeoMetaLogger",
name: str,
labels_count: Dict[str, int],
relations_count: Dict[str, int],
neo4j_schema_vis_data: Dict,
):
self.parent_logger = parent_logger
self.name = name
self.timestamp: float = time.time()
self.labels = labels_count
self.relations = relations_count
self.schema: GraphSchema = GraphSchema.from_neo4j_schema_vis_data(
neo4j_schema_vis_data,
parent_capture_point=self,
extra_props={"name": name} if name else {},
)
def __hash__(self):
return hash(self.timestamp)
def __eq__(self, other: "CapturePoint") -> bool:
return type(other) is type(self) and self.timestamp == other.timestamp
class GraphSchema(py2neo.Subgraph):
@classmethod
def from_neo4j_schema_vis_data(
cls,
neo4j_schema_vis_data: Dict,
parent_capture_point: "CapturePoint",
extra_props: Dict = None,
) -> "GraphSchema":
# ToDo: Break this function down. Too complex/spaghetti!
# the nodes are bound atm. meaning they belong to a certain DB. we need to unbound them.
# we clean the nodes and relationship from Neo4j's `call db.schema.visualization` from IDs (the IDs are random anyway on every call and consequently worthless outside of the own transaction)
# We connect the nodes and relationships into a py2neo.Subgraph
# store that in an instance of GraphSchema
schema_graph: GraphSchema = cls()
if not parent_capture_point.parent_logger.all_schema_nodes:
parent_capture_point.parent_logger.all_schema_nodes = {}
if not extra_props:
extra_props = {}
extra_props = extra_props | {"__neo_meta_logger_node": True}
for rel in neo4j_schema_vis_data["relationships"]:
rel_nodes = [None, None]
for node in neo4j_schema_vis_data["nodes"]:
if rel_nodes[0] and rel_nodes[1]:
break
if node.identity in [rel.start_node.identity, rel.end_node.identity]:
clean_rel_node = cls._get_or_create_unbound_schema_node(
node=node,
parent_capture_point=parent_capture_point,
extra_props=extra_props,
)
if rel.start_node.identity == rel.end_node.identity:
rel_nodes[0] = rel_nodes[1] = clean_rel_node
elif node.identity == rel.start_node.identity:
rel_nodes[0] = clean_rel_node
elif node.identity == rel.end_node.identity:
rel_nodes[1] = clean_rel_node
clean_rel = py2neo.Relationship(
rel_nodes[0],
type(rel).__name__,
rel_nodes[1],
**extra_props,
)
schema_graph = schema_graph | clean_rel
# atm we ignored any nodes without any relation.
single_nodes_cleaned = []
for node in neo4j_schema_vis_data["nodes"]:
single_nodes_cleaned.append(
cls._get_or_create_unbound_schema_node(
node=node,
parent_capture_point=parent_capture_point,
extra_props=extra_props,
)
)
return schema_graph | cls(single_nodes_cleaned)
@classmethod
def _get_or_create_unbound_schema_node(
cls,
node: py2neo.Node,
parent_capture_point: "CapturePoint",
extra_props: Dict = None,
) -> py2neo.Node:
if tuple(
node.labels
) not in parent_capture_point.parent_logger.all_schema_nodes or (
parent_capture_point.parent_logger.all_schema_nodes[tuple(node.labels)][
"__node_count"
]
!= parent_capture_point.labels[tuple(node.labels)[0]]
):
parent_capture_point.parent_logger.all_schema_nodes[
tuple(node.labels)
] = clean_rel_node = py2neo.Node(
*list(node.labels),
**(
extra_props
| {
"__label_name": ":".join(list(node.labels)),
"__node_count": parent_capture_point.labels[
tuple(node.labels)[0]
],
}
),
)
clean_rel_node.__primarykey__ = "__label_name"
clean_rel_node.__primarylabel__ = list(node.labels)[0]
else:
clean_rel_node = parent_capture_point.parent_logger.all_schema_nodes[
tuple(node.labels)
]
return clean_rel_node
from pathlib import Path
from NeoMetaLogger.capture_point import CapturePoint
from NeoMetaLogger.graph_scheme import GraphSchema
class NeoMetaLogger:
......@@ -144,8 +20,8 @@ class NeoMetaLogger:
)
def capture(self, name: str = None):
labels_count = self._count_labels()
relations_count = self._count_relations()
labels_count = self._get_labels_count()
relations_count = self._get_relations_count()
self.capture_points.append(
CapturePoint(
......@@ -153,17 +29,17 @@ class NeoMetaLogger:
parent_logger=self,
labels_count=labels_count,
relations_count=relations_count,
neo4j_schema_vis_data=self._query_schema(),
neo4j_schema_vis_data=self._get_neo4j_schema(),
)
)
def _query_schema(self):
def _get_neo4j_schema(self):
# call db.schema.visualization
return self.graph.run(
"call db.schema.visualization yield nodes, relationships"
).data()[0]
def _count_labels(self) -> Dict[str, int]:
def _get_labels_count(self) -> Dict[str, int]:
all_labels: List[str] = self.graph.run(
"CALL db.labels() yield label return collect(label) as res"
).data()[0]["res"]
......@@ -176,7 +52,7 @@ class NeoMetaLogger:
labels_count[label] = self.graph.run(query_label_count).data()[0]["res"]
return labels_count
def _count_relations(self) -> Dict[str, int]:
def _get_relations_count(self) -> Dict[str, int]:
all_rels: List[str] = self.graph.run(
"CALL db.relationshipTypes() yield relationshipType return collect(relationshipType) as res"
).data()[0]["res"]
......@@ -207,7 +83,7 @@ class NeoMetaLogger:
cp for cp in self.capture_points if capture_point_name == cp.name
)
def get_schemagraph_last_changes(self) -> py2neo.Subgraph:
def get_schemagraph_last_changes(self) -> GraphSchema:
return self.get_schemagraph_changes_since(
self.get_capture_point_by(capture_point_index=-2)
)
......@@ -215,12 +91,7 @@ class NeoMetaLogger:
def get_schemagraph_changes_since(
self,
capture_point: CapturePoint = None,
) -> Dict[str, Dict[str, int]]:
"""Get changes, in terms of quantity, of labels and relations since the last capture compared to the current capture
Returns:
Dict[str,Dict[str,int]]: A base dictonary "{'labels':{...},'relation':{...}}" containing two dictoniries listing changes (in terms of quantity) for labels and relations.
"""
) -> GraphSchema:
current_capture_point: CapturePoint = (
self.capture_points[-1] if self.capture_points else None
)
......@@ -230,8 +101,12 @@ class NeoMetaLogger:
def get_schemagraph_changes(
self, from_capture: CapturePoint, to_capture: CapturePoint
) -> py2neo.Subgraph:
return from_capture.schema - to_capture.schema
) -> GraphSchema:
# hardcoded class name in subgraph. we need to workaround this. can be removed with https://github.com/py2neo-org/py2neo/issues/940 merged
# original line: return from_capture.schema - to_capture.schema
# workaround:
sb: py2neo.Subgraph = from_capture.schema - to_capture.schema
return GraphSchema(nodes=sb.nodes, relationships=sb.relationships)
def get_numeric_last_changes(self) -> Dict[str, Dict[str, int]]:
"""Get changes, in terms of quantity, of labels and relations since the last capture compared to the current capture
......
from NeoMetaLogger.visualizer.graphiz_visualizer import GraphvizVisualizer
from __future__ import annotations
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from NeoMetaLogger.graph_scheme import GraphSchema
class BaseVisualizer:
def __init__(self, neo_meta_logger_graph_schema: GraphSchema):
self.neo_meta_logger_graph_schema = neo_meta_logger_graph_schema
def generate_file(self, path):
raise NotImplementedError
def generate_object(self) -> bytes:
raise NotImplementedError
from py2neo import Subgraph
from NeoMetaLogger.visualizer._base_visualizer import BaseVisualizer
# sudo apt-get install graphviz graphviz-dev
# pip install pygraphviz
import pygraphviz as pgv
class GraphvizVisualizer(BaseVisualizer):
def generate_file(self, path):
self._py2neo_subgraph_to_graphviz(
py2neo_subgraph=self.neo_meta_logger_graph_schema
).write(path)
def generate_object(self) -> bytes:
print("###", self.neo_meta_logger_graph_schema)
print("TYPE", type(self.neo_meta_logger_graph_schema))
return (
self._py2neo_subgraph_to_graphviz(
py2neo_subgraph=self.neo_meta_logger_graph_schema
)
.string()
.encode("utf-8")
)
def _py2neo_subgraph_to_graphviz(
self,
py2neo_subgraph: Subgraph,
) -> pgv.AGraph:
# https://pygraphviz.github.io/documentation/stable/tutorial.html#graphs
graph: pgv.AGraph = pgv.AGraph(strict=False, directed=True)
print("#####6", py2neo_subgraph)
for node in py2neo_subgraph.nodes:
print(type(node))
graph.add_node(node["__label_name"], **dict(node))
for rel in py2neo_subgraph.relationships:
print(rel)
graph.add_edge(
rel.start_node["__label_name"],
rel.end_node["__label_name"],
key=type(rel).__name__,
**dict(rel),
)
return graph
from py2neo import Subgraph
import networkx
from NeoMetaLogger.visualizer._base_visualizer import BaseVisualizer
import matplotlib.pyplot as plt
# WIP: not usable atm
class NetworkxVisualizer:
def create_schema_png(self):
nx_graph = self._py2neo_subgraph_to_networkxgraph(
py2neo_subgraph=self.neo_meta_logger.capture_points[0].schema
)
print(networkx.draw(G=nx_graph, with_labels=True))
def _py2neo_subgraph_to_networkxgraph(
self,
py2neo_subgraph: Subgraph,
) -> networkx.MultiDiGraph:
nx_graph: networkx.MultiDiGraph = networkx.MultiDiGraph()
for node in py2neo_subgraph.nodes:
nx_graph.add_node(
node["__label_name"],
labels=list[node.labels],
properties=dict(node),
)
for rel in py2neo_subgraph.relationships:
nx_graph.add_edge(
rel.start_node["__label_name"],
rel.end_node["__label_name"],
key=rel.type,
type=rel.type,
properties=dict(rel),
)
return nx_graph
......@@ -17,7 +17,7 @@ Lets create a sample graph with python and Neo4J
```python
import py2neo
from neo_meta_logger import NeoMetaLogger
from NeoMetaLogger import NeoMetaLogger
g = py2neo.Graph(name="test_graph")
......
from neo_meta_logger.neo_meta_logger import NeoMetaLogger
\ No newline at end of file
py2neo
DZDUtils
\ No newline at end of file
from setuptools import setup
from pathlib import Path
this_directory = Path(__file__).parent
long_description = (this_directory / "README.md").read_text()
setup(
name="NeoMetaLogger",
description="Track and visualize changes in a Neo4j database schema",
long_description=long_description,
long_description_content_type="text/markdown",
url="https://git.connect.dzd-ev.de/dzdpythonmodules/neo4j-meta-logger",
author="Tim Bleimehl",
author_email="tim.bleimehl@helmholtz-muenchen.de",
license="MIT",
packages=["NeoMetaLogger"],
install_requires=[
"py2neo",
"DZDUtils",
],
python_requires=">=3.9",
zip_safe=False,
include_package_data=True,
use_scm_version={
"root": ".",
"relative_to": __file__,
# "local_scheme": "node-and-timestamp"
"local_scheme": "no-local-version",
"write_to": "version.py",
},
setup_requires=["setuptools_scm"],
)
import py2neo
g = py2neo.Graph()
g.run("match (n) where id(n)=-1 with n match (n)-[r]-(m) return id(r)")
......@@ -13,8 +13,8 @@ if __name__ == "__main__":
print(os.path.normpath(SCRIPT_DIR))
sys.path.insert(0, os.path.normpath(SCRIPT_DIR))
from neo_meta_logger import NeoMetaLogger
from NeoMetaLogger import NeoMetaLogger
from NeoMetaLogger.visualizer import GraphvizVisualizer
NEO4J: Dict = json.loads(os.getenv("NEO4J", "{}"))
wait_for_db_boot(NEO4J)
......@@ -22,7 +22,8 @@ graph = py2neo.Graph(**NEO4J)
graph.run("CREATE OR REPLACE DATABASE schematest")
graph.run("CREATE OR REPLACE DATABASE test")
test_graph = py2neo.Graph(**(NEO4J | {"name": "test"}))
# test_graph = py2neo.Graph(**(NEO4J | {"name": "test"}))
test_graph = py2neo.Graph(**(NEO4J))
mlog = NeoMetaLogger(test_graph)
test_graph.run(
"CREATE p = (:Human{name:'Amina Okujewa'})-[:LIVES_ON]->(:World {name: 'Earth'})"
......@@ -40,6 +41,14 @@ test_graph.run(
mlog.capture()
print(mlog.get_numeric_last_changes())
schema_graph = py2neo.Graph(**(NEO4J | {"name": "schematest"}))
schema_graph.merge(mlog.get_schemagraph_last_changes())
print(mlog.get_schemagraph_last_changes())
print("##########")
print(mlog.capture_points[-1].schema.visualize(GraphvizVisualizer))