Commit 0d248aac authored by Yaroslav Zdravomyslov's avatar Yaroslav Zdravomyslov
Browse files

dockerize session

parent b649c89f
Pipeline #1631 passed with stage
in 20 seconds
...@@ -137,3 +137,4 @@ dmypy.json ...@@ -137,3 +137,4 @@ dmypy.json
# Cython debug symbols # Cython debug symbols
cython_debug/ cython_debug/
data/ data/
FROM pypy:3.8 FROM python:3.9
RUN mkdir /app RUN mkdir /app
WORKDIR /app WORKDIR /app/
COPY reqs.txt /app COPY reqs.txt /app
RUN pip3 install --no-cache-dir -r reqs.txt RUN pip3 install --no-cache-dir -r reqs.txt
COPY main.py /app/main.py COPY MP-HP-DISTANCE /app/MP-HP-DISTANCE
WORKDIR /app/MP-HP-DISTANCE
RUN ls -la
CMD [ "python3", "main.py" ] CMD [ "python3", "main.py" ]
\ No newline at end of file
from Configs import ConfigBase
class DEFAULT(ConfigBase):
HP_NODE_LABEL = "HP_Term"
# HP_NODE_RDFS_ATTRIBUTE = 'rdfs__label'
HP_NODE_RDFS_ATTRIBUTE = "rdfs__label_clean"
HP_NODE_SID_CLEAN_ATTRIBUTE = "sid_clean"
HP_NODE_SID_ATTRIBUTE = "sid"
MP_NODE_LABEL = "MP_Term"
# MP_NODE_RDFS_ATTRIBUTE = 'rdfs__label'
MP_NODE_RDFS_ATTRIBUTE = "rdfs__label_clean"
MP_NODE_SID_CLEAN_ATTRIBUTE = "sid_clean"
MP_NODE_SID_ATTRIBUTE = "sid"
MP_HP_EQUAL_RELATIONSHIP_LABEL = "HAS_CROSS_SPECIES_TERM"
MAYBE_RELATIONSHIP_NAME = "_MAYBE_SIMILAR"
UPHENO_RELATIONSHIP_NAME = "_UPHENO_SIMILAR"
FORBIDDEN_WORDS = [
"frequency",
"left",
"right",
"position",
"generalized",
"ptosis",
"dull",
"lateral",
"axial",
"reticular",
"center",
"peripheral",
"bilateral",
"unilateral",
"coma",
"sarcoma",
"focal",
"acute",
"ranula",
"prolonged",
"central",
]
NEO4J: dict = {}
# docker run -e CONFIGS_NEO4J="{}" registry-gl.connect.dzd-ev.de:443/dzdtools/mp-hp-distance:latest
# "bolt://neo4j01.connect.dzd-ev.de:9787", auth=("neo4j", "skjhbgf942wuef")
from py2neo import Graph from py2neo import Graph
from globals import (
HP_NODE_LABEL,
MP_NODE_LABEL,
HP_NODE_RDFS_ATTRIBUTE,
MP_NODE_RDFS_ATTRIBUTE,
MP_HP_EQUAL_RELATIONSHIP_LABEL,
FORBIDDEN_WORDS,
MAYBE_RELATIONSHIP_NAME,
HP_NODE_SID_ATTRIBUTE,
HP_NODE_SID_CLEAN_ATTRIBUTE,
MP_NODE_SID_ATTRIBUTE,
MP_NODE_SID_CLEAN_ATTRIBUTE,
UPHENO_RELATIONSHIP_NAME,
)
from graphio import RelationshipSet from graphio import RelationshipSet
import csv import csv
from Configs import getConfig
from config import DEFAULT
config: DEFAULT = getConfig()
def get_mp_terms(): def get_mp_terms():
global MP_NODE_LABEL
global MP_HP_EQUAL_RELATIONSHIP_LABEL
global MP_NODE_SID_CLEAN_ATTRIBUTE
neo4j_graph = Graph( neo4j_graph = Graph(**config.NEO4J)
"bolt://neo4j01.connect.dzd-ev.de:9787", auth=("neo4j", "skjhbgf942wuef") # neo4j_graph = Graph(
) # "bolt://neo4j01.connect.dzd-ev.de:9787", auth=("neo4j", "skjhbgf942wuef")
# )
query = ( query = (
"match (n:" "match (n:"
+ MP_NODE_LABEL + config.MP_NODE_LABEL
+ ") WHERE NOT (n)-[:" + ") WHERE NOT (n)-[:"
+ MP_HP_EQUAL_RELATIONSHIP_LABEL + config.MP_HP_EQUAL_RELATIONSHIP_LABEL
+ "]-() return n." + "]-() return n."
+ MP_NODE_RDFS_ATTRIBUTE + config.MP_NODE_RDFS_ATTRIBUTE
+ ", n." + ", n."
+ MP_NODE_SID_CLEAN_ATTRIBUTE + config.MP_NODE_SID_CLEAN_ATTRIBUTE
) )
print("query:\n", query) print("query:\n", query)
...@@ -45,23 +33,21 @@ def get_mp_terms(): ...@@ -45,23 +33,21 @@ def get_mp_terms():
def get_hp_terms(): def get_hp_terms():
global HP_NODE_LABEL
global MP_HP_EQUAL_RELATIONSHIP_LABEL
global HP_NODE_SID_CLEAN_ATTRIBUTE
neo4j_graph = Graph( neo4j_graph = Graph(**config.NEO4J)
"bolt://neo4j01.connect.dzd-ev.de:9787", auth=("neo4j", "skjhbgf942wuef") # neo4j_graph = Graph(
) # "bolt://neo4j01.connect.dzd-ev.de:9787", auth=("neo4j", "skjhbgf942wuef")
# )
query = ( query = (
"match (n:" "match (n:"
+ HP_NODE_LABEL + config.HP_NODE_LABEL
+ ") WHERE NOT (n)-[:" + ") WHERE NOT (n)-[:"
+ MP_HP_EQUAL_RELATIONSHIP_LABEL + config.MP_HP_EQUAL_RELATIONSHIP_LABEL
+ "]-() return n." + "]-() return n."
+ HP_NODE_RDFS_ATTRIBUTE + config.HP_NODE_RDFS_ATTRIBUTE
+ ", n." + ", n."
+ HP_NODE_SID_CLEAN_ATTRIBUTE + config.HP_NODE_SID_CLEAN_ATTRIBUTE
) )
print("query:\n", query) print("query:\n", query)
...@@ -72,7 +58,6 @@ def get_hp_terms(): ...@@ -72,7 +58,6 @@ def get_hp_terms():
def get_equal_names(list_1, list_2): def get_equal_names(list_1, list_2):
global FORBIDDEN_WORDS
list_1 = [(ele[0].lower(), ele[1]) for ele in list_1 if ele[0]] list_1 = [(ele[0].lower(), ele[1]) for ele in list_1 if ele[0]]
list_2 = [(ele[0].lower(), ele[1]) for ele in list_2 if ele[0]] list_2 = [(ele[0].lower(), ele[1]) for ele in list_2 if ele[0]]
...@@ -80,9 +65,9 @@ def get_equal_names(list_1, list_2): ...@@ -80,9 +65,9 @@ def get_equal_names(list_1, list_2):
count = 0 count = 0
res = [] res = []
for ele1 in list_1: for ele1 in list_1:
if not ele1[0] in FORBIDDEN_WORDS: if not ele1[0] in config.FORBIDDEN_WORDS:
for ele2 in list_2: for ele2 in list_2:
if not ele2[0] in FORBIDDEN_WORDS: if not ele2[0] in config.FORBIDDEN_WORDS:
if ele1[0] == ele2[0]: if ele1[0] == ele2[0]:
# if count % 100 == 0: # if count % 100 == 0:
# print(ele1, "==", ele2) # print(ele1, "==", ele2)
...@@ -95,46 +80,38 @@ def get_equal_names(list_1, list_2): ...@@ -95,46 +80,38 @@ def get_equal_names(list_1, list_2):
def create_maybe_same_relationship(tuple_list): def create_maybe_same_relationship(tuple_list):
global HP_NODE_LABEL
global MP_NODE_LABEL
global MAYBE_RELATIONSHIP_NAME
global HP_NODE_SID_CLEAN_ATTRIBUTE
global MP_NODE_SID_CLEAN_ATTRIBUTE
maybe_relation = RelationshipSet( maybe_relation = RelationshipSet(
MAYBE_RELATIONSHIP_NAME, config.MAYBE_RELATIONSHIP_NAME,
[MP_NODE_LABEL], [config.MP_NODE_LABEL],
[HP_NODE_LABEL], [config.HP_NODE_LABEL],
[MP_NODE_SID_CLEAN_ATTRIBUTE], [config.MP_NODE_SID_CLEAN_ATTRIBUTE],
[HP_NODE_SID_CLEAN_ATTRIBUTE], [config.HP_NODE_SID_CLEAN_ATTRIBUTE],
) )
for tuple in tuple_list: for tuple in tuple_list:
maybe_relation.add_relationship( maybe_relation.add_relationship(
{MP_NODE_SID_CLEAN_ATTRIBUTE: tuple[0]}, {config.MP_NODE_SID_CLEAN_ATTRIBUTE: tuple[0]},
{HP_NODE_SID_CLEAN_ATTRIBUTE: tuple[1]}, {config.HP_NODE_SID_CLEAN_ATTRIBUTE: tuple[1]},
) )
neo4j_graph = Graph( neo4j_graph = Graph(**config.NEO4J)
"bolt://neo4j01.connect.dzd-ev.de:9787", auth=("neo4j", "skjhbgf942wuef") # neo4j_graph = Graph(
) # "bolt://neo4j01.connect.dzd-ev.de:9787", auth=("neo4j", "skjhbgf942wuef")
# )
maybe_relation.create_index(neo4j_graph) maybe_relation.create_index(neo4j_graph)
maybe_relation.merge(neo4j_graph) maybe_relation.merge(neo4j_graph)
def create_upheno_relationship(): def create_upheno_relationship():
global MP_NODE_SID_ATTRIBUTE
global HP_NODE_SID_ATTRIBUTE
global UPHENO_RELATIONSHIP_NAME
upheno_relation = RelationshipSet( upheno_relation = RelationshipSet(
UPHENO_RELATIONSHIP_NAME, config.UPHENO_RELATIONSHIP_NAME,
[MP_NODE_LABEL], [config.MP_NODE_LABEL],
[HP_NODE_LABEL], [config.HP_NODE_LABEL],
[MP_NODE_SID_ATTRIBUTE], [config.MP_NODE_SID_ATTRIBUTE],
[HP_NODE_SID_ATTRIBUTE], [config.HP_NODE_SID_ATTRIBUTE],
) )
count = 0 count = 0
...@@ -145,13 +122,14 @@ def create_upheno_relationship(): ...@@ -145,13 +122,14 @@ def create_upheno_relationship():
if line: if line:
count += 1 count += 1
upheno_relation.add_relationship( upheno_relation.add_relationship(
{MP_NODE_SID_ATTRIBUTE: line[2]}, {config.MP_NODE_SID_ATTRIBUTE: line[2]},
{HP_NODE_SID_ATTRIBUTE: line[0]}, {config.HP_NODE_SID_ATTRIBUTE: line[0]},
) )
neo4j_graph = Graph( neo4j_graph = Graph(**config.NEO4J)
"bolt://neo4j01.connect.dzd-ev.de:9787", auth=("neo4j", "skjhbgf942wuef") # neo4j_graph = Graph(
) # "bolt://neo4j01.connect.dzd-ev.de:9787", auth=("neo4j", "skjhbgf942wuef")
# )
upheno_relation.create_index(neo4j_graph) upheno_relation.create_index(neo4j_graph)
upheno_relation.merge(neo4j_graph) upheno_relation.merge(neo4j_graph)
...@@ -159,11 +137,8 @@ def create_upheno_relationship(): ...@@ -159,11 +137,8 @@ def create_upheno_relationship():
if __name__ == "__main__": if __name__ == "__main__":
print(2)
"""
mp_query = get_mp_terms() mp_query = get_mp_terms()
hp_query = get_hp_terms() hp_query = get_hp_terms()
equal_names = get_equal_names(mp_query, hp_query) equal_names = get_equal_names(mp_query, hp_query)
create_maybe_same_relationship(equal_names) create_maybe_same_relationship(equal_names)
create_upheno_relationship() create_upheno_relationship()
"""
HP_NODE_LABEL = "HP_Term"
# HP_NODE_RDFS_ATTRIBUTE = 'rdfs__label'
HP_NODE_RDFS_ATTRIBUTE = "rdfs__label_clean"
HP_NODE_SID_CLEAN_ATTRIBUTE = "sid_clean"
HP_NODE_SID_ATTRIBUTE = "sid"
MP_NODE_LABEL = "MP_Term"
# MP_NODE_RDFS_ATTRIBUTE = 'rdfs__label'
MP_NODE_RDFS_ATTRIBUTE = "rdfs__label_clean"
MP_NODE_SID_CLEAN_ATTRIBUTE = "sid_clean"
MP_NODE_SID_ATTRIBUTE = "sid"
MP_HP_EQUAL_RELATIONSHIP_LABEL = "HAS_CROSS_SPECIES_TERM"
MAYBE_RELATIONSHIP_NAME = "_MAYBE_SIMILAR"
UPHENO_RELATIONSHIP_NAME = "_UPHENO_SIMILAR"
FORBIDDEN_WORDS = [
"frequency",
"left",
"right",
"position",
"generalized",
"ptosis",
"dull",
"lateral",
"axial",
"reticular",
"center",
"peripheral",
"bilateral",
"unilateral",
"coma",
"sarcoma",
"focal",
"acute",
"ranula",
"prolonged",
"central",
]
# 3475
# 3440
# 2169
graphio
py2neo
DZDConfigs
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment