Commit e7ba75c0 authored by Yaroslav Zdravomyslov's avatar Yaroslav Zdravomyslov
Browse files

initial clean commit

parents
Pipeline #1648 passed with stage
in 21 seconds
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
data/
image: docker:git
stages:
- local-image-build-n-push
Push Tagged:
stage: local-image-build-n-push
only:
- tags
script:
- docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $CI_REGISTRY
- docker build --no-cache . -t $CI_REGISTRY_IMAGE:prod -t $CI_REGISTRY_IMAGE:$CI_COMMIT_TAG
- docker push $CI_REGISTRY_IMAGE --all-tags
Push latest:
stage: local-image-build-n-push
only:
# Only "main" ("master" on older repos) should be tagged "latest"
- main
script:
- docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $CI_REGISTRY
- docker build --no-cache . -t $CI_REGISTRY_IMAGE:latest
- docker push $CI_REGISTRY_IMAGE --all-tags
\ No newline at end of file
FROM python:3.9
RUN mkdir /app
WORKDIR /app/
COPY reqs.txt /app
RUN pip3 install --no-cache-dir -r reqs.txt
COPY MP-HP-DISTANCE /app/MP-HP-DISTANCE
WORKDIR /app/MP-HP-DISTANCE
RUN ls -la
CMD [ "python3", "main.py" ]
\ No newline at end of file
from Configs import ConfigBase
class DEFAULT(ConfigBase):
HP_NODE_LABEL = "HP_Term"
# HP_NODE_RDFS_ATTRIBUTE = 'rdfs__label'
HP_NODE_RDFS_ATTRIBUTE = "rdfs__label_clean"
HP_NODE_SID_CLEAN_ATTRIBUTE = "sid_clean"
HP_NODE_SID_ATTRIBUTE = "sid"
MP_NODE_LABEL = "MP_Term"
# MP_NODE_RDFS_ATTRIBUTE = 'rdfs__label'
MP_NODE_RDFS_ATTRIBUTE = "rdfs__label_clean"
MP_NODE_SID_CLEAN_ATTRIBUTE = "sid_clean"
MP_NODE_SID_ATTRIBUTE = "sid"
MP_HP_EQUAL_RELATIONSHIP_LABEL = "HAS_CROSS_SPECIES_TERM"
MAYBE_RELATIONSHIP_NAME = "_MAYBE_SIMILAR"
UPHENO_RELATIONSHIP_NAME = "_UPHENO_SIMILAR"
FORBIDDEN_WORDS = [
"frequency",
"left",
"right",
"position",
"generalized",
"ptosis",
"dull",
"lateral",
"axial",
"reticular",
"center",
"peripheral",
"bilateral",
"unilateral",
"coma",
"sarcoma",
"focal",
"acute",
"ranula",
"prolonged",
"central",
]
NEO4J: dict = {}
from py2neo import Graph
from graphio import RelationshipSet
import csv
from Configs import getConfig
from config import DEFAULT
config: DEFAULT = getConfig()
def get_mp_terms():
neo4j_graph = Graph(**config.NEO4J)
query = (
"match (n:"
+ config.MP_NODE_LABEL
+ ") WHERE NOT (n)-[:"
+ config.MP_HP_EQUAL_RELATIONSHIP_LABEL
+ "]-() return n."
+ config.MP_NODE_RDFS_ATTRIBUTE
+ ", n."
+ config.MP_NODE_SID_CLEAN_ATTRIBUTE
)
print("query:\n", query)
query_output = neo4j_graph.run(query).to_table()
return query_output
def get_hp_terms():
neo4j_graph = Graph(**config.NEO4J)
query = (
"match (n:"
+ config.HP_NODE_LABEL
+ ") WHERE NOT (n)-[:"
+ config.MP_HP_EQUAL_RELATIONSHIP_LABEL
+ "]-() return n."
+ config.HP_NODE_RDFS_ATTRIBUTE
+ ", n."
+ config.HP_NODE_SID_CLEAN_ATTRIBUTE
)
print("query:\n", query)
query_output = neo4j_graph.run(query).to_table()
return query_output
def get_equal_names(list_1, list_2):
list_1 = [(ele[0].lower(), ele[1]) for ele in list_1 if ele[0]]
list_2 = [(ele[0].lower(), ele[1]) for ele in list_2 if ele[0]]
count = 0
res = []
for ele1 in list_1:
if not ele1[0] in config.FORBIDDEN_WORDS:
for ele2 in list_2:
if not ele2[0] in config.FORBIDDEN_WORDS:
if ele1[0] == ele2[0]:
# if count % 100 == 0:
# print(ele1, "==", ele2)
count += 1
res.append((ele1[1], ele2[1]))
print(count)
return res
def create_maybe_same_relationship(tuple_list):
maybe_relation = RelationshipSet(
config.MAYBE_RELATIONSHIP_NAME,
[config.MP_NODE_LABEL],
[config.HP_NODE_LABEL],
[config.MP_NODE_SID_CLEAN_ATTRIBUTE],
[config.HP_NODE_SID_CLEAN_ATTRIBUTE],
)
for tuple in tuple_list:
maybe_relation.add_relationship(
{config.MP_NODE_SID_CLEAN_ATTRIBUTE: tuple[0]},
{config.HP_NODE_SID_CLEAN_ATTRIBUTE: tuple[1]},
)
neo4j_graph = Graph(**config.NEO4J)
maybe_relation.create_index(neo4j_graph)
maybe_relation.merge(neo4j_graph)
def create_upheno_relationship():
upheno_relation = RelationshipSet(
config.UPHENO_RELATIONSHIP_NAME,
[config.MP_NODE_LABEL],
[config.HP_NODE_LABEL],
[config.MP_NODE_SID_ATTRIBUTE],
[config.HP_NODE_SID_ATTRIBUTE],
)
count = 0
with open("upheno.tsv") as file:
tsv_file = csv.reader(file, delimiter="\t")
for line in tsv_file:
if line:
count += 1
upheno_relation.add_relationship(
{config.MP_NODE_SID_ATTRIBUTE: line[2]},
{config.HP_NODE_SID_ATTRIBUTE: line[0]},
)
neo4j_graph = Graph(**config.NEO4J)
upheno_relation.create_index(neo4j_graph)
upheno_relation.merge(neo4j_graph)
if __name__ == "__main__":
mp_query = get_mp_terms()
hp_query = get_hp_terms()
equal_names = get_equal_names(mp_query, hp_query)
create_maybe_same_relationship(equal_names)
create_upheno_relationship()
This diff is collapsed.
HP - MP distance relationship
`match (h:HP_Term)-[r:HAS_CROSS_SPECIES_TERM]-(m:MP_Term) return count(r)`
1) 1132 equal names (867 marked in neo4j01) => 265 found (RDFS == RDFS) 227 without forbidden words
2) asdsad
- es gibt rdfs wie "left", "right", "position", "generalized", "frequency"... (╯°□°)╯︵ ┻━┻
-
\ No newline at end of file
graphio
py2neo
DZDConfigs
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment