Pedigree 또는 multi-position network가 있을 때, 연관성을 tree로 보기 위한 프로그램이다. 이 프로그램은 연관성있는 2개에 대한 것으로 만들었다.
Python anytree library를 사용했다. anytree의 단점은 multi-parent가 되지 않고, 처음 설정된 parent를 따르게 된다.
[ Anytree ]
- Homepage : https://anytree.readthedocs.io/en/latest/
- Manual, PDF : https://readthedocs.org/projects/anytree/downloads/pdf/latest/
[ Graphviz ]
- Homepage : https://graphviz.org/
- Manual : https://graphviz.org/documentation/
예를 들어, HP0009699와 HP0100659가 어떻게 연결되어있는지에 대해 아래 그림과 같이 확인이 가능하다.
출력은 각각의 tree와 HP0009699에서 HP0100659로 연결된 2종류의 tree이며, 총 4가지로 구성되어 있다.
Prerequesite:
$ pip3 install anytree
$ sudo yum install graphviz -y
$ wget http://purl.obolibrary.org/obo/hp.obo
Useful technique:
1) globals()['{}'.format(smp)]=Node(smp)
2) pair=list(OrderedDict.fromkeys(tuple(sub) for sub in pair))
1) 문자를 변수명으로 받을 수 있게 해준다.
2) 순서를 유지하면서 중복요소를 제거해준다.
Code:
from anytree import RenderTree, Node, Walker
from anytree.exporter import DotExporter, UniqueDotExporter
from collections import OrderedDict
import sys
import random
def make_table(fname):
dts={}
with open(fname,'r') as f:
m= len(f.readlines())
f = open(fname,'r')
n=0
while n < m :
dt={}
a=f.readline().rstrip(); n+=1
if a == '[Term]':
while True:
a=f.readline().rstrip(); n+=1
b=a.split(': ')
key=b[0]
if key == '':
break
val=b[1:]
if key == 'id':
id1=val[0]
elif key == 'is_a':
val=[val[0].split(' ! ')[0]]
if key in dt:
dt[key].append(val[0])
else:
dt[key]=val
dts[id1]=dt
f.close()
return dts
def find_tree(dts,smp1,smp2):
smpTmp1=''.join(smp1.split(':'))
smpTmp2=''.join(smp2.split(':'))
color,fontcolor,shape=decorate()
tree=list(dts.keys())
for smp in tree:
smp=''.join(smp.split(':'))
globals()['{}'.format(smp)]=Node(smp)
tree=[smp1]
pair=[]
for smp in tree:
try:
childList=dts[smp]['is_a']
except:
continue
for smp0 in childList:
pair.append([smp,smp0])
while len(childList) > 0:
child=[]
for i, smp0 in enumerate(childList):
try:
child+=dts[smp0]['is_a']
pair.append([smp0]+dts[smp0]['is_a'])
except:
pass
childList=list(set(child))
pair=list(OrderedDict.fromkeys(tuple(sub) for sub in pair))
for smpList in pair:
smp=smpList[0]
for smp0 in smpList[1:]:
smp=''.join(smp.split(':'))
smp0=''.join(smp0.split(':'))
globals()['{}'.format(smp0)]=Node(smp0,parent=globals()['{}'.format(smp)])
smp1n=globals()['{}'.format(smpTmp1)]
print('==[ Tree 1 ]==\n',RenderTree(smp1n).by_attr())
DotExporter(smp1n,nodeattrfunc=lambda x: "style=filled,color="+color+",fontcolor="+fontcolor+",shape="+shape if x == smp1n else None).to_picture("tree1.png")
tree=[smp2]
pair2=[]
for smp in tree:
try:
childList=dts[smp]['is_a']
except:
continue
for smp0 in childList:
pair2.append([smp,smp0])
while len(childList) > 0:
child=[]
for i, smp0 in enumerate(childList):
try:
child+=dts[smp0]['is_a']
pair2.append([smp0]+dts[smp0]['is_a'])
except:
pass
childList=list(set(child))
pair2=list(OrderedDict.fromkeys(tuple(sub) for sub in pair2))
for smpList in reversed(pair2):
smp=smpList[0]
for smp0 in smpList[1:]:
smp=''.join(smp.split(':'))
smp0=''.join(smp0.split(':'))
globals()['{}'.format(smp)]=Node(smp,parent=globals()['{}'.format(smp0)])
smp1n=globals()['{}'.format(smpTmp1)]
smp2n=globals()['{}'.format(smpTmp2)]
try:
w = Walker()
aa= w.walk(smp1n,smp2n)
#print(smp2n.depth)
find_subtree(dts,smp2)
print('==[ Tree ]==\n',RenderTree(smp1n).by_attr())
UniqueDotExporter(smp1n,nodeattrfunc=lambda x: "style=filled,color="+color+",fontcolor="+fontcolor+",shape="+shape if x == smp1n or x == smp2n else None,nodenamefunc=nodenamefunc).to_picture("tree_uniq.png")
DotExporter(smp1n,nodeattrfunc=lambda x: "style=filled,color="+color+",fontcolor="+fontcolor+",shape="+shape if x == smp1n else None).to_picture("tree.png")
print(aa[2][-1])
except:
print('* No pairs.')
find_subtree(dts,smp2)
def decorate():
color='\".7 .3 1.0\"' #'lightgrey'
fontcolor='black'
shape='ellipse' # box, circle, ellipse, record and plaintext
return color,fontcolor,shape
def nodenamefunc(node):
return '%s:%s' % (node.name, node.depth)
def edgeattrfunc(node, child):
return 'label="%s:%s"' % (node.name, child.name)
def find_subtree(dts,smp0):
smpTmp=''.join(smp0.split(':'))
color,fontcolor,shape=decorate()
tree=list(dts.keys())
for smp in tree:
smp=''.join(smp.split(':'))
globals()['{}'.format(smp)]=Node(smp)
tree=[smp0]
pair=[]
for smp in tree:
try:
childList=dts[smp]['is_a']
except:
continue
for smp0 in childList:
pair.append([smp,smp0])
while len(childList) > 0:
child=[]
for i, smp0 in enumerate(childList):
try:
child+=dts[smp0]['is_a']
pair.append([smp0]+dts[smp0]['is_a'])
except:
pass
childList=list(set(child))
pair=list(OrderedDict.fromkeys(tuple(sub) for sub in pair))
for smpList in pair:
smp=smpList[0]
for smp0 in smpList[1:]:
smp=''.join(smp.split(':'))
smp0=''.join(smp0.split(':'))
globals()['{}'.format(smp0)]=Node(smp0,parent=globals()['{}'.format(smp)])
smpn=globals()['{}'.format(smpTmp)]
print('==[ Tree 2 ]==\n',RenderTree(smpn).by_attr())
DotExporter(smpn,nodeattrfunc=lambda x: "style=filled,color="+color+",fontcolor="+fontcolor+",shape="+shape if x == smp1n else None).to_picture("tree2.png")
def find_depth(dts,smp1,smp2):
find_tree(dts,smp1,smp2)
return False
if __name__ == '__main__':
fname='hp.obo'
dts=make_table(fname)
population=list(dts.keys())
smp1,smp2=random.sample(population,2)
print()
print('* Sample 1 :',smp1)
print('* Sample 2 :',smp2)
print()
find_depth(dts,smp1,smp2)
댓글