본문 바로가기
Tools

Find connection

by wycho 2020. 11. 4.

Pedigree 또는 multi-position network가 있을 때, 연관성을 tree로 보기 위한 프로그램이다. 이 프로그램은 연관성있는 2개에 대한 것으로 만들었다.

 

Python anytree library를 사용했다. anytree의 단점은 multi-parent가 되지 않고, 처음 설정된 parent를 따르게 된다.

 

[ Anytree ]

- Homepage : https://anytree.readthedocs.io/en/latest/

- Manual, PDF : https://readthedocs.org/projects/anytree/downloads/pdf/latest/

 

[ Graphviz ]

- Homepage : https://graphviz.org/

- Manual : https://graphviz.org/documentation/

 

예를 들어, HP0009699와 HP0100659가 어떻게 연결되어있는지에 대해 아래 그림과 같이 확인이 가능하다.

출력은 각각의 tree와 HP0009699에서 HP0100659로 연결된 2종류의 tree이며, 총 4가지로 구성되어 있다.

 

 

Prerequesite:

$ pip3 install anytree
$ sudo yum install graphviz -y
$ wget http://purl.obolibrary.org/obo/hp.obo

 

Useful technique:

1) globals()['{}'.format(smp)]=Node(smp)
2) pair=list(OrderedDict.fromkeys(tuple(sub) for sub in pair))

1) 문자를 변수명으로 받을 수 있게 해준다.
2) 순서를 유지하면서 중복요소를 제거해준다.

 

Code:

from anytree import RenderTree, Node, Walker
from anytree.exporter import DotExporter, UniqueDotExporter

from collections import OrderedDict

import sys
import random
def make_table(fname):
    dts={}

    with open(fname,'r') as f:
        m= len(f.readlines())

    f = open(fname,'r')

    n=0
    while n < m :
        dt={}
        a=f.readline().rstrip(); n+=1
        if a == '[Term]':
            while True:
                a=f.readline().rstrip(); n+=1
                b=a.split(': ')
                key=b[0]
                if key == '':
                    break

                val=b[1:]
                if key == 'id':
                    id1=val[0]
                elif key == 'is_a':
                    val=[val[0].split(' ! ')[0]]

                if key in dt:
                    dt[key].append(val[0])
                else:
                    dt[key]=val
            dts[id1]=dt
    f.close()
    return dts


def find_tree(dts,smp1,smp2):
    smpTmp1=''.join(smp1.split(':'))
    smpTmp2=''.join(smp2.split(':'))

    color,fontcolor,shape=decorate()
    
    tree=list(dts.keys())

    for smp in tree:
        smp=''.join(smp.split(':'))
        globals()['{}'.format(smp)]=Node(smp)

    tree=[smp1]
    pair=[]
    for smp in tree:
        try:
            childList=dts[smp]['is_a']
        except:
            continue
        for smp0 in childList:
            pair.append([smp,smp0])

        while len(childList) > 0:
            child=[]
            for i, smp0 in enumerate(childList):

                try:
                    child+=dts[smp0]['is_a']
                    pair.append([smp0]+dts[smp0]['is_a'])
                except:
                    pass

            childList=list(set(child))

    pair=list(OrderedDict.fromkeys(tuple(sub) for sub in pair))
    for smpList in pair:
        smp=smpList[0]
        for smp0 in smpList[1:]:
            smp=''.join(smp.split(':'))
            smp0=''.join(smp0.split(':'))

            globals()['{}'.format(smp0)]=Node(smp0,parent=globals()['{}'.format(smp)])

    smp1n=globals()['{}'.format(smpTmp1)]

    print('==[ Tree 1 ]==\n',RenderTree(smp1n).by_attr())
    DotExporter(smp1n,nodeattrfunc=lambda x: "style=filled,color="+color+",fontcolor="+fontcolor+",shape="+shape if x == smp1n else None).to_picture("tree1.png")
    
    tree=[smp2]
    pair2=[]
    for smp in tree:
        try:
            childList=dts[smp]['is_a']
        except:
            continue
        for smp0 in childList:
            pair2.append([smp,smp0])

        while len(childList) > 0:
            child=[]
            for i, smp0 in enumerate(childList):

                try:
                    child+=dts[smp0]['is_a']
                    pair2.append([smp0]+dts[smp0]['is_a'])
                except:
                    pass

            childList=list(set(child))

    pair2=list(OrderedDict.fromkeys(tuple(sub) for sub in pair2))
    for smpList in reversed(pair2):
        smp=smpList[0]
        for smp0 in smpList[1:]:
            smp=''.join(smp.split(':'))
            smp0=''.join(smp0.split(':'))

            globals()['{}'.format(smp)]=Node(smp,parent=globals()['{}'.format(smp0)])

    smp1n=globals()['{}'.format(smpTmp1)]
    smp2n=globals()['{}'.format(smpTmp2)]

    try:
        w = Walker()
        aa= w.walk(smp1n,smp2n)
        #print(smp2n.depth)
        find_subtree(dts,smp2)
        print('==[  Tree  ]==\n',RenderTree(smp1n).by_attr())
        UniqueDotExporter(smp1n,nodeattrfunc=lambda x: "style=filled,color="+color+",fontcolor="+fontcolor+",shape="+shape if x == smp1n or x == smp2n else None,nodenamefunc=nodenamefunc).to_picture("tree_uniq.png")
        DotExporter(smp1n,nodeattrfunc=lambda x: "style=filled,color="+color+",fontcolor="+fontcolor+",shape="+shape if x == smp1n else None).to_picture("tree.png")
        print(aa[2][-1])
    except:
        print('* No pairs.')
        find_subtree(dts,smp2)

def decorate():
    color='\".7 .3 1.0\"' #'lightgrey'
    fontcolor='black'
    shape='ellipse' # box, circle, ellipse, record and plaintext
    return color,fontcolor,shape
    
def nodenamefunc(node):
    return '%s:%s' % (node.name, node.depth)

def edgeattrfunc(node, child):
    return 'label="%s:%s"' % (node.name, child.name)
    
    
def find_subtree(dts,smp0):
    smpTmp=''.join(smp0.split(':'))
    
    color,fontcolor,shape=decorate()
    
    tree=list(dts.keys())

    for smp in tree:
        smp=''.join(smp.split(':'))
        globals()['{}'.format(smp)]=Node(smp)

    tree=[smp0]
    pair=[]
    for smp in tree:
        try:
            childList=dts[smp]['is_a']
        except:
            continue
        for smp0 in childList:
            pair.append([smp,smp0])

        while len(childList) > 0:
            child=[]
            for i, smp0 in enumerate(childList):
                try:
                    child+=dts[smp0]['is_a']
                    pair.append([smp0]+dts[smp0]['is_a'])
                except:
                    pass

            childList=list(set(child))

    pair=list(OrderedDict.fromkeys(tuple(sub) for sub in pair))
    for smpList in pair:
        smp=smpList[0]
        for smp0 in smpList[1:]:
            smp=''.join(smp.split(':'))
            smp0=''.join(smp0.split(':'))

            globals()['{}'.format(smp0)]=Node(smp0,parent=globals()['{}'.format(smp)])

    smpn=globals()['{}'.format(smpTmp)]

    print('==[ Tree 2 ]==\n',RenderTree(smpn).by_attr())
    DotExporter(smpn,nodeattrfunc=lambda x: "style=filled,color="+color+",fontcolor="+fontcolor+",shape="+shape if x == smp1n else None).to_picture("tree2.png")


def find_depth(dts,smp1,smp2):
    find_tree(dts,smp1,smp2)

    return False


if __name__ == '__main__':
    fname='hp.obo'
    dts=make_table(fname)

    population=list(dts.keys())
    smp1,smp2=random.sample(population,2)
    
    print()
    print('* Sample 1 :',smp1)
    print('* Sample 2 :',smp2)
    print()

    find_depth(dts,smp1,smp2)

'Tools' 카테고리의 다른 글

docker  (0) 2021.01.18
VisiData  (0) 2020.11.20
Jobs for you  (0) 2020.10.22
tmux-cssh  (0) 2020.10.22
TabView  (0) 2020.10.08

댓글