forked from SwagarikaGiri/Multi-PredGO
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDataGenerationAPI.py
More file actions
120 lines (101 loc) · 3.97 KB
/
DataGenerationAPI.py
File metadata and controls
120 lines (101 loc) · 3.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#imports
import numpy as np
import pandas as pd
import pickle
import csv
import sys,os
import ForTestMultiPredModelAPI as PredictionModel
#files
root='data/'
deepgo_prefix_train='data/train-'
deepgo_prefix_test='data/test-'
pklsuffix='.pkl'
multipred_prefix_train='data/multimodaltrain-'
multipred_prefix_test='data/multimodaltest-'
multipred='data/combined-multimodal-'
accession_status_file='AccessionNumber_Structure_StatusFileWithAccessionIndex.pkl'
accession_status_file_path=root+accession_status_file
df1 = pd.read_pickle(accession_status_file_path)
accession=""
ontology=""
Accesion_No_IndexDict = dict()
global RETURN_OBJECT
RETURN_OBJECT=dict()
PAYLOAD=dict()
def generate_dictionary(index_list):
counter=0
for ele in index_list:
if ele not in Accesion_No_IndexDict:
Accesion_No_IndexDict[ele]=counter
counter=counter+1
else:
counter=counter+1
#level 2 function
def get_dataframe(baseCode, function,accession_number):
testData=[]
if(baseCode=='multipred'):
PathDataset = multipred+str(function)+pklsuffix
try:
df = pd.read_pickle(PathDataset)
index = df.index.values
generate_dictionary(index)
accession_index = Accesion_No_IndexDict[accession_number]
next_acession_index=accession_index+2
testData=df.loc[index[accession_index:next_acession_index]]
return testData
except:
return "Sorry the data was not loaded 2"
#level 1 function
def load_train_test_data(accession_object,ontology):
RETURN_OBJECT={}
accession_number=accession_object['accession']
ontology_flag=accession_object[ontology]
if(accession_object['status']==True):
if (ontology == "bp" and ontology_flag):
testData=get_dataframe('multipred','bp',accession_number)
if type(testData)!=str:
prediction_list = PredictionModel.main('bp',testData,'cpu:0')
RETURN_OBJECT['bp']=prediction_list
else:
if(ontology == "bp"):
return "Accession no does not have Biological Function"
if (ontology=="cc" and ontology_flag):
testData=get_dataframe('multipred','cc',accession_number)
if type(testData)!=str:
prediction_list=PredictionModel.main('cc',testData,'cpu:0')
RETURN_OBJECT['cc']=prediction_list
else:
if(ontology=="cc"):
return "Accession no does not have Cellular Component"
if(ontology=="mf" and ontology_flag):
testData=get_dataframe('multipred','mf',accession_number)
if type(testData)!=str:
prediction_list=PredictionModel.main('mf',testData,'cpu:0')
RETURN_OBJECT['mf']=prediction_list
else:
if(ontology=="mf"):
return "Accession no does not have Molecular Function"
return RETURN_OBJECT
elif(accession_object['status']==False):
return "This accession no's structural information is not present in our database"
else:
return "This accession no's sequence, structure, PPIN information is not present in our database"
#root function
def analyze_accession_status(accession_number,ontology):
accession_number=str(accession_number)
ontology = str(ontology)
try:
df1 = pd.read_pickle(accession_status_file_path)
accession_object = df1.loc[accession_number]
print("Status of Accession No in following Ontologies 1 denote aceesion no has functionality in following ontology")
print(accession_object)
PAYLOAD = load_train_test_data(accession_object,ontology)
except:
PAYLOAD={"Sorry errorenous data"}
return PAYLOAD
if __name__=='__main__':
accession=raw_input("Please enter the accession no \t")
ontology=raw_input("Please enter the ontology \t")
message=analyze_accession_status(accession,ontology)
print(message)
# return message