diff --git a/Load/drivers.py b/Load/drivers.py
index 8c0c617..ae18803 100644
--- a/Load/drivers.py
+++ b/Load/drivers.py
@@ -11,13 +11,19 @@ def add_driver_data(cursor, org_dict, feature_id, cvterm_id, dbxref_id, pub_id,
fs_sql = """ INSERT INTO feature_synonym (synonym_id, feature_id, pub_id) VALUES (%s, %s, %s) """
feat_rel_sql = """ INSERT INTO feature_relationship (subject_id, object_id, type_id) VALUES (%s, %s, %s) RETURNING feature_relationship_id """
- count = gene_count = 400000
+ # create domain
+ dom_name = 'DBD'
+ cursor.execute(feat_sql, (None, org_dict['Dmel'], 'DBD and LBD domains', 'ss-XP_DBD and LBD domains',
+ "", 0, cvterm_id['polypeptide']))
+ feature_id[dom_name] = cursor.fetchone()[0]
+
+ count = gene_count = 500000
for start in ['Scer\\GAL4', 'Hsap\\RELA']:
# create gene
- gene_count += 1
org_id = org_dict[start[:4]]
sym_name = "{}".format(start)
- unique_name = 'FBgn:{:07d}'.format(gene_count)
+ count = count + 1
+ unique_name = 'FBgn{:07d}'.format(count)
print("Adding gene {} {} for species {} - syn {}".format(unique_name, gene_count, start[:4], sym_name))
# create dbxref, accession -> uniquename
cursor.execute(dbxref_sql, (db_id['FlyBase'], unique_name))
@@ -28,23 +34,59 @@ def add_driver_data(cursor, org_dict, feature_id, cvterm_id, dbxref_id, pub_id,
feature_id[sym_name] = gene_id = cursor.fetchone()[0]
for i in range(10):
- # create allele/driver.
+ # select f.name, cvt2.name, f.uniquename, o.abbreviation, cvt.name
+ # from feature_relationship fr, cvterm cvt2, cvterm cvt, feature f, organism o
+ # where f.type_id = cvt2.cvterm_id and fr.object_id = f.feature_id and fr.type_id = cvt.cvterm_id and
+ # o.organism_id = f.organism_id and subject_id = 23124422;
+ # name | name | uniquename | abbreviation | name
+ # ------------------+---------------------------------+-------------+--------------+---------------------------
+ # Scer\GAL4 | gene | FBgn0014445 | Scer | alleleof
+ # P{GAL4(DBD)-hb} | transgenic_transposable_element | FBtp0001259 | Ssss | associated_with
+ # pP{GAL4(DBD)-hb} | engineered_plasmid | FBmc0001249 | Ssss | gets_expression_data_from
+ # hb | gene | FBgn0001180 | Dmel | has_reg_region
+ if start == 'Scer\\GAL4':
+ gene_name = f'hb{i+1}'
+ else:
+ gene_name = f'pxn{i + 1}'
count = count + 1
- al_sym_name = "{}[{}]".format(start, count)
- unique_name = 'FBal:{:07d}'.format(count)
- # create dbxref, accession -> uniquename
- print("Adding allele {} {} for species {} - syn {}".format(unique_name, count, start[:4], sym_name))
+ al_sym_name = "{}{}.{}".format(start, dom_name, gene_name)
+ al_name = "{}[{}.{}]".format(start, dom_name, gene_name)
+ unique_name = 'FBal{:07d}'.format(count)
+ print("Adding allele {} {} for species {} - syn {}".format(unique_name, count, start[:4], al_name))
cursor.execute(dbxref_sql, (db_id['FlyBase'], unique_name))
al_dbxref_id = cursor.fetchone()[0]
- cursor.execute(feat_sql, (al_dbxref_id, org_id, al_sym_name, count, None, 0, cvterm_id['gene']))
- feature_id[al_sym_name] = allele_id = cursor.fetchone()[0]
+ cursor.execute(feat_sql, (al_dbxref_id, org_id, al_name, unique_name, None, 0, cvterm_id['allele']))
+ feature_id[al_name] = cursor.fetchone()[0]
+ # add synonyms
+ cursor.execute(syn_sql, (feature_id[al_name], cvterm_id['symbol'], al_sym_name))
+ syn_id = cursor.fetchone()[0]
+ cursor.execute(fs_sql, (syn_id, feature_id[al_name], pub_id))
+
+ cursor.execute(syn_sql, (feature_id[al_name], cvterm_id['symbol'], al_sym_name[5:])) # skip sp name
+ syn_id = cursor.fetchone()[0]
+ cursor.execute(fs_sql, (syn_id, feature_id[al_name], pub_id))
+ # Scer\GAL4 | gene | FBgn0014445 | Scer | alleleof
+ cursor.execute(feat_rel_sql, (feature_id[al_name], feature_id[start], cvterm_id['alleleof']))
+
+ # hb | gene | FBgn0001180 | Dmel | has_reg_region
+ unique_name = 'FBgn{:07d}'.format(count)
+ print("Adding gene {} for species {} - syn {}".format(unique_name, start[:4], gene_name))
+ cursor.execute(dbxref_sql, (db_id['FlyBase'], unique_name))
+ gene_dbxref_id = cursor.fetchone()[0]
+ cursor.execute(feat_sql, (gene_dbxref_id, org_id, gene_name, unique_name, None, 0, cvterm_id['gene']))
+ feature_id[gene_name] = cursor.fetchone()[0]
+ # add synonym
+ # select * from synonym where synonym_id = 6922299;
+ # synonym_id | name | type_id | synonym_sgml
+ # ------------+-------------------+---------+--------------------------
+ # 6922299 | Hsap\RELA[AD.Pxn] | 59978 | Hsap\RELAAD.Pxn
+ cursor.execute(syn_sql, (feature_id[gene_name], cvterm_id['symbol'], gene_name))
+ syn_id = cursor.fetchone()[0]
+ cursor.execute(fs_sql, (syn_id, feature_id[gene_name], pub_id))
+
+ cursor.execute(feat_rel_sql, (feature_id[al_name], feature_id[gene_name], cvterm_id['has_reg_region']))
- # add as feature relationship
- cursor.execute(feat_rel_sql, (allele_id, gene_id, cvterm_id['alleleof']))
+ # P{GAL4(DBD)-hb} | transgenic_transposable_element | FBtp0001259 | Ssss | associated_with
- # add synonym for allele
- cursor.execute(syn_sql, (al_sym_name, cvterm_id['symbol'], al_sym_name))
- symbol_id = cursor.fetchone()[0]
- # add feature_synonym for allele
- cursor.execute(fs_sql, (symbol_id, allele_id, pub_id))
+ # pP{GAL4(DBD)-hb} | engineered_plasmid | FBmc0001249 | Ssss | gets_expression_data_from
diff --git a/add-test_data.py b/add-test_data.py
index 9043dee..3c7b226 100755
--- a/add-test_data.py
+++ b/add-test_data.py
@@ -21,6 +21,7 @@
from Load.grp import add_grp_data
from Load.cell_line import add_cell_line_data
from Load.aberration import add_aberration_data
+from Load.drivers import add_driver_data
conn = psycopg2.connect(database="fb_test")
cursor = conn.cursor()
@@ -522,6 +523,8 @@ def load_pub_author_pubprop(parsed_yaml):
# Disease Implicated Variants (DIV)
add_div_data(cursor, organism_id, cv_cvterm_id, feature_id, pub_id, db_dbxref)
+# add drivers
+add_driver_data(cursor, organism_id, feature_id, cvterm_id, dbxref_id, pub_id, db_id)
# add chromosome_structure_variation
print("Adding chromosome_structure_variation data.")
diff --git a/data/cv_cvterm.yaml b/data/cv_cvterm.yaml
index 892ee3b..d4601c2 100644
--- a/data/cv_cvterm.yaml
+++ b/data/cv_cvterm.yaml
@@ -8,10 +8,13 @@ SO: ['chromosome_arm', 'chromosome', 'gene', 'mRNA', 'DNA', 'golden_path', 'ncRN
'natural population', 'cloned_region', 'engineered_region', 'transgenic_transposable_element',
'transposable_element_insertion_site', 'chromosome_band', 'allele', 'transposable_element',
'natural_transposable_element', 'gene_group', 'polypeptide', 'chromosome_breakpoint', 'engineered_plasmid', 'sgRNA',
- 'oligo', 'engineered_foreign_gene', 'point_mutation', 'cDNA_clone', 'TSS', 'rescue_region', 'insertion_site']
+ 'oligo', 'engineered_foreign_gene', 'point_mutation', 'cDNA_clone', 'TSS', 'rescue_region', 'insertion_site', 'synthetic_sequence']
molecular_function: ['mRNA binding']
-cellular_component: ['nucleolus', 'something' ,'extracellular space']
+cellular_component: ['nucleolus', 'something' ,'extracellular space', 'endoplasmic reticulum']
biological_process: ['activation of immune response', 'defense response to other organism', 'rRNA processing']
+FlyBase anatomy CV: ['embryo','dopaminergic PAM neuron 1', 'dopaminergic PAM neuron 5', 'dissociated larval fat cell',
+ 'embryonic/larval hemolymph',
+ 'anatomy 1', 'anatomy 2', 'anatomy 3', 'mesoderm']
####### End of order matters cv/cvterms
cell_line_cvtermprop type: ['basis']
@@ -24,8 +27,8 @@ CHEBI: []
DOID: []
disease_ontology: ['hh-1']
-experimental assays: ['distribution deduced from reporter (Gal4 UAS)']
-expression slots: ['stage', 'anatomy', 'assay']
+experimental assays: ['distribution deduced from reporter (Gal4 UAS)', 'in situ']
+expression slots: ['stage', 'anatomy', 'assay', 'cellular']
feature_cvtermprop type: [
'wt_class', 'aberr_class', 'tool_uses', 'transgene_uses property',
@@ -36,9 +39,7 @@ feature_relationshipprop type: ['fly_disease-implication_change', 'comment', 're
FlyBase: ['FlyBase analysis']
FlyBase_internal: ['pubprop type:curated_by']
-FlyBase anatomy CV: ['embryo','dopaminergic PAM neuron 1', 'dopaminergic PAM neuron 5',
- 'embryonic/larval hemolymph',
- 'anatomy 1', 'anatomy 2', 'anatomy 3']
+
FlyBase development CV: ['late embryonic stage', 'embryonic stage', 'adult stage', 'development 1', 'development 2', 'development 3',
'wandering third instar larval stage']
FlyBase miscellaneous CV: [
@@ -52,6 +53,7 @@ FlyBase miscellaneous CV: [
'pheno1', 'pheno2', 'pheno3', 'pheno4', 'pheno5',
'photoactivatable fluorescent protein', 'protein detection tool', 'project',
'qualifier', 'reagent collection', 'RNA detection tool', 'single balancer', 'spontaneous',
+ 'split system combination',
'transcriptome', 'umbrella project', 'unspecified']
GenBank feature qualifier: [
@@ -83,7 +85,7 @@ property type: [
'GO_internal_notes', 'GO_review_date', 'HDM_comment', 'OMIM_pheno_table', 'PCR_template',
'aberr_relationships',
'additional_disease-implication_change', 'allele_report_comment',
- 'aminoacid_rep', 'availability','balancer_status', 'bodypart_expression_marker', 'bound_moiety_comment', 'breeding_comment', 'category', 'cellular_description',
+ 'aminoacid_rep', 'availability','balancer_status', 'bodypart_expression_text', 'bodypart_expression_marker', 'bound_moiety_comment', 'breeding_comment', 'category', 'cellular_description',
'comment', 'complementation', 'cyto_change_comment', 'cyto_loc_comment', 'curated_phenotype', 'data_analysis', 'data_type', 'data_link', 'data_link_bdsc', 'deliberate_omission',
'deleted_segment',
'derived_cyto_location', 'description', 'diopt_ortholog', 'discoverer', 'disease_associated', 'div_comment',
@@ -131,7 +133,7 @@ relationship type: [
'identified_with', 'maps_to_clone', 'member_gene_of', 'molec_deletes', 'molec_dups',
'molec_nondeletes', 'molec_nondups', 'molec_partdeletes', 'molec_partdups',
'nomaps_to_clone', 'nondeletes', 'nonduplicates', 'originates_from',
- 'overlap_inferred', 'parent_grp', 'part_deletes', 'part_duplicates', 'partof',
+ 'overlap_inferred', 'partially_produced_by', 'parent_grp', 'part_deletes', 'part_duplicates', 'partof',
'primer_progenitor_of', 'producedby', 'progenitor', 'replacement_descendant_of', 'representative_isoform',
'recom_right_end', 'recom_left_end', 'related_tool', 'segregant_of', 'tagged_with', 'undefined_grp']
diff --git a/triggers/multiple_seqs.sql b/triggers/multiple_seqs.sql
index 406a2ee..68a3acc 100644
--- a/triggers/multiple_seqs.sql
+++ b/triggers/multiple_seqs.sql
@@ -16,7 +16,7 @@ $bob$
LANGUAGE plpgsql;
SELECT make_fb_seqs(ARRAY['al', 'ti', 'tp', 'te', 'mc', 'ms', 'ba', 'ab', 'gn', 'tr', 'pp', 'og',
- 'cl', 'gg', 'hh', 'ig', 'lc', 'rf', 'sf', 'sn', 'st', 'tc', 'to', 'ch']);
+ 'cl', 'gg', 'hh', 'ig', 'lc', 'rf', 'sf', 'sn', 'st', 'tc', 'to', 'ch', 'co']);
CREATE OR REPLACE FUNCTION public.feature_assignname_fn_i()
RETURNS trigger