Skip to content

Commit

Permalink
changes needed for expression tests
Browse files Browse the repository at this point in the history
  • Loading branch information
ianlongden committed Aug 8, 2023
1 parent ed3426f commit 9e4f018
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 27 deletions.
76 changes: 59 additions & 17 deletions Load/drivers.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,19 @@ def add_driver_data(cursor, org_dict, feature_id, cvterm_id, dbxref_id, pub_id,
fs_sql = """ INSERT INTO feature_synonym (synonym_id, feature_id, pub_id) VALUES (%s, %s, %s) """
feat_rel_sql = """ INSERT INTO feature_relationship (subject_id, object_id, type_id) VALUES (%s, %s, %s) RETURNING feature_relationship_id """

count = gene_count = 400000
# create domain
dom_name = 'DBD'
cursor.execute(feat_sql, (None, org_dict['Dmel'], 'DBD and LBD domains', 'ss-XP_DBD and LBD domains',
"", 0, cvterm_id['polypeptide']))
feature_id[dom_name] = cursor.fetchone()[0]

count = gene_count = 500000
for start in ['Scer\\GAL4', 'Hsap\\RELA']:
# create gene
gene_count += 1
org_id = org_dict[start[:4]]
sym_name = "{}".format(start)
unique_name = 'FBgn:{:07d}'.format(gene_count)
count = count + 1
unique_name = 'FBgn{:07d}'.format(count)
print("Adding gene {} {} for species {} - syn {}".format(unique_name, gene_count, start[:4], sym_name))
# create dbxref, accession -> uniquename
cursor.execute(dbxref_sql, (db_id['FlyBase'], unique_name))
Expand All @@ -28,23 +34,59 @@ def add_driver_data(cursor, org_dict, feature_id, cvterm_id, dbxref_id, pub_id,
feature_id[sym_name] = gene_id = cursor.fetchone()[0]

for i in range(10):
# create allele/driver.
# select f.name, cvt2.name, f.uniquename, o.abbreviation, cvt.name
# from feature_relationship fr, cvterm cvt2, cvterm cvt, feature f, organism o
# where f.type_id = cvt2.cvterm_id and fr.object_id = f.feature_id and fr.type_id = cvt.cvterm_id and
# o.organism_id = f.organism_id and subject_id = 23124422;
# name | name | uniquename | abbreviation | name
# ------------------+---------------------------------+-------------+--------------+---------------------------
# Scer\GAL4 | gene | FBgn0014445 | Scer | alleleof
# P{GAL4(DBD)-hb} | transgenic_transposable_element | FBtp0001259 | Ssss | associated_with
# pP{GAL4(DBD)-hb} | engineered_plasmid | FBmc0001249 | Ssss | gets_expression_data_from
# hb | gene | FBgn0001180 | Dmel | has_reg_region
if start == 'Scer\\GAL4':
gene_name = f'hb{i+1}'
else:
gene_name = f'pxn{i + 1}'
count = count + 1
al_sym_name = "{}[{}]".format(start, count)
unique_name = 'FBal:{:07d}'.format(count)
# create dbxref, accession -> uniquename
print("Adding allele {} {} for species {} - syn {}".format(unique_name, count, start[:4], sym_name))
al_sym_name = "{}<up>{}.{}</up>".format(start, dom_name, gene_name)
al_name = "{}[{}.{}]".format(start, dom_name, gene_name)
unique_name = 'FBal{:07d}'.format(count)
print("Adding allele {} {} for species {} - syn {}".format(unique_name, count, start[:4], al_name))
cursor.execute(dbxref_sql, (db_id['FlyBase'], unique_name))
al_dbxref_id = cursor.fetchone()[0]

cursor.execute(feat_sql, (al_dbxref_id, org_id, al_sym_name, count, None, 0, cvterm_id['gene']))
feature_id[al_sym_name] = allele_id = cursor.fetchone()[0]
cursor.execute(feat_sql, (al_dbxref_id, org_id, al_name, unique_name, None, 0, cvterm_id['allele']))
feature_id[al_name] = cursor.fetchone()[0]
# add synonyms
cursor.execute(syn_sql, (feature_id[al_name], cvterm_id['symbol'], al_sym_name))
syn_id = cursor.fetchone()[0]
cursor.execute(fs_sql, (syn_id, feature_id[al_name], pub_id))

cursor.execute(syn_sql, (feature_id[al_name], cvterm_id['symbol'], al_sym_name[5:])) # skip sp name
syn_id = cursor.fetchone()[0]
cursor.execute(fs_sql, (syn_id, feature_id[al_name], pub_id))
# Scer\GAL4 | gene | FBgn0014445 | Scer | alleleof
cursor.execute(feat_rel_sql, (feature_id[al_name], feature_id[start], cvterm_id['alleleof']))

# hb | gene | FBgn0001180 | Dmel | has_reg_region
unique_name = 'FBgn{:07d}'.format(count)
print("Adding gene {} for species {} - syn {}".format(unique_name, start[:4], gene_name))
cursor.execute(dbxref_sql, (db_id['FlyBase'], unique_name))
gene_dbxref_id = cursor.fetchone()[0]
cursor.execute(feat_sql, (gene_dbxref_id, org_id, gene_name, unique_name, None, 0, cvterm_id['gene']))
feature_id[gene_name] = cursor.fetchone()[0]
# add synonym
# select * from synonym where synonym_id = 6922299;
# synonym_id | name | type_id | synonym_sgml
# ------------+-------------------+---------+--------------------------
# 6922299 | Hsap\RELA[AD.Pxn] | 59978 | Hsap\RELA<up>AD.Pxn</up>
cursor.execute(syn_sql, (feature_id[gene_name], cvterm_id['symbol'], gene_name))
syn_id = cursor.fetchone()[0]
cursor.execute(fs_sql, (syn_id, feature_id[gene_name], pub_id))

cursor.execute(feat_rel_sql, (feature_id[al_name], feature_id[gene_name], cvterm_id['has_reg_region']))

# add as feature relationship
cursor.execute(feat_rel_sql, (allele_id, gene_id, cvterm_id['alleleof']))
# P{GAL4(DBD)-hb} | transgenic_transposable_element | FBtp0001259 | Ssss | associated_with

# add synonym for allele
cursor.execute(syn_sql, (al_sym_name, cvterm_id['symbol'], al_sym_name))
symbol_id = cursor.fetchone()[0]
# add feature_synonym for allele
cursor.execute(fs_sql, (symbol_id, allele_id, pub_id))
# pP{GAL4(DBD)-hb} | engineered_plasmid | FBmc0001249 | Ssss | gets_expression_data_from
3 changes: 3 additions & 0 deletions add-test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from Load.grp import add_grp_data
from Load.cell_line import add_cell_line_data
from Load.aberration import add_aberration_data
from Load.drivers import add_driver_data

conn = psycopg2.connect(database="fb_test")
cursor = conn.cursor()
Expand Down Expand Up @@ -522,6 +523,8 @@ def load_pub_author_pubprop(parsed_yaml):
# Disease Implicated Variants (DIV)
add_div_data(cursor, organism_id, cv_cvterm_id, feature_id, pub_id, db_dbxref)

# add drivers
add_driver_data(cursor, organism_id, feature_id, cvterm_id, dbxref_id, pub_id, db_id)

# add chromosome_structure_variation
print("Adding chromosome_structure_variation data.")
Expand Down
20 changes: 11 additions & 9 deletions data/cv_cvterm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,13 @@ SO: ['chromosome_arm', 'chromosome', 'gene', 'mRNA', 'DNA', 'golden_path', 'ncRN
'natural population', 'cloned_region', 'engineered_region', 'transgenic_transposable_element',
'transposable_element_insertion_site', 'chromosome_band', 'allele', 'transposable_element',
'natural_transposable_element', 'gene_group', 'polypeptide', 'chromosome_breakpoint', 'engineered_plasmid', 'sgRNA',
'oligo', 'engineered_foreign_gene', 'point_mutation', 'cDNA_clone', 'TSS', 'rescue_region', 'insertion_site']
'oligo', 'engineered_foreign_gene', 'point_mutation', 'cDNA_clone', 'TSS', 'rescue_region', 'insertion_site', 'synthetic_sequence']
molecular_function: ['mRNA binding']
cellular_component: ['nucleolus', 'something' ,'extracellular space']
cellular_component: ['nucleolus', 'something' ,'extracellular space', 'endoplasmic reticulum']
biological_process: ['activation of immune response', 'defense response to other organism', 'rRNA processing']
FlyBase anatomy CV: ['embryo','dopaminergic PAM neuron 1', 'dopaminergic PAM neuron 5', 'dissociated larval fat cell',
'embryonic/larval hemolymph',
'anatomy 1', 'anatomy 2', 'anatomy 3', 'mesoderm']
####### End of order matters cv/cvterms

cell_line_cvtermprop type: ['basis']
Expand All @@ -24,8 +27,8 @@ CHEBI: []
DOID: []
disease_ontology: ['hh-1']

experimental assays: ['distribution deduced from reporter (Gal4 UAS)']
expression slots: ['stage', 'anatomy', 'assay']
experimental assays: ['distribution deduced from reporter (Gal4 UAS)', 'in situ']
expression slots: ['stage', 'anatomy', 'assay', 'cellular']

feature_cvtermprop type: [
'wt_class', 'aberr_class', 'tool_uses', 'transgene_uses property',
Expand All @@ -36,9 +39,7 @@ feature_relationshipprop type: ['fly_disease-implication_change', 'comment', 're

FlyBase: ['FlyBase analysis']
FlyBase_internal: ['pubprop type:curated_by']
FlyBase anatomy CV: ['embryo','dopaminergic PAM neuron 1', 'dopaminergic PAM neuron 5',
'embryonic/larval hemolymph',
'anatomy 1', 'anatomy 2', 'anatomy 3']

FlyBase development CV: ['late embryonic stage', 'embryonic stage', 'adult stage', 'development 1', 'development 2', 'development 3',
'wandering third instar larval stage']
FlyBase miscellaneous CV: [
Expand All @@ -52,6 +53,7 @@ FlyBase miscellaneous CV: [
'pheno1', 'pheno2', 'pheno3', 'pheno4', 'pheno5',
'photoactivatable fluorescent protein', 'protein detection tool', 'project',
'qualifier', 'reagent collection', 'RNA detection tool', 'single balancer', 'spontaneous',
'split system combination',
'transcriptome', 'umbrella project', 'unspecified']

GenBank feature qualifier: [
Expand Down Expand Up @@ -83,7 +85,7 @@ property type: [
'GO_internal_notes', 'GO_review_date', 'HDM_comment', 'OMIM_pheno_table', 'PCR_template',
'aberr_relationships',
'additional_disease-implication_change', 'allele_report_comment',
'aminoacid_rep', 'availability','balancer_status', 'bodypart_expression_marker', 'bound_moiety_comment', 'breeding_comment', 'category', 'cellular_description',
'aminoacid_rep', 'availability','balancer_status', 'bodypart_expression_text', 'bodypart_expression_marker', 'bound_moiety_comment', 'breeding_comment', 'category', 'cellular_description',
'comment', 'complementation', 'cyto_change_comment', 'cyto_loc_comment', 'curated_phenotype', 'data_analysis', 'data_type', 'data_link', 'data_link_bdsc', 'deliberate_omission',
'deleted_segment',
'derived_cyto_location', 'description', 'diopt_ortholog', 'discoverer', 'disease_associated', 'div_comment',
Expand Down Expand Up @@ -131,7 +133,7 @@ relationship type: [
'identified_with', 'maps_to_clone', 'member_gene_of', 'molec_deletes', 'molec_dups',
'molec_nondeletes', 'molec_nondups', 'molec_partdeletes', 'molec_partdups',
'nomaps_to_clone', 'nondeletes', 'nonduplicates', 'originates_from',
'overlap_inferred', 'parent_grp', 'part_deletes', 'part_duplicates', 'partof',
'overlap_inferred', 'partially_produced_by', 'parent_grp', 'part_deletes', 'part_duplicates', 'partof',
'primer_progenitor_of', 'producedby', 'progenitor', 'replacement_descendant_of', 'representative_isoform',
'recom_right_end', 'recom_left_end', 'related_tool', 'segregant_of', 'tagged_with', 'undefined_grp']

Expand Down
2 changes: 1 addition & 1 deletion triggers/multiple_seqs.sql
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ $bob$
LANGUAGE plpgsql;

SELECT make_fb_seqs(ARRAY['al', 'ti', 'tp', 'te', 'mc', 'ms', 'ba', 'ab', 'gn', 'tr', 'pp', 'og',
'cl', 'gg', 'hh', 'ig', 'lc', 'rf', 'sf', 'sn', 'st', 'tc', 'to', 'ch']);
'cl', 'gg', 'hh', 'ig', 'lc', 'rf', 'sf', 'sn', 'st', 'tc', 'to', 'ch', 'co']);

CREATE OR REPLACE FUNCTION public.feature_assignname_fn_i()
RETURNS trigger
Expand Down

0 comments on commit 9e4f018

Please sign in to comment.