Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SCRUM-4513 #1726

Open
wants to merge 7 commits into
base: alpha
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package org.alliancegenome.curation_api.controllers.crud;

import org.alliancegenome.curation_api.controllers.base.BaseEntityCrudController;
import org.alliancegenome.curation_api.dao.CrossReferenceDAO;
import org.alliancegenome.curation_api.interfaces.crud.BiogridOrcCrudInterface;
import org.alliancegenome.curation_api.jobs.executors.BiogridOrcExecutor;
import org.alliancegenome.curation_api.model.entities.CrossReference;
import org.alliancegenome.curation_api.model.ingest.dto.fms.BiogridOrcIngestFmsDTO;
import org.alliancegenome.curation_api.response.APIResponse;
import org.alliancegenome.curation_api.services.CrossReferenceService;

import jakarta.annotation.PostConstruct;
import jakarta.enterprise.context.RequestScoped;
import jakarta.inject.Inject;

@RequestScoped
public class BiogridOrcCrudController extends BaseEntityCrudController<CrossReferenceService, CrossReference, CrossReferenceDAO> implements BiogridOrcCrudInterface {

@Inject
CrossReferenceService crossReferenceService;

@Inject
BiogridOrcExecutor biogridOrcExecutor;

@Override
@PostConstruct
protected void init() {
setService(crossReferenceService);
}

@Override
public APIResponse updateBiogridOrc(String dataProvider, BiogridOrcIngestFmsDTO biogridOrcData) {
return biogridOrcExecutor.runLoadApi(dataProvider, biogridOrcData.getData());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;

@ApplicationScoped
public class CrossReferenceDAO extends BaseSQLDAO<CrossReference> {
Expand All @@ -32,6 +33,22 @@ public Map<String, Long> getGenesWithCrossRefs(ResourceDescriptorPage page) {
return ensemblGeneMap;
}

public Map<String, Long> getGenesWithCrossRefs(Set<String> referencedCuries) {
String sql = """
select gc.genomicentity_id, cr.referencedcurie from genomicentity_crossreference as gc, crossreference as cr
where gc.crossreferences_id = cr.id AND cr.referencedCurie IN (:referencedCuries)
""";
Query query = entityManager.createNativeQuery(sql);
query.setParameter("referencedCuries", referencedCuries);
List<Object[]> objects = query.getResultList();
Map<String, Long> idCurieMap = new HashMap<>();
objects.forEach(object -> {
idCurieMap.put((String) object[1], (Long) object[0]);
});
return idCurieMap;

}

public Integer persistAccessionGeneAssociated(Long crossReferenceID, Long geneID) {
String sql = """
insert into genomicentity_crossreference (crossreferences_id,genomicentity_id)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package org.alliancegenome.curation_api.interfaces.crud;

import com.fasterxml.jackson.annotation.JsonView;
import jakarta.ws.rs.*;
import jakarta.ws.rs.core.MediaType;
import org.alliancegenome.curation_api.interfaces.base.crud.BaseCreateControllerInterface;
import org.alliancegenome.curation_api.model.entities.CrossReference;
import org.alliancegenome.curation_api.model.ingest.dto.fms.BiogridOrcIngestFmsDTO;
import org.alliancegenome.curation_api.response.APIResponse;
import org.alliancegenome.curation_api.view.View;
import org.eclipse.microprofile.openapi.annotations.tags.Tag;


@Path("/biogrid-orc")
@Tag(name = "CRUD - Biogrid Orc")
@Produces(MediaType.APPLICATION_JSON)
@Consumes(MediaType.APPLICATION_JSON)
public interface BiogridOrcCrudInterface extends BaseCreateControllerInterface<CrossReference> {


@POST
@Path("/bulk/{dataProvider}/biogridfile")
@JsonView(View.FieldsAndLists.class)
APIResponse updateBiogridOrc(@PathParam("dataProvider") String dataProvider, BiogridOrcIngestFmsDTO biogridOrcData);

}
Original file line number Diff line number Diff line change
@@ -1,17 +1,32 @@
package org.alliancegenome.curation_api.jobs.executors;

import java.io.FileInputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;

import org.alliancegenome.curation_api.exceptions.ObjectUpdateException.ObjectUpdateExceptionData;
import org.alliancegenome.curation_api.dao.CrossReferenceDAO;
import org.alliancegenome.curation_api.dao.GeneDAO;
import org.alliancegenome.curation_api.dao.ResourceDescriptorPageDAO;
import org.alliancegenome.curation_api.jobs.util.CsvSchemaBuilder;
import org.alliancegenome.curation_api.model.entities.CrossReference;
import org.alliancegenome.curation_api.model.entities.DataProvider;
import org.alliancegenome.curation_api.model.entities.Organization;
import org.alliancegenome.curation_api.model.entities.ResourceDescriptorPage;
import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory;
import org.alliancegenome.curation_api.model.ingest.dto.fms.BiogridOrcFmsDTO;
import org.alliancegenome.curation_api.response.APIResponse;
import org.alliancegenome.curation_api.response.LoadHistoryResponce;
import org.alliancegenome.curation_api.services.CrossReferenceService;
import org.alliancegenome.curation_api.services.DataProviderService;
import org.alliancegenome.curation_api.services.OrganizationService;
import org.alliancegenome.curation_api.util.ProcessDisplayHelper;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
Expand All @@ -23,16 +38,54 @@
import com.fasterxml.jackson.dataformat.csv.CsvSchema;

import jakarta.enterprise.context.ApplicationScoped;
import jakarta.inject.Inject;

@ApplicationScoped
public class BiogridOrcExecutor extends LoadFileExecutor {

@Inject
ResourceDescriptorPageDAO resourceDescriptorPageDAO;

@Inject
GeneDAO geneDAO;

@Inject
CrossReferenceDAO crossRefDAO;

@Inject
CrossReferenceService crossReferenceService;

@Inject
OrganizationService organizationService;

@Inject
DataProviderService dataProviderService;

public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) {
try (TarArchiveInputStream tarInputStream = new TarArchiveInputStream(
new GZIPInputStream(new FileInputStream(bulkLoadFileHistory.getBulkLoadFile().getLocalFilePath())))) {
new GZIPInputStream(new FileInputStream(bulkLoadFileHistory.getBulkLoadFile().getLocalFilePath())))) {
TarArchiveEntry tarEntry;

Set<String> biogridIds = new HashSet<>();
List<BiogridOrcFmsDTO> biogridData = new ArrayList<>();
String name = bulkLoadFileHistory.getBulkLoad().getName();
String dataProviderName = name.substring(0, name.indexOf(" "));

Organization organization = organizationService.getByAbbr(dataProviderName).getEntity();

HashMap<String, Object> rdpParams = new HashMap<>();
rdpParams.put("name", "biogrid/orcs");
ResourceDescriptorPage resourceDescriptorPage = resourceDescriptorPageDAO.findByParams(rdpParams).getSingleResult();

List<Long> dataProviderIdsBefore = new ArrayList<>(
dataProviderService.getDataProviderMap(organization, resourceDescriptorPage)
.values()
.stream()
.map(DataProvider::getId)
.toList());

dataProviderIdsBefore.removeIf(Objects::isNull);

List<Long> dataProviderIdsLoaded = new ArrayList<>();

while ((tarEntry = tarInputStream.getNextEntry()) != null) {

Expand All @@ -53,40 +106,61 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) {
.with(biogridOrcFmsSchema)
.readValues(tarInputStream.readAllBytes());

List<BiogridOrcFmsDTO> biogridData = it.readAll();
runLoad(bulkLoadFileHistory, biogridData, biogridIds);
biogridData.addAll(it.readAll());

}
runLoad(bulkLoadFileHistory, biogridData, resourceDescriptorPage, organization, dataProviderService,
dataProviderIdsLoaded);

runCleanup(dataProviderService, bulkLoadFileHistory, dataProviderName, dataProviderIdsBefore,
dataProviderIdsLoaded, "Biogrid Orc Load Type");
} catch (Exception e) {
failLoad(bulkLoadFileHistory, e);
e.printStackTrace();
}
}

private boolean runLoad(BulkLoadFileHistory history, List<BiogridOrcFmsDTO> biogridList, Set<String> biogridIds) {
private boolean runLoad(BulkLoadFileHistory history, List<BiogridOrcFmsDTO> biogridList,
ResourceDescriptorPage resourceDescriptorPage, Organization organization,
DataProviderService dataProviderService, List<Long> dataProviderIdsLoaded) {
ProcessDisplayHelper ph = new ProcessDisplayHelper();
ph.addDisplayHandler(loadProcessDisplayService);
if (CollectionUtils.isNotEmpty(biogridList)) {
String loadMessage = biogridList.get(0).getClass().getSimpleName() + " update";
ph.startProcess(loadMessage, biogridList.size());
try {
String loadMessage = "BioGrid update";
Set<String> referencedCuries = populateEntrezIdsFromFiles(biogridList, history);
ph.startProcess(loadMessage, referencedCuries.size());
updateHistory(history);

updateHistory(history);
for (BiogridOrcFmsDTO biogridOrcFmsDTO : biogridList) {
try {
if (biogridOrcFmsDTO.getIdentifierType().equals("ENTREZ_GENE")) {
String identifier = "NCBI_Gene:" + biogridOrcFmsDTO.getIdentifierId();
biogridIds.add(identifier);
Map<String, Long> genomicEntityCrossRefMap = crossReferenceService
.getGenomicEntityCrossRefMap(referencedCuries);

for (String referencedCurie : referencedCuries) {

CrossReference newCrossRef = new CrossReference();
newCrossRef.setReferencedCurie(referencedCurie);
newCrossRef.setDisplayName("BioGRID CRISPR Screen Cell Line Phenotypes");
newCrossRef.setResourceDescriptorPage(resourceDescriptorPage);

DataProvider provider = new DataProvider();
provider.setSourceOrganization(organization);
provider.setCrossReference(newCrossRef);

DataProvider entity = dataProviderService
.insertBioGridOrcDataProvider(provider, genomicEntityCrossRefMap.get(referencedCurie))
.getEntity();

if (entity != null) {
dataProviderIdsLoaded.add(entity.getId());
history.incrementCompleted();
} else {
history.incrementSkipped();

}
} catch (Exception e) {
e.printStackTrace();
history.incrementFailed();
addException(history,
new ObjectUpdateExceptionData(biogridOrcFmsDTO, e.getMessage(), e.getStackTrace()));

}
} catch (Exception e) {
e.printStackTrace();
history.incrementFailed();
ph.progressProcess();
}
updateHistory(history);
Expand All @@ -96,4 +170,42 @@ private boolean runLoad(BulkLoadFileHistory history, List<BiogridOrcFmsDTO> biog

return true;
}

public APIResponse runLoadApi(String dataProviderName, List<BiogridOrcFmsDTO> biogridDTOs) {
List<Long> dataProviderIdsLoaded = new ArrayList<>();
Organization organization = organizationService.getByAbbr(dataProviderName).getEntity();

HashMap<String, Object> rdpParams = new HashMap<>();
rdpParams.put("name", "biogrid/orcs");
ResourceDescriptorPage resourceDescriptorPage = resourceDescriptorPageDAO.findByParams(rdpParams).getSingleResult();

BulkLoadFileHistory history = new BulkLoadFileHistory(biogridDTOs.size());
history = bulkLoadFileHistoryDAO.persist(history);
runLoad(history, biogridDTOs, resourceDescriptorPage, organization, dataProviderService, dataProviderIdsLoaded);
history.finishLoad();

return new LoadHistoryResponce(history);
}

private Set<String> populateEntrezIdsFromFiles(List<BiogridOrcFmsDTO> biogridList, BulkLoadFileHistory history) {
Set<String> biogridIds = new HashSet<>();

for (BiogridOrcFmsDTO biogridOrcFmsDTO : biogridList) {
try {
if (!biogridOrcFmsDTO.getIdentifierType().equals("ENTREZ_GENE")) {
history.incrementSkipped();
continue;
}

String identifier = "NCBI_Gene:" + biogridOrcFmsDTO.getIdentifierId();
biogridIds.add(identifier);

} catch (Exception e) {
e.printStackTrace();
}
}

return biogridIds;

}
}
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
package org.alliancegenome.curation_api.model.ingest.dto.fms;

import java.util.List;

import org.alliancegenome.curation_api.model.ingest.dto.CrossReferenceDTO;
import org.alliancegenome.curation_api.model.ingest.dto.base.BaseDTO;

import lombok.Data;
Expand All @@ -25,5 +22,4 @@ public class BiogridOrcFmsDTO extends BaseDTO {
private Double score5;
private String hit;
private String source;
private List<CrossReferenceDTO> crossReferenceDtos;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package org.alliancegenome.curation_api.model.ingest.dto.fms;

import java.util.List;

import org.alliancegenome.curation_api.model.ingest.dto.base.BaseDTO;

import lombok.Data;
import lombok.EqualsAndHashCode;

@Data
@EqualsAndHashCode(callSuper = true)
public class BiogridOrcIngestFmsDTO extends BaseDTO {

private MetaDataFmsDTO metaData;
private List<BiogridOrcFmsDTO> data;
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;

@RequestScoped
public class CrossReferenceService extends BaseEntityCrudService<CrossReference, CrossReferenceDAO> {
Expand Down Expand Up @@ -109,4 +110,8 @@ public String getCrossReferenceUniqueId(CrossReference xref) {
public Map<String, Long> getGenomicEntityCrossRefMap(ResourceDescriptorPage page) {
return crossReferenceDAO.getGenesWithCrossRefs(page);
}

public Map<String, Long> getGenomicEntityCrossRefMap(Set<String> referencedCuries) {
return crossReferenceDAO.getGenesWithCrossRefs(referencedCuries);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,21 @@ public ObjectResponse<DataProvider> insertExpressionAtlasDataProvider(DataProvid
return new ObjectResponse<>(dbEntity);
}

@Transactional
public ObjectResponse<DataProvider> insertBioGridOrcDataProvider(DataProvider entity, Long geneticEntityId) {
String referencedCurie = entity.getCrossReference().getReferencedCurie();

DataProvider dbEntity = getDataProvider(entity.getSourceOrganization(), referencedCurie, entity.getCrossReference().getResourceDescriptorPage());

// we only create new records, no updates
if (dbEntity == null) {
dataProviderDAO.persist(entity);
crossReferenceDAO.persistAccessionGeneAssociated(entity.getCrossReference().getId(), geneticEntityId);
return new ObjectResponse<>(entity);
}
return new ObjectResponse<>(dbEntity);
}

@NotNull
public static String getFullReferencedCurie(String localReferencedCurie) {
return RESOURCE_DESCRIPTOR_PREFIX + ":" + localReferencedCurie;
Expand Down
Loading
Loading