/* ******************************************************************* * The contents of this file are subject to the Mozilla Public License * Version 1.1 (the "License"); you may not use this file except in * compliance with the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See * the License for the specific language governing rights and * limitations under the License. * * The Original Code is the BioWarehouse. * * The Initial Developer of the Original Code is SRI International. * Portions created by SRI International are Copyright (C) 2004. * All Rights Reserved. ******************************************************************* */ /* * PGDB PATHWAY loader for the BioSpice Data Warehouse * Thomas J Lee, SRI International, August 2002 */ #include "main.h" #include "pathway-parse.h" #include "db.h" #include "widtable.h" #ifdef ORACLE #include #endif #define DEBUG 0 extern int dataset_wid; extern int organism_wid; short ind; extern WIDTABLE pathway_wids; /******************************************************************************* Parsing of pathways is complicated by the fact that the first reference to a pathway can be before it is acutally defined in the input file. Therefore we keep a hash map of pathway unigue_id -> WID which is updated whenever we see a pathway name, and do a SQL INSERT at that point. Pathways that are referenced but not defined will have a Pathway entry, but are absent from the DBID and Entry tables until defined. ******************************************************************************/ void add_pathway(int wid, char *name) { /* Insert a row into Pathway table */ if (DEBUG) printf("Adding pathway %s %d\n", name, wid); db_insert_into_pathway(wid, name); } void update_pathway(int wid, char *name) { /* Update the Name column of a row into Pathway table designated by its WID */ if (DEBUG) printf("Updating pathway %s %d\n", name, wid); db_update_pathway(wid, name); } int get_pathway_wid(char *unique_id, char *common_name) { /* If unique_id has been previously parsed but its entry not yet defined, return the WID that was associated with it. Else allocate & remember a WID, and add an entry into the Pathway table. */ int wid = find_pathway(unique_id); if (DEBUG) printf("get_pathway_wid %s\n", unique_id); if (wid) return wid; else { wid = wh_get_new_wid(); widtable_insert(pathway_wids, wid, unique_id); /* remember wid-unique_id association */ add_pathway(wid, common_name); return(wid); } } void add_pathway_links(int pathway1_wid, struct pathway_link* link) { /* Adds entries to PathwayLink to describe a chemical link between pathway1 and each of a list of pathways */ int chemical_wid = find_chemical(link->chemical); int pathway2_wid; struct stringlist* sptr; if (DEBUG) printf("add_pathway_links %d\n", pathway1_wid); if (!chemical_wid) { if (DEBUG) printf("Missing PATHWAY-LINKS compound %s\n", link->chemical); return; } for (sptr=link->pathways; sptr; sptr=sptr->next) { pathway2_wid = get_pathway_wid(sptr->string, sptr->string); if (DEBUG) printf("Adding pathway link %s: %d - %d\n", link->chemical, pathway1_wid, pathway2_wid); db_insert_into_pathwaylink(pathway1_wid, pathway2_wid, chemical_wid); /* We don't add a second entry with pathway1/2 reversed */ } if (DEBUG) printf("add_pathway_links %d DONE\n", pathway1_wid); } int select_pathway(char *name) { /* Not used */ return(db_select_pathway(name)); } void add_subpathway_for_super(int superpathway_wid, char* subpathway_name) { predecessor_subpathways = widnamelist_insert(predecessor_subpathways, superpathway_wid, subpathway_name); if (DEBUG) printf("Added pred %s to superpathway %d\n", subpathway_name, superpathway_wid); } void pathway_load_entry(struct pathway_entry *entry) { int pathway_wid; int super_wid; /* for super_pathway */ struct stringlist *sptr; /* to iterate over synonyms */ struct stringlist *succptr; /* to iterate over successors */ struct stringlist *predptr; /* to iterate over predecessors */ struct stringlist *rxnptr; /* to iterate over reactions from REACTION-LIST */ struct pathway_link * linkptr; int succrxn_wid; int predrxn_wid; int rxn_wid; short predrxn_wid_ind; char hypothetical; /* 'T' or 'F'; applies to each rxn of pathway */ /* Set indicator variables for optional columns, and check string lengths, numeric formats etc. */ /* ...none */ if (!entry) advise_error("Null entry passed to pathway_load_entry\n"); if (!entry->common_name) { if (DEBUG) printf("Missing COMMON-NAME attribute on Pathway %s\n", entry->unique_id); entry->common_name = (char *) strdup(entry->unique_id); } #ifdef IGNORE #endif if (DEBUG) printf("Loading pathway %s\n", entry->unique_id); #ifdef ORACLE /* Try a WHENEVER to assist in debugging silent Oracle errors */ EXEC SQL WHENEVER SQLERROR DO sql_error("Oracle error in pathway-load"); #endif /* If pathway has been added, use its WID. Else add it fully and remember WID */ pathway_wid = find_pathway(entry->unique_id); if (pathway_wid) { /* Update pathway, changing the Name column from its UNIQUE-ID to its COMMON-NAME */ update_pathway(pathway_wid, entry->common_name); if (DEBUG) printf("Updated pathway %s, %d\n", entry->unique_id, pathway_wid); } else { /* No previous references to pathway */ pathway_wid = get_pathway_wid(entry->unique_id, entry->common_name); if (DEBUG) printf("Added pathway %s, %d\n", entry->unique_id, pathway_wid); } /* Add SuperPathways */ for (sptr=entry->super_pathways; sptr; sptr=sptr->next) { /* If superpathway has been added, use its WID. Else add it and remember WID */ super_wid = get_pathway_wid(sptr->string, sptr->string); if (DEBUG) printf("Added superpathway %s, %s\n", entry->unique_id, sptr->string); /* Add SuperPathway */ db_insert_into_superpathway(pathway_wid, super_wid); /* Are these in BsubCyc? Are their types 'O'? */ } if (DEBUG) printf("Added superpathways for %s, %d\n", entry->unique_id, pathway_wid); /* Add all reaction pairs (from the two corresp. lists) to PathwayReaction */ predptr = entry->predecessors; for (succptr=entry->successors; succptr; succptr=succptr->next) { if (DEBUG) printf("Pathway successor %s\n", succptr->string); hypothetical = stringlist_find(succptr->string, entry->hypothetical_rxns) ? 'T' : 'F'; succrxn_wid = find_reaction(succptr->string); if (succrxn_wid) { predrxn_wid = find_reaction(predptr->string); if (predrxn_wid) predrxn_wid_ind = INDICATE_NONNULL; else predrxn_wid_ind = INDICATE_NULL; if (DEBUG) printf("Inserting successor/pred %s/%s %c\n", succptr->string, predptr->string, hypothetical); db_insert_into_pathwayreaction(pathway_wid, succrxn_wid, hypothetical, predrxn_wid, predrxn_wid_ind); } else { pathway_parse_error(); printf("*** Error: PREDECESSORS reaction %s in pathway %s not found\n", succptr->string, entry->unique_id); } predptr = predptr->next; } /* Add all reactions from REACTION-LIST not already added (due to their absence from successors list) to PathwayReaction. Gracefully ignore any components that are subpathways. */ for (rxnptr=entry->reactions; rxnptr; rxnptr=rxnptr->next) { if (DEBUG) printf("Pathway REACTION-LIST component %s\n", rxnptr->string); if (!stringlist_find(rxnptr->string, entry->successors)) { rxn_wid = find_reaction(rxnptr->string); if (rxn_wid) { hypothetical = stringlist_find(rxnptr->string, entry->hypothetical_rxns) ? 'T' : 'F'; if (DEBUG) printf("Inserting REACTION-LIST rxn %s %c\n", rxnptr->string, hypothetical); db_insert_into_pathwayreaction(pathway_wid, rxn_wid, hypothetical, 0, INDICATE_NULL); } else if (!stringlist_find(rxnptr->string, entry->sub_pathways)) { pathway_parse_error(); printf("*** Error: component %s in pathway %s is undefined or missing from SUB-PATHWAYS\n", rxnptr->string, entry->unique_id); } } } /* Add all subpathways to global data structure for later handling. Can't INSERT into DB now since we don't know the WIDs of the subpathways yet. */ for (sptr=entry->sub_pathways; sptr; sptr=sptr->next) add_subpathway_for_super(pathway_wid, sptr->string); /* Add pathway links to PathwayLink table */ for (linkptr=entry->links; linkptr; linkptr=linkptr->next) { add_pathway_links(pathway_wid, linkptr); } /* Add synonyms to SynonymTable */ for (sptr=entry->synonyms; sptr; sptr=sptr->next) wh_insert_synonymtable(pathway_wid, sptr->string); /* Add citations to CitationWIDOtherWID */ for (sptr=entry->citations; sptr; sptr=sptr->next) insert_citation(pathway_wid, sptr->string); /* Add comments to CommentTable */ for (sptr=entry->comments; sptr; sptr=sptr->next) wh_insert_comment(pathway_wid, sptr->string); /* Add DBID */ if (!entry->unique_id) entry->load_error = 1; else wh_insert_dbid(pathway_wid,entry->unique_id); /* Insert all DBLINKS as crossreferences */ insert_dblinks(pathway_wid, entry->dblinks_dbs, entry->dblinks_ids); /* Finally add Entry */ wh_insert_entry(pathway_wid, entry->load_error, 0, NULL); if (DEBUG) printf("DONE loading pathway %s\n", entry->unique_id); }