Difference between revisions of "iRefIndex Development"
PaulBoddie (talk | contribs) (→Evaluating the Data: Added InnateDB observations.) |
PaulBoddie (talk | contribs) (→Reviewing Mapper Files: Updated output.) |
||
Line 119: | Line 119: | ||
Table int_name ... | Table int_name ... | ||
_euid ... | _euid ... | ||
− | + | <incremental> | |
_idetlbl ... | _idetlbl ... | ||
entry/interactionList/interaction/experimentList/experimentDescription/interactionDetectionMethod/names/shortLabel | entry/interactionList/interaction/experimentList/experimentDescription/interactionDetectionMethod/names/shortLabel | ||
Line 125: | Line 125: | ||
entry/interactionList/interaction/experimentList/experimentDescription/interactionDetectionMethod/names/fullname | entry/interactionList/interaction/experimentList/experimentDescription/interactionDetectionMethod/names/fullname | ||
_idetncat ... | _idetncat ... | ||
− | + | 24 | |
− | + | 25 | |
− | + | 25 | |
Table int_xref ... | Table int_xref ... | ||
_euid ... | _euid ... | ||
− | + | <incremental> | |
_brefdb ... | _brefdb ... | ||
entry/interactionList/interaction/experimentList/experimentDescription/bibref/xref/primaryRef/@db | entry/interactionList/interaction/experimentList/experimentDescription/bibref/xref/primaryRef/@db | ||
− | entry/interactionList/interaction/experimentList/experimentDescription/bibref/xref/ | + | entry/interactionList/interaction/experimentList/experimentDescription/bibref/xref/secondaryRef/@db |
_brefid ... | _brefid ... | ||
entry/interactionList/interaction/experimentList/experimentDescription/bibref/xref/primaryRef/@id | entry/interactionList/interaction/experimentList/experimentDescription/bibref/xref/primaryRef/@id | ||
entry/interactionList/interaction/experimentList/experimentDescription/bibref/xref/secondaryRef/@id | entry/interactionList/interaction/experimentList/experimentDescription/bibref/xref/secondaryRef/@id | ||
_brefct ... | _brefct ... | ||
− | + | 4 | |
− | + | 5 | |
Table int_xref ... | Table int_xref ... | ||
_euid ... | _euid ... | ||
− | + | <incremental> | |
_idetdb ... | _idetdb ... | ||
entry/interactionList/interaction/experimentList/experimentDescription/interactionDetectionMethod/xref/primaryRef/@db | entry/interactionList/interaction/experimentList/experimentDescription/interactionDetectionMethod/xref/primaryRef/@db | ||
Line 150: | Line 150: | ||
entry/interactionList/interaction/experimentList/experimentDescription/interactionDetectionMethod/xref/secondaryRef/@id | entry/interactionList/interaction/experimentList/experimentDescription/interactionDetectionMethod/xref/secondaryRef/@id | ||
_idetct ... | _idetct ... | ||
− | + | 6 | |
− | + | 7 | |
Element experimentList ... | Element experimentList ... | ||
Table int_experiment ... | Table int_experiment ... | ||
_euidr ... | _euidr ... | ||
− | + | _euid | |
_iuider ... | _iuider ... | ||
− | + | _iuid | |
Element interaction ... | Element interaction ... | ||
Table int_name ... | Table int_name ... | ||
_iuid ... | _iuid ... | ||
− | + | <incremental> | |
_iuiflnm ... | _iuiflnm ... | ||
entry/interactionList/interaction/names/fullName | entry/interactionList/interaction/names/fullName | ||
_iuiflnmct ... | _iuiflnmct ... | ||
− | + | 12 | |
Table int_source ... | Table int_source ... | ||
_iuid ... | _iuid ... | ||
− | + | <incremental> | |
_itp ... | _itp ... | ||
entry/interactionList/interaction/xref | entry/interactionList/interaction/xref | ||
Line 177: | Line 177: | ||
Table int_xref ... | Table int_xref ... | ||
_iuid ... | _iuid ... | ||
− | + | <incremental> | |
_idb ... | _idb ... | ||
entry/interactionList/interaction/xref/primaryRef/@db | entry/interactionList/interaction/xref/primaryRef/@db | ||
Line 183: | Line 183: | ||
entry/interactionList/interaction/xref/primaryRef/@id | entry/interactionList/interaction/xref/primaryRef/@id | ||
_irefcat ... | _irefcat ... | ||
− | + | 0 | |
Element participant ... | Element participant ... | ||
Table int_name ... | Table int_name ... | ||
_ouid ... | _ouid ... | ||
− | + | <incremental> | |
_olb ... | _olb ... | ||
entry/interactionList/interaction/participantList/participant/interactor/names/shortLabel | entry/interactionList/interaction/participantList/participant/interactor/names/shortLabel | ||
Line 193: | Line 193: | ||
entry/interactionList/interaction/participantList/participant/interactor/names/fullName | entry/interactionList/interaction/participantList/participant/interactor/names/fullName | ||
_olbct ... | _olbct ... | ||
− | + | 13 | |
− | + | 14 | |
− | + | 15 | |
Table int_object ... | Table int_object ... | ||
_ouid ... | _ouid ... | ||
− | + | <incremental> | |
_oltyp ... | _oltyp ... | ||
entry/interactionList/interaction/participantList/participant/interactor/interactorType/names/shortLabel | entry/interactionList/interaction/participantList/participant/interactor/interactorType/names/shortLabel | ||
Line 207: | Line 207: | ||
Table int_sequence ... | Table int_sequence ... | ||
_ouid ... | _ouid ... | ||
− | + | <incremental> | |
_obsq ... | _obsq ... | ||
entry/interactionList/interaction/participantList/participant/interactor/sequence | entry/interactionList/interaction/participantList/participant/interactor/sequence | ||
Table int_xref ... | Table int_xref ... | ||
_ouid ... | _ouid ... | ||
− | + | <incremental> | |
_odb ... | _odb ... | ||
entry/interactionList/interaction/participantList/participant/interactor/xref/primaryRef/@db | entry/interactionList/interaction/participantList/participant/interactor/xref/primaryRef/@db | ||
Line 220: | Line 220: | ||
entry/interactionList/interaction/participantList/participant/interactor/xref/secondaryRef/@id | entry/interactionList/interaction/participantList/participant/interactor/xref/secondaryRef/@id | ||
_oicat ... | _oicat ... | ||
− | + | 2 | |
− | + | 3 | |
_otax ... | _otax ... | ||
entry/interactionList/interaction/participantList/participant/interactor/organism/@ncbiTaxId | entry/interactionList/interaction/participantList/participant/interactor/organism/@ncbiTaxId | ||
Line 230: | Line 230: | ||
Table int_source2object ... | Table int_source2object ... | ||
_iuidr ... | _iuidr ... | ||
− | + | _iuid | |
_what ... | _what ... | ||
entry/interactionList/interaction/participantList/participant/interactor/names | entry/interactionList/interaction/participantList/participant/interactor/names | ||
Line 238: | Line 238: | ||
entry/interactionList/interaction/participantList/participant/interactor/names | entry/interactionList/interaction/participantList/participant/interactor/names | ||
_refob ... | _refob ... | ||
− | + | _ouid | |
</pre> | </pre> | ||
Revision as of 11:33, 11 October 2010
See iRefIndex Issues and Notes for details of ongoing work to improve the iRefIndex software.
Contents
Adding Sources to iRefIndex
- Identify the location of the downloaded data.
- Evaluate the form of the data:
- For PSI MI XML (Molecular Interaction XML) documents, check the version of the format employed by the data documents.
- For the specific version, review the format's schema and how the data uses the schema. For example, PSI MI XML permits the specification of interactors within interaction descriptions as well as in a separate interactor list.
- Review existing, similar mapper definition files.
Evaluating the Data
The show_xml_paths.py script in the iRef_PSI_XML2RDBMS directory can be used to show the different element paths used in an XML data file to hold data items. For example:
python show_xml_paths.py --data /home/irefindex/data/MINT/2010-09-14/10023771.psi25.xml
The resulting list of paths indicates the places in the element hierarchy of a PSI-MI XML file where information is actually stored. For example:
entrySet/entry/experimentList/experimentDescription/attributeList/attribute entrySet/entry/experimentList/experimentDescription/hostOrganismList/hostOrganism/names/fullName entrySet/entry/experimentList/experimentDescription/hostOrganismList/hostOrganism/names/shortLabel entrySet/entry/experimentList/experimentDescription/interactionDetectionMethod/names/alias entrySet/entry/experimentList/experimentDescription/interactionDetectionMethod/names/fullName entrySet/entry/experimentList/experimentDescription/interactionDetectionMethod/names/shortLabel entrySet/entry/experimentList/experimentDescription/names/fullName entrySet/entry/experimentList/experimentDescription/names/shortLabel entrySet/entry/interactionList/interaction/attributeList/attribute entrySet/entry/interactionList/interaction/confidenceList/confidence/unit/names/fullName entrySet/entry/interactionList/interaction/confidenceList/confidence/unit/names/shortLabel entrySet/entry/interactionList/interaction/confidenceList/confidence/value entrySet/entry/interactionList/interaction/experimentList/experimentRef entrySet/entry/interactionList/interaction/interactionType/names/fullName entrySet/entry/interactionList/interaction/interactionType/names/shortLabel entrySet/entry/interactionList/interaction/intraMolecular entrySet/entry/interactionList/interaction/modelled entrySet/entry/interactionList/interaction/names/shortLabel entrySet/entry/interactionList/interaction/negative entrySet/entry/interactionList/interaction/participantList/participant/biologicalRole/names/fullName entrySet/entry/interactionList/interaction/participantList/participant/biologicalRole/names/shortLabel entrySet/entry/interactionList/interaction/participantList/participant/experimentalPreparationList/experimentalPreparation/names/fullName entrySet/entry/interactionList/interaction/participantList/participant/experimentalPreparationList/experimentalPreparation/names/shortLabel entrySet/entry/interactionList/interaction/participantList/participant/experimentalRoleList/experimentalRole/names/fullName entrySet/entry/interactionList/interaction/participantList/participant/experimentalRoleList/experimentalRole/names/shortLabel entrySet/entry/interactionList/interaction/participantList/participant/featureList/feature/featureRangeList/featureRange/endStatus/names/fullName entrySet/entry/interactionList/interaction/participantList/participant/featureList/feature/featureRangeList/featureRange/endStatus/names/shortLabel entrySet/entry/interactionList/interaction/participantList/participant/featureList/feature/featureRangeList/featureRange/isLink entrySet/entry/interactionList/interaction/participantList/participant/featureList/feature/featureRangeList/featureRange/startStatus/names/fullName entrySet/entry/interactionList/interaction/participantList/participant/featureList/feature/featureRangeList/featureRange/startStatus/names/shortLabel entrySet/entry/interactionList/interaction/participantList/participant/featureList/feature/featureType/names/fullName entrySet/entry/interactionList/interaction/participantList/participant/featureList/feature/featureType/names/shortLabel entrySet/entry/interactionList/interaction/participantList/participant/featureList/feature/names/shortLabel entrySet/entry/interactionList/interaction/participantList/participant/interactorRef entrySet/entry/interactionList/interaction/participantList/participant/names/shortLabel entrySet/entry/interactionList/interaction/participantList/participant/participantIdentificationMethodList/participantIdentificationMethod/names/alias entrySet/entry/interactionList/interaction/participantList/participant/participantIdentificationMethodList/participantIdentificationMethod/names/fullName entrySet/entry/interactionList/interaction/participantList/participant/participantIdentificationMethodList/participantIdentificationMethod/names/shortLabel entrySet/entry/interactorList/interactor/attributeList/attribute entrySet/entry/interactorList/interactor/interactorType/names/fullName entrySet/entry/interactorList/interactor/interactorType/names/shortLabel entrySet/entry/interactorList/interactor/names/alias entrySet/entry/interactorList/interactor/names/fullName entrySet/entry/interactorList/interactor/names/shortLabel entrySet/entry/interactorList/interactor/organism/names/fullName entrySet/entry/interactorList/interactor/organism/names/shortLabel entrySet/entry/interactorList/interactor/sequence entrySet/entry/source/attributeList/attribute entrySet/entry/source/names/fullName entrySet/entry/source/names/shortLabel
With this information, a suitable mapper file can be identified for the conversion of the XML-encoded data into tabular data to be stored in a database. In the above example, it is apparent that the experiment, interaction and interactor details reside alongside each other within each entry element:
entrySet/entry/experimentList/experimentDescription entrySet/entry/interactionList/interaction entrySet/entry/interactionList/interaction/participantList/participant entrySet/entry/interactorList/interactor
In contrast, other PSI-MI XML files adopt a different structure which can be reduced to the following:
entrySet/entry/interactionList/interaction entrySet/entry/interactionList/interaction/experimentList/experimentDescription entrySet/entry/interactionList/interaction/participantList/participant
The different sources can be divided into a number of subformats as follows:
Subformat | Sources | Notes |
---|---|---|
Separate experiment, interaction, interactor lists | BioGRID, HPRD, IntAct, MINT, OPHID | BioGRID uses proteininteractor instead of interactor OPHID uses proteinParticipant, proteinInteractor |
Interaction contains experiment; separate interactor list | DIP | |
Interaction contains experiment and interactor/participant | BIND Translation, CORUM, InnateDB, MPACT, MPPI | InnateDB provides apparently redundant lists of experiments and interactors MPPI uses proteinParticipant, proteinInteractor |
Reviewing Mapper Files
The mapper files already in existence can be reviewed by using the show_xml_paths.py script together with one of these files which reside in the mapper subdirectory of the iRef_PSI_XML2RDBMS directory. For example:
python show_xml_paths.py --mapper mapper/Map25_CORUM.xml
The resulting output describes the structure of the data and how the mapper will attempt to interpret that data. For example (for CORUM):
Element experimentDescription ... Table int_name ... _euid ... <incremental> _idetlbl ... entry/interactionList/interaction/experimentList/experimentDescription/interactionDetectionMethod/names/shortLabel entry/interactionList/interaction/experimentList/experimentDescription/interactionDetectionMethod/names/alias entry/interactionList/interaction/experimentList/experimentDescription/interactionDetectionMethod/names/fullname _idetncat ... 24 25 25 Table int_xref ... _euid ... <incremental> _brefdb ... entry/interactionList/interaction/experimentList/experimentDescription/bibref/xref/primaryRef/@db entry/interactionList/interaction/experimentList/experimentDescription/bibref/xref/secondaryRef/@db _brefid ... entry/interactionList/interaction/experimentList/experimentDescription/bibref/xref/primaryRef/@id entry/interactionList/interaction/experimentList/experimentDescription/bibref/xref/secondaryRef/@id _brefct ... 4 5 Table int_xref ... _euid ... <incremental> _idetdb ... entry/interactionList/interaction/experimentList/experimentDescription/interactionDetectionMethod/xref/primaryRef/@db entry/interactionList/interaction/experimentList/experimentDescription/interactionDetectionMethod/xref/secondaryRef/@db _idetid ... entry/interactionList/interaction/experimentList/experimentDescription/interactionDetectionMethod/xref/primaryRef/@id entry/interactionList/interaction/experimentList/experimentDescription/interactionDetectionMethod/xref/secondaryRef/@id _idetct ... 6 7 Element experimentList ... Table int_experiment ... _euidr ... _euid _iuider ... _iuid Element interaction ... Table int_name ... _iuid ... <incremental> _iuiflnm ... entry/interactionList/interaction/names/fullName _iuiflnmct ... 12 Table int_source ... _iuid ... <incremental> _itp ... entry/interactionList/interaction/xref _isrc ... entry/interactionList/interaction/xref _ifle ... entry/interactionList/interaction/xref Table int_xref ... _iuid ... <incremental> _idb ... entry/interactionList/interaction/xref/primaryRef/@db _iref ... entry/interactionList/interaction/xref/primaryRef/@id _irefcat ... 0 Element participant ... Table int_name ... _ouid ... <incremental> _olb ... entry/interactionList/interaction/participantList/participant/interactor/names/shortLabel entry/interactionList/interaction/participantList/participant/interactor/names/alias entry/interactionList/interaction/participantList/participant/interactor/names/fullName _olbct ... 13 14 15 Table int_object ... _ouid ... <incremental> _oltyp ... entry/interactionList/interaction/participantList/participant/interactor/interactorType/names/shortLabel _osrc ... entry/interactionList/interaction/participantList/participant/interactor/names _ofil ... entry/interactionList/interaction/participantList/participant/interactor/names Table int_sequence ... _ouid ... <incremental> _obsq ... entry/interactionList/interaction/participantList/participant/interactor/sequence Table int_xref ... _ouid ... <incremental> _odb ... entry/interactionList/interaction/participantList/participant/interactor/xref/primaryRef/@db entry/interactionList/interaction/participantList/participant/interactor/xref/secondaryRef/@db _orefid ... entry/interactionList/interaction/participantList/participant/interactor/xref/primaryRef/@id entry/interactionList/interaction/participantList/participant/interactor/xref/secondaryRef/@id _oicat ... 2 3 _otax ... entry/interactionList/interaction/participantList/participant/interactor/organism/@ncbiTaxId _otp ... entry/interactionList/interaction/participantList/participant/interactor/xref/primaryRef/@refType entry/interactionList/interaction/participantList/participant/interactor/xref/secondaryRef/@refType Element participantList ... Table int_source2object ... _iuidr ... _iuid _what ... entry/interactionList/interaction/participantList/participant/interactor/names _isrcr ... entry/interactionList/interaction/participantList/participant/interactor/names _ifler ... entry/interactionList/interaction/participantList/participant/interactor/names _refob ... _ouid
All iRefIndex Pages
Follow this link for a listing of all iRefIndex related pages (archived and current).