PubChem Details
From Chemical Informatics and Cyberinfrastructure Collaboratory
Contents |
[edit]
Table Summary
The local mirror aims to provide a more or less comprehensive copy of the data in PubChem. Currently there are 6 tables
- pubchem_compound
- pubchem_substance
- pubchem_synonym
- pubchem_subst_syn
- pubchem_scass
- pubchem_derived
[edit]
Example Queries
- Select all the rows from the compound synonym table that contain acetyl as part of the synonym
select * from pubchem_synonym where synonym ~* 'acetyl';
- Get the unique ID's for the compounds whose synonyms contain the phrase acetyl
select * from ( select distinct on (cid) cid, ccid_synid from pubchem_synonym
where synonym ~* 'acetyl') temp order by ccid_synid;
- Get the SMILES for entries that have a specified synonym
select distinct openeye_can_smiles
from pubchem_compound where cid in
( select distinct cid from pubchem_synonym
where lower(synonym) = 'morphine');
- Get the SMILES for compounds whose SlogP is in a specified range
select distinct openeye_can_smiles
from pubchem_compound where cid in
( select cid from pubchem_derived where
((slogp >= 1.5) and (slogp <= 1.8034)) );
[edit]
Table Schema
pubchem_compound ( cid varchar(256) primary key, iupac_openeye_name text, iupac_cas_name text, iupac_name text, iupac_systematic_name text, iupac_traditional_name text, nist_inchi text, cactvs_xlogp real, cactvs_exact_mass real check(cactvs_exact_mass >= 0), openeye_mw real check(openeye_mw >= 0), openeye_can_smiles text not null, openeye_iso_smiles text, cactvs_tpsa real, total_charge smallint, heavy_atom_count integer check(heavy_atom_count >= 0), gfp bit(1024) );
pubchem_substance ( sid varchar(256) primary key, substance_version integer check (substance_version > 0), compound_id_type integer check (compound_id_type >= 0), ext_datasource_name text, ext_datasource_regid text, ob_smiles text );
pubchem_synonym (
ccid_synid bigserial not null primary key,
cid varchar(256) references pubchem_compound
on update cascade
on delete cascade,
synonym text not null,
unique(cid, synonym)
);
pubchem_subst_syn (
ccid_ssynid bigserial primary key,
sid varchar(256)
references pubchem_substance
on update cascade
on delete cascade,
synonym text not null,
unique(sid, synonym)
);
pubchem_scass (
ccid_scid bigserial primary key,
sid varchar(256) references pubchem_substance (sid)
on update cascade
on delete cascade,
cid varchar(256) references pubchem_compound (cid)
on update cascade
on delete cascade,
compound_id_type integer check (compound_id_type >= 0),
unique(sid, cid, compound_id_type)
);
pubchem_derived (
cid varchar(256) primary key
references pubchem_compound (cid)
on update cascade
on delete cascade,
SlogP double precision default null,
SMRef double precision default null
);
[edit]
Table Indices
compound_cansmi_idx on pubchem_compound (openeye_can_smiles) syn_idx on pubchem_synonym (synonym) lower_syn_index on pubchem_synonym (lower(synonym)) ssyn_idx on pubchem_subst_syn (synonym); lower_ssyn_idx on pubchem_subst_syn (lower(synonym)) sid_idx on pubchem_scass (sid) slogp_idx on pubchem_derived (slogp)
