# Copyright (c) 2014 Dan Obermiller
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
# You should have received a copy of the MIT License along with this program.
# If not, see <http://opensource.org/licenses/MIT>
"""Parser and builder for CML files. On the path to deprecation."""
from copy import deepcopy
import json
from lxml import etree
from lxml import builder as lb
[docs]class CMLParser(object):
"""Parser for CML files.
Parameters
----------
CML_file : file-like object
The (open) file that contains the data.
Notes
-----
Pretty limited on what it can actually handle; it expects that things are
formatted the way I do and not more generically. I want to eventually make
this more generalizable.
"""
def __init__(self, CML_file):
self.CML_file = CML_file
self.bonds = {}
self.atoms = {}
self.other = {}
self.molecule = {'atoms': self.atoms,
'bonds': self.bonds,
'other_info': self.other}
self.CML_tree = etree.iterparse(self.CML_file)
atom, bond = True, False
last = ''
for _, element in self.CML_tree:
if element.tag == 'molecule':
self.other.update(dict(element.items()))
elif element.tag == 'atomArray':
atom, bond = False, True
last = []
elif atom:
if 'string' in element.tag:
last = element.text
elif 'atom' in element.tag:
self.atoms[element.get('id')] = last
elif bond:
if 'string' in element.tag:
last.append(element.text)
elif 'bond' in element.tag:
self.bonds[element.get('id')] = [part for part in last]
last = []
del self.bonds[None]
for key, bond in self.bonds.items():
try:
rest = bond[2:]
except KeyError:
self.bonds[key] = (bond[0], bond[1],
{'order': 1, 'chirality': 'None'},)
else:
if len(rest) == 1:
self.bonds[key] = (bond[0], bond[1],
{'order': int(rest[0]),
'chirality': 'None'})
elif len(rest) == 2:
self.bonds[key] = (bond[0], bond[1],
{'order': int(rest[0]),
'chirality': rest[1]})
else:
d = {'order': int(rest[0]),
'chirality': rest[1]}
i=0
for value in rest[2:]:
d['unknown{}'.format(i)] = '{}'.format(value)
i+=1
self.bonds[key] = (bond[0], bond[1], d)
if self.bonds[key][2]['chirality'] == 'None':
self.bonds[key][2]['chirality'] = None
def __str__(self):
return json.dumps(self.molecule, indent=4)
[docs]class CMLBuilder(object):
"""Object used to build a CML file.
Parameters
----------
molecule_dict : dict
Dictionary storing all of the molecular information.
"""
@classmethod
[docs] def from_compound(cls, comp):
"""Generates a CMLBuilder object from a Compound object.
Parameters
----------
comp : Compound
The compound to be written to file.
Returns
-------
CMLBuilder
The builder object with the relevant information.
"""
comp = deepcopy(comp)
atoms = comp.atoms
bonds = {}
for bkey, bdata in comp.bonds.iteritems():
bonds.update({bkey: (bdata[0], bdata[1],
{'order': str(bdata[2]['order']),
'chirality': str(bdata[2]['chirality'])})})
other = {key:str(value) for key, value in comp.other_info.iteritems()}
m = {'atoms': atoms, 'bonds': bonds, 'other_info': other}
return CMLBuilder(m)
def __init__(self, molecule_dict):
self.atoms = molecule_dict['atoms']
self.bonds = molecule_dict['bonds']
self.attribs = molecule_dict['other_info']
for key, atom in self.atoms.items():
self.atoms[key] = lb.E.atom(lb.E.string(atom,
builtin="elementType"),
id=key)
for key, bond in self.bonds.items():
order = str(bond[2]['order'])
chirality = str(bond[2]['chirality'])
self.bonds[key] = lb.E.bond(
lb.E.string(bond[0], builtin="atomRef"),
lb.E.string(bond[1], builtin="atomRef"),
lb.E.string(order, builtin="order"),
lb.E.string(chirality, builtin="chirality"),
id=key)
self.CML = lb.E.molecule(
lb.E.atomArray(*sorted(self.atoms.values(),
key=lambda x:x.get('id'))),
lb.E.bondArray(*sorted(self.bonds.values(),
key=lambda x:x.get('id'))),
**self.attribs)
[docs] def to_file(self, cml_file):
"""Writes the data in the builder object to file.
Parameters
----------
cml_file : file-like
The open file to which the compound should be written.
"""
cml_file.write(str(self))
def __str__(self):
return etree.tostring(self.CML, pretty_print=True)
def __repr__(self):
return str(self)