@article{nandy_jcatal_perspective_2025, author = {Nandy, A.*}, title = {From Pages to Patterns: Towards Extracting Catalytic Knowledge from Structure and Text for Transition-Metal Complexes and Metal-Organic Frameworks}, journal = {J. Catal.}, year = {2025} }
@article{chakraborti_vsd_2025, author = {Chakraborti, P. and Mukherjee, S. and Oettinger, D. and Nandy, A. and Krishnan, Y. and Walter, M.G.}, title = {Mechanistic Basis of the Voltage-Sensitivity of Thiazolothiazole Dyes}, journal = {ChemRxiv}, year = {2024}, doi = {10.26434/chemrxiv-2024-cmpzt} }
Voltage sensitive dyes (VSDs) are versatile and powerful reporters of the membrane potential across living cell membranes. Thiazolothiazole (TTz) dyes are a relatively new and promising class of VSDs. While TTz dyes have high photostability and low cytotoxicity, the mechanistic basis of their excellent voltage sensitivity remains unknown. To answer this question, we have accessed a new generation of TTz dyes (asym-TTz) that are asymmetrically substituted, push-pull dyes. One of these molecules shows sustained cellular membrane localization, high photostability and voltage sensitivity of 9% dF/F per 100 mV. Asym-TTz derivatives exhibited strong solvatofluorochromism with large Stokes shifts and exceptionally high transition dipole moments that provided a clue towards their voltage- sensitivity. By computationally analyzing their behavior within cell membranes under an electric field, we found that asym-TTz dyes show a surprising twist on either side of the TTz bridging unit in the excited state at more positive membrane potentials, which decreases dye fluorescence. This novel mechanism can be leveraged to create newer classes of highly sensitive voltage reporter dyes for diverse sensing applications.
@article{saha_annrevbiophys_2024, author = {Saha, R. and Vazquez-Salazar, A. and Nandy, A. and Chen, I.A.}, title = {Fitness Landscapes and Evolution of Catalytic RNA}, journal = {Annu. Rev. Biophys.}, year = {2024}, volume = {53}, pages = {109-125}, doi = {10.1146/annurev-biophys-030822-025038} }
The relationship between genotype and phenotype, or the fitness landscape, is the foundation of genetic engineering and evolution. However, mapping fitness landscapes poses a major technical challenge due to the amount of quantifiable data that is required. Catalytic RNA is a special topic in the study of fitness landscapes due to its relatively small sequence space combined with its importance in synthetic biology. The combination of in vitro selection and high-throughput sequencing has recently provided empirical maps of both complete and local RNA fitness landscapes, but the astronomical size of sequence space limits purely experimental investigations. Next steps are likely to involve data-driven interpolation and extrapolation over sequence space using various machine learning techniques.We discuss recent progress in understanding RNA fitness landscapes, particularly with respect to protocells and machine representations of RNA. The confluence of technical advances may significantly impact synthetic biology in the near future.
@article{lamtyugina_activediffusion_2024, author = {Lamtyugina, A. and Behera, A.K. and Nandy, A. and Floyd, C. and Vaikuntanathan, S.}, title = {Score-Based Generative Diffusion with “Active” Correlated Noise Sources}, journal = {arXiv}, year = {2024}, doi = {10.48550/arXiv.2411.07233} }
Diffusion models exhibit robust generative properties by approximating the underlying distribution of a dataset and synthesizing data by sampling from the approximated distribution. In this work, we explore how the generative performance may be be modulated if noise sources with temporal correlations – akin to those used in the field of active matter – are used for the destruction of the data in the forward process. Our numerical and analytical experiments suggest that the corresponding reverse process may exhibit improved generative properties.
@article{chu_ligandmanybody_2024, author = {Chu, D.B.K. and González-Narváez, D.A. and Meyer, R. and Nandy, A. and Kulik, H.J.}, title = {Ligand Many-Body Expansion as a General Approach for Accelerating Transition Metal Complex Discovery}, journal = {J. Chem. Inf. Model.}, year = {2024}, volume = {64}, pages = {9397–9412}, doi = {10.1021/acs.jcim.4c01728} }
Methods that accelerate the evaluation of molecular properties are essential for chemical discovery. While some degree of ligand additivity has been established for transition metal complexes, it is underutilized in asymmetric complexes, such as the square pyramidal coordination geometries highly relevant to catalysis. To develop predictive methods beyond simple additivity, we apply a many-body expansion to octahedral and square pyramidal complexes and introduce a correction based on adjacent ligands (i.e., the cis interaction model). We first test the cis interaction model on adiabatic spin-splitting energies of octahedral Fe(II) complexes, predicting DFT-calculated values of unseen binary complexes to within an average error of 1.4 kcal/mol. Uncertainty analysis reveals the optimal basis, comprising the homoleptic and mer symmetric complexes. We next show that the cis model (i.e., the cis interaction model solved for the optimal basis) infers both DFT- and CCSD(T)-calculated model catalytic reaction energies to within 1 kcal/mol on average. The cis model predicts low-symmetry complexes with reaction energies outside the range of binary complex reaction energies. We observe that trans interactions are unnecessary for most monodentate systems but can be important for some combinations of ligands, such as complexes containing a mixture of bidentate and monodentate ligands. Finally, we demonstrate that the cis model may be combined with Δ-learning to predict CCSD(T) reaction energies from exhaustively calculated DFT reaction energies and the same fraction of CCSD(T) reaction energies needed for the cis model, achieving around 30% of the error from using the CCSD(T) reaction energies in the cis model alone.
@article{oh_mofs_2024, author = {Oh, C. and Nandy, A. and Yue, S. and Kulik, H.J.}, title = {MOFs with the Stability for Practical Gas Adsorption Applications Require New Design Rules}, journal = {ACS Appl. Mater. Interfaces}, year = {2024}, volume = {16}, pages = {55541–55554}, doi = {10.1021/acsami.4c13250} }
Metal–organic frameworks (MOFs) have been widely studied for their ability to capture and store greenhouse gases. However, most computational discovery efforts study hypothetical MOFs without consideration of their stability, limiting the practical application of novel materials. We overcome this limitation by screening hypothetical ultrastable MOFs that have predicted high thermal and activation stability, as judged by machine learning (ML) models trained on experimental measures of stability. We enhance this set by computing the bulk modulus as a measure of mechanical stability and filter 1102 mechanically robust hypothetical MOFs from a database of ultrastable MOFs (USMOF DB). Grand Canonical Monte Carlo simulations are then employed to predict the gas adsorption properties of these hypothetical MOFs, alongside a database of experimental MOFs. We identify privileged building blocks that lead MOFs in USMOF DB to show exceptional working capacities compared to the experimental MOFs. We interpret these differences by training ML models on CO2 and CH4 adsorption in these databases, showing how poor model transferability between data sets indicates that novel design rules can be derived from USMOF DB that would not have been gathered through assessment of structurally characterized MOFs. We identify geometric features and node chemistry that will enable the rational design of MOFs with enhanced gas adsorption properties in synthetically realizable MOFs.
@article{cockayne_nist_2024, author = {Cockayne, E. and McDannald, A. and Wong-Ng, W. and Chen, Y.-S. and Benedict, J. and Gándara Barragán, F. and Hendon, C.H. and Keen, D.A. and Kolb, U. and Li, L. and Ma, S. and Morris, W. and Nandy, A. and Runčevski, T. and Soukri, M. and Sriram, A. and Steckel, J.A. and Findley, J. and Wilmer, C. and Yildirim, T. and Zhou, W. and Levin, I. and Brown, C. J.}, title = {Integrating Crystallographic and Computational Approaches to Carbon-Capture Materials for the Mitigation of Climate Change}, journal = {J. Mater. Chem. A}, year = {2024}, volume = {12}, pages = {25678-25695}, doi = {10.1039/D4TA04136D} }
This article presents an overview of the current state of the art in the structure determination of microporous carbon-capture materials, as discussed at the recent NIST workshop “Integrating Crystallographic and Computational Approaches to Carbon-Capture Materials for the Mitigation of Climate Change”. The continual rise in anthropogenic CO2 concentration and its effect on climate change call for the implementation of carbon capture technologies to reduce the CO2 concentration in the atmosphere. Porous solids, including metal–organic frameworks (MOFs), are feasible candidates for gas capture and storage applications. However, determining the structure of these materials represents a significant obstacle in their development into advanced sorbents. The existing difficulties can be overcome by integrating crystallographic methods and theoretical modeling. The workshop gathered experimentalists and theorists from academia, government, and industry to review this field and identify approaches, including collaborative opportunities, required to develop tools for rapid determination of the structures of porous solid sorbents and the effect of structure on the carbon capture performance. We highlight the findings of that workshop, especially in the need for reference materials, standardized procedures and reporting of sorbent activation and adsorption measurements, standardized reporting of theoretical calculations, and round-robin structure determination.
@article{pitt_benchmarking_2024, author = {Pitt, T. and Jia, H. and Azbell, T. J. and Zick, M.E. and Nandy, A. and Kulik, H.J. and Milner, P. J.}, title = {Benchmarking N2O Adsorption and Activation in Coordinatively Unsaturated Metal-Organic Frameworks}, journal = {J. Mater. Chem. C}, year = {2024}, volume = {12}, pages = {3164-3174}, doi = {10.1039/D3TC04492K} }
Anthropogenic emissions of N2O, the third most abundant greenhouse gas after CO2 and CH4, are contributing to global climate change. Although metal–organic frameworks (MOFs) have been widely studied as adsorbents for CO2 and CH4, less effort has focused on the use of MOFs to remove N2O from emission streams or from air. Further, N2O activation would enable its use as an inexpensive oxidant for fine chemical synthesis. Herein, we identify features that contribute to strong binding and high uptake of N2O at coordinatively unsaturated metal sites in the M2Cl2(btdd) (M = Mn, Co, Ni, Cu; btdd2− = bis(1,2,3-triazolo[4,5-b],[4′,5′-i])dibenzo[1,4]dioxin) and M2(dobdc) (M = Mg, Mn, Fe, Co, Ni, Cu, Zn; dobdc4− = 2,5-dioxido-1,4-benzenedicarboxylate) series of MOFs. Combined experimental and computational studies suggest that N2O adsorption at open-metal-sites is primarily based on electrostatic interactions, rather than π-backbonding, causing MOFs with more Lewis acidic metal centers to be superior N2O adsorbents. As a result, Mg2(dobdc) demonstrates strong binding and record-setting N2O uptake (8.75 mmol g−1 at 1 bar and 298 K). Using density functional theory (DFT) to characterize reactive intermediates and transition states, we demonstrate that N2O activation to form a M(IV)–oxo species and N2 is thermodynamically favorable in Mn2(dobdc) and Fe2(dobdc) but appears to be kinetically limited in Mn2(dobdc). Our work lays a foundation for understanding N2O adsorption and activation in MOFs, paving the way for the design of promising next-generation materials for N2O capture and utilization.
@article{jia_computational_2024, author = {Jia, H. and Duan, C. and Kevlishvili, I. and Nandy, A. and Liu, M. and Kulik, H.J.}, title = {Computational Discovery of Co-doped Single-Atom Catalysts for Methane-to-Methanol Conversion}, journal = {ACS Catal.}, year = {2024}, volume = {14}, pages = {2992-3005}, doi = {10.1021/acscatal.3c05506} }
The absence of a synthetic catalyst that can selectively oxidize methane to methanol motivates extensive study of single-site catalysts that possess a high degree of tunability in their coordination environments and share similarities with natural enzymes that can catalyze this reaction. Single-atom catalysts (SACs), in particular doped graphitic SACs, have emerged as a promising family of materials due to their high atom economy and scalability, but SACs are yet to be exhaustively screened for methane-to-methanol conversion. Modulating the coordination environment near single metal sites by means of codopants, we carry out a large-scale high-throughput virtual screen of 2048 transition metal (i.e., Mn, Fe, Co, and Ru) SACs codoped with various elements (i.e., N, O, P, and S) in numerous spin and oxidation (i.e., M(II)/M(III)) states for the challenging conversion of methane to methanol. We identify that the ground-state preference is metal- and oxidation-state-dependent. We observe a weak negative correlation between the oxo formation energy (ΔE(oxo)) and the energy of hydrogen atom transfer (ΔE(HAT)), thanks to the high variability in the coordination environment. Therefore, codoped SACs demonstrate flexible tunability that disrupts linear free energy relationships in a manner similar to that of homogeneous catalysts without losing the scalability of heterogeneous catalysts. We identify energetically favorable catalyst candidates along the Pareto frontier of ΔE(oxo) and ΔE(HAT). Further kinetic analysis reveals an intermediate-spin Fe(II) SAC and a low-spin Ru(II) SAC as promising candidates that merit further experimental exploration.
@article{adamji_large-scale_2024, author = {Adamji, H. and Kevlishvili, I. and Nandy, A. and Roman-Leshkov, Y. and Kulik, H.J.}, title = {Large-scale Comparison of Fe and Ru Polyolefin C–H Activation Catalysts}, journal = {J. Catal.}, year = {2024}, volume = {431}, pages = {115361}, doi = {10.1016/j.jcat.2024.115361} }
We performed a large-scale density functional theory comparison of polyolefin C–H hydroxylation trends across over 200 Fe and Ru catalysts that are identical except for their metal centers for the radical-rebound conversion of propane to propanol. We observed a strong spin-state dependence: higher-spin states had more favorable metal-oxo formation and isopropanol release in Ru catalysts, while hydrogen atom transfer (HAT) was more favorable in Fe catalysts. While the widely studied metal-oxo formation vs. HAT linear free-energy relationship held for Ru, it was more easily disrupted for Fe. Ru catalysts have a spin-forbidden C–H hydroxylation pathway, while Fe catalysts favor a spin-allowed, intermediate-spin pathway. Calculation of reaction coordinates on representative catalysts corroborated these spin–reactivity trends and showed comparable energetic spans for Fe and Ru analogues, as well as strong Brønsted–Evans–Polanyi relationships for both the metal-oxo formation and HAT steps, motivating expanded study of Fe catalysts.
@article{edholm_protein3d_2024, author = {Edholm, F. and Nandy, A. and Reinhardt, C. and Kastner, D.W. and Kulik, H.J.}, title = {Protein3D: Enabling Analysis and Extraction of Metal-Containing Sites from the Protein Data Bank with molSimplify}, journal = {J. Comput. Chem.}, year = {2024}, volume = {45}, pages = {352-361}, doi = {10.1002/jcc.27242} }
Metalloenzymes catalyze a wide range of chemical transformations, with the active site residues playing a key role in modulating chemical reactivity and selectivity. Unlike smaller synthetic catalysts, a metalloenzyme active site is embedded in a larger protein, which makes interrogation of electronic properties and geometric features with quantum mechanical calculations challenging. Here we implement the ability to fetch crystallographic structures from the Protein Data Bank and analyze the metal binding sites in the program molSimplify. We show the usefulness of the newly created protein3D class to extract the local environment around non-heme iron enzymes containing a two histidine motif and prepare 372 structures for quantum mechanical calculations. Our implementation of protein3D serves to expand the range of systems molSimplify can be used to analyze and will enable high-throughput study of metal-containing active sites in proteins.
@article{yue_discovering_2023, author = {Yue, S. and Nandy, A. and Kulik, H.J.}, title = {Discovering Molecular Coordination Environments for Selective Ion Binding Using Machine Learning}, journal = {J. Phys. Chem. B}, year = {2023}, volume = {127}, pages = {10592–10600}, doi = {10.1021/acs.jpcb.3c06416} }
The design of ion-selective materials with improved separation efficacy and efficiency is paramount, as current technologies fail to meet real-world deployment challenges. Selectivity in these materials can be informed by local ion binding in confined membrane ion channels. In this study, we utilize a data-driven approach to investigate design features in small molecular complexes coordinating ions as simplified models of ion channels. We curate a data set of 563 alkali metal coordinating molecular complexes (i.e., with Li+, Na+, or K+) from the Cambridge Structural Database and calculate differential ion binding energies using density functional theory. Using this information, we probe when and why structures favor exchange with alternate ions. Our analysis reveals that energetic preferences are related to ion size but are largely due to chemical interactions rather than structural reorganization. We identify unique trends in the selectivity for Li+ over other alkali ions, including the presence of N coordination atoms, planar coordination geometry, and small coordinating ring sizes. We use machine learning models to identify the key contributions of both geometric and electronic features in predicting selective ion binding. These physical insights offer preliminary guidance into the design of optimal membranes for ion selectivity.
@article{vennelakanti_assessing_2023, author = {Vennelakanti, V. and Taylor, M. G. and Nandy, A. and Duan, C. and Kulik, H.J.}, title = {Assessing the Performance of Approximate Density Functional Theory on 95 Experimentally Characterized Fe(II) Spin Crossover Complexes}, journal = {J. Chem. Phys.}, year = {2023}, volume = {159}, pages = {024120}, doi = {10.1063/5.0157187} }
Spin crossover (SCO) complexes, which exhibit changes in spin state in response to external stimuli, have applications in molecular electronics and are challenging materials for computational design. We curate a dataset of 95 Fe(II) SCO complexes (SCO-95) from the Cambridge Structural Database that have available low- and high-temperature crystal structures and, in most cases, confirmed experimental spin transition temperatures (T1/2). We study these complexes using density functional theory (DFT) with 30 functionals spanning across multiple rungs of “Jacob’s ladder” to understand the effect of exchange–correlation functional on electronic and Gibbs free energies associated with spin crossover. We specifically assess the effect of varying the Hartree–Fock exchange fraction (aHF) in structures and properties within the B3LYP family of functionals. We identify three best-performing functionals, a modified version of B3LYP (aHF = 0.10), M06-L, and TPSSh, that accurately predict SCO behavior for the majority of the complexes. While M06-L performs well, MN15-L, a more recently developed Minnesota functional, fails to predict SCO behavior for all complexes, which could be the result of differences in datasets used for parametrization of M06-L and MN15-L and also the increased number of parameters for MN15-L. Contrary to observations from prior studies, double-hybrids with higher aHF values are found to strongly stabilize high-spin states and therefore exhibit poor performance in predicting SCO behavior. Computationally predicted T1/2 values are consistent among the three functionals but show limited correlation to experimentally reported T1/2 values. These failures are attributed to the lack of crystal packing effects and counter-anions in the DFT calculations that would be needed to account for phenomena such as hysteresis and two-step SCO behavior. The SCO-95 set thus presents opportunities for method development, both in terms of increasing model complexity and method fidelity.
@article{adamji_computationally_2023, author = {Adamji, H. and Nandy, A. and Kevlishvili, I. and Roman-Leshkov, Y. and Kulik, H.J.}, title = {Computational Discovery of Stable Metal–Organic Frameworks for Methane-to-Methanol Catalysis}, journal = {J. Am. Chem. Soc.}, year = {2023}, volume = {145}, pages = {14365–14378}, doi = {10.1021/jacs.3c03351} }
The challenge of direct partial oxidation of methane to methanol has motivated the targeted search of metal–organic frameworks (MOFs) as a promising class of materials for this transformation because of their site-isolated metals with tunable ligand environments. Thousands of MOFs have been synthesized, yet relatively few have been screened for their promise in methane conversion. We developed a high-throughput virtual screening workflow that identifies MOFs from a diverse space of experimental MOFs that have not been studied for catalysis, yet are thermally stable, synthesizable, and have promising unsaturated metal sites for C–H activation via a terminal metal-oxo species. We carried out density functional theory calculations of the radical rebound mechanism for methane-to-methanol conversion on models of the secondary building units (SBUs) from 87 selected MOFs. While we showed that oxo formation favorability decreases with increasing 3d filling, consistent with prior work, previously observed scaling relations between oxo formation and hydrogen atom transfer (HAT) are disrupted by the greater diversity in our MOF set. Accordingly, we focused on Mn MOFs, which favor oxo intermediates without disfavoring HAT or leading to high methanol release energies─a key feature for methane hydroxylation activity. We identified three Mn MOFs comprising unsaturated Mn centers bound to weak-field carboxylate ligands in planar or bent geometries with promising methane-to-methanol kinetics and thermodynamics. The energetic spans of these MOFs are indicative of promising turnover frequencies for methane to methanol that warrant further experimental catalytic studies.
@article{nandy_identifying_2023, author = {Nandy, A. and Taylor, M.G. and Kulik, H.J.}, title = {Identifying Underexplored and Untapped Regions in the Chemical Space of Transition Metal Complexes}, journal = {J. Phys. Chem. Lett}, year = {2023}, volume = {14}, pages = {5798–5804}, doi = {10.1021/acs.jpclett.3c01214} }
We survey more than 240 000 crystallized mononuclear transition metal complexes (TMCs) to identify trends in preferred geometric structure and metal coordination. While we observe that an increased level of d filling correlates with a lower coordination number preference, we note exceptions, and we observe undersampling of 4d/5d transition metals and 3p-coordinating ligands. For the one-third of mononuclear TMCs that are octahedral, analysis of the 67 symmetry classes of their ligand environments reveals that complexes often contain monodentate ligands that may be removable, forming an open site amenable to catalysis. Due to their use in catalysis, we analyze trends in coordination by tetradentate ligands in terms of the capacity to support multiple metals and the variability of coordination geometry. We identify promising tetradentate ligands that co-occur in crystallized complexes with labile monodentate ligands that would lead to reactive sites. Literature mining suggests that these ligands are untapped as catalysts, motivating proposal of a promising octa-functionalized porphyrin.
@article{cytter_insights_2023, author = {Cytter, Y. and Nandy, A. and Duan, C. and Kulik, H.J.}, title = {Insights into the Deviation from Piecewise Linearity in Transition Metal Complexes from Supervised Machine Learning Models}, journal = {Phys. Chem. Chem. Phys.}, year = {2023}, volume = {25}, pages = {8103-8116}, doi = {10.1039/D3CP00258F} }
Virtual high-throughput screening (VHTS) and machine learning (ML) with density functional theory (DFT) suffer from inaccuracies from the underlying density functional approximation (DFA). Many of these inaccuracies can be traced to the lack of derivative discontinuity that leads to a curvature in the energy with electron addition or removal. Over a dataset of nearly one thousand transition metal complexes typical of VHTS applications, we computed and analyzed the average curvature (i.e., deviation from piecewise linearity) for 23 density functional approximations spanning multiple rungs of “Jacob’s ladder”. While we observe the expected dependence of the curvatures on Hartree-Fock exchange, we note limited correlation of curvature values between different rungs of “Jacob’s ladder”. We train ML models (i.e., artificial neural networks or ANNs) to predict the curvature and the associated frontier orbital energies for each of these 23 functionals and then interpret differences in curvature among the different DFAs through analysis of the ML models. Notably, we observe spin to play a much more important role in determining the curvature of range-separated and double hybrids in comparison to semi-local functionals, explaining why curvature values are weakly correlated between these and other families of functionals. Over a space of 187.2k hypothetical compounds, we use our ANNs to pinpoint DFAs for which representative transition metal complexes have near-zero curvature with low uncertainty, demonstrating an approach to accelerate screening of complexes with targeted optical gaps.
@article{nandy_database_2023, author = {Nandy, A. and Yue, S. and Oh, C. and Duan, C. and Terrones, G. and Chung, Y.G. and Kulik, H.J.}, title = {A Database of Ultrastable MOFs Reassembled from Stable Fragments with Machine Learning Models}, journal = {Matter}, year = {2023}, volume = {6}, pages = {1-19}, doi = {10.1016/j.matt.2023.03.009} }
High-throughput screening of hypothetical metal-organic framework (MOF) databases can uncover new materials, but their stability in real-world applications is often unknown. We leverage community knowledge and machine learning (ML) models to identify MOFs that are thermally stable and stable upon activation. We separate these MOFs into their building blocks and recombine them to make a new hypothetical MOF database of over 50,000 structures with orders of magnitude more (1) connectivity nets and (2) inorganic building blocks than were present in prior databases. This database shows a 10-fold enrichment of ultrastable MOF structures that are stable upon activation and more than 1 standard deviation more thermally stable than the average experimentally characterized MOF. For nearly 10,000 ultrastable MOFs, we compute elastic moduli to confirm that these materials have good mechanical stability, and we report methane deliverable capacities. We identify privileged metal nodes in ultrastable MOFs that optimize gas storage and mechanical stability simultaneously.
@article{yue_effects_2023, author = {Yue, S. and Oh, C. and Nandy, A. and Terrones, G. and Kulik, H.J.}, title = {Effects of MOF Linker Rotation and Functionalization on Methane Uptake and Diffusion}, journal = {Mol. Sys. Des. Eng.}, year = {2023}, volume = {8}, pages = {527-537}, doi = {10.1039/D2ME00237J} }
The flexible degrees of freedom in metal–organic frameworks (MOFs) can have significant effects on guest molecule behavior. However, in the majority of studies applying molecular simulations to MOFs, the framework is assumed to be rigid in order to minimize computational cost. Here we assess the significance of this assumption on a representative example of methane uptake and diffusion in UiO-66. We introduce an open-source code to modify MOFs through functionalization and linker rotation and we perform Grand Canonical Monte Carlo and molecular dynamics simulations of methane in each of the functionalized and linker-rotated derivatives of UiO-66. We find that linker rotation moderately influences methane uptake and significantly influences methane diffusion. Our assessment provides ranges of property values that serve as measures of uncertainty of these two properties associated with linker rotation. We further determine that void volume fraction and minimum pore size are the features that govern methane uptake and diffusion, respectively. These findings illustrate the impact of linker rotation on MOFs and provide design principles to guide future investigations.
@article{terrones_low-cost_2023, author = {Terrones, G. and Duan, C. and Nandy, A. and Kulik, H.J.}, title = {Low-Cost Machine Learning Prediction of Excited State Properties of Iridium-Centered Phosphors}, journal = {Chem. Sci.}, year = {2023}, volume = {14}, pages = {1419-1433}, doi = {10.1039/D2SC06150C} }
Prediction of the excited state properties of photoactive iridium complexes challenges ab initio methods such as time-dependent density functional theory (TDDFT) both from the perspective of accuracy and of computational cost, complicating high-throughput virtual screening (HTVS). We instead leverage low-cost machine learning (ML) models and experimental data for 1380 iridium complexes to perform these prediction tasks. We find the best-performing and most transferable models to be those trained on electronic structure features from low-cost density functional tight binding calculations. Using artificial neural network (ANN) models, we predict the mean emission energy of phosphorescence, the excited state lifetime, and the emission spectral integral for iridium complexes with accuracy competitive with or superseding that of TDDFT. We conduct feature importance analysis to determine that high cyclometalating ligand ionization potential correlates to high mean emission energy, while high ancillary ligand ionization potential correlates to low lifetime and low spectral integral. As a demonstration of how our ML models can be used for HTVS and the acceleration of chemical discovery, we curate a set of novel hypothetical iridium complexes and use uncertainty-controlled predictions to identify promising ligands for the design of new phosphors while retaining confidence in the quality of the ANN predictions.
@article{kastner_mechanistic_2023, author = {Kastner, D. W. and Nandy, A. and Mehmood, R. and Kulik, H.J.}, title = {Mechanistic Insights Into Substrate Positioning Across Non-heme Fe(II)/alpha-ketoglutarate-dependent Halogenases and Hydroxylases}, journal = {ACS Catal.}, year = {2023}, volume = {13}, pages = {2489-2501}, doi = {10.1021/acscatal.2c06241} }
Non-heme iron halogenases and hydroxylases activate inert C–H bonds to selectively catalyze the functionalization of diverse biological products under physiological conditions. To better understand the differences in substrate positioning key to their divergent reactivities, we compiled available crystallographic and spectroscopic data, which revealed that hydroxylases prefer an acute oxo–Fe–H target angle while halogenases prefer a more obtuse angle. With molecular dynamics simulations guided by this experimental information, we simulated the representative hydroxylases TauD and VioC and the halogenases BesD and WelO5 with both acute and obtuse harmonic restraints. We identified key substrate interaction partners that maintain the angle of approach in the respective enzymes, such as Asp94 in TauD and His127 in BesD. Moreover, our simulations reveal that the protein environment in halogenases prevents the sampling of acute angles observed in hydroxylases and vice versa. To validate these classical observations, we optimized the structure with large-scale quantum mechanical (QM) simulations and confirmed that QM-derived substrate–enzyme hydrogen bond strengths were higher in the native configurations. We computed reaction barriers for the rate-limiting hydrogen atom transfer step and found them to be slightly lower from an acute angle regardless of the enzyme–substrate complex. Analysis of the halogenase reaction coordinate reveals the formation of hydrogen bonding networks between the Fe(III)-hydroxyl, monodentate succinate, and a member of the second coordination sphere that may inhibit the hydroxyl rebound.
@article{duan_rapid_2023, author = {Duan, C. and Nandy, A. and Terrones, G. and Kastner, D. W. and Kulik, H.J.}, title = {Active Learning Exploration of Transition-Metal Complexes to Discover Method-Insensitive and Synthetically Accessible Chromophores}, journal = {JACS Au}, year = {2023}, volume = {3}, pages = {391-401}, doi = {10.1021/jacsau.2c00547} }
Transition-metal chromophores with earth-abundant transition metals are an important design target for their applications in lighting and nontoxic bioimaging, but their design is challenged by the scarcity of complexes that simultaneously have well-defined ground states and optimal target absorption energies in the visible region. Machine learning (ML) accelerated discovery could overcome such challenges by enabling the screening of a larger space but is limited by the fidelity of the data used in ML model training, which is typically from a single approximate density functional. To address this limitation, we search for consensus in predictions among 23 density functional approximations across multiple rungs of “Jacob’s ladder”. To accelerate the discovery of complexes with absorption energies in the visible region while minimizing the effect of low-lying excited states, we use two-dimensional (2D)efficient global optimization to sample candidate low-spin chromophores from multimillion complex spaces. Despite the scarcity (i.e., ∼0.01%) of potential chromophores in this large chemical space, we identify candidates with high likelihood (i.e., >10%) of computational validation as the ML models improve during active learning, representing a 1000-fold acceleration in discovery. Absorption spectra of promising chromophores from time-dependent density functional theory verify that 2/3 of candidates have the desired excited-state properties. The observation that constituent ligands from our leads have demonstrated interesting optical properties in the literature exemplifies the effectiveness of our construction of a realistic design space and active learning approach.
@article{cho_dft-based_2023, author = {Cho, Y. and Nandy, A. and Duan, C. and Kulik, H.J.}, title = {DFT-Based Multireference Diagnostics in the Solid State: Application to Metal–Organic Frameworks}, journal = {J. Chem. Theory Comput.}, year = {2023}, volume = {19}, pages = {190–197}, doi = {10.1021/acs.jctc.2c01033} }
When a many-body wave function of a system cannot be captured by a single determinant, high-level multireference (MR) methods are required to properly explain its electronic structure. MR diagnostics to estimate the magnitude of such static correlation have been primarily developed for molecular systems and range from low in computational cost to as costly as the full MR calculation itself. We report the first application of low-cost MR diagnostics based on the fractional occupation number calculated with finite-temperature DFT to solid-state systems. To compare the behavior of the diagnostics on solids and molecules, we select metal–organic frameworks (MOFs) as model materials because their reticular nature provides an intuitive way to identify molecular derivatives. On a series of closed-shell MOFs, we demonstrate that the DFT-based MR diagnostics are equally applicable to solids as to their molecular derivatives. The magnitude and spatial distribution of the MR character of a MOF are found to have a good correlation with those of its molecular derivatives, which can be calculated much more affordably in comparison to those of the full MOF. The additivity of MR character discussed here suggests the set of molecular derivatives to be a good representation of a MOF for both MR detection and ultimately for MR corrections, facilitating accurate and efficient high-throughput screening of MOFs and other porous solids.
@article{duan_transferable_2023, author = {Duan, C. and Nandy, A. and Meyer, R. and Arunachalam, N. and Kulik, H.J.}, title = {A Transferable Recommender Approach for Selecting the Best Density Functional Approximations in Chemical Discovery}, journal = {Nat. Comput. Sci.}, year = {2023}, volume = {3}, pages = {38–47}, doi = {10.1038/s43588-022-00384-0} }
Approximate density functional theory has become indispensable owing to its balanced cost–accuracy trade-off, including in large-scale screening. To date, however, no density functional approximation (DFA) with universal accuracy has been identified, leading to uncertainty in the quality of data generated from density functional theory. With electron density fitting and Δ-learning, we build a DFA recommender that selects the DFA with the lowest expected error with respect to the gold standard (but cost-prohibitive) coupled cluster theory in a system-specific manner. We demonstrate this recommender approach on the evaluation of vertical spin splitting energies of transition metal complexes. Our recommender predicts top-performing DFAs and yields excellent accuracy (about 2 kcal mol−1) for chemical discovery, outperforming both individual Δ-learning models and the best conventional single-functional approach from a set of 48 DFAs. By demonstrating transferability to diverse synthesized compounds, our recommender potentially addresses the accuracy versus scope dilemma broadly encountered in computational chemistry.
@article{arunachalam_ligand_2022, author = {Arunachalam, N. and Gugler, S. and Taylor, M. G. and Duan, C. and Nandy, A. and Janet, J. P. and Meyer, R. and Oldenstaedt, J. and Chu, D. B. K. and Kulik, H.J.}, title = {Ligand Additivity Relationships Enable Efficient Exploration of Transition Metal Chemical Space}, journal = {J. Chem. Phys.}, year = {2022}, volume = {157}, pages = {184112}, doi = {10.1063/5.0125700} }
To accelerate the exploration of chemical space, it is necessary to identify the compounds that will provide the most additional information or value. A large-scale analysis of mononuclear octahedral transition metal complexes deposited in an experimental database confirms an under-representation of lower-symmetry complexes. From a set of around 1000 previously studied Fe(II) complexes, we show that the theoretical space of synthetically accessible complexes formed from the relatively small number of unique ligands is significantly (∼816k) larger. For the properties of these complexes, we validate the concept of ligand additivity by inferring heteroleptic properties from a stoichiometric combination of homoleptic complexes. An improved interpolation scheme that incorporates information about cis and trans isomer effects predicts the adiabatic spin-splitting energy to around 2 kcal/mol and the HOMO level to less than 0.2 eV. We demonstrate a multi-stage strategy to discover leads from the 816k Fe(II) complexes within a targeted property region. We carry out a coarse interpolation from homoleptic complexes that we refine over a subspace of ligands based on the likelihood of generating complexes with targeted properties. We validate our approach on nine new binary and ternary complexes predicted to be in a targeted zone of discovery, suggesting opportunities for efficient transition metal complex discovery.
@article{nandy_using_2022, author = {Nandy, A. and Adamji, H. and Kastner, D.W. and Vennelakanti, V. and Nazemi, A. and Liu, M. and Kulik, H.J.}, title = {Using Computational Chemistry to Reveal Nature’s Blueprints in Single-Site Catalyst C–H Activation}, journal = {ACS Catal.}, year = {2022}, volume = {12}, number = {15}, pages = {9281-9306}, doi = {10.1021/acscatal.2c02096} }
The challenge of activating inert C–H bonds motivates a study of catalysts that draws from what can be accomplished by natural enzymes and translates these advantageous features into transition-metal complex (TMC) and material mimics. Inert C–H bond activation reactivity has been observed in a diverse number of predominantly iron-containing enzymes from the heme-P450s to nonheme iron α-ketoglutarate-dependent enzymes and methane monooxygenases. Computational studies have played a key role in correlating active-site variables, such as the primary coordination sphere, oxidation state, and spin state, to reactivity. TMCs, zeolites, metal–organic frameworks (MOFs), and single-atom catalysts (SACs) are synthetic inorganic materials that have been designed to incorporate Fe active sites in analogy to single sites in enzymes. In these systems, computational studies have been essential in supporting spectroscopic assignments and quantifying the effects of the metal-local environment on C–H bond reactivity. High-throughput virtual screening tools that have been widely used for bulk metal catalysis do not readily extend to the single-site inorganic catalysts where metal–ligand bonding and localized d-electrons govern reaction energetics. These localized d-electrons can also necessitate wave function theory calculations when density functional theory (DFT) is not sufficiently accurate. Where sufficient computational or experimental data can be gathered, machine learning has helped uncover more general design rules for reactivity or stability. As we continue to investigate metalloprotein active sites, we gain insights that enable us to design stable, active, and selective single-site catalysts.
@article{duan2022ml, author = {Duan, C. and Nandy, A. and Adamji, H. and Kulik, H. J.}, title = {Machine Learning Models Predict Calculation Outcomes with the Transferability Necessary for Computational Catalysis}, journal = {J. Chem. Theory Comput.}, year = {2022}, volume = {18}, number = {7}, pages = {4282-4292}, doi = {10.1021/acs.jctc.2c00331} }
Virtual high-throughput screening (VHTS) and machine learning (ML) have greatly accelerated the design of single-site transition-metal catalysts. VHTS of catalysts, however, is often accompanied with a high calculation failure rate and wasted computational resources due to the difficulty of simultaneously converging all mechanistically relevant reactive intermediates to expected geometries and electronic states. We demonstrate a dynamic classifier approach, i.e., a convolutional neural network that monitors geometry optimizations on the fly, and exploit its good performance and transferability in identifying geometry optimization failures for catalyst design. We show that the dynamic classifier performs well on all reactive intermediates in the representative catalytic cycle of the radical rebound mechanism for the conversion of methane to methanol despite being trained on only one reactive intermediate. The dynamic classifier also generalizes to chemically distinct intermediates and metal centers absent from the training data without loss of accuracy or model confidence. We rationalize this superior model transferability as arising from the use of electronic structure and geometric information generated on-the-fly from density functional theory calculations and the convolutional layer in the dynamic classifier. When used in combination with uncertainty quantification, the dynamic classifier saves more than half of the computational resources that would have been wasted on unsuccessful calculations for all reactive intermediates being considered.
@article{nandy2022new, author = {Nandy, A. and Duan, C. and Goffinet, C. and Kulik, H. J.}, title = {New Strategies for Direct Methane-to-Methanol Conversion from Active Learning Exploration of 16 Million Catalysts}, journal = {JACS Au}, year = {2022}, volume = {2}, number = {5}, pages = {1200-1213}, doi = {10.1021/jacsau.2c00176} }
Despite decades of effort, no earth-abundant homogeneous catalysts have been discovered that can selectively oxidize methane to methanol. We exploit active learning to simultaneously optimize methane activation and methanol release calculated with machine learning-accelerated density functional theory in a space of 16 M candidate catalysts including novel macrocycles. By constructing macrocycles from fragments inspired by synthesized compounds, we ensure synthetic realism in our computational search. Our large-scale search reveals that low-spin Fe(II) compounds paired with strong-field (e.g., P or S-coordinating) ligands have among the best energetic tradeoffs between hydrogen atom transfer (HAT) and methanol release. This observation contrasts with prior efforts that have focused on high-spin Fe(II) with weak-field ligands. By decoupling equatorial and axial ligand effects, we determine that negatively charged axial ligands are critical for more rapid release of methanol and that higher-valency metals [i.e., M(III) vs M(II)] are likely to be rate-limited by slow methanol release. With full characterization of barrier heights, we confirm that optimizing for HAT does not lead to large oxo formation barriers. Energetic span analysis reveals designs for an intermediate-spin Mn(II) catalyst and a low-spin Fe(II) catalyst that are predicted to have good turnover frequencies. Our active learning approach to optimize two distinct reaction energies with efficient global optimization is expected to be beneficial for the search of large catalyst spaces where no prior designs have been identified and where linear scaling relationships between reaction energies or barriers may be limited or unknown.
@article{cytter2022divergent, author = {Cytter, Y. and Nandy, A. and Bajaj, A. and Kulik, H. J.}, title = {Divergent Ligand Additivity Effects in Two Types of Delocalization Errors From Approximate Density Functional Theory}, journal = {J. Phys. Chem. Lett.}, year = {2022}, volume = {13}, number = {20}, pages = {4549-4555}, doi = {10.1021/acs.jpclett.2c01026} }
The predictive accuracy of density functional theory (DFT) is hampered by delocalization errors, especially for correlated systems such as transition-metal complexes. Two complementary strategies have been developed to reduce delocalization error: eliminating the global curvature with change in charge, and applying a linear response Hubbard U as a measure of local curvature at a metal center at fixed charge in a DFT+U framework. We investigate the relationship between the two delocalization error measures as the ligand field strength is varied with the number of strong-field ligands in a series of heteroleptic complexes or by geometrically constraining the metal–ligand bond length in homoleptic octahedral complexes. We show that across these sets of complexes an inverse relationship generally exists between global and local curvatures. We find that effects of ligand substitution on both measures of delocalization are typically additive, but the quantities seldom coincide.
@article{duan2022detection, author = {Duan, C. and Chu, D. B. K. and Nandy, A. and Kulik, H. J.}, title = {Detection of Multi-Reference Character Imbalances Enables a Transfer Learning Approach for Virtual High Throughput Screening with Coupled Cluster Accuracy at DFT Cost}, journal = {Chem. Sci.}, year = {2022}, volume = {13}, pages = {4962-4971}, doi = {10.1039/D2SC00393G} }
Appropriately identifying and treating molecules and materials with significant multi-reference (MR) character is crucial for achieving high data fidelity in virtual high-throughput screening (VHTS). Despite development of numerous MR diagnostics, the extent to which a single value of such a diagnostic indicates the MR effect on a chemical property prediction is not well established. We evaluate MR diagnostics for over 10 000 transition-metal complexes (TMCs) and compare to those for organic molecules. We observe that only some MR diagnostics are transferable from one chemical space to another. By studying the influence of MR character on chemical properties (i.e., MR effect) that involve multiple potential energy surfaces (i.e., adiabatic spin splitting, ΔEH–L, and ionization potential, IP), we show that differences in MR character are more important than the cumulative degree of MR character in predicting the magnitude of an MR effect. Motivated by this observation, we build transfer learning models to predict CCSD(T)-level adiabatic ΔEH–L and IP from lower levels of theory. By combining these models with uncertainty quantification and multi-level modeling, we introduce a multi-pronged strategy that accelerates data acquisition by at least a factor of three while achieving coupled cluster accuracy (i.e., to within 1 kcal mol−1 MAE) for robust VHTS.
@article{bajaj2022molecular, author = {Bajaj, A. and Duan, C. and Nandy, A. and Taylor, M. G. and Kulik, H. J.}, title = {Molecular Orbital Projectors in Non-empirical jmDFT Recover Exact Conditions in Transition Metal Chemistry}, journal = {J. Chem. Phys.}, year = {2022}, volume = {156}, pages = {184112}, doi = {10.1063/5.0089460} }
Low-cost, non-empirical corrections to semi-local density functional theory are essential for accurately modeling transition-metal chemistry. Here, we demonstrate the judiciously modified density functional theory (jmDFT) approach with non-empirical U and J parameters obtained directly from frontier orbital energetics on a series of transition-metal complexes. We curate a set of nine representative Ti(III) and V(IV) d1 transition-metal complexes and evaluate their flat-plane errors along the fractional spin and charge lines. We demonstrate that while jmDFT improves upon both DFT+U and semi-local DFT with the standard atomic orbital projectors (AOPs), it does so inefficiently. We rationalize these inefficiencies by quantifying hybridization in the relevant frontier orbitals. To overcome these limitations, we introduce a procedure for computing a molecular orbital projector (MOP) basis for use with jmDFT. We demonstrate this single set of d1 MOPs to be suitable for nearly eliminating all energetic delocalization and static correlation errors. In all cases, MOP jmDFT outperforms AOP jmDFT, and it eliminates most flat-plane errors at non-empirical values. Unlike DFT+U or hybrid functionals, jmDFT nearly eliminates energetic delocalization and static correlation errors within a non-empirical framework.
@article{nandy2022mofsimplify, author = {Nandy, A. and Terrones, G. and Arunachalam, N. and Duan, C. and Kastner, D. W. and Kulik, H. J.}, title = {MOFSimplify, Machine Learning Models with Extracted Stability Data of Three Thousand Metal–Organic Frameworks}, journal = {Sci. Data.}, year = {2022}, volume = {9}, pages = {74}, doi = {10.1038/s41597-022-01181-0} }
We report a workflow and the output of a natural language processing (NLP)-based procedure to mine the extant metal–organic framework (MOF) literature describing structurally characterized MOFs and their solvent removal and thermal stabilities. We obtain over 2,000 solvent removal stability measures from text mining and 3,000 thermal decomposition temperatures from thermogravimetric analysis data. We assess the validity of our NLP methods and the accuracy of our extracted data by comparing to a hand-labeled subset. Machine learning (ML, i.e. artificial neural network) models trained on this data using graph- and pore-geometry-based representations enable prediction of stability on new MOFs with quantified uncertainty. Our web interface, MOFSimplify, provides users access to our curated data and enables them to harness that data for predictions on new MOFs. MOFSimplify also encourages community feedback on existing data and on ML model predictions for community-based active learning for improved MOF stability models.
@article{duan2022ml_materials, author = {Duan, C. and Nandy, A. and Kulik, H. J.}, title = {Machine Learning for the Discovery and Design of Materials}, journal = {Ann. Rev. Chem. Eng.}, year = {2022}, volume = {13}, pages = {405-429}, doi = {10.1146/annurev-chembioeng-092320-120230} }
Machine learning (ML) has become a part of the fabric of high-throughput screening and computational discovery of materials. Despite its increasingly central role, challenges remain in fully realizing the promise of ML. This is especially true for the practical acceleration of the engineering of robust materials and the development of design strategies that surpass trial and error or high-throughput screening alone. Depending on the quantity being predicted and the experimental data available, ML can either outperform physics-based models, be used to accelerate such models, or be integrated with them to improve their performance. We cover recent advances in algorithms and in their application that are starting to make inroads toward (a) the discovery of new materials through large-scale enumerative screening, (b) the design of materials through identification of rules and principles that govern materials properties, and (c) the engineering of practical materials by satisfying multiple objectives. We conclude with opportunities for further advancement to realize ML as a widespread tool for practical computational materials design.
@article{harper2022representations, author = {Harper, D. and Nandy, A. and Arunachalam, N. and Duan, C. and Janet, J.P. and Kulik, H. J.}, title = {Representations and Strategies for Transferable Machine Learning Models in Chemical Discovery}, journal = {J. Chem. Phys.}, year = {2022}, volume = {156}, pages = {074101}, doi = {10.1063/5.0082964} }
Strategies for machine-learning (ML)-accelerated discovery that are general across material composition spaces are essential, but demonstrations of ML have been primarily limited to narrow composition variations. By addressing the scarcity of data in promising regions of chemical space for challenging targets such as open-shell transition-metal complexes, general representations and transferable ML models that leverage known relationships in existing data will accelerate discovery. Over a large set (∼1000) of isovalent transition-metal complexes, we quantify evident relationships for different properties (i.e., spin-splitting and ligand dissociation) between rows of the Periodic Table (i.e., 3d/4d metals and 2p/3p ligands). We demonstrate an extension to the graph-based revised autocorrelation (RAC) representation (i.e., eRAC) that incorporates the group number alongside the nuclear charge heuristic that otherwise overestimates dissimilarity of isovalent complexes. To address the common challenge of discovery in a new space where data are limited, we introduce a transfer learning approach in which we seed models trained on a large amount of data from one row of the Periodic Table with a small number of data points from the additional row. We demonstrate the synergistic value of the eRACs alongside this transfer learning strategy to consistently improve model performance. Analysis of these models highlights how the approach succeeds by reordering the distances between complexes to be more consistent with the Periodic Table, a property we expect to be broadly useful for other material domains.
@article{jia2022modeling, author = {Jia, H. and Nandy, A. and Liu, M. and Kulik, H. J.}, title = {Modeling the Roles of Rigidity and Dopants in Single-Atom Methane-to-Methanol Catalysts}, journal = {J. Mater. Chem. A.}, year = {2022}, volume = {10}, pages = {6193-6203}, doi = {10.1039/D1TA08502F} }
Doped graphitic single-atom catalysts (SACs) with isolated iron sites have similarities to natural enzymes and molecular biomimetics that can convert methane to methanol via a radical rebound mechanism with high-valent Fe(IV)[double bond, length as m-dash]O intermediates. To understand the relationship of SACs to these homogeneous analogues, we use range-separated hybrid density functional theory (DFT) to compare the energetics and structure of the direct metal-coordinating environment in the presence of 2p (i.e., N or O) and 3p (i.e., P or S) dopants and with increasing finite graphene model flake size to mimic differences in local rigidity. While metal–ligand bond lengths in SACs are significantly shorter than those in transition-metal complexes, they remain longer than SAC mimic macrocyclic complexes. In SACs or the macrocyclic complexes, this compressed metal–ligand environment induces metal distortion out of the plane, especially when reactive species are bound to iron. As a result of this modified metal-coordination environment, we observe SACs to simultaneously favor the formation of the metal–oxo while also allowing for methanol release. This reactivity is different from what has been observed for large sets of square planar model homogeneous catalysts. Overall, our calculations recommend broader consideration of dopants (e.g., P or S) and processing conditions that allow for local distortion around the metal site in graphitic SACs.
@article{liu2022large, author = {Liu, M. and Nazemi, A. and Taylor, M. G. and Nandy, A. and Duan, C. and Kulik, H. J.}, title = {Large-Scale Analysis of the Electronic and Geometric Properties of Bio-Inspired Mo/W Complexes}, journal = {ACS Catal.}, year = {2022}, volume = {12}, number = {2}, pages = {383-396}, doi = {10.1021/acscatal.1c04624} }
The design of inorganic molecular complexes for the reversible conversion of formate into CO2 inspired by formate dehydrogenase (FDH) enzymes is challenged by a lack of understanding of how to mimic the enzyme action. Here, we carry out a large-scale, high-throughput screening study on all mononuclear Mo/W complexes currently deposited in Cambridge Structural Database (CSD) that resemble the coordination environment of the molybdopterin cofactors in FDH. Using density functional theory, we systematically investigate the individual effects of metal identity, ligand identity, oxidation state, and coordination number on structural, electronic, and catalytic (i.e., H atom binding) properties. We compare our results on molecular complexes with large quantum mechanical cluster calculations on a representative FDH enzyme to understand the influence of the enzyme environment. These comparisons reveal that the enzyme environment primarily influences the metal-local geometry, and these structural variations can improve catalysis. Through a series of computational substitutions on molecular complexes of terminal chalcogen atoms and metal centers, we extend beyond CSD structures to identify the limits of varied chalcogen and metal identity. Through this analysis, we demonstrate that the enzyme primarily affects the geometric properties of the metal center, and terminal chalcogen moieties primarily influence local electronic properties.
@article{nandy2022audacity, author = {Nandy, A. and Duan, C. and Kulik, H. J.}, title = {Audacity of Huge: Overcoming Challenges of Data Scarcity and Data Quality for Machine Learning in Computational Materials Discovery}, journal = {Curr. Opin. in Chem. Eng.}, year = {2022}, volume = {36}, pages = {100778}, doi = {10.1016/j.coche.2021.100778} }
Machine learning (ML)-accelerated discovery requires large amounts of high-fidelity data to reveal predictive structure–property relationships. For many properties of interest in materials discovery, the challenging nature and high cost of data generation has resulted in a data landscape that is both scarcely populated and of dubious quality. Data-driven techniques starting to overcome these limitations include the use of consensus across functionals in density functional theory, the development of new functionals or accelerated electronic structure theories, and the detection of where computationally demanding methods are most necessary. When properties cannot be reliably simulated, large experimental data sets can be used to train ML models. In the absence of manual curation, increasingly sophisticated natural language processing and automated image analysis are making it possible to learn structure–property relationships from the literature. Models trained on these data sets will improve as they incorporate community feedback.
@article{vennelakanti2022effect, author = {Vennelakanti, V. and Nandy, A. and Kulik, H. J.}, title = {The Effect of Hartree-Fock Exchange on Scaling Relations and Reaction Energetics for C–H Activation Catalysts}, journal = {Top. Catal.}, year = {2022}, volume = {65}, pages = {296-311}, doi = {10.1007/s11244-021-01482-5} }
High-throughput computational catalyst studies are typically carried out using density functional theory (DFT) with a single, approximate exchange-correlation functional. In open shell transition metal complexes (TMCs) that are promising for challenging reactions (e.g., C–H activation), the predictive power of DFT has been challenged, and properties are known to be strongly dependent on the admixture of Hartree-Fock (HF) exchange. We carry out a large-scale study of the effect of HF exchange on the predicted catalytic properties of over 1200 mid-row (i.e., Cr, Mn, Fe, Co) 3d TMCs for direct methane-to-methanol conversion. Reaction energy sensitivities across this set depend both on the catalytic rearrangement and ligand chemistry of the catalyst. These differences in sensitivities change both the absolute energetics predicted for a catalyst and its relative performance. Previous observations of the poor performance of global linear free energy relationships (LFERs) hold with both semi-local DFT widely employed in heterogeneous catalysis and hybrid DFT. Narrower metal/oxidation/spin-state specific LFERs perform better and are less sensitive to HF exchange than absolute reaction energetics, except in the case of some intermediate/high-spin states. Importantly, the interplay between spin-state dependent reaction energetics and exchange effects on spin-state ordering means that the choice of DFT functional strongly influences whether the minimum energy pathway is spin-conserved. Despite these caveats, LFERs involving catalysts that can be expected to have closed shell intermediates and low-spin ground states retain significant predictive power.
@article{nandy2021using, author = {Nandy, A. and Duan, C. and Kulik, H. J.}, title = {Using Machine Learning and Data Mining to Leverage Community Knowledge for the Engineering of Stable Metal-Organic Frameworks}, journal = {J. Am. Chem. Soc.}, year = {2021}, volume = {143}, number = {42}, pages = {17535-17547}, doi = {10.1021/jacs.1c07217} }
Although the tailored metal active sites and porous architectures of MOFs hold great promise for engineering challenges ranging from gas separations to catalysis, a lack of understanding of how to improve their stability limits their use in practice. To overcome this limitation, we extract thousands of published reports of the key aspects of MOF stability necessary for their practical application: the ability to withstand high temperatures without degrading and the capacity to be activated by removal of solvent molecules. From nearly 4000 manuscripts, we use natural language processing and image analysis to obtain over 2000 solvent-removal stability measures and 3000 thermal degradation temperatures. We analyze the relationships between stability properties and the chemical and geometric structures in this set to identify limits of prior heuristics derived from smaller sets of MOFs. By training predictive machine learning (ML, i.e., Gaussian process and artificial neural network) models to encode the structure–property relationships with graph- and pore-structure-based representations, we are able to make predictions of stability orders of magnitude faster than conventional physics-based modeling or experiment. Interpretation of important features in ML models provides insights that we use to identify strategies to engineer increased stability into typically unstable 3d-transition-metal-containing MOFs that are frequently targeted for catalytic applications. We expect our approach to accelerate the time to discovery of stable, practical MOF materials for a wide range of applications.
@article{taylor2021deciphering, author = {Taylor, M. G. and Nandy, A. and Lu, C. C. and Kulik, H. J.}, title = {Deciphering Cryptic Behavior in Bimetallic Transition Metal Complexes with Machine Learning}, journal = {J. Phys. Chem. Lett.}, year = {2021}, volume = {12}, number = {40}, pages = {9812-9820}, doi = {10.1021/acs.jpclett.1c02852} }
We demonstrate an alternative, data-driven approach to uncovering structure–property relationships for the rational design of heterobimetallic transition-metal complexes that exhibit metal–metal bonding. We tailor graph-based representations of the metal-local environment for these complexes for use in multiple linear regression and kernel ridge regression (KRR) models. We curate a set of 28 experimentally characterized complexes to develop a multiple linear regression model for oxidation potentials. We achieve good accuracy (mean absolute error of 0.25 V) and preserve transferability to unseen experimental data with a new ligand structure. We also train a KRR model on a subset of 330 structurally characterized heterobimetallics to predict the degree of metal–metal bonding. This KRR model predicts relative metal–metal bond lengths in the test set to within 5%, and analysis of key features reveals the fundamental atomic contributions (e.g., the valence electron configuration) that most strongly influence the behavior of these complexes. Our work provides guidance for rational bimetallic design, suggesting that properties, including the formal shortness ratio, should be transferable from one period to another.
@article{nandy2021computational, author = {Nandy, A. and Duan, C. and Taylor, M. G. and Liu, F. and Steeves, A. H. and Kulik, H. J.}, title = {Computational Discovery of Transition-metal Complexes: From High-throughput Screening to Machine Learning}, journal = {Chem. Rev.}, year = {2021}, volume = {121}, number = {16}, pages = {9927-10000}, doi = {10.1021/acs.chemrev.1c00347} }
Transition-metal complexes are attractive targets for the design of catalysts and functional materials. The behavior of the metal–organic bond, while very tunable for achieving target properties, is challenging to predict and necessitates searching a wide and complex space to identify needles in haystacks for target applications. This review will focus on the techniques that make high-throughput search of transition-metal chemical space feasible for the discovery of complexes with desirable properties. The review will cover the development, promise, and limitations of “traditional” computational chemistry (i.e., force field, semiempirical, and density functional theory methods) as it pertains to data generation for inorganic molecular discovery. The review will also discuss the opportunities and limitations in leveraging experimental data sources. We will focus on how advances in statistical modeling, artificial intelligence, multiobjective optimization, and automation accelerate discovery of lead compounds and design rules. The overall objective of this review is to showcase how bringing together advances from diverse areas of computational chemistry and computer science have enabled the rapid uncovering of structure–property relationships in transition-metal chemistry. We aim to highlight how unique considerations in motifs of metal–organic bonding (e.g., variable spin and oxidation state, and bonding strength/nature) set them and their discovery apart from more commonly considered organic molecules. We will also highlight how uncertainty and relative data scarcity in transition-metal chemistry motivate specific developments in machine learning representations, model training, and in computational chemistry. Finally, we will conclude with an outlook of areas of opportunity for the accelerated discovery of transition-metal complexes.
@article{duan2021putting, author = {Duan, C. and Liu, F. and Nandy, A. and Kulik, H. J.}, title = {Putting Density Functional Theory to the Test in Machine-Learning-Accelerated Materials Discovery}, journal = {J. Phys. Chem. Lett.}, year = {2021}, volume = {12}, number = {19}, pages = {4628-4637}, doi = {10.1021/acs.jpclett.1c00631} }
Accelerated discovery with machine learning (ML) has begun to provide the advances in efficiency needed to overcome the combinatorial challenge of computational materials design. Nevertheless, ML-accelerated discovery both inherits the biases of training data derived from density functional theory (DFT) and leads to many attempted calculations that are doomed to fail. Many compelling functional materials and catalytic processes involve strained chemical bonds, open-shell radicals and diradicals, or metal–organic bonds to open-shell transition-metal centers. Although promising targets, these materials present unique challenges for electronic structure methods and combinatorial challenges for their discovery. In this Perspective, we describe the advances needed in accuracy, efficiency, and approach beyond what is typical in conventional DFT-based ML workflows. These challenges have begun to be addressed through ML models trained to predict the results of multiple methods or the differences between them, enabling quantitative sensitivity analysis. For DFT to be trusted for a given data point in a high-throughput screen, it must pass a series of tests. ML models that predict the likelihood of calculation success and detect the presence of strong correlation will enable rapid diagnoses and adaptation strategies. These “decision engines” represent the first steps toward autonomous workflows that avoid the need for expert determination of the robustness of DFT-based materials discoveries.
@article{janet2021navigating, author = {Janet, J.P. and Duan, C. and Nandy, A. and Liu, F. and Kulik, H. J.}, title = {Navigating Transition-Metal Chemical Space: Artificial Intelligence for First-Principles Design}, journal = {Acc. Chem. Res.}, year = {2021}, volume = {54}, number = {3}, pages = {532-545}, doi = {10.1021/acs.accounts.0c00686} }
The variability of chemical bonding in open-shell transition-metal complexes not only motivates their study as functional materials and catalysts but also challenges conventional computational modeling tools. Here, tailoring ligand chemistry can alter preferred spin or oxidation states as well as electronic structure properties and reactivity, creating vast regions of chemical space to explore when designing new materials atom by atom. Although first-principles density functional theory (DFT) remains the workhorse of computational chemistry in mechanism deduction and property prediction, it is of limited use here. DFT is both far too computationally costly for widespread exploration of transition-metal chemical space and also prone to inaccuracies that limit its predictive performance for localized d electrons in transition-metal complexes. These challenges starkly contrast with the well-trodden regions of small-organic-molecule chemical space, where the analytical forms of molecular mechanics force fields and semiempirical theories have for decades accelerated the discovery of new molecules, accurate DFT functional performance has been demonstrated, and gold-standard methods from correlated wavefunction theory can predict experimental results to chemical accuracy. The combined promise of transition-metal chemical space exploration and lack of established tools has mandated a distinct approach. In this Account, we outline the path we charted in exploration of transition-metal chemical space starting from the first machine learning (ML) models (i.e., artificial neural network and kernel ridge regression) and representations for the prediction of open-shell transition-metal complex properties. The distinct importance of the immediate coordination environment of the metal center as well as the lack of low-level methods to accurately predict structural properties in this coordination environment first motivated and then benefited from these ML models and representations. Once developed, the recipe for prediction of geometric, spin state, and redox potential properties was straightforwardly extended to a diverse range of other properties, including in catalysis, computational “feasibility”, and the gas separation properties of periodic metal–organic frameworks. Interpretation of selected features most important for model prediction revealed new ways to encapsulate design rules and confirmed that models were robustly mapping essential structure–property relationships. Encountering the special challenge of ensuring that good model performance could generalize to new discovery targets motivated investigation of how to best carry out model uncertainty quantification. Distance-based approaches, whether in model latent space or in carefully engineered feature space, provided intuitive measures of the domain of applicability. With all of these pieces together, ML can be harnessed as an engine to tackle the large-scale exploration of transition-metal chemical space needed to satisfy multiple objectives using efficient global optimization methods. In practical terms, bringing these artificial intelligence tools to bear on the problems of transition-metal chemical space exploration has resulted in ML-model assessments of large, multimillion compound spaces in minutes and validated new design leads in weeks instead of decades.
@article{nandy2020why, author = {Nandy, A. and Kulik, H. J.}, title = {Why Conventional Design Rules for C-H Activation Fail for Open Shell Transition Metal Catalysts}, journal = {ACS Catal.}, year = {2020}, volume = {10}, number = {24}, pages = {15033-15047}, doi = {10.1021/acscatal.0c04300} }
The design of selective and active C–H activation catalysts for direct methane-to-methanol conversion is challenging. Bioinspired complexes that form high-valent metal–oxo intermediates capable of hydrogen abstraction and rebound hydroxylation are promising candidates. This promise has made them a target for computational high-throughput screening, typically simplified through the use of linear free energy relationships (LFERs). However, their mid-row transition-metal centers have numerous accessible spin and oxidation states that increase the combinatorial scale of design efforts. Here, we carry out a computational design screen of over 2500 mid-row 3d transition-metal complexes with four metals in numerous spin and oxidation states. We demonstrate the importance of spin/oxidation state in dictating design principles, limiting the generalization of strategies derived for widely studied high-spin Fe(II) catalysts to other metals or spin/oxidation states. Combined assessment of the effect of ligand-field tuning on reaction step energetics and on the identity of the ground state allows us to propose refined design strategies for spin-allowed methane-to-methanol catalysis. We observe weak coupling of energetics and design principles between reaction steps (e.g., oxo formation vs methanol release), meaning that LFERs do not generalize across our larger catalyst set. To rationalize relative reactivity in known catalysts, we instead compute independent reaction energies and propose strategies for further improvements in catalyst design.
@article{moosavi2020understanding, author = {Moosavi, S. M. and Nandy, A. and Jablonka, K. M. and Ongari, D. and Janet, J. P. and Boyd, P. G. and Lee, Y. and Smit, B. and Kulik, H. J.}, title = {Understanding Diversity in the Metal-Organic Framework Ecosystem}, journal = {Nat. Commun.}, year = {2020}, volume = {11}, pages = {4068}, doi = {10.1038/s41467-020-17755-8} }
Millions of distinct metal-organic frameworks (MOFs) can be made by combining metal nodes and organic linkers. At present, over 90,000 MOFs have been synthesized and over 500,000 predicted. This raises the question whether a new experimental or predicted structure adds new information. For MOF chemists, the chemical design space is a combination of pore geometry, metal nodes, organic linkers, and functional groups, but at present we do not have a formalism to quantify optimal coverage of chemical design space. In this work, we develop a machine learning method to quantify similarities of MOFs to analyse their chemical diversity. This diversity analysis identifies biases in the databases, and we show that such bias can lead to incorrect conclusions. The developed formalism in this study provides a simple and practical guideline to see whether new structures will have the potential for new insights, or constitute a relatively small variation of existing structures.
@article{nandy2020large, author = {Nandy, A. and Chu, D. B. K. and Harper, D. R. and Duan, C. and Arunachalam, N. and Cytter, Y. and Kulik, H. J.}, title = {Large-Scale Comparison of 3d and 4d Transition Metal Complexes Illuminates the Reduced Effect of Exchange on Second-Row Spin-State Energetics}, journal = {Phys. Chem. Chem. Phys.}, year = {2020}, volume = {22}, pages = {19326-19341}, doi = {10.1039/D0CP02977G} }
Density functional theory (DFT) is widely used in transition-metal chemistry, yet essential properties such as spin-state energetics in transition-metal complexes (TMCs) are well known to be sensitive to the choice of the exchange–correlation functional. Increasing the amount of exchange in a functional typically shifts the preferred ground state in first-row TMCs from low-spin to high-spin by penalizing delocalization error, but the effect on properties of second-row complexes is less well known. We compare the exchange sensitivity of adiabatic spin-splitting energies in pairs of mononuclear 3d and 4d mid-row octahedral transition-metal complexes. We analyze hundreds of complexes assembled from four metals in two oxidation states with ten small monodentate ligands that span a wide range of field strengths expected to favor a variety of ground states. We observe consistently lower but proportional sensitivity to exchange fraction among 4d TMCs with respect to their isovalent 3d TMC counterparts, leading to the largest difference in sensitivities for the strongest field ligands. The combined effect of reduced exchange sensitivities and the greater low-spin bias of most 4d TMCs means that while over one-third of 3d TMCs change ground states over a modest variation (ca. 0.0–0.3) in exchange fraction, almost no 4d TMCs do. Differences in delocalization, as judged through changes in the metal–ligand bond lengths between spin states, do not explain the distinct behavior of 4d TMCs. Instead, evaluation of potential energy curves in 3d and 4d TMCs reveals that higher exchange sensitivities in 3d TMCs are likely due to the opposing effect of exchange on the low-spin and high-spin states, whereas the effect on both spin states is more comparable in 4d TMCs.
@article{duan2020semi, author = {Duan, C. and Liu, F. and Nandy, A. and Kulik, H. J.}, title = {Semi-supervised Machine Learning Enables the Robust Detection of Multireference Character at Low Cost}, journal = {J. Phys. Chem. Lett.}, year = {2020}, volume = {11}, number = {16}, pages = {6640-6648}, doi = {10.1021/acs.jpclett.0c02018} }
Multireference (MR) diagnostics are common tools for identifying strongly correlated electronic structure that makes single-reference (SR) methods (e.g., density functional theory or DFT) insufficient for accurate property prediction. However, MR diagnostics typically require computationally demanding correlated wave function theory (WFT) calculations, and diagnostics often disagree or fail to predict MR effects on properties. To overcome these challenges, we introduce a semi-supervised machine learning (ML) approach with virtual adversarial training (VAT) of an MR classifier using 15 WFT and DFT MR diagnostics as inputs. In semi-supervised learning, only the most extreme SR or MR points are labeled, and the remaining point labels are learned. The resulting VAT model outperforms the alternatives, as quantified by the distinct property distributions of SR- and MR-classified molecules. To reduce the cost of generating inputs to the VAT model, we leverage the VAT model’s robustness to noisy inputs by replacing WFT MR diagnostics with regression predictions in an MR decision engine workflow that preserves excellent performance. We demonstrate the transferability of our approach to larger molecules and those with distinct chemical composition from the training set. This MR decision engine demonstrates promise as a low-cost, high-accuracy approach to the automatic detection of strong correlation for predictive high-throughput screening.
@article{duan2020data, author = {Duan, C. and Liu, F. and Nandy, A. and Kulik, H. J.}, title = {Data-Driven Approaches Can Overcome the Cost–Accuracy Trade-Off in Multireference Diagnostics}, journal = {J. Chem. Theory Comput.}, year = {2020}, volume = {16}, number = {7}, pages = {4373-4387}, doi = {10.1021/acs.jctc.0c00358} }
High-throughput computational screening typically employs methods (i.e., density functional theory or DFT) that can fail to describe challenging molecules, such as those with strongly correlated electronic structure. In such cases, multireference (MR) correlated wavefunction theory (WFT) would be the appropriate choice but remains more challenging to carry out and automate than single-reference (SR) WFT or DFT. Numerous diagnostics have been proposed for identifying when MR character is likely to have an effect on the predictive power of SR calculations, but conflicting conclusions about diagnostic performance have been reached on small data sets. We compute 15 MR diagnostics, ranging from affordable DFT-based to more costly MR-WFT-based diagnostics, on a set of 3165 equilibrium and distorted small organic molecules containing up to six heavy atoms. Conflicting MR character assignments and low pairwise linear correlations among diagnostics are also observed over this set. We evaluate the ability of existing diagnostics to predict the percent recovery of the correlation energy, %Ecorr. None of the DFT-based diagnostics are nearly as predictive of %Ecorr as the best WFT-based diagnostics. To overcome the limitation of this cost–accuracy trade-off, we develop machine learning (ML, i.e., kernel ridge regression) models to predict WFT-based diagnostics from a combination of DFT-based diagnostics and a new, size-independent 3D geometric representation. The ML-predicted diagnostics correlate as well with MR effects as their computed (i.e., with WFT) values, significantly improving over the DFT-based diagnostics on which the models were trained. These ML models thus provide a promising approach to improve upon DFT-based diagnostic accuracy while remaining suitably low cost for high-throughput screening.
@article{taylor2020seeing, author = {Taylor, M. G. and Yang, T. and Lin, S. and Nandy, A. and Janet, J.P. and Duan, C. and Kulik, H. J.}, title = {Seeing is Believing: Experimental Spin States from Machine Learning Model Structure Predictions}, journal = {J. Phys. Chem. A}, year = {2020}, volume = {124}, number = {16}, pages = {3286-3299}, doi = {10.1021/acs.jpca.0c01458} }
Determination of ground-state spins of open-shell transition-metal complexes is critical to understanding catalytic and materials properties but also challenging with approximate electronic structure methods. As an alternative approach, we demonstrate how structure alone can be used to guide assignment of ground-state spin from experimentally determined crystal structures of transition-metal complexes. We first identify the limits of distance-based heuristics from distributions of metal–ligand bond lengths of over 2000 unique mononuclear Fe(II)/Fe(III) transition-metal complexes. To overcome these limits, we employ artificial neural networks (ANNs) to predict spin-state-dependent metal–ligand bond lengths and classify experimental ground-state spins based on agreement of experimental structures with the ANN predictions. Although the ANN is trained on hybrid density functional theory data, we exploit the method-insensitivity of geometric properties to enable assignment of ground states for the majority (ca. 80–90%) of structures. We demonstrate the utility of the ANN by data-mining the literature for spin-crossover (SCO) complexes, which have experimentally observed temperature-dependent geometric structure changes, by correctly assigning almost all (>95%) spin states in the 46 Fe(II) SCO complex set. This approach represents a promising complement to more conventional energy-based spin-state assignment from electronic structure theory at the low cost of a machine learning model.
@article{nandy2019machine, author = {Nandy, A. and Zhu, J. and Janet, J. P. and Duan, C. and Getman, R. B. and Kulik, H. J.}, title = {Machine Learning Accelerates the Discovery of Design Rules and Exceptions in Stable Metal-Oxo Intermediate Formation}, journal = {ACS Catal.}, year = {2019}, volume = {9}, number = {9}, pages = {8243-8255}, doi = {10.1021/acscatal.9b02165} }
Metal–oxo moieties are important catalytic intermediates in the selective partial oxidation of hydrocarbons and in water splitting. Stable metal–oxo species have reactive properties that vary depending on the spin state of the metal, complicating the development of structure–property relationships. To overcome these challenges, we train machine-learning (ML) models capable of predicting metal–oxo formation energies across a range of first-row metals, oxidation states, and spin states. Using connectivity-only features tailored for inorganic chemistry as inputs to kernel ridge regression or artificial neural network (ANN) ML models, we achieve good mean absolute errors (4–5 kcal/mol) on set-aside test data across a range of ligand orientations. Analysis of feature importance for oxo formation energy prediction reveals the dominance of nonlocal, electronic ligand properties in contrast to other transition metal complex properties (e.g., spin-state or ionization potential). We enumerate the theoretical catalyst space with an ANN, revealing expected trends in oxo formation energetics, such as destabilization of the metal–oxo species with increasing d-filling, as well as exceptions, such as weak correlations with indicators of oxidative stability of the metal in the resting state or unexpected spin-state dependence in reactivity. We carry out uncertainty-aware evolutionary optimization using the ANN to explore a >37 000 candidate catalyst space. New metal and oxidation state combinations are uncovered and validated with density functional theory (DFT), including counterintuitive oxo formation energies for oxidatively stable complexes. This approach doubles the density of confirmed DFT leads in originally sparsely populated regions of property space, highlighting the potential of ML-model-driven discovery to uncover catalyst design rules and exceptions.
@article{janet2019quantitative, author = {Janet, J. P. and Duan, C. and Yang, T. and Nandy, A. and Kulik, H. J.}, title = {A Quantitative Uncertainty Metric Controls Error in Neural Network-Driven Chemical Discovery}, journal = {Chem. Sci.}, year = {2019}, volume = {10}, pages = {7913-7922}, doi = {10.1039/C9SC02298H} }
Machine learning (ML) models, such as artificial neural networks, have emerged as a complement to high-throughput screening, enabling characterization of new compounds in seconds instead of hours. The promise of ML models to enable large-scale chemical space exploration can only be realized if it is straightforward to identify when molecules and materials are outside the model’s domain of applicability. Established uncertainty metrics for neural network models are either costly to obtain (e.g., ensemble models) or rely on feature engineering (e.g., feature space distances), and each has limitations in estimating prediction errors for chemical space exploration. We introduce the distance to available data in the latent space of a neural network ML model as a low-cost, quantitative uncertainty metric that works for both inorganic and organic chemistry. The calibrated performance of this approach exceeds widely used uncertainty metrics and is readily applied to models of increasing complexity at no additional cost. Tightening latent distance cutoffs systematically drives down predicted model errors below training errors, thus enabling predictive error control in chemical discovery or identification of useful data points for active learning.
@article{duan2019learning, author = {Duan, C. and Janet, J. P. and Liu, F. and Nandy, A. and Kulik, H. J.}, title = {Learning from Failure: Predicting Electronic Structure Calculation Outcomes with Machine Learning Models}, journal = {J. Chem. Theory Comput.}, year = {2019}, volume = {15}, number = {4}, pages = {2331-2345}, doi = {10.1021/acs.jctc.9b00057} }
High-throughput computational screening for chemical discovery mandates the automated and unsupervised simulation of thousands of new molecules and materials. In challenging materials spaces, such as open shell transition metal chemistry, characterization requires time-consuming first-principles simulation that often necessitates human intervention. These calculations can frequently lead to a null result, e.g., the calculation does not converge or the molecule does not stay intact during a geometry optimization. To overcome this challenge toward realizing fully automated chemical discovery in transition metal chemistry, we have developed the first machine learning models that predict the likelihood of successful simulation outcomes. We train support vector machine and artificial neural network classifiers to predict simulation outcomes (i.e., geometry optimization result and degree of ⟨S2⟩ deviation) for a chosen electronic structure method based on chemical composition. For these static models, we achieve an area under the curve of at least 0.95, minimizing computational time spent on nonproductive simulations and therefore enabling efficient chemical space exploration. We introduce a metric of model uncertainty based on the distribution of points in the latent space to systematically improve model prediction confidence. In a complementary approach, we train a convolutional neural network classification model on simulation output electronic and geometric structure time series data. This dynamic model generalizes more readily than the static classifier by becoming more predictive as input simulation length increases. Finally, we describe approaches for using these models to enable autonomous job control in transition metal complex discovery.
@article{janet2019designing, author = {Janet, J. P. and Liu, F. and Nandy, A. and Duan, C. and Yang, T. and Lin, S. and Kulik, H. J.}, title = {Designing in the Face of Uncertainty: Exploiting Electronic Structure and Machine Learning Models for Discovery in Inorganic Chemistry}, journal = {Inorg. Chem.}, year = {2019}, volume = {58}, number = {16}, pages = {10592-10606}, doi = {10.1021/acs.inorgchem.9b00109} }
Recent transformative advances in computing power and algorithms have made computational chemistry central to the discovery and design of new molecules and materials. First-principles simulations are increasingly accurate and applicable to large systems with the speed needed for high-throughput computational screening. Despite these strides, the combinatorial challenges associated with the vastness of chemical space mean that more than just fast and accurate computational tools are needed for accelerated chemical discovery. In transition-metal chemistry and catalysis, unique challenges arise. The variable spin, oxidation state, and coordination environments favored by elements with well-localized d or f electrons provide great opportunity for tailoring properties in catalytic or functional (e.g., magnetic) materials but also add layers of uncertainty to any design strategy. We outline five key mandates for realizing computationally driven accelerated discovery in inorganic chemistry: (i) fully automated simulation of new compounds, (ii) knowledge of prediction sensitivity or accuracy, (iii) faster-than-fast property prediction methods, (iv) maps for rapid chemical space traversal, and (v) a means to reveal design rules on the kilocompound scale. Through case studies in open-shell transition-metal chemistry, we describe how advances in methodology and software in each of these areas bring about new chemical insights. We conclude with our outlook on the next steps in this process toward realizing fully autonomous discovery in inorganic chemistry using computational chemistry.
@article{nandy2018strategies, author = {Nandy, A. and Duan, C. and Janet, J. P. and Gugler, S. O. and Kulik, H. J.}, title = {Strategies and Software for Machine Learning Accelerated Discovery in Transition Metal Chemistry}, journal = {Ind. Eng. Chem. Res.}, year = {2018}, volume = {57}, number = {42}, pages = {13973-13986}, doi = {10.1021/acs.iecr.8b04015} }
Machine learning the electronic structure of open shell transition metal complexes presents unique challenges, including robust and automated data set generation. Here, we introduce tools that simplify data acquisition from density functional theory (DFT) and validation of trained machine learning models using the molSimplify automatic design (mAD) workflow. We demonstrate this workflow by training and comparing the performance of LASSO, kernel ridge regression (KRR), and artificial neural network (ANN) models using heuristic, topological revised autocorrelation (RAC) descriptors we have recently introduced for machine learning inorganic chemistry. On a series of open shell transition metal complexes, we evaluate set aside test errors of these models for predicting the HOMO level and HOMO–LUMO gap. The best performing models are ANNs, which show 0.15 and 0.25 eV test set mean absolute errors on the HOMO level and HOMO–LUMO gap, respectively. Poor performing KRR models using the full 153-feature RAC set are improved to nearly the same performance as the ANNs when trained on down-selected subsets of 20–30 features. Analysis of the essential descriptors for HOMO level and HOMO–LUMO gap prediction as well as comparison to subsets previously obtained for other properties reveal the paramount importance of nonlocal, steric properties in determining frontier molecular orbital energetics. We demonstrate our model performance on diverse complexes and in the discovery of molecules with target HOMO–LUMO gaps from a large 15,000 molecule design space in minutes rather than days that full DFT evaluation would require.
@article{nandy2018nmr, author = {Nandy, A. and Forse, A. C. and Witherspoon, V. J. and Reimer, J. A.}, title = {NMR Spectroscopy Reveals Adsorbate Binding Sites in the Metal-Organic Framework UiO-66(Zr)}, journal = {J. Phys. Chem. C}, year = {2018}, volume = {122}, number = {15}, pages = {8295-8305}, doi = {10.1021/acs.jpcc.7b12628} }
We assign 1H and 13C NMR resonances emanating from acetone, methanol, and cyclohexane adsorbed inside the pores of UiO-66(Zr). These results are informed by density functional theory (DFT) calculations, which probe the role of two competing effects inside of the pore environment: (i) nucleus independent chemical shifts (NICSs) generated by ring currents in conjugated linkers and (ii) small molecule coordination to the metal-oxyhydroxy cluster. These interactions are found to perturb the chemical shift of in-pore adsorbate relative to ex-pore adsorbate (which resides in spaces between the MOF particles). Changes in self-solvation upon adsorption may also perturb the chemical shift. Our results indicate that cyclohexane preferentially adsorbs in the tetrahedral pores of UiO-66(Zr), while acetone and methanol adsorb at the Zr–OH moieties on the metal-oxyhydroxy clusters in a more complex fashion. This method may be used to probe molecular adsorption sites and material void saturation with selected adsorbates, and with further development may eventually be used to trace in-pore chemistry of MOF materials.
@article{khirich2018measurement, author = {Khirich, G. and Holliday, M. J. and Lin, J. C. and Nandy, A.}, title = {Measurement and Characterization of Hydrogen-Deuterium Exchange Chemistry Using Relaxation Dispersion NMR Spectroscopy}, journal = {J. Phys. Chem. B}, year = {2018}, volume = {122}, number = {8}, pages = {2368-2378}, doi = {10.1021/acs.jpcb.7b10849} }
One-dimensional heteronuclear relaxation dispersion NMR spectroscopy at 13C natural abundance successfully characterized the dynamics of the hydrogen–deuterium exchange reaction occurring at the Nε position in l-arginine by monitoring Cδ in varying amounts of D2O. A small equilibrium isotope effect was observed and quantified, corresponding to ΔG = −0.14 kcal mol–1. A bimolecular rate constant of kD = 5.1 × 109 s–1 M–1 was determined from the pH*-dependence of kex (where pH* is the direct electrode reading of pH in 10% D2O and kex is the nuclear spin exchange rate constant), consistent with diffusion-controlled kinetics. The measurement of ΔG serves to bridge the millisecond time scale lifetimes of the detectable positively charged arginine species with the nanosecond time scale lifetime of the nonobservable low-populated neutral arginine intermediate species, thus allowing for characterization of the equilibrium lifetimes of the various arginine species in solution as a function of fractional solvent deuterium content. Despite the system being in fast exchange on the chemical shift time scale, the magnitude of the secondary isotope shift due to the exchange reaction at Nε was accurately measured to be 0.12 ppm directly from curve-fitting D2O-dependent dispersion data collected at a single static field strength. These results indicate that relaxation dispersion NMR spectroscopy is a robust and general method for studying base-catalyzed hydrogen–deuterium exchange chemistry at equilibrium.
@article{ford2018modular, author = {Ford, A. C. and Chui, W. F. and Zeng, A. Y. and Nandy, A. and Liebenberg, E. and Carraro, C. and Kazakia, G. and Alliston, T. and O'Connell, G. D.}, title = {A Modular Approach to Creating Large Engineered Cartilage Surfaces}, journal = {J. Biomech.}, year = {2018}, volume = {67}, pages = {177-183}, doi = {10.1016/j.jbiomech.2017.11.035} }
Native articular cartilage has limited capacity to repair itself from focal defects or osteoarthritis. Tissue engineering has provided a promising biological treatment strategy that is currently being evaluated in clinical trials. However, current approaches in translating these techniques to developing large engineered tissues remains a significant challenge. In this study, we present a method for developing large-scale engineered cartilage surfaces through modular fabrication. Modular Engineered Tissue Surfaces (METS) uses the well-known, but largely under-utilized self-adhesion properties of de novo tissue to create large scaffolds with nutrient channels. Compressive mechanical properties were evaluated throughout METS specimens, and the tensile mechanical strength of the bonds between attached constructs was evaluated over time. Raman spectroscopy, biochemical assays, and histology were performed to investigate matrix distribution. Results showed that by Day 14, stable connections had formed between the constructs in the METS samples. By Day 21, bonds were robust enough to form a rigid sheet and continued to increase in size and strength over time. Compressive mechanical properties and glycosaminoglycan (GAG) content of METS and individual constructs increased significantly over time. The METS technique builds on established tissue engineering accomplishments of developing constructs with GAG composition and compressive properties approaching native cartilage. This study demonstrated that modular fabrication is a viable technique for creating large-scale engineered cartilage, which can be broadly applied to many tissue engineering applications and construct geometries.
@article{barin2017highly, author = {Barin, G. and Peterson, G. W. and Crocell\`a, V. and Xu, J. and Colwell, K. A. and Nandy, A. and Reimer, J. A. and Bordiga, S. and Long, J. R.}, title = {Highly Effective Ammonia Removal in a Series of Bronsted Acidic Porous Polymers: Investigation of Chemical and Structural Variations}, journal = {Chem. Sci.}, year = {2017}, volume = {8}, number = {6}, pages = {4399-4409}, doi = {10.1039/C6SC05079D} }
Although a widely used and important industrial gas, ammonia (NH3) is also highly toxic and presents a substantial health and environmental hazard. The development of new materials for the effective capture and removal of ammonia is thus of significant interest. The capture of ammonia at ppm-level concentrations relies on strong interactions between the adsorbent and the gas, as demonstrated in a number of zeolites and metal–organic frameworks with Lewis acidic open metal sites. However, these adsorbents typically exhibit diminished capacity for ammonia in the presence of moisture due to competitive adsorption of water and/or reduced structural stability. In an effort to overcome these challenges, we are investigating the performance of porous polymers functionalized with Brønsted acidic groups, which should possess inherent structural stability and enhanced reactivity towards ammonia in the presence of moisture. Herein, we report the syntheses of six different Brønsted acidic porous polymers exhibiting –NH3Cl, –CO2H, –SO3H, and –PO3H2 groups and featuring two different network structures with respect to interpenetration. We further report the low- and high-pressure NH3 uptake in these materials, as determined under dry and humid conditions using gas adsorption and breakthrough measurements. Under dry conditions, it is possible to achieve NH3 capacities as high as 2 mmol g−1 at 0.05 mbar (50 ppm) equilibrium pressure, while breakthrough saturation capacities of greater than 7 mmol g−1 are attainable under humid conditions. Chemical and structural variations deduced from these measurements also revealed an important interplay between acidic group spatial arrangement and NH3 uptake, in particular that interpenetration can promote strong adsorption even for weaker Brønsted acidic functionalities. In situ infrared spectroscopy provided further insights into the mechanism of NH3 adsorption, revealing a proton transfer between ammonia and acidic sites as well as strong hydrogen bonding interactions in the case of the weaker carboxylic acid-functionalized polymer. These findings highlight that an increase of acidity or porosity does not necessarily correspond directly to increased NH3 capacity and advocate for the development of more fine-tuned design principles for efficient NH3 capture under a range of concentrations and conditions.
@article{bezci2015effect, author = {Bezci, S. E. and Nandy, A. and O'Connell, G. D.}, title = {Effect of Hydration on Healthy Intervertebral Disk Mechanical Stiffness}, journal = {J. Biomech. Eng.}, year = {2015}, volume = {137}, number = {10}, pages = {101007}, doi = {10.1115/1.4031416} }
The intervertebral disk has an excellent swelling capacity to absorb water, which is thought to be largely due to the high proteoglycan composition. Injury, aging, degeneration, and diurnal loading are all noted by a significant decrease in water content and tissue hydration. The objective of this study was to evaluate the effect of hydration, through osmotic loading, on tissue swelling and compressive stiffness of healthy intervertebral disks. The wet weight of nucleus pulposus (NP) and annulus fibrosus (AF) explants following swelling was 50% or greater, demonstrating significant ability to absorb water under all osmotic loading conditions (0.015 M–3.0 M phosphate buffered saline (PBS)). Estimated NP residual strains, calculated from the swelling ratio, were approximately 1.5 * greater than AF residual strains. Compressive stiffness increased with hyperosmotic loading, which is thought to be due to material compaction from osmotic-loading and the nonlinear mechanical behavior. Importantly, this study demonstrated that residual strains and material properties are greatly dependent on osmotic loading. The findings of this study support the notion that swelling properties from osmotic loading will be important for accurately describing the effect of degeneration and injury on disk mechanics. Furthermore, the tissue swelling will be an important consideration for developing biological repair strategies aimed at restoring mechanical behavior toward a healthy disk.
Aditya Nandy
Assistant Professor
UCLA
UCLA
Boelter Hall Room 4667
Los Angeles, CA 90095
© 2025 Molecular Modeling Laboratory