@article{lars, author = {Lars Ruthotto and Eldad Haber}, title = {Deep Neural Networks motivated by Partial Differential Equations}, journal = {CoRR}, volume = {abs/1804.04272}, year = {2018}, url = {http://arxiv.org/abs/1804.04272}, archivePrefix = {arXiv}, eprint = {1804.04272}, timestamp = {Tue, 01 May 2018 19:46:29 +0200}, biburl = {https://dblp.org/rec/bib/journals/corr/abs-1804-04272}, bibsource = {dblp computer science bibliography, https://dblp.org} } @InProceedings{Bell08, author="Bell, Bradley M. and Burke, James V.", editor="Bischof, Christian H. and B{\"u}cker, H. Martin and Hovland, Paul and Naumann, Uwe and Utke, Jean", title="Algorithmic Differentiation of Implicit Functions and Optimal Values", booktitle="Advances in Automatic Differentiation", year="2008", publisher="Springer Berlin Heidelberg", address="Berlin, Heidelberg", pages="67--77", abstract="In applied optimization, an understanding of the sensitivity of the optimal value to changes in structural parameters is often essential. Applications include parametric optimization, saddle point problems, Benders decompositions, and multilevel optimization. In this paper we adapt a known automatic differentiation (AD) technique for obtaining derivatives of implicitly defined functions for application to optimal value functions. The formulation we develop is well suited to the evaluation of first and second derivatives of optimal values. The result is a method that yields large savings in time and memory. The savings are demonstrated by a Benders decomposition example using both the ADOL-C and CppAD packages. Some of the source code for these comparisons is included to aid testing with other hardware and compilers, other AD packages, as well as future versions of ADOL-C and CppAD. The source code also serves as an aid in the implementation of the method for actual applications. In addition, it demonstrates how multiple C++ operator overloading AD packages can be used with the same source code. This provides motivation for the coding numerical routines where the floating point type is a C++ template parameter.", isbn="978-3-540-68942-3" } @ARTICLE{Aravkin16, author = {{Aravkin}, Aleksandr and {Drusvyatskiy}, Dmitriy and {van Leeuwen}, Tristan}, title = "{Variable projection for nonsmooth models}", journal = {arXiv e-prints}, keywords = {Mathematics - Optimization and Control, Statistics - Computation, Statistics - Machine Learning, 65K05, 65K10, 86-08}, year = "2016", month = "Jan", eid = {arXiv:1601.05011}, pages = {arXiv:1601.05011}, archivePrefix = {arXiv}, eprint = {1601.05011}, primaryClass = {math.OC}, adsurl = {https://ui.adsabs.harvard.edu/abs/2016arXiv160105011A}, adsnote = {Provided by the SAO/NASA Astrophysics Data System} } @inproceedings{Schmidt2011, author = {Schmidt, Mark and Roux, Nicolas Le and Bach, Francis}, title = {Convergence Rates of Inexact Proximal-gradient Methods for Convex Optimization}, booktitle = {Proceedings of the 24th International Conference on Neural Information Processing Systems}, series = {NIPS'11}, year = {2011}, isbn = {978-1-61839-599-3}, location = {Granada, Spain}, pages = {1458--1466}, numpages = {9}, url = {http://dl.acm.org/citation.cfm?id=2986459.2986622}, acmid = {2986622}, publisher = {Curran Associates Inc.}, address = {USA}, } @article {vanLeeuwen2014SISC3Dfds, title = {3D frequency-domain seismic inversion with controlled sloppiness}, journal = {SIAM Journal on Scientific Computing}, volume = {36}, number = {5}, year = {2014}, note = {(SISC)}, month = {10}, pages = {S192-S217}, abstract = {Seismic waveform inversion aims at obtaining detailed estimates of subsurface medium parameters, such as the spatial distribution of soundspeed, from multiexperiment seismic data. A formulation of this inverse problem in the frequency domain leads to an optimization problem constrained by a Helmholtz equation with many right-hand sides. Application of this technique to industry-scale problems faces several challenges: First, we need to solve the Helmholtz equation for high wave numbers over large computational domains. Second, the data consist of many independent experiments, leading to a large number of PDE solves. This results in high computational complexity both in terms of memory and CPU time as well as input/output costs. Finally, the inverse problem is highly nonlinear and a lot of art goes into preprocessing and regularization. Ideally, an inversion needs to be run several times with different initial guesses and/or tuning parameters. In this paper, we discuss the requirements of the various components (PDE solver, optimization method, \dots) when applied to large-scale three-dimensional seismic waveform inversion and combine several existing approaches into a flexible inversion scheme for seismic waveform inversion. The scheme is based on the idea that in the early stages of the inversion we do not need all the data or very accurate PDE solves. We base our method on an existing preconditioned Krylov solver (CARP-CG) and use ideas from stochastic optimization to formulate a gradient-based (quasi-Newton) optimization algorithm that works with small subsets of the right-hand sides and uses inexact PDE solves for the gradient calculations. We propose novel heuristics to adaptively control both the accuracy and the number of right-hand sides. We illustrate the algorithms on synthetic benchmark models for which significant computational gains can be made without being sensitive to noise and without losing the accuracy of the inverted model.}, keywords = {block-cg, Helmholtz equation, inexact gradient, Kaczmarz method, preconditioning, Seismic inversion}, doi = {10.1137/130918629}, url = {http://epubs.siam.org/doi/abs/10.1137/130918629}, url2 = {https://slim.gatech.edu/Publications/Public/Journals/SIAMJournalOnScientificComputing/2014/vanLeeuwen2014SISC3Dfds/vanLeeuwen2014SISC3Dfds.pdf}, author = {Tristan van Leeuwen and Felix J. Herrmann} } @article{Wu19, author = {Yan Wu and Mihaela Rosca and Timothy P. Lillicrap}, title = {Deep Compressed Sensing}, journal = {CoRR}, volume = {abs/1905.06723}, year = {2019}, url = {http://arxiv.org/abs/1905.06723}, archivePrefix = {arXiv}, eprint = {1905.06723}, timestamp = {Tue, 28 May 2019 12:48:08 +0200}, biburl = {https://dblp.org/rec/bib/journals/corr/abs-1905-06723}, bibsource = {dblp computer science bibliography, https://dblp.org} } @ARTICLE{Veen2018, author = {{Van Veen}, Dave and {Jalal}, Ajil and {Soltanolkotabi}, Mahdi and {Price}, Eric and {Vishwanath}, Sriram and {Dimakis}, Alexandros G.}, title = "{Compressed Sensing with Deep Image Prior and Learned Regularization}", journal = {arXiv e-prints}, keywords = {Statistics - Machine Learning, Computer Science - Information Theory, Computer Science - Machine Learning}, year = "2018", month = "Jun", eid = {arXiv:1806.06438}, pages = {arXiv:1806.06438}, archivePrefix = {arXiv}, eprint = {1806.06438}, primaryClass = {stat.ML}, adsurl = {https://ui.adsabs.harvard.edu/abs/2018arXiv180606438V}, adsnote = {Provided by the SAO/NASA Astrophysics Data System} } @article {peters2018pmf, title = {Projection methods and applications for seismic nonlinear inverse problems with multiple constraints}, journal = {Geophysics}, year = {2018}, note = {(Published online in Geophysics)}, abstract = {Nonlinear inverse problems are often hampered by non-uniqueness and local minima because of missing low frequencies and far offsets in the data, lack of access to good starting models, noise, and modeling errors. A well-known approach to counter these deficiencies is to include prior information on the unknown model, which regularizes the inverse problem. While conventional regularization methods have resulted in enormous progress in ill-posed (geophysical) inverse problems, challenges remain when the prior information consists of multiple pieces. To handle this situation, we propose an optimization framework that allows us to add multiple pieces of prior information in the form of constraints. Compared to additive regularization penalties, constraints have a number of advantages making them more suitable for inverse problems such as full-waveform inversion. The proposed framework is rigorous because it offers assurances that multiple constraints are imposed uniquely at each iteration, irrespective of the order in which they are invoked. To project onto the intersection of multiple sets uniquely, we employ Dykstra{\textquoteright}s algorithm that scales to large problems and does not rely on trade-off parameters. In that sense, our approach differs substantially from approaches such as Tikhonov regularization, penalty methods, and gradient filtering. None of these offer assurances, which makes them less suitable to full-waveform inversion where unrealistic intermediate results effectively derail the iterative inversion process. By working with intersections of sets, we keep expensive objective and gradient calculations unaltered, separate from projections, and we also avoid trade-off parameters. These features allow for easy integration into existing code bases. In addition to more predictable behavior, working with constraints also allows for heuristics where we built up the complexity of the model gradually by relaxing the constraints. This strategy helps to avoid convergence to local minima that represent unrealistic models. We illustrate this unique feature with examples of varying complexity.}, keywords = {constraints, Full-waveform inversion, intersection, Optimization, projection, regularization}, doi = {10.1190/geo2018-0192.1}, url = {https://slim.gatech.edu/Publications/Public/Journals/Geophysics/2018/peters2018pmf/peters2018pmf.html}, author = {Bas Peters and Brendan R. Smithyman and Felix J. Herrmann} } @article {esser2016tvr, title = {Total-variation regularization strategies in full-waveform inversion}, journal = {SIAM Journal on Imaging Sciences}, volume = {11}, number = {1}, year = {2018}, note = {(SIAM Journal on Imaging Sciences)}, pages = {376-406}, abstract = {We propose an extended full-waveform inversion formulation that includes general convex constraints on the model. Though the full problem is highly nonconvex, the overarching optimization scheme arrives at geologically plausible results by solving a sequence of relaxed and warm-started constrained convex subproblems. The combination of box, total-variation, and successively relaxed asymmetric total-variation constraints allows us to steer free from parasitic local minima while keeping the estimated physical parameters laterally continuous and in a physically realistic range. For accurate starting models, numerical experiments carried out on the challenging 2004 BP velocity benchmark demonstrate that bound and total-variation constraints improve the inversion result significantly by removing inversion artifacts, related to source encoding, and by clearly improved delineation of top, bottom, and flanks of a high-velocity high-contrast salt inclusion. The experiments also show that for poor starting models these two constraints by themselves are insufficient to detect the bottom of high-velocity inclusions such as salt. Inclusion of the one-sided asymmetric total-variation constraint overcomes this issue by discouraging velocity lows to buildup during the early stages of the inversion. To the author{\textquoteright}s knowledge the presented algorithm is the first to successfully remove the imprint of local minima caused by poor starting models and band-width limited finite aperture data.}, keywords = {constrained optimization, Full-waveform inversion, hinge loss, salt, total variation}, doi = {10.1137/17M111328X}, url = {https://slim.gatech.edu/Publications/Public/Journals/CoRR/2016/esser2016tvr/esser2016tvr.pdf}, url2 = {https://doi.org/10.1137/17M111328X}, author = {Ernie Esser and Llu{\'\i}s Guasch and Tristan van Leeuwen and Aleksandr Y. Aravkin and Felix J. Herrmann} } @article{Witte2019, author = {Philipp A. Witte and Mathias Louboutin and Fabio Luporini and Gerard J. Gorman and Felix J. Herrmann}, title = {Compressive least-squares migration with on-the-fly Fourier transforms}, journal = {GEOPHYSICS}, volume = {84}, number = {5}, pages = {R655-R672}, year = {2019}, doi = {10.1190/geo2018-0490.1}, URL = {https://doi.org/10.1190/geo2018-0490.1}, eprint = {https://doi.org/10.1190/geo2018-0490.1}, abstract = { ABSTRACTLeast-squares reverse time migration is a powerful approach for true-amplitude seismic imaging of complex geologic structures, but the successful application of this method is currently hindered by its enormous computational cost, as well as its high memory requirements for computing the gradient of the objective function. We have tackled these problems by introducing an algorithm for low-cost sparsity-promoting least-squares migration using on-the-fly Fourier transforms. We formulate the least-squares migration objective function in the frequency domain (FD) and compute gradients for randomized subsets of shot records and frequencies, thus significantly reducing data movement and the number of overall wave equations solves. By using on-the-fly Fourier transforms, we can compute an arbitrary number of monochromatic FD wavefields with a time-domain (TD) modeling code, instead of having to solve individual Helmholtz equations for each frequency, which becomes computationally infeasible when moving to high frequencies. Our numerical examples demonstrate that compressive imaging with on-the-fly Fourier transforms provides a fast and memory-efficient alternative to TD imaging with optimal checkpointing, whose memory requirements for a fixed background model and source wavelet are independent of the number of time steps. Instead, the memory and additional computational costs grow with the number of frequencies and determine the amount of subsampling artifacts and crosstalk. In contrast to optimal checkpointing, this offers the possibility to trade the memory and computational costs for image quality or a larger number of iterations and is advantageous in new computing environments such as the cloud, where computing is often cheaper than memory and data movement. } } @ARTICLE{Adler2018, author = {{Adler}, Jonas and {{\"O}ktem}, Ozan}, title = "{Deep Bayesian Inversion}", journal = {arXiv e-prints}, keywords = {Statistics - Machine Learning, Computer Science - Machine Learning, Mathematics - Statistics Theory}, year = "2018", month = "Nov", eid = {arXiv:1811.05910}, pages = {arXiv:1811.05910}, archivePrefix = {arXiv}, eprint = {1811.05910}, primaryClass = {stat.ML}, adsurl = {https://ui.adsabs.harvard.edu/abs/2018arXiv181105910A}, adsnote = {Provided by the SAO/NASA Astrophysics Data System} } @article {fang2017uqfip, title = {Uncertainty quantification for inverse problems with weak partial-differential-equation constraints}, journal = {Geophysics}, volume = {83}, number = {6}, year = {2018}, note = {(Geophysics)}, pages = {R629-R647}, abstract = {In a statistical inverse problem, the objective is a complete statistical description of unknown parameters from noisy observations in order to quantify uncertainties of the parameters of interest. We consider inverse problems with partial-differential-equation-constraints, which are applicable to a variety of seismic problems. Bayesian inference is one of the most widely-used approaches to precisely quantify statistics through a posterior distribution, incorporating uncertainties in observed data, modeling kernel, and prior knowledge of the parameters. Typically when formulating the posterior distribution, the partial-differential-equation-constraints are required to be exactly satisfied, resulting in a highly nonlinear forward map and a posterior distribution with many local maxima. These drawbacks make it difficult to find an appropriate approximation for the posterior distribution. Another complicating factor is that traditional Markov chain Monte Carlo methods are known to converge slowly for realistically sized problems. In this work, we relax the partial-differential-equation-constraints by introducing an auxiliary variable, which allows for Gaussian deviations in the partial-differential-equations. Thus, we obtain a new bilinear posterior distribution consisting of both data and partial-differential-equation misfit terms. We illustrate that for a particular range of variance choices for the partial-differential-equation misfit term, the new posterior distribution has fewer modes and can be well-approximated by a Gaussian distribution, which can then be sampled in a straightforward manner. Since it is prohibitively expensive to explicitly construct the dense covariance matrix of the Gaussian approximation for intermediate to large-scale problems, we present a method to implicitly construct it, which enables efficient sampling. We apply this framework to two-dimensional seismic inverse problems with 1,800 and 92,455 unknown parameters. The results illustrate that our framework can produce comparable statistical quantities to those produced by conventional Markov chain Monte Carlo type methods while requiring far fewer partial-differential-equation solves, which are the main computational bottlenecks in these problems.}, keywords = {acoustic, FWI, UQ, weak-constraint}, doi = {10.1190/geo2017-0824.1}, url = {https://slim.gatech.edu/Publications/Public/Journals/Geophysics/2018/fang2017uqfip/fang2017uqfip.html}, author = {Zhilong Fang and Curt Da Silva and Rachel Kuske and Felix J. Herrmann} } @inproceedings{han2017alternating, title={Alternating back-propagation for generator network}, author={Han, Tian and Lu, Yang and Zhu, Song-Chun and Wu, Ying Nian}, booktitle={Thirty-First AAAI Conference on Artificial Intelligence}, year={2017} } @inproceedings{welling2011bayesian, title={Bayesian learning via stochastic gradient Langevin dynamics}, author={Welling, Max and Teh, Yee W}, booktitle={Proceedings of the 28th international conference on machine learning (ICML-11)}, pages={681--688}, year={2011} } @inproceedings{mosser2018stochastic, title={Stochastic seismic waveform inversion using generative adversarial networks as a geological prior}, author={Mosser, Lukas and Dubrule, Olivier and Blunt, M}, booktitle={First EAGE/PESGB Workshop Machine Learning}, year={2018} } @article{dittmer2018regularization, title={Regularization by architecture: A deep prior approach for inverse problems}, author={Dittmer, S{\"o}ren and Kluth, Tobias and Maass, Peter and Baguer, Daniel Otero}, journal={arXiv preprint arXiv:1812.03889}, year={2018} } @article{wu2019parametric, title={Parametric convolutional neural network-domain full-waveform inversion}, author={Wu, Yulang and McMechan, George A}, journal={Geophysics}, volume={84}, number={6}, pages={R893--R908}, year={2019}, publisher={Society of Exploration Geophysicists} } @inproceedings{zhang2015deep, title={Deep learning with elastic averaging SGD}, author={Zhang, Sixin and Choromanska, Anna E and LeCun, Yann}, booktitle={Advances in Neural Information Processing Systems}, pages={685--693}, year={2015} } @article{chaudhari2016entropy, title={Entropy-sgd: Biasing gradient descent into wide valleys}, author={Chaudhari, Pratik and Choromanska, Anna and Soatto, Stefano and LeCun, Yann and Baldassi, Carlo and Borgs, Christian and Chayes, Jennifer and Sagun, Levent and Zecchina, Riccardo}, journal={arXiv preprint arXiv:1611.01838}, year={2016} }