\documentclass[11pt]{article} \usepackage{amsmath,amssymb,amsthm} \usepackage{float} \usepackage{graphicx} \DeclareMathOperator*{\E}{\mathbb{E}} \let\Pr\relax \DeclareMathOperator*{\Pr}{\mathbb{P}} \newcommand{\eps}{\epsilon} \newcommand{\inprod}[1]{\left\langle #1 \right\rangle} \newcommand{\R}{\mathbb{R}} \newcommand{\norm}[2]{\| #1 \|_{#2}} \newcommand{\handout}[5]{ \noindent \begin{center} \framebox{ \vbox{ \hbox to 5.78in { {\bf CS 395T: Sublinear Algorithms } \hfill #2 } \vspace{4mm} \hbox to 5.78in { {\Large \hfill #5 \hfill} } \vspace{2mm} \hbox to 5.78in { {\em #3 \hfill #4} } } } \end{center} \vspace*{4mm} } \newcommand{\lecture}[4]{\handout{#1}{#2}{#3}{Scribe: #4}{Lecture: #1}} \newtheorem{theorem}{Theorem} \newtheorem{corollary}[theorem]{Corollary} \newtheorem{lemma}[theorem]{Lemma} \newtheorem{observation}[theorem]{Observation} \newtheorem{proposition}[theorem]{Proposition} \newtheorem{definition}[theorem]{Definition} \newtheorem{claim}[theorem]{Claim} \newtheorem{fact}[theorem]{Fact} \newtheorem{assumption}[theorem]{Assumption} % 1-inch margins, from fullpage.sty by H.Partl, Version 2, Dec. 15, 1988. \topmargin 0pt \advance \topmargin by -\headheight \advance \topmargin by -\headsep \textheight 8.9in \oddsidemargin 0pt \evensidemargin \oddsidemargin \marginparwidth 0.5in \textwidth 6.5in \parindent 0in \parskip 1.5ex \begin{document} \lecture{12 -- More compressed sensing, Oct 7, 2014}{Fall 2014}{Prof.\ Eric Price}{Tianyang Li} \section{Overview} In the last lecture: {\em regular compressed sensing}. In this lecture: {\em model-based compressed sensing}. \section{Compressed sensing} \begin{itemize} \item $x$ is $k$-sparse \item observe $y=Ax+e$ \item recover $\hat{x} \approx x$ where $\norm{\hat{x} - x}{2} \lesssim \norm{e}{2}$ \end{itemize} (or $x$ is ``approximately'' $k$-sparse and we recover $\hat{x}$ where $\norm{\hat{x} - x}{2} \leq \norm{e}{2} + C \underset{\text{$k$-sparse $x'$}}{\min} \underbrace{\norm{x-x'}{}}_{\text{various norm}}$) \paragraph{Some notes about $A$} \begin{itemize} \item If $A \in \R^{m \times n}$ satisfies RIP, then recovery is possible. \item When each entry in $A$ is sampled from a Gaussian with mean 0 and variance 1, then $m=O(n \log \frac{n}{k})$ suffices. \end{itemize} \paragraph{How good is this?} \begin{itemize} \item to store the positions of the entries: $\log \binom{n}{k} \approx k \log \frac{n}{k}$ \item to store the values of the entries: $k$ words \end{itemize} Define ``sparsity ratio'' $R = \frac{n}{k}$. Compressed sensing saves $\frac{R}{\log R}$ factor relative to naive sampling. Storage saves approximately $R$ factor. Can't use $O(k)$ measurements \underline{\em in general}. \underline{\em But} can for more structured signals, e.g. {\em block-sparse} signals: \begin{figure}[H] \centering \includegraphics[width=.5\textwidth]{block-sparse-example.png} \caption{$\frac{k}{B}$ ``blocks'' of length $B$ where each block is all on/all off} \end{figure} For block-sparse signals, the number of support is $\binom{\frac{n}{B}}{\frac{k}{B}} = 2^{O(\frac{k}{B} \log \frac{n}{k})}$. When $B \geq \log \frac{n}{k}$ this is 2ドル^{O(k)}$. \section{Tree sparsity} \begin{figure}[H] \centering \includegraphics[width=0.5\textwidth]{tree-sparsity-example.png} \caption{Sparsity pattern is contiguous rooted subtree} \end{figure} \paragraph{Number of trees with $k$ terms in size $n$ binary tree} \begin{itemize} \item path that visits all vertices $\leq 2k$ edges \item at each vertex there are 3 possible directions to go \end{itemize} number of trees $\leq 3^{2k} = 2^{O(k)}$ \newcommand{\F}{\mathcal{F}} \section{Model sparsity} $\F$ is a family of supports, each $S$ in $\F$ satisfies $S \subseteq [n],ドル $|S| \leq k$. \begin{theorem} $m = O(k + \log |\F|)$ Gaussian measurements suffice. \end{theorem} \newcommand{\supp}{\text{supp}} \paragraph{Model based compressed sensing} Given $y=Ax+e,ドル $\supp(x) \in \F,ドル recover $\hat{x}$ such that $\norm{\hat{x} - x}{2} \lesssim \norm{e}{2}$ \paragraph{Model RIP} $\forall x$ with $\supp(x) \in \F \oplus \F = \left\{S \cup T | S, T \in \F \right\}$ \begin{equation} \norm{Ax}{2} = (1 \pm \eps) \norm{x}{2} \end{equation} \paragraph{Model IHT} \begin{equation} x^{i+1} = H_{\F}(x^i+A^T(y-Ax^i)) \end{equation} where \begin{equation} H_{\F} = \underset{T \in \F}{\arg} \min \norm{z_T}{2} \end{equation} (Before in {\em regular} compressed sensing if $A$ satisfies 2ドルk$-RIP then IHT works.) \paragraph{First iteration analysis} \begin{equation} z = A^T y=A^T Ax+A^T e \end{equation} $\forall T$ and $S=\supp(x),ドル if $A$ is model RIP on $\F \oplus \F,ドル \begin{align} \norm{z-x_{S \cup T}}{2} & \leq \norm{A^T A - I}{2} \norm{x}{2} + \norm{A^T_{S \cup T \times [n]}}{2} \norm{e}{2} \\ & \leq \eps \norm{x}{2} + (1 + \eps) \norm{e}{2} \end{align} $\forall z$ and $\underbrace{T}_{\text{top $k$ of $z$}} \in \F$ we want \begin{equation} \label{mcs-bound1} \norm{x-z_T}{2} \lesssim \norm{(x-z)_{S \cup T}}{2} \end{equation} To prove \eqref{mcs-bound1}: \begin{align} \norm{x_{S \backslash T}}{2} & \leq \norm{(x-z)_{S \backslash T}}{2} + \norm{z_{S \backslash T}}{2} \\ & \leq \norm{(x-z)_{S \backslash T}}{2} + \norm{z_{T \backslash S}}{2} \\ \Rightarrow \norm{x_{S \backslash T}}{2}^2 &\leq 2\norm{(x-z)_{S \backslash T}}{2}^2 + 2\norm{z_{T \backslash S}}{2}^2 \end{align} \begin{align} \norm{x-z_T}{2}^2 &= \norm{x_{s \backslash T}}{2}^2 + \norm{z_{T \backslash S}}{2}^2 + \norm{(x-z)_{T \cap S}}{2}^2 \\ & \leq 2 \norm{(x-z)_{S \backslash T}}{2}^2 + 3 \norm{z^{T \backslash S}}{2}^2 + \norm{(x-z)^{T \cap S}}{2}^2 \\ & \leq 3 \norm{(x-z)_{S \cup T}}{2}^2 \end{align} \paragraph{Running time} \begin{itemize} \item regular IHT: $\log \frac{\norm{x}{2}}{\norm{e}{2}}$ (matrix vector multiplication time for $A$) \item model IHT: $\log \frac{\norm{x}{2}}{\norm{e}{2}}$ (matrix vector multiplication time for $A$ $+$ $H_{\F}$) \end{itemize} \paragraph{Computing $H_{\F}$ for trees} \begin{itemize} \item exact: $O(nk^2),ドル $O(nk)$ \item approximate (find $T'$ such that $\norm{z_{T'}}{2} \lesssim \underset{T}{\min} \norm{z_T}{2}$): $\tilde{O}(n)$ \end{itemize} \section{Compressed sensing using $L^1$ minimization} For \begin{equation} y = Ax+e \end{equation} \begin{equation} \min \norm{x}{1} \end{equation} given \begin{equation} \norm{A\hat{x}-y}{2} \leq \eps \end{equation} \begin{theorem}\label{cs-l1} If $\eps \geq \norm{e}{2}$ and $A$ satisfies RIP or RE then $\norm{\hat{x} - x}{2} \lesssim \eps$. \end{theorem} \subsection{Restricted Eigenvalue (RE)} \paragraph{IHT fails for $A=2I$} \begin{align} z &= A^T A x + A^T e \\ &= 4x + 2e \end{align} \begin{definition}{Restricted Eigenvalue (RE)} \begin{equation} \frac{\norm{Az}{2}}{\norm{z}{2}} \geq \eps \end{equation} whenever \begin{align} |S| &= k \\ \norm{z_S}{1} &\geq \alpha \norm{z_{\bar{S}}}{1} \end{align} \end{definition} For example, $\eps = \frac{1}{10}$ and $\alpha =1 $. \begin{proof}{(Theorem \ref{cs-l1})} Set $\eps = \norm{e}{2}$. Let $z = \hat{x} - x$. \begin{align*} \norm{Az-e}{2}^2 &\leq \norm{e}{2}^2\\ \norm{Az}{2}^2 - 2 e^TAz + \norm{e}{2}^2&\leq \norm{e}{2}^2\\ \Rightarrow \norm{Az}{2} &\leq 2 \norm{e}{2} \end{align*} For $S = \supp(x),ドル \begin{align*} \norm{x_S}{1} = \norm{x}{1} & \geq \norm{\hat{x}}{1} \\ & = \norm{x+z}{1} \\ & \geq \norm{(x+z)_S}{1} + \norm{z_{\bar{S}}}{1} \\ & \geq \norm{x_S}{1} + \norm{z_{\bar{S}}}{1} - \norm{z_{S}}{1} \end{align*} so $\norm{z_{S}}{1} \geq \norm{z_{\bar{S}}}{1}$. RE $\Rightarrow$ $\norm{z}{2} \lesssim \norm{Az}{2} \leq 2 \norm{e}{2}$. \end{proof} \subsection{RIP $\Rightarrow$ RE} \paragraph{``Shelling argument''} Suppose $A$ satisfies the RIP of order 2ドルk$. We would like to show for any $z$ and $S \subset [n]$ of size $k$ with $\norm{z_S}{1} \geq \norm{z_{\overline{S}}}{1}$ that $\norm{Az}{2} \gtrsim \norm{z}{2}$. Split $z$ into blocks $z^1, z^2, \dots$ of decreasing magnitude, so $z^1$ has the largest $k$ coordinates, and each next $z^i$ has the next largest 2ドルk$ coordinates. Then for $i \geq 3$ we have that \begin{equation} \frac{\norm{z^i}{2}}{\sqrt{2k}} \leq \norm{z^i}{2} \leq \frac{\norm{z^{i-1}}{2}}{\sqrt{2k}} \end{equation} By assumption, $\norm{z^1}{1} \geq \norm{\sum_{i=2}^{\infty} z^i}{1}$. Then \begin{align*} \norm{Az}{} & = \norm{A(z^1 + z^2 + \dots)}{} \\ & \geq \norm{A(z^1+z^2)}{} - \norm{Az^3}{} - \dots\\ & \geq (1-\eps)\norm{z^1 + z^2}{2} - (1+\eps)(\sum_{i=3}^\infty \norm{z^i}{2})\\ & \geq (1-\eps)\norm{z^1}{2} - (1+\eps)(\sum_{i=2}^\infty \norm{z^i}{1})/\sqrt{2k}\\ & = (1-\eps)\norm{z^1}{2} - \frac{(1+\eps)}{\sqrt{2k}}\norm{\sum_{i=2}^\infty z^i}{1}\\ & = (1-\eps)\norm{z^1}{2} - \frac{(1+\eps)}{\sqrt{2k}}\norm{\sum_{i=2}^\infty z^i}{1}\\ & \geq (1-\eps)\norm{z^1}{2} - \frac{(1+\eps)}{\sqrt{2k}}\norm{z^1}{1}\\ & \geq (1-\eps)\norm{z^1}{2} - \frac{(1+\eps)}{\sqrt{2}}\norm{z^1}{2}\\ & \geq \frac{1}{10} \norm{z^1}{2} \end{align*} for $\eps < 1/10$. \bibliographystyle{alpha} \begin{thebibliography}{42} \bibitem[CRT06]{CRT06} Candes, E. J., Romberg, J. K., Tao, T. \newblock Stable signal recovery from incomplete and inaccurate measurements. \newblock {\em Communications on Pure and Applied Mathematics}, 59(8), 1207-1223. \bibitem[BCDH10]{BCDH10} R. Baraniuk, V. Cevher, M. F. Duarte, C. Hegde. \newblock Model-based compressive sensing. \newblock {\em IEEE Transactions on Information Theory}, 59(8), vol. 56, num. 4, p. 1982-2001, 2010. \end{thebibliography} \end{document}