\documentclass[11pt]{article} \usepackage{amsmath,amssymb,amsthm} \DeclareMathOperator*{\E}{\mathbb{E}} \let\Pr\relax \DeclareMathOperator*{\Pr}{\mathbb{P}} \newcommand{\eps}{\epsilon} \newcommand{\inprod}[1]{\left\langle #1 \right\rangle} \newcommand{\R}{\mathbb{R}} \newcommand{\handout}[5]{ \noindent \begin{center} \framebox{ \vbox{ \hbox to 5.78in { {\bf CS 395T: Sublinear Algorithms } \hfill #2 } \vspace{4mm} \hbox to 5.78in { {\Large \hfill #5 \hfill} } \vspace{2mm} \hbox to 5.78in { {\em #3 \hfill #4} } } } \end{center} \vspace*{4mm} } \newcommand{\lecture}[4]{\handout{#1}{#2}{#3}{Scribe: #4}{Lecture #1}} \newtheorem{theorem}{Theorem} \newtheorem{corollary}[theorem]{Corollary} \newtheorem{lemma}[theorem]{Lemma} \newtheorem{observation}[theorem]{Observation} \newtheorem{proposition}[theorem]{Proposition} \newtheorem{definition}[theorem]{Definition} \newtheorem{claim}[theorem]{Claim} \newtheorem{fact}[theorem]{Fact} \newtheorem{assumption}[theorem]{Assumption} \newtheorem{example}[theorem]{Example} % 1-inch margins, from fullpage.sty by H.Partl, Version 2, Dec. 15, 1988. \topmargin 0pt \advance \topmargin by -\headheight \advance \topmargin by -\headsep \textheight 8.9in \oddsidemargin 0pt \evensidemargin \oddsidemargin \marginparwidth 0.5in \textwidth 6.5in \parindent 0in \parskip 1.5ex \begin{document} \lecture{10\&11 --- Sept 30 \& Oct 2, 2014}{Fall 2014}{Prof.\ Eric Price}{Xue Chen} \section{Overview} These notes describe two lectures. The first gives an introduction to $\epsilon$-covers, $\epsilon$-packings, and RIP matrices. The second describes compressed sensing and iterative hard thresholding. \section{$\epsilon$-cover and $\epsilon$-packing} First, we define metric spaces, $\epsilon$-covers and covering number. \begin{definition}[Metric Space] A metric space is an ordered pair $(X,d)$ where $X$ is a space and $d$ is a metric on $X$ such that $\forall x,y \in X:$ \begin{enumerate} \item $d(x,y)\ge 0$. \item $d(x,y)=0 \Leftrightarrow x=y$. \item $d(x,y)=d(y,x)$. \item $d(x,y)\le d(x,z)+d(z,y), \forall z \in X$. \end{enumerate} \end{definition} \begin{definition} An $\epsilon$-cover of $X$ with respect to $d$ is a collection of points $\{x_1,\cdots,x_n\} \subseteq X$ such that $\forall y\in X, min_{1\le i \le n} d(y,x_i)\le \epsilon.$ \end{definition} \begin{definition} The covering number $N(\epsilon,X,d)$ is the minimal number of points of all $\epsilon$-cover of $X$ w.r.t. $d$. \end{definition} We abuse the notation $N$ to denote $N(\epsilon,X,d)$ if $X,d$ is clear. We use $\log N(\epsilon,X,d)$ to denote the metric entropy of $(X,d)$ which indicates the information by knowing the positions of a point to $\epsilon$ distance in $d$. \begin{example} $X=[-1,1],d(x,y)=|x-y|.$ Then $\{0, \pm 2\epsilon, \pm 4\epsilon, \cdots \}$ is an $\epsilon$-cover of $X,ドル so $N(\epsilon,X,d)\le \frac{2}{2\epsilon}+1=1+1/\epsilon$. \end{example} \begin{example} $X=[-1,1]^m, d(x,y)=|x-y|_{\infty}$. From the above example, $\{0, \pm 2\epsilon, \pm 4\epsilon,\cdots \}^m$ is an $\epsilon$-cover of $X$. So $N(\epsilon, X, d)\le (1+1/\epsilon)^m$ and the metric entropy is $\log N=\Theta(m\log(\frac{1}{\epsilon}))$. \end{example} A closely related concept of covering number is packing number. \begin{definition} An $\epsilon$-packing of $X$ w.r.t. $d$ is a collection of points $\{x_1,\cdots, x_n\}\subseteq X$ such that $\forall i,j(i\neq j), d(x_i,x_j)\ge \epsilon$. The packing number $M(\epsilon,X,d)$ is the maximal number of points of all $\epsilon$-packings of $(X,d)$. \end{definition} \begin{lemma}{\label{sandwich}} $$M(2\epsilon,X,d)\le N(\epsilon,X,d) \le M(\epsilon,X,d).$$ \end{lemma} In general, the difference between $\epsilon$ and 2ドル\epsilon$ will only affect constants, which we will not care about in this course. So we will freely switch between the packing number and the covering number of $(X,d)$. \section{$N(\epsilon,B^d_q,\|\cdot\|_p)$} Now let us consider the covering of $L_q$ balls in $L_p$ norm. \begin{definition} $\|x\|_p=(\sum_{i} |x_i|^p)^{1/p}$ for any $p>0$. The unit ball of dimension $d$ in $L_p$ is $B^d_p=\{x\in R^d|\|x\|_p\le 1\}$. \end{definition} One can verify $\|\cdot\|_p$ is a metric by Holder's inequality for any $p>0$. We abuse the notation $\|\cdot\|$ if $p$ is clear. One of the basic property of $L_p$ we will use in this lecture is $\| x \|_{p_0} \le \| x \| _{p_1}$ for $p_0 \ge p_1$. We also use $Vol(S)$ to denote the volume of $S$. Another property of ball volume often used in this lecture is that $\frac{\alpha B^d_p}{\beta B^d_p}=(\frac{\alpha}{\beta})^d$ -- scaling a ball by $\alpha$ in each of $d$ dimensions increases its volume by a factor of $\alpha^d$. \begin{fact} $\frac{1}{\epsilon^d} \frac{Vol(B^d_q)}{Vol(B^d_p)} \le N(\epsilon,B^d_q,\|\cdot\|_p) \le (\frac{2}{\epsilon})^d \frac{ Vol(B^d_q + \frac{\epsilon}{2}B^d_p) }{Vol(B^d_p)}$. \end{fact} \begin{proof} Lower bound: Let $\{x_1,x_2,\cdots, x_N\}$ be an $\epsilon$-cover of $B^d_q$. Because $B^d_q \subseteq \cup_i (x_i+\epsilon B^d_p),ドル $Vol(B^d_q) \le N \cdot Vol(\epsilon B^d_p)$. Upper Bound: Let $\{x_1,x_2,\cdots, x_N\}$ be an $\epsilon$-packing of $B^d_q$. Because all the balls of $x_i + \frac{\epsilon}{2} B^d_p$ are disjoint, $\cup_i (x_i + \frac{\epsilon}{2} B^d_p) \subseteq B^d_q + \frac{\epsilon}{2} B^d_p$(some $x_i$ may be on the surface). Therefore $N \cdot Vol(\frac{\epsilon}{2} B^d_p)\le Vol(B^d_q + \frac{\epsilon}{2} B^d_p)$ and a upper bound of packing number is also a upper bound of covering number by Lemma \ref{sandwich}. \end{proof} To make this simpler, let's look at a couple cases. \paragraph{Same norm.} If $p=q,ドル the upper bound $ \frac{ Vol(B^d_q + \frac{\epsilon}{2}B^d_p) }{Vol(B^d_p)}=\frac{Vol( (1 + \frac{\epsilon}{2})B^d_p )}{Vol(B^d_p)}=(1+\frac{\epsilon}{2})^d.$ Therefore $\frac{1}{\epsilon^d}\le N \le (1+\frac{2}{\epsilon})^d$. \paragraph{When $q=1$ and $p=2$.} Because $B^d_1\le B^d_2$ from the property of $L_p,ドル one upper bound is $ \frac{ Vol(B^d_q + \frac{\epsilon}{2}B^d_p) }{Vol(B^d_p)}\le \frac{ Vol(( 1 + \frac{\epsilon}{2})B^d_p) }{Vol(B^d_p)} \le (1+ \frac{\epsilon}{2})^d$. For the lower bound, $Vol(B^d_1)=\frac{2^d}{d!}$ because there are two signs for each dimension and the volume of each $d$-simplex is $\frac{1}{d!}$. $Vol(B^d_2)=\frac{\pi^{d/2}}{(d/2)!}$ for even $d$\cite{Sphere}. Therefore $\frac{1}{\epsilon^d}\cdot \frac{2^d/d!}{\pi^{d/2}/(d/2)!}\le N$ and $d\log(1/\epsilon) - \frac{d}{2}\log d \le \log N \le d \log(2/\epsilon)$. This gives a tight bound of $N = \Theta(d \log (1/\eps))$ when $\eps < 1/d$. For $\eps> 1/\sqrt{d},ドル however, the lower bound is trivial. In fact, the volume argument is loose in the ``large $\eps$'' setting. We can also show that $\log N \le O(\frac{\log d}{\epsilon ^2})$ by Maurey's empirical method (see, for example, \cite{NPW2012}): \begin{enumerate} \item For any $\vec{x}=(x_1,\cdots,x_d) \le B^d_1,ドル we consider the following experiment($e_1,e_2,\cdots, e_d$ is the standard basis of $\mathbb{R}^d$): \item Randomly sampling $z_i$ from $\{e_1,e_2,\cdots,e_d\}$ according to $(|x_1|,|x_2|,\cdots, |x_n|)$ (or 0ドル$ for the remainder) for $i=1,\cdots, t$ independently. \item Let $z=\frac{1}{t} \sum_i z_i$. Then $E[z]=x$ and $$E[\|x-z\|^2_2]=\sum_{j=1}^d (x_j-\frac{1}{t}\sum_i 1_{z_i=e_j})^2=\frac{1}{t}\sum_{j=1}^d x_j(1-x_j) \le \frac{1}{t} \sum_j x_j\le 1/t.$$ \item So there exists a $z$ such that $E[\|z-x\|_2]\le \epsilon$ after choosing $t=1/\epsilon^2$. Therefore $N\le (2d+1)^t$ and $\log N \le O(\frac{\log d}{\epsilon^2})$. \end{enumerate} \section{Sparse Vectors and RIP matrix} Let us start with some definitions. \begin{definition} We use $supp(x)=\{i|x_i\neq 0,1\le i \le d\}$ to denote the support of vector $x$. And we define $\|x\|_0=|supp(x)|$ and $k$-sparse space $$T_k=\{x : \|x\|_2\le 1, \|x\|_0=k\}.$$ \end{definition} It is not difficult to see $N(\epsilon,T_k,\|\cdot \|_2)\le {d \choose k}(1+\frac{2}{\epsilon})^k$ by a union bound over all $k$-dimensional subspaces. Therefore $\log N(\epsilon, T_k, \|\cdot \|_2)\le O(k \log \frac{d}{k\epsilon}).$ Now we are interested in finding a matrix $A$ with ``few'' rows that preserves the norm of \emph{every vector $x \in T_k$}, i.e. has bounded $\max_{x:x \in T_k} \frac{\|Ax\|_2}{\|x\|_2}$ over nonzero vector $x$. Recall what we proved in the construction of a $JL$ matrix: if we sample a matrix $A\in \mathbb{R}^{m\times n}$ by independently sampling each entry $a_{i,j}\sim N(0,1/m),ドル then $\forall x \in \mathbb{R}^n, \|Ax\|^2_2 = (1 \pm \epsilon) \|x\|^2_2$ with prob. at least 1ドル - 2e^{-\epsilon^2 m/C}$ for some constant $C$(see lecture note 2). If $T_k$ is finite, we could take a union bound to argue the same way to generate $A$ also works here for every vector $x \in T_k$. However, $T_k$ is infinite and we need another argument to bound the error. Instead of union bound, let $S=\{x^{(1)},\cdots, x^{(N)}\}$ be an $\epsilon$-cover(a.k.a.``net'') of $T_k$ with size $N\le {d \choose k}(1+\frac{2}{\epsilon})^d$. We can decompose any $x \in T_k$ in this way: \begin{enumerate} \item Find $x_1 \in S$ such that $x=x_1+\epsilon x'$ for $\|x'\|_2\le 1$ and $|supp(x')|\le k$. Since $S$ is an $\epsilon$-cover, there always exists $x_1 \in S$ such that $\|x-x_1\|_2<\epsilon$ by definition. $|supp(x')|\le k$ follows from a special property of our net: because our net is a union bound over all $k$-dimensional subspaces, we can choose $x_1$ from the same $k$-dimensional subspace as $x$. \item If $\|x'\| \neq 0,ドル then applying the above procedure on $x'$ again to get $x'=x_2+\epsilon x''$ such that $\|x''\|_2\le 1$ and $|supp(x'')|\le k,ドル and so on. \item Eventually, we have $x=x_1+\epsilon x_2 + \epsilon^2 x_3 + \cdots + \epsilon^{i-1}x_i + \dotsb $ so all the $x_i \in S$. \end{enumerate} Now we choose $m=C_0 \log N/ \epsilon^2=O(\frac{d}{\epsilon^2}\log \frac{d}{\epsilon k})$ for a large constant $C_0$ and $\epsilon<\frac{1}{2},ドル then $\|Ax\|_2=(1\pm \epsilon)\|x\|_2$ for all $x\in S$ with high probability by union bound. This concludes \begin{align*} \forall x\in T, \|Ax\|_2&=\|A(x_1+\epsilon x_2 + \epsilon^2 x_3 + \cdots + \epsilon^{i-1}x_i \cdots )\|_2\\ &\le \|Ax_1\|_2 + \epsilon \|Ax_2\|_2 + \epsilon^2 \|Ax_3\|_2 + \cdots + \epsilon^{i-1} \|Ax_i\|_2 + \cdots \\ &\le (1+\epsilon)(1+\epsilon+\epsilon^2+\cdots + \epsilon^{i-1}+\cdots)\\ &\le (1+\epsilon)\frac{1}{1-\epsilon}\\ &\le 1+O(\epsilon) \end{align*} \begin{definition}[Restricted Isometry Property] An $m\times n$ matrix $A$ has restricted isometry property(RIP) of $(k,\epsilon)$ if $\forall x$ with $\|x\|_0\le k, (1-\epsilon)\|x\|_2 \le \|Ax\|_2 \le (1+\epsilon)\|x\|_2$. \end{definition} An equivalent way to define RIP is for any subset $S$ of size $k, \|(A_S)^T A_S - I\|_2 \le \epsilon$ where $A_S$ is the matrix by picking column in $S$ and $\|A\|_p=\sup_{x\neq \vec{0}}\frac{ \|Ax\|_p}{\|x\|_p}$. From now on, we assume $\epsilon$ is a small constant such as 1ドル/10$ and 1ドル/100$ without further specification. In general, we are interested in the construction of an $m \times n$ RIP matrix $A$ with the following properties: \begin{enumerate} \item It is easy to check $A$ satisfies RIP or not. \item $A$ can be stored in $o(mn)$ space. \item The multiplication $Ax$ can be computed in $o(mk)$ time for $x \in T_k$. \item $m$ is as small as possible. \end{enumerate} \begin{example}[Random (sub)Gaussian Matrix] We generate $A$ by independently sampling a (sub)Gaussian variable in every entry. With overwhelming probability ($\ge 1-e^{\Omega(m)}$), $A$ is an RIP matrix for sufficiently large $m=\Omega(\frac{1}{\eps^2}k log (n/k))$. However, we do not know how to verify $A$ satisfies RIP even though it happens with very high prob., and it is also bad in storage and multiplication. \end{example} \begin{example}[Coherent Matrix] A matrix $A$ with $n$ columns $\{a_1,a_2,\cdots, a_n\}$ is defined to be $\alpha$-coherent iff $\frac{}{\sqrt{\|a_i\|_2\cdot \|a_j\|_2}}\le \alpha$. Let $A'$ be the normalized matrix of $A$(normalize every column to a unit vector). We can show $A'$ is a $(k,\alpha \cdot k)$-RIP matrix: for any $k$-sparse vector $x,ドル $$\|Ax\|^2_2=\sum_{i \in supp(x)} \sum_{j \in supp(x)}x_{i}x_j=\sum_{i \in supp(x)}x^2_i + \sum_{i\neq j}\alpha \cdot |x_ix_j| \le \|x\|^2_2+\alpha k \|x\|^2_2.$$ Coherent matrix is easy to verify but it need a large $m$ if we want to use it as a RIP matrix. For example, suppose we generate every $a_i$ by independently choosing $\{\pm 1\}$ in every entry. It is not to difficult to see $m\ge 1/\alpha^2 \ge \Omega(k^2)$ if we want $a_i$ is $\alpha$-coherent with high prob.(compute the variance of $$). Finding an explicit RIP matrix with $m$ much smaller than $k^2$ is a challenging open problem. Some progress was made by Bourgain et. al.\cite{BDFKK11}, who obtained $m=k^{2-\delta}$ for a universal constant $\delta,ドル but $\delta$ is tiny. \end{example} \begin{example}[Fourier Matrix]\cite{FM} An $n\times n$ square Fourier matrix $F$ is defined as $F_{i,j}=\omega^{ij}$ where $\omega=e^{2\pi i/n}(i$ is the imaginary unit here). Another way to construct a $(k,\epsilon)$-RIP matrix is sampling a row subset $S$ with size $m=O(k\log n \log^3 \log k)$ to get a $|S|\times n$ matrix $A$\cite{CGV2013}, and $A$ is a RIP matrix w.h.p. There exists an algorithm multiplies $A$ to $x$ in time $\tilde{O}(m)$ and stores $A$ in space $O(k \log^2 n \log^3 \log k)$. However, we do not know how to verify a matrix $A$ that is generated this way is a RIP matrix. The same construction also works for Hadamard matrix and a similar one for circulant matrices. \end{example} One interesting question for RIP matrices is how sparse can a RIP matrix be. The negative result is that there are at least $\Omega(k)$ nonzero entries in every column (see HW3). \section{Compressed Sensing} Let $A \in \mathbb{R}^{m\times n}$. We normalize every column of $A$ to be roughly 1. Given $y=Ax+e$ for some $x\in \mathbb{R}^n$ with $\|x\|_0 \le k$ and $e$ is a noise vector. The goal of compressed sensing is to efficiently recover $\hat{x}$ given $y$ such that $\|x-\hat{x}\|_2 \le C \|e\|_2$ for some universal constant $C$. Compressed sensing has widely applications in industry. For example, it has been used in imaging processing, magnetic resonance imaging(MRI), oil expolation, spectrum sensing and feature testing. It takes advantage of the data's sparseness in some basis. We first discuss the differences between compressed sensing and sparse recovery, then introduce Iterative Hard Thresholding to recover $\hat{x}$. Compressed sensing is very similar to ``sparse recovery'' or ``heavy hitters'', problems we saw earlier in class with Count-Min and Count-Sketch. ``Compressed sensing'' and ``sparse recovery'' are terms for essentially the same problem that grew out of different communities: ``compressed sensing'' from math/statistics/signal processing, and ``sparse recovery'' from streaming algorithms in computer science. That said, there are noticeable differences in problem formulation and approaches between the two communities, so it makes sense to preserve the distinction. Note that this list isn't formal or definitive; it's a sense of differences between the two communities working in the same area, but there's work that blurs the lines. \begin{enumerate} \item In sparse recovery, it is allowed to choose matrix $A$ after giving $x$ and the algorithm only needs to be able to find the correct answer w.h.p. over $A$ such as Count-Sketch. However, we have to choose the matrix $A$ before reading $y$ in compressed sensing, and the algorithm should be able to recover $\hat{x}$ for every $y=Ax+e$ where $x,e$ satisfy the requirements. \item In compressed sensing, there is a noise vector $e$ and one often assumes that $x$ is exactly $k$-sparse, while in sparse recovery one generally assumes that $x$ is not $k$-sparse but you observe $Ax$ exactly. This distinction is generally not too important, and algorithms that work in either noise model typically also work in the other one. \item Sparse recovery cares more about the running time. Sparse recovery algorithms strive for $n \log^c n$ or ideally $k \log^c n$ time, while compressed sensing algorithms are often happy with $n^c$ time. \item In sparse recovery, algorithms is closed tied to the matrices we used in it. In compressed sensing, algorithms works well as long as the matrix $A$ has some property $P$. For example, if $A$ satisfies RIP, solving $\textit{argmin}_{\hat{x}:\|A\hat{x}-y\| \le \epsilon} \|\hat{x}\|_{1}$ will give a good recovery $\hat{x}$ of $x$ by convex optimization, $L_1$ minimization and iterative methods. \end{enumerate} \section{Iterative Hard Thresholding} We are focusing on Iterative Hard Thresholding in the rest of this notes. We first describe its algorithm and go to the analysis later. Let $A$ be a $(C\cdot k,\epsilon)$-RIP matrix and $H_k(x)$ denote the projection of $x$ on its top $k$ elements. Given $y=Ax+e$ for $\|x\|_0 \le k$ and $\|e\|_2$ is small, the algorithm works as following with an appropriate choice $t$: \begin{enumerate} \item $x^{(1)}=\vec{0}$. \item For $i=1,2,\cdots,t$ \item $\quad x^{(i+1)}=H_k\large( x^{(i)} +A^{T} (y-Ax^{(i)}) \large )$. \end{enumerate} We are going to prove $\|x^{(t+1)}-x\|_2 \le O(\|e\|)$ for $t=O(\log \frac{\|x\|}{\|e\|})$. The intuition of the algorithm is $A^{T} (y-Ax^{(i)}) = A^{T} A (x-x^{(i)}) + A^T e$. Because $\|A^{T}_S A_S - I_k\|_2 \le \epsilon$ for any column subset $S$ with size $C\cdot k,ドル we can think $A^T A \approx I_n$ and $A^{T} A (x-x^{(i)}) + A^T e \approx x-x^{(i)}$. \begin{proof} Let $x_0=x - x^{(i)}, z=A^T Ax_0 + A^T e$. We use $H$ be the support set of $x_0$. Because $x^{(i+1)}=H_k(x^{(i)}+z),ドル we try to bound $\|z-x_0\|$ at first. For any column subset $S$ with size at most $(C-2)k,ドル we bound $\|(z-x_0)_S\|$ as this way: \begin{align*} \|(z-x_0)_S\|_2 &= \| \big((A^T A - I)x_0 + A^T e\big)_S \|_2\\ &\le \|\big( (A^T A -I) x_0 \big)_{S\cup H} \|_2 + \|(A^T e)_S\|_2\\ &\le \|(A^T A -I )_{(S \cup H) \times (S \cup H)}\|_2 \cdot \|x_0\|_2+\|A_S\|_2 \cdot \|e\|_2\\ &\le \epsilon \|x_0\|_2 + (1+\epsilon) \|e\|_2 \end{align*} However, our goal is to prove $\|x - x^{(i+1)}\|$ is small after enough steps which is equivalent to prove $\| z_S - x_0\|_2/\|x_0\|_2<1$ for the top $k$ elements subset $S$ in $x^{i+1}$. We need the following Lemma to prove the bound of $\|(z-x_0)_S\|_2$ can be used to bound $\|z_S - x_0\|_2$. \begin{lemma} Let $x,z \in \mathbb{R}^n,ドル $x$ is $k$-sparse with support set $H$ and $S$ is the top $k$ elements subset of $z$. Then $$\|x-z_S\|^2_2 \le 5 \|(x-z)_{H \cup S}\|^2_2.$$ \end{lemma} \begin{proof} Pairing up $i\in H\setminus S$ and $j \in S \setminus H$(Recall $|S|=|H|=k$ and $z_j \ge z_i$ by definition), it is enough to prove $$z_j^2+x_i^2 \le 5\big ( (z_i-x_i)^2+z_j^2\big ).$$ We discuss it by two cases: \begin{enumerate} \item $|z_i|>|x_i|/2: x_i^2 \le 4 z_i^2 \le 4 z_j^2$. \item $|z_i|<|x_i|/2: x_i^2 \le 4 (x_i-z_i)^2$. \end{enumerate} \end{proof} Continue to the proof of the convergence, Taking $(x,x^{(i+1)})$ into the lemma, if $\epsilon<0.1$ and $\|x^{(i)}-x\| \ge 12 \|e\|$ we have \begin{align*} \|x - x^{(i+1)}\| &\le \sqrt{5} \|(x - x^{(i+1)})_{S \cup H}\| \\ &\le \sqrt{5} \|(x_0 - z)_{S \cup H}\| \\ &\le \sqrt{5} \epsilon \|x_0\| + \sqrt{5}(1+\epsilon)\|e\| \\ &\le \|x^{(i)} - x\|/4 + 3 \|e\| \\ & \le \|x^{(i)} - x\|/2. \end{align*} For $t=O(\log \frac{\|x\|}{\|e\|}),ドル we have $x^{(t+1)}=O(\|e\|)$. \end{proof} \bibliographystyle{alpha} \begin{thebibliography}{42} \bibitem[Sphere]{Sphere} \newblock https://en.wikipedia.org/wiki/Sphere. \bibitem[NPW12]{NPW2012} Jelani~Nelson, Eric~Price, Mary~Wooters. \newblock New constructions of RIP matrices with fast multiplication and fewer rows. \newblock {\em SODA 2014}: 1515-1528. \bibitem[CGV2013]{CGV2013} Mahdi~Cheraghchi ,Venkatesan~Guruswami,Ameya~Velingker. \newblock Restricted isometry of fourier matrices and list decodability of random linear codes \newblock {em SODA 2013}:Pages 432-442 \bibitem[BDFKK11]{BDFKK11} Jean~Bourgain, Stephen~Dilworth, Kevin~Ford, Sergei~Konyagin, Denka~Kutzarova. \newblock Breaking the $k^2$ Barrier for Explicit RIP Matrices. \newblock {\em STOC}:pages 637–644, 2011. \bibitem[FM]{FM} \newblock http://mathworld.wolfram.com/FourierMatrix.html. \end{thebibliography} \end{document}

AltStyle によって変換されたページ (->オリジナル) /