A little while ago, I had to write a little C# application to recover my HDD data (full context on this question)
To answer my problem I developed a console application which job was to recursively copy the entire folder tree of my HDD.
The code can be seen below :
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
namespace CopyCat
{
class Program
{
private static long _fileCount = 0;
private static long _byteCount = 0;
private static long _byteProgress = 0;
private static ProgressBar _progressCount = new ProgressBar();
static void Main(string[] args)
{
Directory.Exists(args[0]);
Directory.Exists(args[1]);
FileDiscovery(args[0]);
FileCopy(args[0], args[1]);
Console.ReadLine();
}
static void FileCopy(String source, String dest)
{
try
{
foreach (var file in Directory.EnumerateFiles(source, "*", SearchOption.TopDirectoryOnly))
{
try
{
if (file == null) continue;
var oFile = File.OpenRead(file);
var dFile = File.Open(Path.Combine(dest, Path.GetFileName(file)), FileMode.Create,
FileAccess.ReadWrite);
oFile.CopyTo(dFile, 104857600);
oFile.Close();
dFile.Flush();
dFile.Close();
_byteProgress += new FileInfo(file).Length;
_progressCount.Report((double)_byteProgress / (double)_byteCount);
}
catch (Exception e)
{
Console.WriteLine("[COPY][ERROR] : Couldn't copy file : {0} => {1}", file, e.Message);
}
}
foreach (var directory in Directory.EnumerateDirectories(source, "*", SearchOption.TopDirectoryOnly))
{
if (directory == @"G:\$RECYCLE.BIN") continue;
var dir = Path.GetFileName(directory);
if (!Directory.Exists(Path.Combine(dest, dir)))
{
Directory.CreateDirectory(Path.Combine(dest, dir));
}
FileCopy(directory, Path.Combine(dest, dir));
}
}
catch (Exception exception)
{
Console.WriteLine("[COPY][WARNING] : Couldn't open directory : {0}", source);
}
}
static void FileDiscovery(String dir)
{
try
{
foreach (var file in Directory.EnumerateFiles(dir, "*", SearchOption.TopDirectoryOnly))
{
_fileCount++;
_byteCount += new FileInfo(file).Length;
}
foreach (var directory in Directory.EnumerateDirectories(dir, "*", SearchOption.TopDirectoryOnly))
{
FileDiscovery(directory);
}
}
catch (Exception exception)
{
Console.WriteLine("[DISCOVERY][WARNING] : Couldn't open directory : {0}", dir);
}
}
static String HumanReadableByteCount(long bytes, Boolean si = false, int precision = 2)
{
int unit = si ? 1000 : 1024;
if (bytes < unit) return bytes + " B";
int exp = (int)(Math.Log(bytes) / Math.Log(unit));
String pre = (si ? "kMGTPE" : "KMGTPE")[(exp - 1)] + (si ? "" : "i");
return String.Format("{0} {1}{2}", Math.Round(bytes / Math.Pow(unit, exp), precision), pre, si ? "b" : "B");
}
}
}
(Using a custom ProgressBar)
I wanted to know specifically how I could improve the copying speed.
-
\$\begingroup\$ The latest version of the code will be available here : gist.github.com/sidewinder94/d15c9aab985e12d747fc \$\endgroup\$Irwene– Irwene2016年03月09日 08:09:38 +00:00Commented Mar 9, 2016 at 8:09
2 Answers 2
This
Directory.Exists(args[0]);
Directory.Exists(args[1]);
doesn't serve any purpose because you aren't evaluating the returned boolean value. In fact its more dangerous to use it with the args
not checked for null
at all. Your application will just crash if the application is called using only one or no argument at all.
The static void FileDiscovery()
method would benefit from a better name like CalculateDirectorySize()
and by returning a long
instead of void
.
Another point to mention is that the DirectoryInfo
class contains the method GetFileSystemInfos()
which would lead to the following change
private static long CalculateDirectorySize(String dir)
{
long directorySize = 0;
var dirInfo = new DirectoryInfo(dir);
try
{
foreach (var fileInfo in dirInfo.GetFileSystemInfos("*", SearchOption.TopDirectoryOnly))
{
directorySize += fileInfo.Length;
}
foreach (var directory in Directory.EnumerateDirectories(dir, "*", SearchOption.TopDirectoryOnly))
{
directorySize += CalculateDirectorySize(directory);
}
}
catch (Exception exception)
{
Console.WriteLine("[DISCOVERY][WARNING] : Couldn't open directory : {0}", dir);
}
return directorySize;
}
I have added private
access modifier to the method, because it is a good habit to add one.
Because _fileCount
is no where used I have removed it.
The FileCopy()
method is using a strange way to copy the files by reading and writing them using FileStream
's.
A more idiomatic way would be to use one of the overloaded
File.Copy()
methods. This methods are doing under the hood the same like your code but the streams are properly closed if any exception occurs.Directory.CreateDirectory()
can be called regardless if the directory exists or not. You can just skip the check forExists()
.- Having three times
Path.Combine(dest, dir)
won't be necessary if the result is stored in a variable. - Instead of calling
GetFileName()
for a directory you should callGetDirectoryName()
this would result in the following change
private static void FileCopy(String source, String dest)
{
try
{
foreach (var file in Directory.EnumerateFiles(source, "*", SearchOption.TopDirectoryOnly))
{
try
{
File.Copy(file, Path.Combine(dest, Path.GetFileName(file)));
_byteProgress += new FileInfo(file).Length;
_progressCount.Report((double)_byteProgress / (double)_byteCount);
}
catch (Exception e)
{
Console.WriteLine("[COPY][ERROR] : Couldn't copy file : {0} => {1}", file, e.Message);
}
}
foreach (var directory in Directory.EnumerateDirectories(source, "*", SearchOption.TopDirectoryOnly))
{
if (directory == @"G:\$RECYCLE.BIN") continue;
var dir = Path.GetDirectoryName(directory);
var destination = Path.Combine(dest, dir)
Directory.CreateDirectory(destination);
FileCopy(directory, destination);
}
}
catch (Exception exception)
{
Console.WriteLine("[COPY][WARNING] : Couldn't open directory : {0}", source);
}
}
Based on the comment
I was aware of the
File.Copy()
method at the time of writing the application BUT (and that's why I included the context) this method was throwing the same error as my explorer when used : that the file was not accessible and/or corrupted.
I would like to suggest to only use this kind of copy operation if the File.Copy()
method throws an IOException
having its own method like so
private static readonly int blockSize = 104857600;
private static bool SafeFileCopy(string source, string destination) {
try
{
using(var sourceStream = File.OpenRead(source))
using(var destinationStream = File.Open(destination, FileMode.Create,
FileAccess.ReadWrite))
{
sourceStream.CopyTo(destinationStream, blockSize);
return true
}
}
catch (Exception ex)
{
// do some logging
}
return false;
}
Resulting in the former foreach
like so
foreach (var file in Directory.EnumerateFiles(source, "*", SearchOption.TopDirectoryOnly))
{
string destinationFile = Path.Combine(dest, Path.GetFileName(file));
try
{
File.Copy(file, destinationFile);
_byteProgress += new FileInfo(file).Length;
_progressCount.Report((double)_byteProgress / (double)_byteCount);
}
catch (IOException ioex)
{
if (!SafeFileCopy(file, destinationFile))
{
// do some logging here
}
}
catch (Exception e)
{
Console.WriteLine("[COPY][ERROR] : Couldn't copy file : {0} => {1}", file, e.Message);
}
}
Basically you should only catch exceptions which you can handle or which you only need to log/swallow. But you should always catch specific exceptions only. Catching Exception
itself is a bad habit so for the SafeFileCopy()
this would be for you to change.
-
\$\begingroup\$ I was aware of the
File.Copy()
method at the time of writing the application BUT (and that's why I included the context) this method was throwing the same error as my explorer when used : that the file was not accessible and/or corrupted. Which was not the case when manually copying using streams. And since the performance was less than good, I came to post the question :) \$\endgroup\$Irwene– Irwene2016年03月08日 15:03:26 +00:00Commented Mar 8, 2016 at 15:03 -
\$\begingroup\$ Thanks for the rest of your remarks, I will be applying them. \$\endgroup\$Irwene– Irwene2016年03月08日 15:04:41 +00:00Commented Mar 8, 2016 at 15:04
-
\$\begingroup\$ Would you know of any way to speed up the copy between the streams ? Or is it a question better asked on SO ? \$\endgroup\$Irwene– Irwene2016年03月08日 16:01:30 +00:00Commented Mar 8, 2016 at 16:01
-
\$\begingroup\$ Basically you have used a high blocksize and I guess that the bottleneck is your harddisc. You can ask on SO or better search first on SO. \$\endgroup\$Heslacher– Heslacher2016年03月08日 16:05:22 +00:00Commented Mar 8, 2016 at 16:05
-
\$\begingroup\$ See also stackoverflow.com/a/32237548/2655508 \$\endgroup\$Heslacher– Heslacher2016年03月08日 16:10:01 +00:00Commented Mar 8, 2016 at 16:10
Use File.Copy. They use some lower level libraries that are much faster than copying streams. This improved performance for our application quite a bit.
Here's the code that File.Copy uses to actually copy the file (notice Win32Native.CopyFile):
/// <devdoc>
/// Note: This returns the fully qualified name of the destination file.
/// </devdoc>
[System.Security.SecuritySafeCritical] // auto-generated
[ResourceExposure(ResourceScope.Machine)]
[ResourceConsumption(ResourceScope.Machine)]
internal static String InternalCopy(String sourceFileName, String destFileName, bool overwrite)
{
Contract.Requires(sourceFileName != null);
Contract.Requires(destFileName != null);
Contract.Requires(sourceFileName.Length > 0);
Contract.Requires(destFileName.Length > 0);
String fullSourceFileName = Path.GetFullPathInternal(sourceFileName);
new FileIOPermission(FileIOPermissionAccess.Read, new String[] { fullSourceFileName }, false, false).Demand();
String fullDestFileName = Path.GetFullPathInternal(destFileName);
new FileIOPermission(FileIOPermissionAccess.Write, new String[] { fullDestFileName }, false, false).Demand();
bool r = Win32Native.CopyFile(fullSourceFileName, fullDestFileName, !overwrite);
if (!r)
{
// Save Win32 error because subsequent checks will overwrite this HRESULT.
int errorCode = Marshal.GetLastWin32Error();
String fileName = destFileName;
if (errorCode != Win32Native.ERROR_FILE_EXISTS)
{
// For a number of error codes (sharing violation, path
// not found, etc) we don't know if the problem was with
// the source or dest file. Try reading the source file.
using (SafeFileHandle handle = Win32Native.UnsafeCreateFile(fullSourceFileName, FileStream.GENERIC_READ, FileShare.Read, null, FileMode.Open, 0, IntPtr.Zero))
{
if (handle.IsInvalid)
fileName = sourceFileName;
}
if (errorCode == Win32Native.ERROR_ACCESS_DENIED)
{
if (Directory.InternalExists(fullDestFileName))
throw new IOException(Environment.GetResourceString("Arg_FileIsDirectory_Name", destFileName), Win32Native.ERROR_ACCESS_DENIED, fullDestFileName);
}
}
__Error.WinIOError(errorCode, fileName);
}
return fullDestFileName;
}
-
\$\begingroup\$ If they're using
File.Copy
, then why do you need to show the code for it? It would be customary to show how the code is different if you useFile.Copy
, not howFile.Copy
itself works. \$\endgroup\$mdfst13– mdfst132016年03月10日 23:04:45 +00:00Commented Mar 10, 2016 at 23:04