I am trying to test the validity of a few thousand public proxy servers quickly. To do this, I am sending web requests using each proxy to a website such as example.com, and checking that no errors such as timeouts occur, and that the page contains a certain string value which I already know that site will contain.
Using a synchronous method like HttpWebRequest.GetResponse simply will not do for my large list. So I am using HttpWebRequest.BeginGetResponse instead, and passing in an Action object called callback and invoking that callback upon completion or failure of the request.
I want to know if there is a better way to achieve the concurrency I require. Using HttpWebRequest.GetResponseAsync does not actually decrease the time it takes to test the whole list, although it will not block the UI thread compared to GetResponse.
I haven't implemented input validation on some of classes yet, so please disregard that when answering. I also haven't added a timeout check using ThreadPool.RegisterWaitForSingleObject yet, disregard that as well.
namespace HelperLib
{
public class Proxy
{
public string IP { get; set; }
public int Port { get; set; }
public bool IsValid { get; set; }
public Proxy(string ip, int port)
{
//TODO Validate input.
this.IP = ip;
this.Port = port;
}
public void TestValidityAsync(ProxyTest test, Action<bool, Exception> callback)
{
var request = HttpWebRequest.Create(test.URL);
request.Proxy = new WebProxy(this.ToString());
WebUtility.GetResponseAsync(request, (requestState, ex) => {
if (requestState.response != null && requestState.responseString.Contains(test.Validation))
{
if (test.Invalidation == null)
{
callback.Invoke(true, ex);
}
else
{
if (!requestState.responseString.Contains(test.Invalidation))
{
callback.Invoke(true, ex);
}
else
{
callback.Invoke(false, ex);
}
}
}
else
{
callback.Invoke(false, ex);
}
});
}
public override string ToString()
{
return this.IP + ":" + this.Port;
}
}
}
namespace HelperLib
{
public class ProxyTest
{
public string URL { get; set; }
public string Validation { get; set; }
public string Invalidation { get; set; }
public ProxyTest(string url, string validation)
{
this.URL = url;
this.Validation = validation;
}
public ProxyTest(string url, string validation, string invalidation)
{
this.URL = url;
this.Validation = validation;
this.Invalidation = invalidation;
}
}
}
namespace HelperLib
{
public class WebUtility
{
public static void GetResponseAsync(WebRequest request, Action<RequestState, Exception> callback)
{
//Send a non-blocking asynchronous web request.
var requestState = new RequestState();
requestState.request = request;
requestState.responseCallback = callback;
requestState.request.BeginGetResponse(new AsyncCallback(ResponseCallback), requestState);
}
private static async void ResponseCallback(IAsyncResult ar)
{
var requestState = (RequestState)ar.AsyncState;
try
{
requestState.response = requestState.request.EndGetResponse(ar);
requestState.responseStream = requestState.response.GetResponseStream();
using (var sr = new StreamReader(requestState.responseStream))
{
requestState.responseString = await sr.ReadToEndAsync();
}
requestState.responseCallback.Invoke(requestState, null);
}
catch (Exception ex)
{
requestState.responseCallback.Invoke(requestState, ex);
}
}
}
}
namespace HelperLib
{
public class RequestState
{
public WebRequest request;
public WebResponse response;
public Action<RequestState, Exception> responseCallback;
public StringBuilder responseBuilder = new StringBuilder();
public Stream responseStream;
public Byte[] responseBuffer = new Byte[1024];
public string responseString;
}
}
Speed comparison vs HttpClient
private void MethodA()
{
ManualResetEvent mre = new ManualResetEvent(false);
Stopwatch sw = Stopwatch.StartNew();
ServicePointManager.DefaultConnectionLimit = 500;
int responseCount = 0;
for (int i = 0; i < iterations; i++)
{
WebRequest req = WebRequest.Create("http://example.com");
HelperLib.WebUtility.GetResponseAsync(req, (requestState, ex) =>
{
responseCount++;
if (responseCount == iterations)
{
mre.Set();
}
});
}
mre.WaitOne();
Debug.WriteLine(sw.ElapsedMilliseconds);
//Outputs 816
}
private async void MethodB()
{
Stopwatch sw = Stopwatch.StartNew();
using (HttpClient client = new HttpClient())
{
for (int i = 0; i < iterations; i++)
{
string response = await client.GetStringAsync("http://example.com");
}
}
sw.Stop();
Debug.WriteLine(sw.ElapsedMilliseconds);
//Outputs 2744
}
1 Answer 1
Consider the following based on the example originally provided
static HttpClient client = new HttpClient();
private async Task MethodB() {
Stopwatch sw = Stopwatch.StartNew();
List<Task<string>> tasks = new List<Task<string>>();
for (int i = 0; i < iterations; i++) {
var task = client.GetStringAsync("http://example.com");
tasks.Add(task);
}
var responses = await Task.WhenAll(tasks);
sw.Stop();
Debug.WriteLine(sw.ElapsedMilliseconds);
}
HttpClient
should be created once and used for the entire lifecycle of the application.
Avoid async void
functions. Instead have them return a Task
.
Tasks can be executed simultaneously using Task.WhenAll
.
As for scale, group the requests into manageable batches and execute them asynchronously.
You can add a cancellation token to time out any requests that take longer than a predefined duration.
This would mean that for a batch it will only go as long as the set time out.
Each request can be encapsulated in its own function to handle exceptions or response functionality.
-
\$\begingroup\$ Very helpful thanks. I will test this shortly and if it performs as well or better than my own implementation using BeginGetResponse I will mark this as the answer. It certainly is less complicating to write. \$\endgroup\$JohnWick– JohnWick2018年04月03日 01:43:45 +00:00Commented Apr 3, 2018 at 1:43
-
\$\begingroup\$ Wow, using your method it cut the time in half to 400ms. You rock! \$\endgroup\$JohnWick– JohnWick2018年04月03日 01:54:21 +00:00Commented Apr 3, 2018 at 1:54
Explore related questions
See similar questions with these tags.
HttpClient
and its async API \$\endgroup\$Task.WhenAll
? \$\endgroup\$