This class works as a single instance of a worker, a worker reads from a bank of queue items from a MySQL database table, although that logic is abstracted into ScraperQueueDao
class. One process runs only one worker, although many processes are spread across multiple servers.
This code is only used by me for personal projects, so the usability side of things doesn't matter as much as they would usually. I would like to primarily focus on improving my code, specifically the 70 line method ProcessQueueItem
.
public class ScraperWorker
{
private readonly ScraperQueue _scraperQueue;
private readonly Dictionary<string, ISocialEventHandler> _eventHandlers;
private readonly ScraperWorkerDao _scraperWorkerDao;
private readonly ScraperQueueDao _scraperQueueDao;
private bool _isProcessing;
public ScraperWorker(
ScraperQueue scraperQueue,
Dictionary<string, ISocialEventHandler> eventHandlers,
ScraperWorkerDao scraperWorkerDao,
ScraperQueueDao scraperQueueDao)
{
_scraperQueue = scraperQueue;
_eventHandlers = eventHandlers;
_scraperWorkerDao = scraperWorkerDao;
_scraperQueueDao = scraperQueueDao;
Process();
}
public void Start()
{
_isProcessing = true;
}
public void Stop()
{
_isProcessing = false;
}
public void Process()
{
new Thread(() =>
{
while (true)
{
RenewWorkerStatus(); // Checks if we have paused the worker via an external service.
if (!_isProcessing || !_scraperQueue.TryGetItem(out var item))
{
Console.WriteLine("Either the worker is paused, or the queue is empty.");
Thread.Sleep(TimeSpan.FromSeconds(30));
continue;
}
_scraperWorkerDao.UpdateWorkerLastSeen(StaticState.WorkerId);
var eventHandler = ResolveEventHandlerFromItem(item.Item);
eventHandler.SetCurrentItem(item.Item);
var result = ProcessQueueItem(item, eventHandler);
_scraperQueueDao.StoreItemResultInDatabase(result);
item.MarkAsComplete();
Thread.Sleep(500);
}
}).Start();
}
private void RenewWorkerStatus()
{
_isProcessing = _scraperWorkerDao.IsWorkerRunning(StaticState.WorkerId);
}
private ScraperQueueItemResult ProcessQueueItem(ScraperQueueItem item, ISocialEventHandler eventHandler)
{
try
{
Console.WriteLine("Started processing queue item: " + item.Item);
if (eventHandler.IsLoginNeeded())
{
eventHandler.Login();
}
eventHandler.NavigateToProfile(item.Item);
if (!eventHandler.TryWaitForProfileToLoad())
{
return new ScraperQueueItemResult(
item.Id, "Method 'TryWaitForProfileToLoad' returned false.", eventHandler.GetPageSource(), false
);
}
var profile = eventHandler.CreateProfile();
if (!profile.ShouldScrape())
{
return new ScraperQueueItemResult(
item.Id, "Method 'ShouldScrape' returned false.", eventHandler.GetPageSource(), false
);
}
if (profile.ShouldCollectConnections())
{
profile.Connections = eventHandler.GetConnections();
var filteredConnections = eventHandler.GetFilteredConnections(profile.Connections);
_scraperQueue.AddItems(eventHandler.ConvertConnectionsToQueueItems(filteredConnections));
}
if (profile.ShouldSave())
{
if (!profile.IsPrivate)
{
profile.Posts = eventHandler.GetPosts(profile.Username);
}
profile.Save();
Console.WriteLine("We successfully saved - " + profile.Url);
return new ScraperQueueItemResult(
item.Id, "success", eventHandler.GetPageSource(), true
);
}
else
{
return new ScraperQueueItemResult(
item.Id, "Method 'ShouldSave' returned false.", eventHandler.GetPageSource(), false
);
}
}
catch (Exception e)
{
new Client(new Configuration(StaticState.BugSnagApiKey)).Notify(e);
return new ScraperQueueItemResult(
item.Id, e.Message, eventHandler.GetPageSource(), false
);
}
}
private ISocialEventHandler ResolveEventHandlerFromItem(string item)
{
var domain = new Uri(item).Host.Replace("www.", "");
if (_eventHandlers.ContainsKey(domain))
{
return _eventHandlers[domain];
}
throw new Exception($"Failed to resolve event handler for domain {domain}");
}
public bool IsProcessing()
{
return _isProcessing;
}
}
-
\$\begingroup\$ I'm not sure why this has 2 downvotes or close votes for being broken, as the description doesn't say it's broken anywhere. And asking for a refactor is inline with what Code Review deems is on-topic. \$\endgroup\$Peilonrayz– Peilonrayz ♦2020年08月29日 14:32:02 +00:00Commented Aug 29, 2020 at 14:32
-
1\$\begingroup\$ Hey whilst I changed "refactor" to "improving my code" it would be great if you could clarify what you want refactored and why. \$\endgroup\$Peilonrayz– Peilonrayz ♦2020年08月29日 15:17:07 +00:00Commented Aug 29, 2020 at 15:17
1 Answer 1
All try
/ catch
blocks have a tendency to add more code to a function.
I see 2 ways to improve(decrease the code) the specified function, both include applying the Single Responsibility Principle states:
that every module, class, or function should have responsibility over a single part of the functionality provided by the software, and that responsibility should be entirely encapsulated by that module, class or function.
This means taking the contents of the more complicated if
statements and putting them into their own member functions. You can then either reference these functions directly or have a dictionary of functions to call in particular cases.