The aim of the program is to read the yahoo finance website in order to find potentially profitable stocks based on their past performance.
I have some code that works but I think it is of poor quality. I am trying to scrape data from a website. If the internet connection fails, the program should wait 5 seconds before retrying.
Program.cs
using HtmlAgilityPack;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace EarningEdge
{
class Program
{
static void Main(string[] args)
{
DateTime currDate = new DateTime(Constants.startYear, Constants.startMonth, Constants.startDay);
DateTime endDate = new DateTime(Constants.endYear, Constants.endMonth, Constants.endDay);
DateTime weekDate = currDate.AddDays(7 - (currDate.Day % 7 + 2));
string currDir = Environment.CurrentDirectory + @"\Stock\";
System.IO.Directory.CreateDirectory(currDir);
while (currDate.CompareTo(endDate) < 0)
{
if (currDate.DayOfWeek.Equals(DayOfWeek.Saturday))
{
weekDate = weekDate.AddDays(7);
}
// Do nothing on the weekend. For thy shall rest on the sabbath.
if (currDate.DayOfWeek.Equals(DayOfWeek.Saturday) || currDate.DayOfWeek.Equals(DayOfWeek.Sunday))
{
currDate = currDate.AddDays(1);
continue;
}
HtmlWeb web = new HtmlWeb();
//var html = @"https://finance.yahoo.com/calendar/earnings?from=2019年01月20日&to=2019年01月26日&day=2019年01月25日";
var html = @"https://finance.yahoo.com/calendar/earnings?from=" + weekDate.AddDays(-6).Year + "-" +
weekDate.AddDays(-6).Month.ToString("D2") + "-" + (weekDate.AddDays(-6).Day.ToString("D2")) +
"&to=" + weekDate.Year + "-" + weekDate.Month.ToString("D2") + "-" + weekDate.Day.ToString("D2")
+ "&day=" + currDate.Year + "-" + currDate.Month.ToString("D2") + "-" + currDate.Day.ToString("D2");
HtmlDocument htmlDoc = null;
try
{
htmlDoc = web.Load(html);
}
catch (Exception e)
{
System.IO.File.AppendAllText(currDir + @"ErrorLog.txt",
"\nDate of Earning Report : " + currDate.ToString("dd-MM-yyyy")
+ "\nError occured at : " + DateTime.Now.ToString("dd-MM-yyyy")
+ "\nError message : " + e.Message
+ "\nInner message : " + e.InnerException
+ "\nHtml : " + html
+ "\n\n");
// I don't need to worry about UI responsiveness so this should suffice.
System.Threading.Thread.Sleep(5000);
// Continue to re-run the above code after 5 seconds.
continue;
}
if (htmlDoc.DocumentNode != null)
{
var nodes = htmlDoc.DocumentNode.SelectNodes("//tbody");
foreach (var node in nodes.Descendants("tr"))
{
var counter = 0;
var row = node.ChildNodes;
string symbol = null;
foreach (var col in row)
{
// The Symbol Column of the table.
if (counter == Constants.Symbol)
{
symbol = col.InnerText;
break;
}
counter++;
//Console.WriteLine(symbol);
}
// For each company row access the earning history.
if (symbol != null)
{
// Even if the stock isn't optionable, I will still record it.
// For Academic purposes.
//// Check if the stock is optionable.
//html = @"https://finance.yahoo.com/quote/" + symbol + "/options?p=" + symbol;
////html = @"https://finance.yahoo.com/quote/FMBM/options?p=FMBM";
//while (true)
//{
// try
// {
// htmlDoc = web.Load(html);
// }
// catch (Exception e)
// {
// System.IO.File.AppendAllText(currDir + @"ErrorLog.txt",
// "\nDate of Earning Report : " + currDate.ToString("dd-MM-yyyy")
// + "\nError occured at : " + DateTime.Now.ToString("dd-MM-yyyy")
// + "\nSymbol : " + symbol
// + "\nError message : " + e.Message
// + "\nInner message : " + e.InnerException
// + "\nHtml : " + html
// + "\n\n");
// // I don't need to worry about UI responsiveness so this should suffice.
// System.Threading.Thread.Sleep(5000);
// continue;
// }
// break;
//}
//if (htmlDoc.DocumentNode != null)
//{
// var options = htmlDoc.DocumentNode.SelectNodes("//div[@id='Main']").Descendants("Section").ElementAt(0).InnerText;
// if (options == "Options data is not available")
// {
// continue;
// }
//}
html = @"https://finance.yahoo.com/quote/" + symbol + "/analysis?p=" + symbol;
//html = @"https://finance.yahoo.com/quote/MSG/analysis?p=MSG";
//htmlDoc = web.Load(html);
while (true)
{
try
{
htmlDoc = web.Load(html);
}
catch (Exception e)
{
System.IO.File.AppendAllText(currDir + @"ErrorLog.txt",
"\nDate of Earning Report : " + currDate.ToString("dd-MM-yyyy")
+ "\nError occured at : " + DateTime.Now.ToString("dd-MM-yyyy")
+ "\nSymbol : " + symbol
+ "\nError message : " + e.Message
+ "\nInner message : " + e.InnerException
+ "\nHtml : " + html
+ "\n\n");
// I don't need to worry about UI responsiveness so this should suffice.
System.Threading.Thread.Sleep(5000);
continue;
}
break;
}
if (htmlDoc.DocumentNode != null)
{
var earningNode = htmlDoc.DocumentNode.SelectNodes("//section[@data-test='qsp-analyst']");
// No earning history avaliable.
if (earningNode == null)
{
continue;
}
var tables = earningNode.Descendants("table");
var earnings = tables.ElementAt(2).Descendants("tbody").ElementAt(0).Descendants("tr").ElementAt(3);
var growthStr = tables.ElementAt(5).Descendants("tbody").ElementAt(0).Descendants("tr").ElementAt(0).Descendants("td").ElementAt(1).InnerText.Trim('%');
float growth;
Console.ForegroundColor = ConsoleColor.Green;
Console.WriteLine("Date: " + currDate.ToString("dd-MM-YY"));
Console.WriteLine("Symbol : " + symbol);
if (float.TryParse(growthStr, out growth))
{
Console.WriteLine("Growth : " + growth);
}
float q1;
if (float.TryParse(earnings.Descendants("td").ElementAt(1).InnerText.Trim('%'), out q1))
{
Console.WriteLine(q1);
}
float q2;
if (float.TryParse(earnings.Descendants("td").ElementAt(2).InnerText.Trim('%'), out q2))
{
Console.WriteLine(q2);
}
float q3;
if (float.TryParse(earnings.Descendants("td").ElementAt(3).InnerText.Trim('%'), out q3))
{
Console.WriteLine(q3);
}
float q4;
if (float.TryParse(earnings.Descendants("td").ElementAt(4).InnerText.Trim('%'), out q4))
{
Console.WriteLine(q4);
}
if (q1 < 0 || q2 < 0 || q3 < 0 || q4 < 0)
{
continue;
}
if (growth < 80)
{
continue;
}
Console.ForegroundColor = ConsoleColor.Red;
Console.WriteLine(symbol + " : Is expecting a 3 week climb");
Console.ForegroundColor = ConsoleColor.Green;
System.IO.File.AppendAllText(currDir + @"3WeekClimb.txt", currDate.ToString("dd-MM-yyyy") + " symbol: " + symbol + "\n");
// Store this stock in the database. Expect a 3 week climb.
// Manually check the chart to see if the climb already happened.
}
}
}
}
currDate = currDate.AddDays(1);
}
}
}
}
Constants.cs
namespace EarningEdge
{
static class Constants
{
public const int Symbol = 1;
public const int startDay = 29;
public const int startMonth = 1;
public const int startYear= 2019;
public const int endDay = 28;
public const int endMonth = 2;
public const int endYear= 2019;
}
}
Is there a more elegant way of doing this?
Is this bad practice? My intention is to have an infinite loop just in case the internet dies the program will continue to run, polling the website every 5 seconds until the internet is back online.
This is a console app using c#. I am using htmlAgilityPack to load the web page.
1 Answer 1
Don't split up dates into year/month/day, instead use a static DateTime
. Which means you need to rename your Constants
class to something like Setup
or Configuration
.
Your Main
is a 200+ lines long method. You should split this up into smaller methods and maybe even move some of those to dedicated classes.
Don't create your own (primitive) logging system, instead use an established platform like NLog or Serilog.
I'm somewhat baffled by the meaning of public const int Symbol = 1;
. Especially since there is also a variable named symbol
. Names should convey a meaning, and that seems to be totally lacking here.
Why do you loop through all the columns in a row when you only need the second one?
This code is repeated four times with only minimal changes:
float q1;
if (float.TryParse(earnings.Descendants("td").ElementAt(1).InnerText.Trim('%'), out q1))
{
Console.WriteLine(q1);
}
Don't copy-paste code; instead move this to a dedicated method.
Do not pointlessly abbreviate: currDate
is harder to read than currentdate
and doesn't gain you anything.
main()
function. \$\endgroup\$web
is). You could edit to provide a larger, more self-contained, block of code, provided you accept that answers can address any perceived issues with the code and not just your personal concerns with it. \$\endgroup\$