Improved code at: Email a notification when detecting changes on a website - follow-up
Improved code at: Email a notification when detecting changes on a website - follow-up
Improved code at: Email a notification when detecting changes on a website - follow-up
Improved code at: Email a notification when detecting changes on a website - follow-up
UPDATE (suggested changes from Caridorc):
#!/usr/bin/env python3
import urllib.request, hashlib, time, html2text, smtplib, datetime, argparse, difflib, logging
class urlchange:
def __init__(self, url):
self.url = url
self.urlhash = self.createhash()
self.content = self.getcontent()
loggerdate = datetime.infodatetime.now("Start).strftime( "%d.%m.%Y %H:%M:%S" )
print(date+": Start Monitoring... hash: {self"+self.urlhash}".format(**locals()))
def getcontent(self):
#Try to get data
try:
urldata = urllib.request.urlopen(self.url).read().decode("utf-8","ignore")
urldata = html2text.html2text(urldata)
except urllib.error.HTTPError as e:
if hasattrprint(e"Can't open url: ", "reason"self.url):
return urldata
logger.critical("Can't reach server: {e.reason}".format(**locals()))
def createhash(self):
#create exit(1)hash
urldata elif= hasattrself.getcontent(e, "code"):.encode("utf-8")
md5hash = hashlib.md5()
logger.critical("Request was not fulfilled: {e.code}"md5hash.format(**localsupdate())urldata)
return md5hash.hexdigest()
exit def comparehash(1self):
date = datetime.datetime.now().strftime( "%d.%m.%Y else%H:%M:%S" )
loggerif(self.criticalcreatehash("Connection) Error!"== self.urlhash):
print(date+": Nothing has exit(1changed")
return urldata
return False
def createhash(self)else:
#create hash
print(date+": Something has changed")
urldata = self.getcontent().encode("utf-8")
md5hash =if(not hashlibargs.md5(nodiff):
md5hash.update(urldata)
return md5hashprint(self.hexdigestdiff())
def comparehash(self):
if(self.createhash() ==not selfargs.urlhashnomail):
logger.info("Nothing has changed")
try:
return False
else:
sendmail("Url has changed!","The Url logger"+self.info("Somethingurl+" has changed")
changed at "+date+" .\n\nNew content:\n"+self.diff())
date = datetime.datetime.now().strftime( "%d.%m.%Y %H:%M:%S" )
except:
if(not args.nomail):
sendmail("Url has changed!","The Url {self"+self.url}url+" has changed at {date}"+date+" .".format(**locals()))
ifelif(not args.nodiffnomail):
diff = self.diffsendmail()
"Url has changed!","The Url "+self.url+" has changed at "+date+" logger.info("{diff}".format(**locals()))
if(notreturn args.nomail):True
def diff(self):
#second mail because diff#what couldhas bechaged
time consuming
start, end = 0, 0
newcontent = diffself.encodegetcontent("ascii","ignore")
#start of changes
sendmail("Urlfor difference!"i,"Thej Urlin {enumerate(self.url}content):
has changed at {date} .\n\nNew content\n{diff}".format if(**localsi<len())newcontent) and j != newcontent[i]):
return True
start=i
def diff(self):
result = "" break
newcontent#end =of self.getcontent()changes
s =for difflib.SequenceMatcher(Nonei,j in enumerate(reversed(self.content,newcontent)):
for tag, i1, i2, j1,if( j2(len(newcontent)-(i+1))>0 inand s.get_opcodesj != newcontent[len(newcontent)-(i+1)]):
if(tag == "insert" or tag == "replaced"end=len(newcontent):-i
result += newcontent[j1:j2]break
return resultnewcontent[start:end]
def sendmail(subject,message):
try:
server = smtplib.SMTP("smtp.server.com",587)
server.set_debuglevel(0)
server.ehlo()
server.starttls()
server.login("[email protected]","password")
except Exception as inst:
logger.citical("SMTP Connection Error: {inst.args}".format(**localsprint()))
"Can't connect to the SMTP exit(1server!")
date = datetime.datetime.now().strftime( "%d.%m.%Y %H:%M:%S" )
msg = "From: email@server.com\nSubjectde\nSubject: {subject}\nDate%s\nDate: {%s\n\n%s""" % (subject, date}\n\n{, message}""".format(**locals())
server.sendmail("email@server.com"de","email2@server.com"de",msg)
server.quit()
logging.infoprint("emaildate+": email was sent")
parser = argparse.ArgumentParser(description="Monitor if a website has changed.")
parser.add_argument("url",help="url that should be monitored")
parser.add_argument("-t","--time",help="seconds between checks (default: 600)",default=600,type=int)
parser.add_argument("-nd","--nodiff",help="show no difference",action="store_true")
parser.add_argument("-n","--nomail",help="no email is sent",action="store_true")
args = parser.parse_args()
logging.basicConfig(format="%(asctime)s %(message)s",datefmt="%d.%m.%Y %H:%M:%S:")
logger = logging.getLogger()
logger.setLevel(logging.INFO)
url1 = urlchange(args.url)
time.sleep(args.time)
while(True):
if(url1.comparehash()):
break
time.sleep(args.time)
UPDATE (suggested changes from Caridorc):
#!/usr/bin/env python3
import urllib.request, hashlib, time, html2text, smtplib, datetime, argparse, difflib, logging
class urlchange:
def __init__(self, url):
self.url = url
self.urlhash = self.createhash()
self.content = self.getcontent()
logger.info("Start Monitoring... hash {self.urlhash}".format(**locals()))
def getcontent(self):
#Try to get data
try:
urldata = urllib.request.urlopen(self.url).read().decode("utf-8","ignore")
urldata = html2text.html2text(urldata)
except urllib.error.HTTPError as e:
if hasattr(e, "reason"):
logger.critical("Can't reach server: {e.reason}".format(**locals()))
exit(1)
elif hasattr(e, "code"):
logger.critical("Request was not fulfilled: {e.code}".format(**locals()))
exit(1)
else: logger.critical("Connection Error!")
exit(1)
return urldata
def createhash(self):
#create hash
urldata = self.getcontent().encode("utf-8")
md5hash = hashlib.md5()
md5hash.update(urldata)
return md5hash.hexdigest()
def comparehash(self):
if(self.createhash() == self.urlhash):
logger.info("Nothing has changed")
return False
else:
logger.info("Something has changed")
date = datetime.datetime.now().strftime( "%d.%m.%Y %H:%M:%S" )
if(not args.nomail):
sendmail("Url has changed!","The Url {self.url} has changed at {date} .".format(**locals()))
if(not args.nodiff):
diff = self.diff()
logger.info("{diff}".format(**locals()))
if(not args.nomail):
#second mail because diff could be time consuming
diff.encode("ascii","ignore")
sendmail("Url difference!","The Url {self.url} has changed at {date} .\n\nNew content\n{diff}".format(**locals())) return True
def diff(self):
result = ""
newcontent = self.getcontent()
s = difflib.SequenceMatcher(None, self.content,newcontent)
for tag, i1, i2, j1, j2 in s.get_opcodes():
if(tag == "insert" or tag == "replaced"):
result += newcontent[j1:j2]
return result
def sendmail(subject,message):
try:
server = smtplib.SMTP("smtp.server.com",587)
server.set_debuglevel(0)
server.ehlo()
server.starttls()
server.login("[email protected]","password")
except Exception as inst:
logger.citical("SMTP Connection Error: {inst.args}".format(**locals()))
exit(1)
date = datetime.datetime.now().strftime( "%d.%m.%Y %H:%M:%S" )
msg = "From: email@server.com\nSubject: {subject}\nDate: {date}\n\n{message}""".format(**locals())
server.sendmail("email@server.com","email2@server.com",msg)
server.quit()
logging.info("email was sent")
parser = argparse.ArgumentParser(description="Monitor if a website has changed.")
parser.add_argument("url",help="url that should be monitored")
parser.add_argument("-t","--time",help="seconds between checks (default: 600)",default=600,type=int)
parser.add_argument("-nd","--nodiff",help="show no difference",action="store_true")
parser.add_argument("-n","--nomail",help="no email is sent",action="store_true")
args = parser.parse_args()
logging.basicConfig(format="%(asctime)s %(message)s",datefmt="%d.%m.%Y %H:%M:%S:")
logger = logging.getLogger()
logger.setLevel(logging.INFO)
url1 = urlchange(args.url)
time.sleep(args.time)
while(True):
if(url1.comparehash()):
break
time.sleep(args.time)
#!/usr/bin/env python3
import urllib.request, hashlib, time, html2text, smtplib, datetime, argparse
class urlchange:
def __init__(self, url):
self.url = url
self.urlhash = self.createhash()
self.content = self.getcontent()
date = datetime.datetime.now().strftime( "%d.%m.%Y %H:%M:%S" )
print(date+": Start Monitoring... hash: "+self.urlhash)
def getcontent(self):
#Try to get data
try:
urldata = urllib.request.urlopen(self.url).read().decode("utf-8","ignore")
urldata = html2text.html2text(urldata)
except:
print("Can't open url: ", self.url)
return urldata
def createhash(self):
#create hash
urldata = self.getcontent().encode("utf-8")
md5hash = hashlib.md5()
md5hash.update(urldata)
return md5hash.hexdigest()
def comparehash(self):
date = datetime.datetime.now().strftime( "%d.%m.%Y %H:%M:%S" )
if(self.createhash() == self.urlhash):
print(date+": Nothing has changed")
return False
else:
print(date+": Something has changed")
if(not args.nodiff):
print(self.diff())
if(not args.nomail):
try:
sendmail("Url has changed!","The Url "+self.url+" has changed at "+date+" .\n\nNew content:\n"+self.diff())
except:
sendmail("Url has changed!","The Url "+self.url+" has changed at "+date+" .")
elif(not args.nomail):
sendmail("Url has changed!","The Url "+self.url+" has changed at "+date+" .")
return True
def diff(self):
#what has chaged
start, end = 0, 0
newcontent = self.getcontent()
#start of changes
for i,j in enumerate(self.content):
if(i<len(newcontent) and j != newcontent[i]):
start=i
break
#end of changes
for i,j in enumerate(reversed(self.content)):
if( (len(newcontent)-(i+1))>0 and j != newcontent[len(newcontent)-(i+1)]):
end=len(newcontent)-i
break
return newcontent[start:end]
def sendmail(subject,message):
try:
server = smtplib.SMTP("smtp.server.com",587)
server.set_debuglevel(0)
server.ehlo()
server.starttls()
server.login("[email protected]","password")
except:
print("Can't connect to the SMTP server!")
date = datetime.datetime.now().strftime( "%d.%m.%Y %H:%M:%S" )
msg = "From: email@server.de\nSubject: %s\nDate: %s\n\n%s""" % (subject, date, message)
server.sendmail("email@server.de","email2@server.de",msg)
server.quit()
print(date+": email was sent")
parser = argparse.ArgumentParser(description="Monitor if a website has changed.")
parser.add_argument("url",help="url that should be monitored")
parser.add_argument("-t","--time",help="seconds between checks (default: 600)",default=600,type=int)
parser.add_argument("-nd","--nodiff",help="show no difference",action="store_true")
parser.add_argument("-n","--nomail",help="no email is sent",action="store_true")
args = parser.parse_args()
url1 = urlchange(args.url)
time.sleep(args.time)
while(True):
if(url1.comparehash()):
break
time.sleep(args.time)