Skip to main content
Code Review

Return to Question

Question Protected by Community Bot
edited tags; edited tags
Link
200_success
  • 145.6k
  • 22
  • 190
  • 479
deleted 41 characters in body
Source Link
Jamal
  • 35.2k
  • 13
  • 134
  • 238

Here is a small script I wrote to get the HNews ask section and display them without using a web browser. Just I'm just looking for feed backfeedback on how to improve my style/coding logic/overall code.

#!/usr/bin/python
'''This script gets the headlines from hacker news ask section'''
import urllib2
import HTMLParser
import re
class HNParser(HTMLParser.HTMLParser):
def __init__(self):
 HTMLParser.HTMLParser.__init__(self)
 self.data=[]
 self.recording=0
 
def handle_starttag(self, tag, attribute):
 if tag!='a':
 return
 elif self.recording:
 self.recording +=1
 return
 for name, value in attribute:
 if name=='href' and value[0]=='i':
 break
 else:
 return
 self.recording=1
def handle_endtag(self, tag):
 if tag=='a' and self.recording:
 self.recording-=1
 
def handle_data(self, data):
 if self.recording:
 self.data.append(data)
HOST='http://news.ycombinator.com/ask'
parser=HNParser()
f=urllib2.urlopen(HOST)
rawHTML=f.read()
parser.feed(rawHTML)
i=0
for items in parser.data:
 try:
 print parser.data[2*i]
 i+=1
 except IndexError:
 break
 parser.close()

Any comments would be great Thanks!

Here is a small script I wrote to get the HNews ask section and display them without using a web browser. Just looking for feed back on how to improve my style/coding logic/overall code.

#!/usr/bin/python
'''This script gets the headlines from hacker news ask section'''
import urllib2
import HTMLParser
import re
class HNParser(HTMLParser.HTMLParser):
def __init__(self):
 HTMLParser.HTMLParser.__init__(self)
 self.data=[]
 self.recording=0
 
def handle_starttag(self, tag, attribute):
 if tag!='a':
 return
 elif self.recording:
 self.recording +=1
 return
 for name, value in attribute:
 if name=='href' and value[0]=='i':
 break
 else:
 return
 self.recording=1
def handle_endtag(self, tag):
 if tag=='a' and self.recording:
 self.recording-=1
 
def handle_data(self, data):
 if self.recording:
 self.data.append(data)
HOST='http://news.ycombinator.com/ask'
parser=HNParser()
f=urllib2.urlopen(HOST)
rawHTML=f.read()
parser.feed(rawHTML)
i=0
for items in parser.data:
 try:
 print parser.data[2*i]
 i+=1
 except IndexError:
 break
 parser.close()

Any comments would be great Thanks!

Here is a small script I wrote to get the HNews ask section and display them without using a web browser. I'm just looking for feedback on how to improve my style/coding logic/overall code.

#!/usr/bin/python
'''This script gets the headlines from hacker news ask section'''
import urllib2
import HTMLParser
import re
class HNParser(HTMLParser.HTMLParser):
def __init__(self):
 HTMLParser.HTMLParser.__init__(self)
 self.data=[]
 self.recording=0
 
def handle_starttag(self, tag, attribute):
 if tag!='a':
 return
 elif self.recording:
 self.recording +=1
 return
 for name, value in attribute:
 if name=='href' and value[0]=='i':
 break
 else:
 return
 self.recording=1
def handle_endtag(self, tag):
 if tag=='a' and self.recording:
 self.recording-=1
 
def handle_data(self, data):
 if self.recording:
 self.data.append(data)
HOST='http://news.ycombinator.com/ask'
parser=HNParser()
f=urllib2.urlopen(HOST)
rawHTML=f.read()
parser.feed(rawHTML)
i=0
for items in parser.data:
 try:
 print parser.data[2*i]
 i+=1
 except IndexError:
 break
 parser.close()
Tweeted twitter.com/#!/StackCodeReview/status/151624869677768704
Renamed title to be more specific to the code presented
Link

Review my HNews "ask section" page scraping Python script to make it more pythonic

Source Link
Greg Brown
  • 185
  • 1
  • 1
  • 4
Loading
lang-py

AltStyle によって変換されたページ (->オリジナル) /