|
| 1 | +from selenium import webdriver |
| 2 | +from email_validator import validate_email, EmailNotValidError |
| 3 | +import csv |
| 4 | + |
| 5 | +def LinkedInEmailScraper(userEmail, userPassword): |
| 6 | + emailList = {} |
| 7 | + |
| 8 | + browser = webdriver.Chrome() |
| 9 | + url = '[INSERT URL TO LINKEDIN POST]' # example => 'https://www.linkedin.com/posts/faangpath_hiring-womxn-ghc2020-activity-6721287139721650176-QFCV/' |
| 10 | + browser.get(url) # visits page of the desired post |
| 11 | + |
| 12 | + browser.implicitly_wait(5) |
| 13 | + |
| 14 | + commentDiv = browser.find_element_by_xpath('/html/body/main/section[1]/section[1]/div/div[3]/a[2]') # finds comment button |
| 15 | + loginLink = commentDiv.get_attribute('href') |
| 16 | + browser.get(loginLink) |
| 17 | + |
| 18 | + email = browser.find_element_by_xpath('//*[@id="username"]') |
| 19 | + password = browser.find_element_by_xpath('//*[@id="password"]') |
| 20 | + email.send_keys(userEmail) # inputs email in email field |
| 21 | + password.send_keys(userPassword) # inputs password in password field |
| 22 | + submit = browser.find_element_by_xpath('//*[@id="app__container"]/main/div[3]/form/div[3]/button') |
| 23 | + submit.submit() # submits form |
| 24 | + |
| 25 | + browser.implicitly_wait(5) |
| 26 | + |
| 27 | + commentSection = browser.find_element_by_css_selector('.comments-comments-list') # finds the comments section |
| 28 | + |
| 29 | + for _ in range (3): # this can also be set to any number or "while True" if you want it to search through the whole comment section of the post |
| 30 | + try: |
| 31 | + moreCommentsButton = commentSection.find_element_by_class_name('comments-comments-list__show-previous-container').find_element_by_tag_name('button') |
| 32 | + moreCommentsButton.click() |
| 33 | + browser.implicitly_wait(5) |
| 34 | + except: |
| 35 | + print('End of checking comments') |
| 36 | + break |
| 37 | + |
| 38 | + browser.implicitly_wait(20) |
| 39 | + |
| 40 | + comments = commentSection.find_elements_by_tag_name('article') # finds all individual comments |
| 41 | + |
| 42 | + for comment in comments: |
| 43 | + try: |
| 44 | + commenterName = comment.find_element_by_class_name('hoverable-link-text') # finds name of commenter |
| 45 | + commentText = comment.find_element_by_tag_name('p') |
| 46 | + commenterEmail = commentText.find_element_by_tag_name('a').get_attribute('innerHTML') # finds email of commenter |
| 47 | + validEmail = validate_email(commenterEmail) # validates email address |
| 48 | + commenterEmail = validEmail.email |
| 49 | + except: |
| 50 | + continue |
| 51 | + |
| 52 | + emailList[commenterName.get_attribute('innerHTML')] = commenterEmail |
| 53 | + |
| 54 | + browser.quit() |
| 55 | + return emailList |
| 56 | + |
| 57 | +def DictToCSV(input_dict): |
| 58 | + ''' |
| 59 | + Converts dictionary into csv |
| 60 | + ''' |
| 61 | + with open('./LinkedIn Email Scraper/emails.csv', 'w') as f: |
| 62 | + f.write('name,email\n') |
| 63 | + for key in input_dict: |
| 64 | + f.write('%s,%s\n'%(key, input_dict[key])) |
| 65 | + f.close() |
| 66 | + |
| 67 | +if __name__ == '__main__': |
| 68 | + userEmail = '[INSERT YOUR EMAIL ADDRESS FOR LINKEDIN ACCOUNT]' |
| 69 | + userPassword = '[INSERT YOUR PASSWORD FOR LINKEDIN ACCOUNT' |
| 70 | + |
| 71 | + emailList = LinkedInEmailScraper(userEmail, userPassword) |
| 72 | + DictToCSV(emailList) |
| 73 | + |
| 74 | + |
| 75 | + |
| 76 | + |
| 77 | + |
| 78 | + |
| 79 | + |
| 80 | + |
0 commit comments