|
1 | 1 | import requests
|
| 2 | +import json |
| 3 | +import os |
2 | 4 | from bs4 import BeautifulSoup
|
3 | 5 |
|
4 | 6 | # to scrape title
|
@@ -79,13 +81,35 @@ def getImage(soup, url):
|
79 | 81 | if ((not "http://" in url) or (not "https://" in url)):
|
80 | 82 | url = "https://" + url
|
81 | 83 |
|
82 | | -# getting the html |
83 | | -r = requests.get(url) |
84 | | -soup = BeautifulSoup(r.text, "html.parser") |
85 | | - |
86 | 84 | # printing values
|
87 | | -print("\nTitle : ", getTitle(soup)) |
88 | | -print("Description : ", getDesc(soup)) |
89 | | -print("URL : ", url) |
90 | | -print("Image link : ", getImage(soup, url)) |
91 | | -print("\n--END--\n") |
| 85 | + |
| 86 | +# first check in the DB |
| 87 | +db = {} |
| 88 | +# create file if it doesn't exist |
| 89 | +if not os.path.exists('Link-Preview/db.json'): |
| 90 | + f = open('Link-Preview/db.json', 'w') |
| 91 | + f.write("{}") |
| 92 | + f.close() |
| 93 | + |
| 94 | +with open('Link-Preview/db.json', 'r') as file: |
| 95 | + db = json.loads(file.read()) |
| 96 | +db["mj"] = { |
| 97 | + "name": "madhav" |
| 98 | +} |
| 99 | +print(db) |
| 100 | + |
| 101 | +# parse file |
| 102 | +with open('Link-Preview/db.json', 'w') as file: |
| 103 | + json.dump(db, file) |
| 104 | + |
| 105 | +# if not in db get via request |
| 106 | + |
| 107 | +# getting the html |
| 108 | +# r = requests.get(url) |
| 109 | +# soup = BeautifulSoup(r.text, "html.parser") |
| 110 | + |
| 111 | +# print("\nTitle : ", getTitle(soup)) |
| 112 | +# print("Description : ", getDesc(soup)) |
| 113 | +# print("URL : ", url) |
| 114 | +# print("Image link : ", getImage(soup, url)) |
| 115 | +# print("\n--END--\n") |
0 commit comments