|
1 | | -from cProfile import label |
2 | | -from re import S |
3 | | -import spacy |
4 | 1 | from spacy.lang.en.stop_words import STOP_WORDS |
5 | | -from spacy import load, displacy |
6 | 2 | import en_core_web_sm |
7 | 3 | from string import punctuation |
8 | 4 | from heapq import nlargest |
9 | 5 | import spacy_streamlit |
| 6 | +import requests |
| 7 | +import json |
| 8 | +from bs4 import BeautifulSoup |
10 | 9 |
|
11 | 10 |
|
12 | 11 | nlp= en_core_web_sm.load() |
@@ -50,6 +49,36 @@ def sentence_score(sentence_tokens, word_frequencies): |
50 | 49 | return sentence_score |
51 | 50 |
|
52 | 51 |
|
| 52 | +def fetch_news_links(): |
| 53 | + link_list = [] |
| 54 | + |
| 55 | + reqUrl = "https://newsapi.org/v2/everything?sources=bbc-news&q=india&language=en&apiKey=3af9b5b135cc4e90b4a5d87807716cd1" |
| 56 | + |
| 57 | + headersList = { |
| 58 | + "Accept": "*/*", |
| 59 | + "User-Agent": "Thunder Client (https://www.thunderclient.com)" |
| 60 | + } |
| 61 | + |
| 62 | + payload = "" |
| 63 | + |
| 64 | + response = requests.request("GET", reqUrl, data=payload, headers=headersList).text |
| 65 | + response = json.loads(response) |
| 66 | + |
| 67 | + tw = 0 |
| 68 | + for i in range(len(response["articles"])): |
| 69 | + if tw ==10: |
| 70 | + pass |
| 71 | + else: |
| 72 | + if "/news/" in response["articles"][i]["url"]: |
| 73 | + link_list.append(response["articles"][i]["url"]) |
| 74 | + else: |
| 75 | + pass |
| 76 | + tw += 1 |
| 77 | + |
| 78 | + return link_list |
| 79 | + |
| 80 | + |
| 81 | + |
53 | 82 | def get_summary(text): |
54 | 83 | doc = nlp(text) |
55 | 84 | tokens = [token.text for token in doc] |
|
0 commit comments