Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 5a71459

Browse files
[update]comment to the code
1 parent 12202c7 commit 5a71459

File tree

2 files changed

+6
-1
lines changed

2 files changed

+6
-1
lines changed

‎DocumentSummaryCreater/preprocessing.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,22 +2,26 @@
22
import neologdn
33

44
class EnglishCorpus:
5+
# Preparation of morphological analyzer
56
def __init__(self):
67
self.nlp = spacy.load("en_core_web_sm")
78

9+
# Pre-processing of line breaks and special characters
810
def preprocessing(self, text:str) -> str:
911
text = text.replace("\n", "")
1012
text = neologdn.normalize(text)
1113

1214
return text
1315

16+
# Divide sentences into sentences while retaining the results of morphological analysis
1417
def make_sentence_list(self, sentences:str) -> list:
1518
doc = self.nlp(sentences)
1619
self.ginza_sents_object = doc.sents
1720
sentence_list = [s for s in doc.sents]
1821

1922
return sentence_list
2023

24+
# Put a space between words
2125
def make_corpus(self) -> list:
2226
corpus = []
2327
for s in self.ginza_sents_object:

‎DocumentSummaryCreater/summary_make.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,15 @@
66
from sumy.summarizers.lex_rank import LexRankSummarizer
77

88
def summarize_sentences(sentences:str, language="english") -> list:
9+
# Preparation sentences
910
corpus_maker = EnglishCorpus()
1011
preprocessed_sentences = corpus_maker.preprocessing(sentences)
1112
preprocessed_sentence_list = corpus_maker.make_sentence_list(preprocessed_sentences)
1213
corpus = corpus_maker.make_corpus()
1314
parser = PlaintextParser.from_string(" ".join(corpus), Tokenizer(language))
1415

16+
# Call the summarization algorithm and do the summarization
1517
summarizer = LexRankSummarizer()
16-
1718
summarizer.stop_words = get_stop_words(language)
1819
summary = summarizer(document=parser.document, sentences_count=len(corpus)*2//10)
1920

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /