Commit 5a71459

committed

[update]comment to the code

1 parent 12202c7 commit 5a71459Copy full SHA for 5a71459

File tree

-1

lines changed

-1

lines changed

Lines changed: 4 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -2,22 +2,26 @@`
`2`	`2`	`import neologdn`
`3`	`3`
`4`	`4`	`class EnglishCorpus:`
	`5`	`+ # Preparation of morphological analyzer`
`5`	`6`	`def __init__(self):`
`6`	`7`	`self.nlp = spacy.load("en_core_web_sm")`
`7`	`8`
	`9`	`+ # Pre-processing of line breaks and special characters`
`8`	`10`	`def preprocessing(self, text:str) -> str:`
`9`	`11`	`text = text.replace("\n", "")`
`10`	`12`	`text = neologdn.normalize(text)`
`11`	`13`
`12`	`14`	`return text`
`13`	`15`
	`16`	`+ # Divide sentences into sentences while retaining the results of morphological analysis`
`14`	`17`	`def make_sentence_list(self, sentences:str) -> list:`
`15`	`18`	`doc = self.nlp(sentences)`
`16`	`19`	`self.ginza_sents_object = doc.sents`
`17`	`20`	`sentence_list = [s for s in doc.sents]`
`18`	`21`
`19`	`22`	`return sentence_list`
`20`	`23`
	`24`	`+ # Put a space between words`
`21`	`25`	`def make_corpus(self) -> list:`
`22`	`26`	`corpus = []`
`23`	`27`	`for s in self.ginza_sents_object:`

Lines changed: 2 additions & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -6,14 +6,15 @@`
`6`	`6`	`from sumy.summarizers.lex_rank import LexRankSummarizer`
`7`	`7`
`8`	`8`	`def summarize_sentences(sentences:str, language="english") -> list:`
	`9`	`+ # Preparation sentences`
`9`	`10`	`corpus_maker = EnglishCorpus()`
`10`	`11`	`preprocessed_sentences = corpus_maker.preprocessing(sentences)`
`11`	`12`	`preprocessed_sentence_list = corpus_maker.make_sentence_list(preprocessed_sentences)`
`12`	`13`	`corpus = corpus_maker.make_corpus()`
`13`	`14`	`parser = PlaintextParser.from_string(" ".join(corpus), Tokenizer(language))`
`14`	`15`
	`16`	`+ # Call the summarization algorithm and do the summarization`
`15`	`17`	`summarizer = LexRankSummarizer()`
`16`		`-`
`17`	`18`	`summarizer.stop_words = get_stop_words(language)`
`18`	`19`	`summary = summarizer(document=parser.document, sentences_count=len(corpus)*2//10)`
`19`	`20`

Comments

(0)