|
1 | 1 | import string
|
2 | 2 | import nltk
|
| 3 | +import sys |
| 4 | +import os |
3 | 5 |
|
4 | | -fp=open(r"C:\Users\ZAVERI SANYA\Desktop\Amazing-Python-Scripts\Remove_POS_hindi_text\\Tagged_Hindi_Corpus.txt",mode="r",encoding="utf-8") #opens the hindi_tagged_corpus.txt file |
5 | | -fd=open(r"C:\Users\ZAVERI SANYA\Desktop\Amazing-Python-Scripts\Remove_POS_hindi_text\\Only_Hindi.txt",mode="a",encoding="utf-8") |
| 6 | +user_input=input(' Enter file location of your Tagged Hindi Text: ') |
| 7 | +#C:\Users\ZAVERI SANYA\Desktop\Amazing-Python-Scripts\Remove_POS_hindi_text\\Tagged_Hindi_Corpus.txt |
| 8 | +assert os.path.exists(user_input), "I did not find the file at, "+str(user_input) |
| 9 | +fp=open(user_input,mode="r",encoding="utf-8") #opens the hindi_tagged_corpus.txt file |
| 10 | +print("Hooray we found your file!") |
| 11 | + |
| 12 | +user_answer= input (' Enter file location where you wish to get your Only Hindi Text file: ') |
| 13 | +#C:\Users\ZAVERI SANYA\Desktop\Amazing-Python-Scripts\Remove_POS_hindi_text\Only_Hindi.txt |
| 14 | +fd=open(user_answer,mode="a",encoding="utf-8") |
6 | 15 | data=fp.read()
|
7 | 16 | data_token=nltk.tokenize.word_tokenize(data) #data tokenization
|
8 | 17 | words=[]
|
|
18 | 27 | for word in words:
|
19 | 28 | str+=word+" " #it concatenates the words
|
20 | 29 | fd.write(str) #writes to only_hindi.txt file
|
| 30 | +print("Hooray your Only Hindi Text file is ready...Please Check!") |
21 | 31 | fp.close()
|
22 | 32 | fd.close()
|
0 commit comments