Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 9d2940e

Browse files
Create 0-0 NLP.ipynb
1 parent d10f5c3 commit 9d2940e

File tree

1 file changed

+171
-0
lines changed

1 file changed

+171
-0
lines changed

‎0-0 NLP.ipynb‎

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 61,
6+
"metadata": {},
7+
"outputs": [
8+
{
9+
"name": "stdout",
10+
"output_type": "stream",
11+
"text": [
12+
"['Hello How are you?']\n"
13+
]
14+
}
15+
],
16+
"source": [
17+
"\n",
18+
"import nltk\n",
19+
"\n",
20+
"from nltk.tokenize import sent_tokenize, word_tokenize\n",
21+
" \n",
22+
"srt='Java is a general purpose programming language that is class-based, object-oriented, and designed to have as few implementation dependencies as possible.'\n",
23+
"srtt=\"Hello How are you?\"\n",
24+
"print(sent_tokenize(srtt))\n",
25+
"\n"
26+
]
27+
},
28+
{
29+
"cell_type": "code",
30+
"execution_count": 62,
31+
"metadata": {},
32+
"outputs": [
33+
{
34+
"name": "stdout",
35+
"output_type": "stream",
36+
"text": [
37+
"['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', \"you're\", \"you've\", \"you'll\", \"you'd\", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', \"she's\", 'her', 'hers', 'herself', 'it', \"it's\", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', \"that'll\", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 's', 't', 'can', 'will', 'just', 'don', \"don't\", 'should', \"should've\", 'now', 'd', 'll', 'm', 'o', 're', 've', 'y', 'ain', 'aren', \"aren't\", 'couldn', \"couldn't\", 'didn', \"didn't\", 'doesn', \"doesn't\", 'hadn', \"hadn't\", 'hasn', \"hasn't\", 'haven', \"haven't\", 'isn', \"isn't\", 'ma', 'mightn', \"mightn't\", 'mustn', \"mustn't\", 'needn', \"needn't\", 'shan', \"shan't\", 'shouldn', \"shouldn't\", 'wasn', \"wasn't\", 'weren', \"weren't\", 'won', \"won't\", 'wouldn', \"wouldn't\"]\n",
38+
"['is', 'is', 'a', 'sample', 'sentence', ',', 'showing', 'off', 'the', 'stop', 'words', 'java', 'python', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?']\n",
39+
"['sample', 'sentence', 'showing', 'stop', 'words', 'java', 'python']\n"
40+
]
41+
}
42+
],
43+
"source": [
44+
"\n",
45+
"import nltk\n",
46+
"from nltk.corpus import stopwords\n",
47+
"from nltk.tokenize import sent_tokenize, word_tokenize\n",
48+
" \n",
49+
"srt='is is a sample sentence, showing off the stop words java python ???????????????'\n",
50+
"stop=stopwords.words('english')\n",
51+
"print(stop)\n",
52+
"word=word_tokenize(srt)\n",
53+
"\n",
54+
"print(word)\n",
55+
"\n",
56+
"filtr=[]\n",
57+
"punc=[',','?',\"/\",\"<\",\">\"]\n",
58+
"\n",
59+
"for w in word:\n",
60+
" if w not in stop and w not in punc: \n",
61+
" filtr.append(w)\n",
62+
"\n",
63+
" \n",
64+
"print(filtr)"
65+
]
66+
},
67+
{
68+
"cell_type": "code",
69+
"execution_count": 69,
70+
"metadata": {},
71+
"outputs": [
72+
{
73+
"name": "stdout",
74+
"output_type": "stream",
75+
"text": [
76+
"hello\n",
77+
"Hi\n",
78+
"do\n",
79+
"do\n",
80+
"do\n"
81+
]
82+
}
83+
],
84+
"source": [
85+
"\n",
86+
"import nltk\n",
87+
"from nltk.stem import PorterStemmer\n",
88+
"from nltk.tokenize import sent_tokenize, word_tokenize\n",
89+
"\n",
90+
"ps=PorterStemmer()\n",
91+
"new=\"Hello Hi doing doing doing\"\n",
92+
"\n",
93+
"word=word_tokenize(new)\n",
94+
"\n",
95+
"for w in word:\n",
96+
" print(ps.stem(w))\n"
97+
]
98+
},
99+
{
100+
"cell_type": "code",
101+
"execution_count": 17,
102+
"metadata": {},
103+
"outputs": [
104+
{
105+
"name": "stdout",
106+
"output_type": "stream",
107+
"text": [
108+
"cat\n",
109+
"goose\n",
110+
"good\n"
111+
]
112+
}
113+
],
114+
"source": [
115+
"\n",
116+
"import nltk\n",
117+
"from nltk.stem import WordNetLemmatizer\n",
118+
"from nltk.tokenize import sent_tokenize, word_tokenize\n",
119+
"\n",
120+
"lemmatizer=WordNetLemmatizer()\n",
121+
"\n",
122+
"print(lemmatizer.lemmatize(\"cats\"))\n",
123+
"print(lemmatizer.lemmatize(\"geese\"))\n",
124+
"print(lemmatizer.lemmatize(\"better\",pos=\"a\"))"
125+
]
126+
},
127+
{
128+
"cell_type": "code",
129+
"execution_count": 35,
130+
"metadata": {},
131+
"outputs": [
132+
{
133+
"data": {
134+
"text/plain": [
135+
"[('They', 'PRP'), ('to', 'TO'), ('permit', 'VB')]"
136+
]
137+
},
138+
"execution_count": 35,
139+
"metadata": {},
140+
"output_type": "execute_result"
141+
}
142+
],
143+
"source": [
144+
"text=\"Hello Hi doing doing doing\"\n",
145+
"new=['They','to', 'permit']\n",
146+
"nltk.pos_tag(new)"
147+
]
148+
}
149+
],
150+
"metadata": {
151+
"kernelspec": {
152+
"display_name": "Python 3",
153+
"language": "python",
154+
"name": "python3"
155+
},
156+
"language_info": {
157+
"codemirror_mode": {
158+
"name": "ipython",
159+
"version": 3
160+
},
161+
"file_extension": ".py",
162+
"mimetype": "text/x-python",
163+
"name": "python",
164+
"nbconvert_exporter": "python",
165+
"pygments_lexer": "ipython3",
166+
"version": "3.7.4"
167+
}
168+
},
169+
"nbformat": 4,
170+
"nbformat_minor": 4
171+
}

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /