Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 18eb082

Browse files
Merge pull request avinashkranjan#1585 from okaditya84/master
Added sentiment analyzer program
2 parents 7d2be06 + 2df2a60 commit 18eb082

File tree

5 files changed

+385
-0
lines changed

5 files changed

+385
-0
lines changed

‎Sentiment Analyser/README.md‎

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# Sentiment Analyser
2+
3+
This is a sentiment analyser that takes in a sentence and returns the sentiment of the sentence.
4+
It gives rating to the emotions present in the sentence, from 0 to 1.
5+
It uses NLP and ML algroithms to do so.
6+
This is trained on word2vec dataset using IMDB movie reviews.
7+
And the test model is based on RNN.
8+
9+
10+
11+
## Setup instructions
12+
- Install python 3.6 or above
13+
- Install the required packages using the following command
14+
```bash
15+
pip install -r requirements.txt
16+
```
17+
- First execute the word2vec.ipynb file to train the model
18+
- Then execute the RNN(w2v).ipynb file to test the model
19+
- Change the epochs batch size based on your system configuration and the accuracy you want.
20+
- It will then create a csv file with the sentiment of the sentence.
21+
22+
## Output
23+
Check the screenshot in this folder for the output.
24+
25+
## Author(s)
26+
27+
- [Aditya Jethani](https://github.com/okaditya84)
28+

‎Sentiment Analyser/RNN(w2v).ipynb‎

Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"id": "e5997f58",
7+
"metadata": {},
8+
"outputs": [
9+
{
10+
"name": "stdout",
11+
"output_type": "stream",
12+
"text": [
13+
"Epoch 1/15\n",
14+
"196/196 [==============================] - 59s 272ms/step - loss: 0.6775 - acc: 0.5654 - val_loss: 0.6538 - val_acc: 0.6173 - lr: 0.0010\n",
15+
"Epoch 2/15\n",
16+
"196/196 [==============================] - 52s 265ms/step - loss: 0.6585 - acc: 0.6066 - val_loss: 0.6501 - val_acc: 0.6184 - lr: 0.0010\n",
17+
"Epoch 3/15\n",
18+
"196/196 [==============================] - 55s 280ms/step - loss: 0.6488 - acc: 0.6216 - val_loss: 0.6378 - val_acc: 0.6365 - lr: 0.0010\n",
19+
"Epoch 4/15\n",
20+
"196/196 [==============================] - 58s 294ms/step - loss: 0.6427 - acc: 0.6304 - val_loss: 0.6505 - val_acc: 0.6307 - lr: 0.0010\n",
21+
"Epoch 5/15\n",
22+
"196/196 [==============================] - ETA: 0s - loss: 0.6337 - acc: 0.6417\n",
23+
"Epoch 5: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.\n",
24+
"196/196 [==============================] - 57s 291ms/step - loss: 0.6337 - acc: 0.6417 - val_loss: 0.6780 - val_acc: 0.6249 - lr: 0.0010\n",
25+
"Epoch 6/15\n",
26+
"196/196 [==============================] - 55s 280ms/step - loss: 0.6225 - acc: 0.6509 - val_loss: 0.6244 - val_acc: 0.6507 - lr: 1.0000e-04\n",
27+
"Epoch 7/15\n",
28+
"196/196 [==============================] - 55s 280ms/step - loss: 0.6213 - acc: 0.6510 - val_loss: 0.6274 - val_acc: 0.6496 - lr: 1.0000e-04\n",
29+
"Epoch 8/15\n",
30+
"196/196 [==============================] - 55s 281ms/step - loss: 0.6181 - acc: 0.6549 - val_loss: 0.6220 - val_acc: 0.6522 - lr: 1.0000e-04\n",
31+
"Epoch 9/15\n",
32+
"196/196 [==============================] - 56s 285ms/step - loss: 0.6180 - acc: 0.6551 - val_loss: 0.6195 - val_acc: 0.6536 - lr: 1.0000e-04\n",
33+
"Epoch 10/15\n",
34+
"196/196 [==============================] - 56s 284ms/step - loss: 0.6165 - acc: 0.6585 - val_loss: 0.6242 - val_acc: 0.6512 - lr: 1.0000e-04\n",
35+
"Epoch 11/15\n",
36+
"196/196 [==============================] - 56s 287ms/step - loss: 0.6176 - acc: 0.6549 - val_loss: 0.6187 - val_acc: 0.6538 - lr: 1.0000e-04\n",
37+
"Epoch 12/15\n",
38+
"196/196 [==============================] - 56s 285ms/step - loss: 0.6148 - acc: 0.6585 - val_loss: 0.6178 - val_acc: 0.6574 - lr: 1.0000e-04\n",
39+
"Epoch 13/15\n",
40+
"196/196 [==============================] - 56s 287ms/step - loss: 0.6160 - acc: 0.6599 - val_loss: 0.6160 - val_acc: 0.6574 - lr: 1.0000e-04\n",
41+
"Epoch 14/15\n",
42+
"196/196 [==============================] - 54s 275ms/step - loss: 0.6142 - acc: 0.6572 - val_loss: 0.6156 - val_acc: 0.6571 - lr: 1.0000e-04\n",
43+
"Epoch 15/15\n",
44+
"196/196 [==============================] - 54s 274ms/step - loss: 0.6136 - acc: 0.6595 - val_loss: 0.6162 - val_acc: 0.6576 - lr: 1.0000e-04\n",
45+
"196/196 [==============================] - 7s 35ms/step - loss: 0.6162 - acc: 0.6576\n",
46+
"Test score: 0.6162243485450745\n",
47+
"Test accuracy: 0.6576399803161621\n"
48+
]
49+
}
50+
],
51+
"source": [
52+
"from keras.datasets import imdb\n",
53+
"from keras.models import Sequential\n",
54+
"from keras.layers import LSTM, Dense, Embedding, Dropout\n",
55+
"from keras.callbacks import EarlyStopping, ReduceLROnPlateau\n",
56+
"from keras.preprocessing.text import Tokenizer\n",
57+
"from gensim.models import Word2Vec\n",
58+
"import numpy as np\n",
59+
"import pandas as pd\n",
60+
"from tensorflow.keras.preprocessing.sequence import pad_sequences \n",
61+
"import pickle\n",
62+
"# Load the IMDB dataset and split it into training and test sets\n",
63+
"(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=10000)\n",
64+
"\n",
65+
"# Tokenize the text and convert it to sequences\n",
66+
"tokenizer = Tokenizer(num_words=10000)\n",
67+
"x_train_str = [str(text) for text in x_train]\n",
68+
"tokenizer.fit_on_texts(x_train_str)\n",
69+
"x_train = tokenizer.texts_to_sequences(x_train_str)\n",
70+
"x_test_str = [str(text) for text in x_test]\n",
71+
"x_test = tokenizer.texts_to_sequences(x_test_str)\n",
72+
"\n",
73+
"# Pad the sequences to a fixed length\n",
74+
"maxlen = 100\n",
75+
"x_train = pad_sequences(x_train, maxlen=maxlen)\n",
76+
"x_test = pad_sequences(x_test, maxlen=maxlen)\n",
77+
"\n",
78+
"# Load pre-trained Word2Vec model\n",
79+
"w2v_model = Word2Vec.load('w2v_model.bin')\n",
80+
"\n",
81+
"# Create embedding matrix\n",
82+
"word_index = tokenizer.word_index\n",
83+
"embedding_matrix = np.zeros((len(word_index) + 1, 100))\n",
84+
"for word, i in word_index.items():\n",
85+
" if word in w2v_model.wv.key_to_index:\n",
86+
" embedding_matrix[i] = w2v_model.wv[word]\n",
87+
"\n",
88+
"# Define the model architecture\n",
89+
"model = Sequential()\n",
90+
"model.add(Embedding(len(word_index) + 1, 100, weights=[embedding_matrix], input_length=maxlen, trainable=False))\n",
91+
"model.add(Dropout(0.2))\n",
92+
"model.add(LSTM(32, dropout=0.2, recurrent_dropout=0.2, return_sequences=True))\n",
93+
"model.add(LSTM(32, dropout=0.2, recurrent_dropout=0.2))\n",
94+
"model.add(Dense(1, activation='sigmoid'))\n",
95+
"\n",
96+
"# Compile the model\n",
97+
"model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])\n",
98+
"\n",
99+
"# Define early stopping and learning rate reduction callbacks\n",
100+
"early_stopping = EarlyStopping(monitor='val_loss', patience=3, verbose=1, mode='min')\n",
101+
"reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=2, verbose=1, mode='min')\n",
102+
"\n",
103+
"# Train the model\n",
104+
"history = model.fit(\n",
105+
" x_train, y_train ,\n",
106+
" batch_size=128,\n",
107+
" epochs=15,\n",
108+
" validation_data=(x_test, y_test),\n",
109+
" callbacks=[early_stopping, reduce_lr]\n",
110+
")\n",
111+
"#save the model in pickle format\n",
112+
"pickle.dump(model, open('model.pkl', 'wb'))\n",
113+
"#save the tokenizer in pickle format\n",
114+
"pickle.dump(tokenizer, open('tokenizer.pkl', 'wb'))\n",
115+
"\n",
116+
"\n",
117+
"# Evaluate the model on the test set\n",
118+
"score, acc = model.evaluate(x_test, y_test, batch_size=128)\n",
119+
"print('Test score:', score)\n",
120+
"print('Test accuracy:', acc)\n",
121+
"\n"
122+
]
123+
},
124+
{
125+
"cell_type": "code",
126+
"execution_count": null,
127+
"id": "7885e840",
128+
"metadata": {},
129+
"outputs": [],
130+
"source": []
131+
},
132+
{
133+
"cell_type": "code",
134+
"execution_count": 2,
135+
"id": "43521b94",
136+
"metadata": {},
137+
"outputs": [
138+
{
139+
"name": "stdout",
140+
"output_type": "stream",
141+
"text": [
142+
"782/782 [==============================] - 11s 14ms/step\n",
143+
"Saved results to CSV file.\n"
144+
]
145+
}
146+
],
147+
"source": [
148+
"# Get predictions on the test set\n",
149+
"# Get predicted probabilities on the test set\n",
150+
"y_pred_prob = model.predict(x_test)\n",
151+
"\n",
152+
"# Convert probabilities to classes\n",
153+
"y_pred = np.argmax(y_pred_prob, axis=1)\n",
154+
"\n",
155+
"# Convert the integer labels to sentiment strings\n",
156+
"sentiments = ['negative', 'positive']\n",
157+
"y_test_str = np.array([sentiments[label] for label in y_test])\n",
158+
"y_pred_str = np.array([sentiments[label] for label in y_pred])\n",
159+
"\n",
160+
"# Store the results in a CSV file\n",
161+
"results = pd.DataFrame({'Review': x_test_str, 'Actual Sentiment': y_test_str, 'Predicted Sentiment': y_pred_str})\n",
162+
"results.to_csv('imdb_sentiments.csv', index=False)\n",
163+
"\n",
164+
"print('Saved results to CSV file.')\n"
165+
]
166+
},
167+
{
168+
"cell_type": "code",
169+
"execution_count": null,
170+
"id": "92a7b5ad",
171+
"metadata": {},
172+
"outputs": [],
173+
"source": []
174+
}
175+
],
176+
"metadata": {
177+
"kernelspec": {
178+
"display_name": "Python 3 (ipykernel)",
179+
"language": "python",
180+
"name": "python3"
181+
},
182+
"language_info": {
183+
"codemirror_mode": {
184+
"name": "ipython",
185+
"version": 3
186+
},
187+
"file_extension": ".py",
188+
"mimetype": "text/x-python",
189+
"name": "python",
190+
"nbconvert_exporter": "python",
191+
"pygments_lexer": "ipython3",
192+
"version": "3.11.1"
193+
}
194+
},
195+
"nbformat": 4,
196+
"nbformat_minor": 5
197+
}
72.4 KB
Loading[フレーム]

‎Sentiment Analyser/requirements.txt‎

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
absl-py==1.4.0
2+
asttokens==2.2.1
3+
astunparse==1.6.3
4+
backcall==0.2.0
5+
cachetools==5.3.0
6+
certifi==2022年12月7日
7+
charset-normalizer==3.1.0
8+
colorama==0.4.6
9+
comm==0.1.2
10+
debugpy==1.6.6
11+
decorator==5.1.1
12+
executing==1.2.0
13+
flatbuffers==23.3.3
14+
gast==0.4.0
15+
gensim==4.3.1
16+
google-auth==2.16.2
17+
google-auth-oauthlib==0.4.6
18+
google-pasta==0.2.0
19+
grpcio==1.51.3
20+
h5py==3.8.0
21+
idna==3.4
22+
ipykernel==6.21.3
23+
ipython==8.11.0
24+
jax==0.4.6
25+
jedi==0.18.2
26+
jupyter_client==8.0.3
27+
jupyter_core==5.3.0
28+
keras==2.12.0
29+
libclang==15.0.6.1
30+
Markdown==3.4.1
31+
MarkupSafe==2.1.2
32+
matplotlib-inline==0.1.6
33+
nest-asyncio==1.5.6
34+
numpy==1.23.5
35+
oauthlib==3.2.2
36+
opt-einsum==3.3.0
37+
packaging==23.0
38+
pandas==1.5.3
39+
parso==0.8.3
40+
pickleshare==0.7.5
41+
platformdirs==3.1.1
42+
prompt-toolkit==3.0.38
43+
protobuf==4.22.1
44+
psutil==5.9.4
45+
pure-eval==0.2.2
46+
pyasn1==0.4.8
47+
pyasn1-modules==0.2.8
48+
Pygments==2.14.0
49+
python-dateutil==2.8.2
50+
pytz==2022年7月1日
51+
pywin32==305
52+
pyzmq==25.0.1
53+
requests==2.28.2
54+
requests-oauthlib==1.3.1
55+
rsa==4.9
56+
scipy==1.10.1
57+
six==1.16.0
58+
smart-open==6.3.0
59+
stack-data==0.6.2
60+
tensorboard==2.12.0
61+
tensorboard-data-server==0.7.0
62+
tensorboard-plugin-wit==1.8.1
63+
tensorflow==2.12.0rc1
64+
tensorflow-estimator==2.12.0
65+
tensorflow-intel==2.12.0rc1
66+
tensorflow-io-gcs-filesystem==0.31.0
67+
termcolor==2.2.0
68+
tornado==6.2
69+
traitlets==5.9.0
70+
typing_extensions==4.5.0
71+
urllib3==1.26.15
72+
wcwidth==0.2.6
73+
Werkzeug==2.2.3
74+
wincertstore==0.2
75+
wrapt==1.14.1

‎Sentiment Analyser/word2vec.ipynb‎

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {
7+
"colab": {
8+
"base_uri": "https://localhost:8080/"
9+
},
10+
"id": "ex-oKOQ95wAu",
11+
"outputId": "0eef58ac-1fb5-4879-df27-bc9378bef581"
12+
},
13+
"outputs": [],
14+
"source": []
15+
},
16+
{
17+
"cell_type": "code",
18+
"execution_count": null,
19+
"metadata": {
20+
"id": "Y0NOzxRF5zzT"
21+
},
22+
"outputs": [],
23+
"source": [
24+
"from gensim.models import Word2Vec\n",
25+
"from keras.datasets import imdb\n",
26+
"\n",
27+
"# Load the IMDB dataset\n",
28+
"(x_train, _), (x_test, _) = imdb.load_data(num_words=10000)\n",
29+
"\n",
30+
"# Convert the sequences of word indexes to lists of words\n",
31+
"word_index = imdb.get_word_index()\n",
32+
"index_to_word = {i: word for word, i in word_index.items()}\n",
33+
"index_to_word[0] = '<PAD>'\n",
34+
"index_to_word[1] = '<START>'\n",
35+
"index_to_word[2] = '<UNK>'\n",
36+
"x_train = [[index_to_word.get(i, '') for i in seq] for seq in x_train]\n",
37+
"x_test = [[index_to_word.get(i, '') for i in seq] for seq in x_test]\n",
38+
"\n",
39+
"# Train the Word2Vec model\n",
40+
"w2v_model = Word2Vec(sentences=x_train + x_test, vector_size=100, window=5, min_count=1, workers=4, epochs=10)\n",
41+
"\n",
42+
"# Save the trained model to a file\n",
43+
"w2v_model.save('w2v_model.bin')\n"
44+
]
45+
},
46+
{
47+
"cell_type": "code",
48+
"execution_count": null,
49+
"metadata": {},
50+
"outputs": [],
51+
"source": []
52+
},
53+
{
54+
"cell_type": "code",
55+
"execution_count": null,
56+
"metadata": {},
57+
"outputs": [],
58+
"source": []
59+
}
60+
],
61+
"metadata": {
62+
"colab": {
63+
"provenance": []
64+
},
65+
"kernelspec": {
66+
"display_name": "Python 3 (ipykernel)",
67+
"language": "python",
68+
"name": "python3"
69+
},
70+
"language_info": {
71+
"codemirror_mode": {
72+
"name": "ipython",
73+
"version": 3
74+
},
75+
"file_extension": ".py",
76+
"mimetype": "text/x-python",
77+
"name": "python",
78+
"nbconvert_exporter": "python",
79+
"pygments_lexer": "ipython3",
80+
"version": "3.11.1"
81+
}
82+
},
83+
"nbformat": 4,
84+
"nbformat_minor": 1
85+
}

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /