Attribute error message

Question 1

import nltk
import random
from nltk.corpus import movie_reviews
from nltk.classify.scikitlearn import SklearnClassifier
import pickle
import sys
sys.getdefaultencoding()
import os
from sklearn.naive_bayes import MultinomialNB, BernoulliNB
from sklearn.linear_model import SGDClassifier
from nltk.classify import ClassifierI
from statistics import mode
from nltk.tokenize import word_tokenize
class VoteClassifier(ClassifierI):
 def __int__(self, *classifiers):
 self._classifiers = classifiers
 def classify(self, features):
 votes = [ ]
 for c in self._classifiers:
 v = c.classify(features)
 votes.append(v)
 return mode(votes)
 def confidence(self, features):
 votes = [ ]
 for c in self._classifiers:
 v = c.classify(features)
 votes.append(v)
 choice_votes = votes.count(mode(votes))
 conf = choice_votes / len(votes)
 return conf
short_pos = os.open("positive.txt", os.O_RDONLY).read()
short_neg = os.open("negative.txt", os.O_RDONLY).read()
documents = [ ]
for r in short_pos.split('\n'):
 documents.append( (r, "pos") )
for r in short_neg.split('\n'):
 documents.append( (r, "neg") )
 all_words = [ ]
 short_pos_words = word_tokenize(short_pos)
 short_neg_words = word_tokenize(short_neg)
 for w in short_pos_words:
 all_words.append(w.lower())
 for w in short_neg_words:
 all_words.append(w.lower())
all_words = nltk.FreqDist(all_words)
word_features = list(all_words.keys())[:5000]
def find_features(document):
 words = set(document)
 features = {}
 for w in word_features:
 features[w] = (w in words) 
 return features
# print ((find_features(movie_reviews.words('neg/cv000_29416.txt'))))
featuresets = [(find_features(rev), category) for (rev, category) in documents] random.shuffle(featuresets)
#training with increased data collection 
training_set = featuresets[:10000]
#testing with increased data Collection
testing_set = featuresets[10000:]
#define and train classifier
classifier = nltk.NaiveBayesClassifier.train(training_set)
#testing classifier
#print("Classifier accuracy percent:",(nltk.classify.accuracy(classifier, testing_set))*100)
#show the 15 most valuable words when it comes to positive or negative reviews
#classifier.show_most_informative_features(15)
#saving classifier
#save_classifier = open("naivebayes.pickle", "wb")
#pickle.dump(classifier, save_classifier)
#save_classifier.close()
#loading classifier
#classifier_f = open("naivebayes.pickle", "rb")
#classifier = pickle.load(classifier_f)
#classifier_f.close()
print("Original Naive Bayes Alogrithm acurracy percent:", (nltk.classify.accuracy(classifier, testing_set))*100)
 classifier.show_most_informative_features(15)
MNB_classifier = SklearnClassifier(MultinomialNB())
MNB_classifier.train(training_set)
print("MultinomialNB accuracy percent:", nltk.classify.accuracy(MNB_classifier, testing_set))
BernoulliNB_classifier = SklearnClassifier(BernoulliNB())
BernoulliNB_classifier.train(training_set)
print("BernoulliNB accuracy percent:", nltk.classify.accuracy(BernoulliNB_classifier, testing_set))
SGDClassifier_classifier = SklearnClassifier(SGDClassifier())
SGDClassifier_classifier.train(training_set)
print("SGDclassifier accuracy percent:", nltk.classify.accuracy(SGDClassifier_classifier, testing_set))
#SVC_classifier = SklearnClassifier(SVC())
#SVC_classifier.train(training_set)
#print("SVC accuracy percent:", nltk.classify.accuracy(SVC_classifier, testing_set))
voted_classifier = VoteClassifier(classifier, 
 SGDClassifier_classifier,
 MNB_classifier, 
 BernoulliNB_classifier )
print("voted_classifier accuracy percent:", (nltk.classify.accuracy(voted_classifier, training_set))*100)
#print ("Classification:", voted_classifier.classify(testing_set[0][0]), "Confidence%:", voted_classifier.confidence(testing_set[0][0])*100)
#print ("Classification:", voted_classifier.classify(testing_set[1][0]), "Confidence%:", voted_classifier.confidence(testing_set[1][0])*100)
#print ("Classification:", voted_classifier.classify(testing_set[2][0]), "Confidence%:", voted_classifier.confidence(testing_set[2][0])*100)
#print ("Classification:", voted_classifier.classify(testing_set[3][0]), "Confidence%:", voted_classifier.confidence(testing_set[3][0])*100)
#print ("Classification:", voted_classifier.classify(testing_set[4][0]), "Confidence%:", voted_classifier.confidence(testing_set[4][0])*100)
#print ("Classification:", voted_classifier.classify(testing_set[5][0]), "Confidence%:", voted_classifier.confidence(testing_set[5][0])*100)

When running the above code I get the error,

short_pos = os.open("positive.txt", os.O_RDONLY).read()
AttributeError: 'int' object has no attribute 'read'

Why is this error occurring, and how do I prevent this error from appearing again?

Question 2

It is because you are trying to call .read() method on a return value from os.open(), which returns an int (file descriptor), not a file-like object.

I think what you meant to do was using a simple

with open('filename.txt', 'r') as f:
 text = f.read()

Or if you really want a one-liner:

text = open('filename.txt', 'r').read()

Those two lines:

short_pos = os.open("positive.txt", os.O_RDONLY).read()
short_neg = os.open("negative.txt", os.O_RDONLY).read()

Should be changed to:

with open("positive.txt", 'r') as f:
 short_pos = f.read()
with open("negative.txt", 'r') as f:
 short_neg = f.read()

Also, instead of reading the contents of the whole file and then splitting those by a \n like this:

for r in short_pos.split('\n'): # This .split()
 documents.append( (r, "pos") )
for r in short_neg.split('\n'): # And this .split()
 documents.append( (r, "neg") )

instead of using str.split() it would be much better idea to read the file using .readlines() instead of read() in the first place. The former will return a list of lines from the file stream, and you don't have to worry about different line ending schemes that different operating systems use.

Question 3

Thank You very much. But I now have the error: return codecs.ascii_decode(input, self.errors)[0] UnicodeDecodeError: 'ascii' codec can't decode byte 0xf3 in position 4645: ordinal not in range(128)

Question 4

@A.Lona It doesn't seem to originate from the code that you posted, therefore I can't really be certain, but I've got a hunch, that you are trying to decode something that already is decoded into ascii/utf. iIf the input variable is something from the file that you are opening, then if you were using os.open() to open this file, the output would be a byte-string, which you would have to decode with some kind of codec. You are now opening your file with a simple open(), therefore the output is already a standard string that doesn't require any decoding. Could you post full stack trace?

Błażej Michalik 5,20247 silver badges65 bronze badges · Accepted Answer · 2017-02-14 14:08:40Z

It is because you are trying to call .read() method on a return value from os.open(), which returns an int (file descriptor), not a file-like object.

I think what you meant to do was using a simple

with open('filename.txt', 'r') as f:
 text = f.read()

Or if you really want a one-liner:

text = open('filename.txt', 'r').read()

Those two lines:

short_pos = os.open("positive.txt", os.O_RDONLY).read()
short_neg = os.open("negative.txt", os.O_RDONLY).read()

Should be changed to:

with open("positive.txt", 'r') as f:
 short_pos = f.read()
with open("negative.txt", 'r') as f:
 short_neg = f.read()

Also, instead of reading the contents of the whole file and then splitting those by a \n like this:

for r in short_pos.split('\n'): # This .split()
 documents.append( (r, "pos") )
for r in short_neg.split('\n'): # And this .split()
 documents.append( (r, "neg") )

instead of using str.split() it would be much better idea to read the file using .readlines() instead of read() in the first place. The former will return a list of lines from the file stream, and you don't have to worry about different line ending schemes that different operating systems use.

Thank You very much. But I now have the error: return codecs.ascii_decode(input, self.errors)[0] UnicodeDecodeError: 'ascii' codec can't decode byte 0xf3 in position 4645: ordinal not in range(128)
@A.Lona It doesn't seem to originate from the code that you posted, therefore I can't really be certain, but I've got a hunch, that you are trying to decode something that already is decoded into ascii/utf. iIf the input variable is something from the file that you are opening, then if you were using os.open() to open this file, the output would be a byte-string, which you would have to decode with some kind of codec. You are now opening your file with a simple open(), therefore the output is already a standard string that doesn't require any decoding. Could you post full stack trace?

CollectivesTM on Stack Overflow

Attribute error message

1 Answer 1

2 Comments

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Hot Network Questions

CollectivesTM on Stack Overflow

1 Answer 1

2 Comments

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Related