Problem with encoding in Python

Question 1

I'm fairly new to python, and I'm with some problems with encoding.

Please see the code:

# -*- coding: utf-8 -*-
import config # Ficheiro de configuracao
import twitter
import random
import sqlite3
import time
import bitly_api #https://github.com/bitly/bitly-api-python
import feedparser
class TwitterC:
 def logToDatabase(self, tweet, timestamp):
 # Will log to the database
 database = sqlite3.connect('database.db') # Create a database file
 cursor = database.cursor() # Create a cursor
 cursor.execute("CREATE TABLE IF NOT EXISTS twitter(id_tweet INTEGER AUTO_INCREMENT PRIMARY KEY, tweet TEXT, timestamp TEXT);") # Make a table
 # Assign the values for the insert into
 msg_ins = tweet
 timestamp_ins = timestamp
 values = [msg_ins, timestamp_ins]
 # Insert data into the table
 cursor.execute("INSERT INTO twitter(tweet, timestamp) VALUES(?, ?)", values)
 database.commit() # Save our changes
 database.close() # Close the connection to the database
 def shortUrl(self, url):
 bit = bitly_api.Connection(config.bitly_username, config.bitly_key) # Instanciar a API
 return bit.shorten(url) # Encurtar o URL
 def updateTwitterStatus(self, update): 
 short = self.shortUrl(update["url"]) # Vou encurtar o URL
 update_str = update["msg"] + " " + short['url'] # Mensagem em bruto, sem tratamento de contagem de caracteres
 # I will see how much characters have the message, if more than 140, delete some chars
 length_message = len(update_str)
 if length_message > 140:
 length_url = len(short['url'])
 count_message = 136 - length_url
 shorten_msg = update["msg"][0:count_message] + '... '
 update_str = shorten_msg + short['url']
 # Will post to twitter and print the posted text
 api = twitter.Api(consumer_key=config.consumer_key, 
 consumer_secret=config.consumer_secret, 
 access_token_key=config.access_token_key, 
 access_token_secret=config.access_token_secret)
 status = api.PostUpdate(update_str) # Fazer o update
 msg = status.text # Vou gravar o texto enviado para a variavel 'msg'
 # Vou gravar p a Base de Dados
 self.logToDatabase(msg, time.time())
 print msg # So p mostrar o texto enviado. Comentar esta linha de futuro.
# Exemplo base 
#x = TwitterC()
#x.updateTwitterStatus({"url": "http://xyz.com/?cat=28", "msg": "Some tips about PostgreSQL Administration?"})
# Solucao para um misto de feeds e frases feitas
# Vou escolher uma fonte ao acaso
p = range(2) # tem o 0 e o 1
p = random.choice(p)
if p == 0: # Escolhe TEXT UPDATES
 # Vou escolher um text update ao acaso
 text_a_enviar = random.choice(config.text_updates)
 update_to_send = text_a_enviar 
elif p == 1: # Escolhe FEEDS UPDATES
 '''# Vou escolher um feed ao acaso
 feed_a_enviar = random.choice(config.feeds_updates)
 # Vou apanhar o conteudo do feed
 d = feedparser.parse(feed_a_enviar["feedurl"])
 # Vou definir quantos feeds quero ter no i
 i = range(8)
 # Vou meter para "updates" 10 entradas do feed
 updates = []
 for i in range(8):
 updates.append([{"url": feed_a_enviar["linktoourpage"], "msg": d.entries[i].summary + ", "}])
 # Vou escolher ums entrada ao acaso
 update_to_send = random.choice(updates)'''
# Vou postar p o Twitter 
x = TwitterC()
x.updateTwitterStatus({"url": "http://xyz.com/", "msg": "favoritos à distancia"})

The code have some lines but the problem is in this line:

x.updateTwitterStatus({"url": "http://xyz.com/", "msg": "favoritos à distancia"})

This line have a character with an accent "à" and this causes the problem here:

def updateTwitterStatus(self, update): 
 short = self.shortUrl(update["url"]) # Vou encurtar o URL
 update_str = update["msg"] + " " + short['url'] # Mensagem em bruto, sem tratamento de contagem de caracteres
 ...

More precisely in this line:

update_str = update["msg"] + " " + short['url'] # Mensagem em bruto, sem tratamento de contagem de caracteres

The output of the error is this:

x.updateTwitterStatus({"url": "http://xyz.com", "msg": "favoritos à distancia"})
UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 48: ordinal not in range(128)

Any clues on how to solve this?

Question 2

farmdev.com/talks/unicode

Question 3

Also joelonsoftware.com/articles/Unicode.html

Question 4

Try adding from __future__ import unicode_literals at the top if your file. Alternatively you can prefix every string with a ́u ́, ie u"favoritos à distancia"

Make sure your file is actually saved as utf-8 too!

Jochen Ritzel 108k33 gold badges205 silver badges196 bronze badges · Accepted Answer · 2011-03-31 23:52:08Z

Try adding from __future__ import unicode_literals at the top if your file. Alternatively you can prefix every string with a ́u ́, ie u"favoritos à distancia"

Make sure your file is actually saved as utf-8 too!

CollectivesTM on Stack Overflow

Problem with encoding in Python

1 Answer 1

Comments

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Hot Network Questions

CollectivesTM on Stack Overflow

1 Answer 1

Comments

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Related