1

I'm fairly new to python, and I'm with some problems with encoding.

Please see the code:

# -*- coding: utf-8 -*-
import config # Ficheiro de configuracao
import twitter
import random
import sqlite3
import time
import bitly_api #https://github.com/bitly/bitly-api-python
import feedparser
class TwitterC:
 def logToDatabase(self, tweet, timestamp):
 # Will log to the database
 database = sqlite3.connect('database.db') # Create a database file
 cursor = database.cursor() # Create a cursor
 cursor.execute("CREATE TABLE IF NOT EXISTS twitter(id_tweet INTEGER AUTO_INCREMENT PRIMARY KEY, tweet TEXT, timestamp TEXT);") # Make a table
 # Assign the values for the insert into
 msg_ins = tweet
 timestamp_ins = timestamp
 values = [msg_ins, timestamp_ins]
 # Insert data into the table
 cursor.execute("INSERT INTO twitter(tweet, timestamp) VALUES(?, ?)", values)
 database.commit() # Save our changes
 database.close() # Close the connection to the database
 def shortUrl(self, url):
 bit = bitly_api.Connection(config.bitly_username, config.bitly_key) # Instanciar a API
 return bit.shorten(url) # Encurtar o URL
 def updateTwitterStatus(self, update): 
 short = self.shortUrl(update["url"]) # Vou encurtar o URL
 update_str = update["msg"] + " " + short['url'] # Mensagem em bruto, sem tratamento de contagem de caracteres
 # I will see how much characters have the message, if more than 140, delete some chars
 length_message = len(update_str)
 if length_message > 140:
 length_url = len(short['url'])
 count_message = 136 - length_url
 shorten_msg = update["msg"][0:count_message] + '... '
 update_str = shorten_msg + short['url']
 # Will post to twitter and print the posted text
 api = twitter.Api(consumer_key=config.consumer_key, 
 consumer_secret=config.consumer_secret, 
 access_token_key=config.access_token_key, 
 access_token_secret=config.access_token_secret)
 status = api.PostUpdate(update_str) # Fazer o update
 msg = status.text # Vou gravar o texto enviado para a variavel 'msg'
 # Vou gravar p a Base de Dados
 self.logToDatabase(msg, time.time())
 print msg # So p mostrar o texto enviado. Comentar esta linha de futuro.
# Exemplo base 
#x = TwitterC()
#x.updateTwitterStatus({"url": "http://xyz.com/?cat=28", "msg": "Some tips about PostgreSQL Administration?"})
# Solucao para um misto de feeds e frases feitas
# Vou escolher uma fonte ao acaso
p = range(2) # tem o 0 e o 1
p = random.choice(p)
if p == 0: # Escolhe TEXT UPDATES
 # Vou escolher um text update ao acaso
 text_a_enviar = random.choice(config.text_updates)
 update_to_send = text_a_enviar 
elif p == 1: # Escolhe FEEDS UPDATES
 '''# Vou escolher um feed ao acaso
 feed_a_enviar = random.choice(config.feeds_updates)
 # Vou apanhar o conteudo do feed
 d = feedparser.parse(feed_a_enviar["feedurl"])
 # Vou definir quantos feeds quero ter no i
 i = range(8)
 # Vou meter para "updates" 10 entradas do feed
 updates = []
 for i in range(8):
 updates.append([{"url": feed_a_enviar["linktoourpage"], "msg": d.entries[i].summary + ", "}])
 # Vou escolher ums entrada ao acaso
 update_to_send = random.choice(updates)'''
# Vou postar p o Twitter 
x = TwitterC()
x.updateTwitterStatus({"url": "http://xyz.com/", "msg": "favoritos à distancia"})

The code have some lines but the problem is in this line:

x.updateTwitterStatus({"url": "http://xyz.com/", "msg": "favoritos à distancia"})

This line have a character with an accent "à" and this causes the problem here:

def updateTwitterStatus(self, update): 
 short = self.shortUrl(update["url"]) # Vou encurtar o URL
 update_str = update["msg"] + " " + short['url'] # Mensagem em bruto, sem tratamento de contagem de caracteres
 ...

More precisely in this line:

update_str = update["msg"] + " " + short['url'] # Mensagem em bruto, sem tratamento de contagem de caracteres

The output of the error is this:

x.updateTwitterStatus({"url": "http://xyz.com", "msg": "favoritos à distancia"})
UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 48: ordinal not in range(128)

Any clues on how to solve this?

asked Mar 31, 2011 at 22:31
2

1 Answer 1

1

Try adding from __future__ import unicode_literals at the top if your file. Alternatively you can prefix every string with a ́u ́, ie u"favoritos à distancia"

Make sure your file is actually saved as utf-8 too!

answered Mar 31, 2011 at 23:52
Sign up to request clarification or add additional context in comments.

Comments

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.