2

I am trying the below code but i am getting error

if not os.path.isfile('train.db'):
 disk_engine = create_engine('sqlite:///train.db')
 start = dt.datetime.now()
 chunksize = 15000
 j = 0
 index_start = 1
 for df in pd.read_csv('final_features.csv', names=['Unnamed: 0','id','is_duplicate','cwc_min','cwc_max','csc_min','csc_max','ctc_min','ctc_max','last_word_eq','first_word_eq','abs_len_diff','mean_len','token_set_ratio','token_sort_ratio','fuzz_ratio','fuzz_partial_ratio','longest_substr_ratio','freq_qid1','freq_qid2','q1len','q2len','q1_n_words','q2_n_words','word_Common','word_Total','word_share','freq_q1+q2','freq_q1-q2','0_x','1_x','2_x','3_x','4_x','5_x','6_x','7_x','8_x','9_x','10_x','11_x','12_x','13_x','14_x','15_x','16_x','17_x','18_x','19_x','20_x','21_x','22_x','23_x','24_x','25_x','26_x','27_x','28_x','29_x','30_x','31_x','32_x','33_x','34_x','35_x','36_x','37_x','38_x','39_x','40_x','41_x','42_x','43_x','44_x','45_x','46_x','47_x','48_x','49_x','50_x','51_x','52_x','53_x','54_x','55_x','56_x','57_x','58_x','59_x','60_x','61_x','62_x','63_x','64_x','65_x','66_x','67_x','68_x','69_x','70_x','71_x','72_x','73_x','74_x','75_x','76_x','77_x','78_x','79_x','80_x','81_x','82_x','83_x','84_x','85_x','86_x','87_x','88_x','89_x','90_x','91_x','92_x','93_x','94_x','95_x','0_y','1_y','2_y','3_y','4_y','5_y','6_y','7_y','8_y','9_y','10_y','11_y','12_y','13_y','14_y','15_y','16_y','17_y','18_y','19_y','20_y','21_y','22_y','23_y','24_y','25_y','26_y','27_y','28_y','29_y','30_y','31_y','32_y','33_y','34_y','35_y','36_y','37_y','38_y','39_y','40_y','41_y','42_y','43_y','44_y','45_y','46_y','47_y','48_y','49_y','50_y','51_y','52_y','53_y','54_y','55_y','56_y','57_y','58_y','59_y','60_y','61_y','62_y','63_y','64_y','65_y','66_y','67_y','68_y','69_y','70_y','71_y','72_y','73_y','74_y','75_y','76_y','77_y','78_y','79_y','80_y','81_y','82_y','83_y','84_y','85_y','86_y','87_y','88_y','89_y','90_y','91_y','92_y','93_y','94_y','95_y'], chunksize=chunksize, iterator=True, encoding='utf-8', ):
 df.index += index_start
 j+=1
 print('{} rows'.format(j*chunksize))
 df.to_sql('data', disk_engine, if_exists='append')
 index_start = df.index[-1] + 1

This is the o/p that i am getting

 15000 rows
---------------------------------------------------------------------------
OperationalError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
 1192 parameters,
-> 1193 context)
 1194 except BaseException as e:
~\Anaconda3\lib\site-packages\sqlalchemy\engine\default.py in do_execute(self, cursor, statement, parameters, context)
 506 def do_execute(self, cursor, statement, parameters, context=None):
--> 507 cursor.execute(statement, parameters)
 508 
OperationalError: too many SQL variables
The above exception was the direct cause of the following exception:
OperationalError Traceback (most recent call last)
<ipython-input-83-b376654c990a> in <module>()
 14 j+=1
 15 print('{} rows'.format(j*chunksize))
---> 16 df.to_sql('data', disk_engine, if_exists='append')
 17 index_start = df.index[-1] + 1
~\Anaconda3\lib\site-packages\pandas\core\generic.py in to_sql(self, name, con, schema, if_exists, index, index_label, chunksize, dtype)
 2125 ... df2.to_excel(writer, sheet_name='Sheet_name_2')
 2126 
-> 2127 ExcelWriter can also be used to append to an existing Excel file:
 2128 
 2129 >>> with pd.ExcelWriter('output.xlsx',
~\Anaconda3\lib\site-packages\pandas\io\sql.py in to_sql(frame, name, con, schema, if_exists, index, index_label, chunksize, dtype)
 448 index=True,
 449 index_label=None,
--> 450 chunksize=None,
 451 dtype=None,
 452 method=None,
~\Anaconda3\lib\site-packages\pandas\io\sql.py in to_sql(self, frame, name, if_exists, index, index_label, schema, chunksize, dtype)
 1147 
 1148 @staticmethod
-> 1149 def _query_iterator(
 1150 result, chunksize, columns, index_col=None, coerce_float=True, parse_dates=None
 1151 ):
~\Anaconda3\lib\site-packages\pandas\io\sql.py in insert(self, chunksize)
 661 ----------
 662 conn : sqlalchemy.engine.Engine or sqlalchemy.engine.Connection
--> 663 keys : list of str
 664 Column names
 665 data_iter : generator of list
~\Anaconda3\lib\site-packages\pandas\io\sql.py in _execute_insert(self, conn, keys, data_iter)
 636 return str(CreateTable(self.table).compile(self.pd_sql.connectable))
 637 
--> 638 def _execute_create(self):
 639 # Inserting table into database, add to MetaData object
 640 self.table = self.table.tometadata(self.pd_sql.meta)
~\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in execute(self, object, *multiparams, **params)
 946 raise exc.ObjectNotExecutableError(object)
 947 else:
--> 948 return meth(self, multiparams, params)
 949 
 950 def _execute_function(self, func, multiparams, params):
~\Anaconda3\lib\site-packages\sqlalchemy\sql\elements.py in _execute_on_connection(self, connection, multiparams, params)
 267 def _execute_on_connection(self, connection, multiparams, params):
 268 if self.supports_execution:
--> 269 return connection._execute_clauseelement(self, multiparams, params)
 270 else:
 271 raise exc.ObjectNotExecutableError(self)
~\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _execute_clauseelement(self, elem, multiparams, params)
 1058 compiled_sql,
 1059 distilled_params,
-> 1060 compiled_sql, distilled_params
 1061 )
 1062 if self._has_events or self.engine._has_events:
~\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
 1198 parameters,
 1199 cursor,
-> 1200 context)
 1201 
 1202 if self._has_events or self.engine._has_events:
~\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _handle_dbapi_exception(self, e, statement, parameters, cursor, context)
 1411 util.raise_from_cause(
 1412 sqlalchemy_exception,
-> 1413 exc_info
 1414 )
 1415 else:
~\Anaconda3\lib\site-packages\sqlalchemy\util\compat.py in raise_from_cause(exception, exc_info)
 201 exc_type, exc_value, exc_tb = exc_info
 202 cause = exc_value if exc_value is not exception else None
--> 203 reraise(type(exception), exception, tb=exc_tb, cause=cause)
 204 
 205 if py3k:
~\Anaconda3\lib\site-packages\sqlalchemy\util\compat.py in reraise(tp, value, tb, cause)
 184 value.__cause__ = cause
 185 if value.__traceback__ is not tb:
--> 186 raise value.with_traceback(tb)
 187 raise value
 188 
~\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
 1191 statement,
 1192 parameters,
-> 1193 context)
 1194 except BaseException as e:
 1195 self._handle_dbapi_exception(
~\Anaconda3\lib\site-packages\sqlalchemy\engine\default.py in do_execute(self, cursor, statement, parameters, context)
 505 
 506 def do_execute(self, cursor, statement, parameters, context=None):
--> 507 cursor.execute(statement, parameters)
 508 
 509 def do_execute_no_params(self, cursor, statement, context=None):

I have already tried with various chunksize values but it won't worked. Can anyone please suggest me to fix this error. I am running this code in jupyter notebook. I already have updated versions of pandas and other libraries so there is no compatability issue.

desertnaut
60.8k32 gold badges155 silver badges183 bronze badges
asked Apr 1, 2020 at 10:09

2 Answers 2

2

Based on a few tests, I'm not sure that the @Scratch'N'Purr answer is completely correct.

For instance, define a SQLite table with, say, 58 columns. Then, try to insert a Pandas df with only, say, 830 rows and 58 columns, with a statement like:

df.to_sql("tableName", conn, if_exists="append", index=False, method="multi")

You will get the op error: sqlite3.OperationalError: too many SQL variables

I also read that there is an error on

"line 2143, in _execute_insert_multi
 conn.execute(self.insert_statement(num_rows=len(data_list)), flattened_data)"

I'm not an expert there, but maybe (could someone enlight me on that?) all the placeholders for every row that must be inserted end up in the same query "flattened"? As a result, we end up with more placeholders than SQLite can accept.

In my case, by modifying the query and setting a chunksize limit, I could get rid off the error:

df.to_sql("tableName", conn, if_exists="append", chunksize=100, index=False, method="multi")

From that, I must infer that it does impact the number of placeholders used in the query. So, yes, it seems that the chuncksize could, under some circumstances, resolve such a problem.

answered Nov 1, 2023 at 12:15
Sign up to request clarification or add additional context in comments.

1 Comment

answer helped me. I was getting the same, operational error: too many sql variables, while trying to insert a pandas df having only 566 rows and 76 columns. Setting the chunksize limit to 100 fixed the issue for me. I cannot attest to the explanation being accurate, but the issue resolved, nonetheless.
0

This error is related to the number of parameters being passed to sqlite3. In essence, what's happening behind the scenes is that there's a SQL query being issued to the db engine: INSERT INTO myTable (col1, col2, col3,..., col_n) VALUES (?, ?, ?,..., ?), where the ? are the values from your dataframe being passed to the database.

This error occurred because your dataframe is very wide (has lots of columns), so during insertion, many parameters are being passed into the SQL statement. You can actually see in the error stack that justifies my explanation:

--> 507 cursor.execute(statement, parameters)

Simply, SQLite can only handle a limited number of parameters being passed. This is simply a limitation of SQLite. You can scroll down to #9 in this page for more info on this.

Setting chunksize will not resolve your problem. My suggestion is using another db like postgres or mysql.

answered Apr 1, 2020 at 10:46

1 Comment

The width or number of columns alone does not limit SQLite. Please check @stephane 's answer for the correct explanation. Note that the answer (setting chunksize) is correct but the reasoning for it is not.

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.