Database PostgreSQL 9.6.6 on x86_64-pc-linux-gnu, compiled by gcc (GCC) 4.8.2 20140120 (Red Hat 4.8.2-16), 64-bit
- this is on an Amazon AWS instance.
I recently identified some orphan data in one of the databases I look after when it caused a cron job to fail - this orphan data however was in violation of a foreign key constraint. After digging through the logs I found that these little queries had been ran (sadly I don't know by who - I presume their reason was to get around some triggers that discourage mass-deleting):
SET session_replication_role = replica;
delete from bmis.companies where company_id = 38;
SET session_replication_role = DEFAULT;
The data stored in bmis.companies
is the absolute top-level data in this system - everything stems from it. I've manually been through the tables and ran deletes that basically boil down to this pseudo code:
delete from
schema.table
where
not exists(
select
*
from
schema.parent_table
where
parent_table.id = table.parent_id
);
However I'm only human and it's entirely possible that I may have missed something - there are many tables in this system. I'm wondering if there's any reasonably easy-to-implement way of identifying data already inside the database that is in violation of the foreign keys, or even just identifying tables that contain this data?
1 Answer 1
So, after much Googling I came up with the below to locate foreign key violations in my dataset - I had indeed missed a single table.
If you're wondering why I don't use information_schema.table_constraints
, information_schema.key_column_usage
, and information_schema.constraint_column_usage
it's because using those tables completley falls apart for multi-column foreign keys.
Hope this helps someone someday:
do language plpgsql $$
declare
_schemas varchar[];
_fkey record;
_sql text;
_and text;
_i integer;
_test record;
begin
_schemas := array['my_schema_one', 'my_schema_two']::varchar[];
create temporary table __foreign_key_violations (
table_name varchar,
column_names varchar[],
foreign_table_name varchar,
foreign_column_names varchar[]
) on commit drop;
for _fkey in(
with
__tables as(
select
quote_ident(tables.table_schema) || '.' || quote_ident(tables.table_name) as table_name
from
information_schema.tables
where
tables.table_type = 'BASE TABLE'
and table_schema = any(_schemas)
order by
tables.table_schema asc,
tables.table_name asc
),
__raw_fkeys as(
select
__tables.table_name,
pg_catalog.pg_get_constraintdef(pg_constraint.oid, true) as fkeydef
from
__tables
inner join pg_catalog.pg_constraint on(
pg_constraint.conrelid = __tables.table_name::regclass
and pg_constraint.contype = 'f'
)
)
select
__raw_fkeys.table_name,
string_to_array(regexp_replace(__raw_fkeys.fkeydef, E'^FOREIGN KEY \\((.*?)\\).*$', E'\1円'), ', ') as column_names,
regexp_replace(__raw_fkeys.fkeydef, E'^FOREIGN KEY .*? REFERENCES (.*?)\\(.*$', E'\1円') as foreign_table_name,
string_to_array(regexp_replace(__raw_fkeys.fkeydef, E'^FOREIGN KEY .*? REFERENCES .*?\\((.*?)\\).*$', E'\1円'), ', ') as foreign_column_names
from
__raw_fkeys
) loop
_sql := '
select
true as violations
from
' || _fkey.table_name || ' as local_table
where
not exists(
select
*
from
' || _fkey.foreign_table_name || ' as foreign_table
where';
for _i in 1 .. array_upper(_fkey.column_names, 1) loop
_and := '';
if _i > 1 then
_and := 'and ';
end if;
_sql := _sql || '
' || _and || 'local_table.' || quote_ident(_fkey.column_names[_i]) || ' is not null
and foreign_table.' || quote_ident(_fkey.foreign_column_names[_i]) || ' = local_table.' || quote_ident(_fkey.column_names[_i]);
end loop;
_sql := _sql || '
)';
for _i in 1 .. array_upper(_fkey.column_names, 1) loop
_sql := _sql || '
and local_table.' || quote_ident(_fkey.column_names[_i]) || ' is not null';
end loop;
_sql := _sql || '
limit 1
';
execute _sql into _test;
if _test.violations = true then
insert into __foreign_key_violations (
table_name,
column_names,
foreign_table_name,
foreign_column_names
) values (
_fkey.table_name,
_fkey.column_names,
_fkey.foreign_table_name,
_fkey.foreign_column_names
);
end if;
end loop; -- _fkey in..
end;
$$;
select * from __foreign_key_violations;
Explore related questions
See similar questions with these tags.