JAVA开发（记一次删除完全相同pgSQL数据库记录只保留一条）

news2026/2/12 3:34:08

进行数据管理时，无效数据可能会对生产力和决策质量造成严重的影响。如何发现和处理无效数据变得愈发重要。一起来唠唠你会如何处理无效数据吧~

方向一：介绍无效数据的概念

最近遇到了pg数据库表中的大量数据重复了，需要删除其中的一条。一条条删除显然不切合实际，还是需要通过计算来删除。

方向二：无效数据的处理方法

实施步骤：

1.对原表进行备份

2.使用一个表结构完全一样的临时表对原始表进行接收。

create table tmp_0524_1 as select * from public.m_user_bak20230524;

3.找出重复的数据：

drop table tmp_0524_2;
create table tmp_0524_2 as
select * from (
select phone ,count(1) cn from tmp_0524_1 where phone is not null group by phone ) t where t.cn > 1 ;

4.用第二个临时表接收数据：

drop table tmp_0524_3_1;
create table tmp_0524_3_1 as
select t.* from tmp_0524_1 t where phone in (select phone from tmp_0524_2 ) and opt_user is null;

5.建一个表结构一样的空表，多增加一个iid字段

create table tmp_0524_3 as select '' iid , t.* from tmp_0524_3_1 where 1 = 2;

6.给空表增加一个iid字段，建立一个字段自增

CREATE SEQUENCE tmp_0524_3_id_seq
START WITH 1
INCREMENT BY 1
NO MINVALUE
NO MAXVALUE
CACHE 1;
alter table tmp_0524_3 alter column iid set default nextval('tmp_0524_3_id_seq');

7.插入数据：

INSERT INTO public.tmp_0524_3
(id, user_id, phone, user_name, id_card, wx_account, state, app_id, app_name, head_icon, sex, sex_name, reg_time, user_state, user_state_name, open_id, opt_user, birthday_type, birthday, org_name, last_trading_time, org_code, union_id, orig_phone, update_time, belong_code, belong_name, data_type, client_num, client_name, country_code, inviter, channel_id, channel_name, company)
select id, user_id, phone, user_name, id_card, wx_account, state, app_id, app_name, head_icon, sex, sex_name, reg_time, user_state, user_state_name, open_id, opt_user, birthday_type, birthday, org_name, last_trading_time, org_code, union_id, orig_phone, update_time, belong_code, belong_name, data_type, client_num, client_name, country_code, inviter, channel_id, channel_name, company
from tmp_0524_3_1;

8.删除相同数据中的一条：

DELETE FROM tmp_0524_3
WHERE iid
NOT IN (
SELECT max(iid)
FROM tmp_0524_3
GROUP BY phone
);

9.删除原始表中的有相同数据的数据

delete from m_user where phone in (select phone from tmp_0524_3);

10.将处理好的数据插回原始表：

INSERT INTO public.m_user
(id, user_id, phone, user_name, id_card, wx_account, state, app_id, app_name, head_icon, sex, sex_name, reg_time, user_state, user_state_name, open_id, opt_user, birthday_type, birthday, org_name, last_trading_time, org_code, union_id, orig_phone, update_time, belong_code, belong_name, data_type, client_num, client_name, country_code, inviter, channel_id, channel_name, company)
select id, user_id, phone, user_name, id_card, wx_account, state, app_id, app_name, head_icon, sex, sex_name, reg_time, user_state, user_state_name, open_id, opt_user, birthday_type, birthday, org_name, last_trading_time, org_code, union_id, orig_phone, update_time, belong_code, belong_name, data_type, client_num, client_name, country_code, inviter, channel_id, channel_name, company
from tmp_0524_3;

方向三：如何减少无效数据

在并发系统中应该增加锁对数据进行插入，避免重复的插入数据。

方向四：实际案例举例

案例：

最近遇到了pg数据库表中的大量数据重复了，需要删除其中的一条。一条条删除显然不切合实际，还是需要通过计算来删除。

实施步骤：

1.对原表进行备份

2.使用一个表结构完全一样的临时表对原始表进行接收。

create table tmp_0524_1 as select * from public.m_user_bak20230524;

3.找出重复的数据：

drop table tmp_0524_2;
create table tmp_0524_2 as
select * from (
select phone ,count(1) cn from tmp_0524_1 where phone is not null group by phone ) t where t.cn > 1 ;

4.用第二个临时表接收数据：

drop table tmp_0524_3_1;
create table tmp_0524_3_1 as
select t.* from tmp_0524_1 t where phone in (select phone from tmp_0524_2 ) and opt_user is null;

5.建一个表结构一样的空表，多增加一个iid字段

create table tmp_0524_3 as select '' iid , t.* from tmp_0524_3_1 where 1 = 2;

6.给空表增加一个iid字段，建立一个字段自增

CREATE SEQUENCE tmp_0524_3_id_seq
START WITH 1
INCREMENT BY 1
NO MINVALUE
NO MAXVALUE
CACHE 1;
alter table tmp_0524_3 alter column iid set default nextval('tmp_0524_3_id_seq');

7.插入数据：

INSERT INTO public.tmp_0524_3
(id, user_id, phone, user_name, id_card, wx_account, state, app_id, app_name, head_icon, sex, sex_name, reg_time, user_state, user_state_name, open_id, opt_user, birthday_type, birthday, org_name, last_trading_time, org_code, union_id, orig_phone, update_time, belong_code, belong_name, data_type, client_num, client_name, country_code, inviter, channel_id, channel_name, company)
select id, user_id, phone, user_name, id_card, wx_account, state, app_id, app_name, head_icon, sex, sex_name, reg_time, user_state, user_state_name, open_id, opt_user, birthday_type, birthday, org_name, last_trading_time, org_code, union_id, orig_phone, update_time, belong_code, belong_name, data_type, client_num, client_name, country_code, inviter, channel_id, channel_name, company
from tmp_0524_3_1;

8.删除相同数据中的一条：

DELETE FROM tmp_0524_3
WHERE iid
NOT IN (
SELECT max(iid)
FROM tmp_0524_3
GROUP BY phone
);

9.删除原始表中的有相同数据的数据

delete from m_user where phone in (select phone from tmp_0524_3);

10.将处理好的数据插回原始表：

INSERT INTO public.m_user
(id, user_id, phone, user_name, id_card, wx_account, state, app_id, app_name, head_icon, sex, sex_name, reg_time, user_state, user_state_name, open_id, opt_user, birthday_type, birthday, org_name, last_trading_time, org_code, union_id, orig_phone, update_time, belong_code, belong_name, data_type, client_num, client_name, country_code, inviter, channel_id, channel_name, company)
select id, user_id, phone, user_name, id_card, wx_account, state, app_id, app_name, head_icon, sex, sex_name, reg_time, user_state, user_state_name, open_id, opt_user, birthday_type, birthday, org_name, last_trading_time, org_code, union_id, orig_phone, update_time, belong_code, belong_name, data_type, client_num, client_name, country_code, inviter, channel_id, channel_name, company
from tmp_0524_3;