I tried to create a peice of code that helps me remove duplicates from my sql database and reset the ID's as well. I am not sure that it's error proof.
import mysql.connector
# Database connection parameters
host = 'host
database = 'database'
user = 'user'
password = 'password'
port = numbers
try:
# Establishing the connection
conn = mysql.connector.connect(
host=host,
database=database,
user=user,
password=password,
port=port
)
if conn.is_connected():
print('Connected to the database')
cursor = conn.cursor()
# Remove duplicates
delete_duplicates_query = """
DELETE t1 FROM articles t1
INNER JOIN articles t2
WHERE
t1.ID < t2.ID AND
t1.headline = t2.headline AND
t1.description = t2.description AND
t1.link = t2.link AND
t1.date = t2.date AND
t1.hour = t2.hour;
"""
cursor.execute(delete_duplicates_query)
conn.commit()
print('Duplicates removed')
# Reset primary key
reset_primary_key_query = """
SET @new_id = 0;
UPDATE articles SET ID = (@new_id := @new_id + 1);
ALTER TABLE articles AUTO_INCREMENT = 1;
"""
cursor.execute(reset_primary_key_query)
conn.commit()
print('Primary key reset')
except mysql.connector.Error as e:
print(f"Error: {e}")
finally:
if conn.is_connected():
cursor.close()
conn.close()
print('Database connection closed')
For reference
Table name : articles
ID
headline
description
link
date
hour