Using MySQL Partitions (a Python example)

Today somebody had a problem with expiring a large table (a Serendipity Blog table).
In MySQL InnoDB, tables are physically ordered by primary key (InnoDB data is a B+ tree, a balanced tree where the data pages are the leaves of the tree). If you are expiring old data from such a log table, you are deleting from the left hand side of the tree, and since it is a balanced tree, that triggers a lot of rebalancing - hence it is very slow.
If you rename the old table and INSERT … SELECT the data you want to keep back into the original table, that can be faster.
But if the data you want to keep is larger than memory, the indexing of the data will still be slow. A nice way to handle log tables are partitions. Here is an example. It’s not very cleaned up, but it works on my system.
#! /usr/bin/env python --
# setting up the python environment:
# pip install virtualenv
# virtualenv partitions
# cd partitions
# source bin/activate
# pip install --update pip
# pip install mysqlclient click
# setting up the MySQL:
# create user demo@localhost identified by "pfrtlng";
# grant all on demo.* to demo@localhost;
# Testing:
# ./partitions drop --name keks
# ./partitions create --name keks
# ./partitions fill --name keks
# mysql -u demo -ppfrtlng demo -e 'select * from information_schema.partitions where table_name = "keks"'
# ./partitions add --name keks
# mysql -u demo -ppfrtlng demo -e 'select * from information_schema.partitions where table_name = "keks"'
# ./partitions add --name keks
# ./partitions add --name keks
# ./partitions add --name keks
# mysql -u demo -ppfrtlng demo -e 'select * from information_schema.partitions where table_name = "keks"'
# ./partitions dropbyname --name keks --pname p0
# mysql -u demo -ppfrtlng demo -e 'select * from information_schema.partitions where table_name = "keks"'
# ./partitions dropbyvalue --name keks --valuebelow 500001
# mysql -u demo -ppfrtlng demo -e 'select * from information_schema.partitions where table_name = "keks"'
# ./partitions drop --name keks
import click
import MySQLdb
import random
import string
from pprint import pprint
# A lot of SQL collected here
db_config = dict(
host = "localhost",
user = "demo",
passwd = "pfrtlng",
db = "demo",
)
sql_drop_table = 'drop table %s'
sql_create_table = """create table %s (
counter_id integer not null primary key auto_increment,
data varchar(64) not null
) %s
"""
sql_partition_clause = 'partition by range (counter_id) ( %s )'
sql_partition_range_clause = 'partition p%d values less than (%d),'
sql_insert_into = 'insert into %s ( counter_id, data ) values ( %d, "%s" )'
sql_find_partitions = """select partition_name, partition_description
from information_schema.partitions
where table_schema = '%s'
and table_name = '%s'
order by cast(PARTITION_DESCRIPTION as signed) desc
limit 2"""
sql_alter_table_add_partition = 'alter table %s add partition ( partition %s values less than (%d))'
sql_alter_table_drop_partition = 'alter table %s drop partition %s'
sql_find_partition_by_value = """select partition_name
from information_schema.partitions
where table_schema = '%s'
and table_name = '%s'
and cast(partition_description as signed) < %d"""
###########
# create a db connection
db = MySQLdb.connect(**db_config)
@click.group(help='Test row expiration with and without partitions.')
def partitions():
pass
@partitions.command()
@click.option('--name', default='demo', help='Table name to drop')
def drop(name):
cmd = sql_drop_table % name
# click.echo('CMD: %s' % cmd)
try:
c = db.cursor()
c.execute(cmd)
click.echo('Table "%s" dropped.' % name)
except MySQLdb.OperationalError as e:
click.echo('Table "%s" did not exist.' % name)
@partitions.command()
@click.option('--name', default='demo', help='Table name to create')
@click.option('--partitioned/--no-partitioned', default=True, help='Create table partitioned?')
@click.option('--size', default=1000000, help='Expected table size')
@click.option('--psize', default=100000, help='Partition size')
def create(name, partitioned, size, psize):
pcmd = ''
# add partitioning clause to create table statement
if (partitioned):
ppcmd = ''
counter = 0
# add all the ranges
for r in range(0, size+1, psize):
ppcmd = ppcmd + ( sql_partition_range_clause % (counter, r))
counter = counter + 1
# remove the trailing comma
pcmd = sql_partition_clause % ( ppcmd.rstrip(',') )
# complete the create table statement
cmd = sql_create_table % ( name, pcmd )
c = db.cursor()
try:
c.execute(cmd)
click.echo('Table "%s" created %s partitions.' % ( name, "with" if partitioned else "without"))
except MySQLdb.OperationalError as e:
click.echo('Table "%s" already exists.' % name)
@partitions.command()
@click.option('--name', default='demo', help='Table name to insert into')
@click.option('--size', default=1000000, help='Number of rows to load into the table.')
def fill(name, size):
for i in range(1, size):
str = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20))
cmd = sql_insert_into % (name, i, str)
c = db.cursor()
try:
c.execute(cmd)
except MySQLdb.Error as e:
click.echo("MySQL Error: %s" % e)
# commit every 1000 statements
if (i % 10000 == 0):
db.commit()
# one final commit
db.commit()
@partitions.command()
@click.option('--name', default='demo', help='Table name to add partition to')
@click.option('--size', default=None, type=click.INT, help='Partition size in id count')
def add(name, size):
global db_config # we need the schema name
schema = db_config['db']
cmd = sql_find_partitions % ( schema, name ); # find me the two last partitions from I_S.PARTITIONS
# click.echo("Sql: %s" % cmd)
c = db.cursor(MySQLdb.cursors.DictCursor)
c.execute(cmd)
r = c.fetchall()
# if no size was given, we take the interval from the two highest numbered partitions as default
if size is None:
size = int(r[0]['partition_description']) - int(r[1]['partition_description'])
limit = int(r[0]['partition_description']) + size
# we automatically calculate the new partition name p(XX+1) from the last pXX
pname = 'p' + str(int(r[0]['partition_name'][1:]) + 1)
cmd = sql_alter_table_add_partition % ( name, pname, limit )
# click.echo("Sql: %s" % cmd)
c = db.cursor()
try:
c.execute(cmd)
click.echo('Partition %s has been added (values less than %d, that is a %d step size.)' % (pname, limit, size))
except MySQLdb.Error as e:
click.echo("MySQL Error: %s" % e )
exit(1)
@partitions.command()
@click.option('--name', default='demo', help='Table name to drop partition from')
@click.option('--pname', help='Drop partition by name pXXX')
def dropbyname(name, pname):
if pname is None:
click.echo('I need a --pname')
exit(1)
cmd = sql_alter_table_drop_partition % ( name, pname)
# click.echo('Sql: %s' % cmd)
c = db.cursor()
try:
c.execute(cmd)
click.echo("Dropped partition named %s" % pname)
except MySQLdb.Error as e:
click.echo("MySQL Error: %s" % e)
exit(1)
@partitions.command()
@click.option('--name', default='demo', help='Table name to drop partitions from')
@click.option('--valuebelow', type=click.INT, help='Drop all partitions with values below X')
def dropbyvalue(name, valuebelow):
global db_config # we need the schema name
schema = db_config['db']
if valuebelow is None:
click.echo('I need a --valuebelow')
exit(1)
cmd = sql_find_partition_by_value % ( schema, name, valuebelow)
# click.echo("Sql: %s" % cmd)
c = db.cursor()
try:
c.execute(cmd)
except MySQL.Error as e:
click.echo("MySQL Error: %s" % e)
exit(1)
result = c.fetchall() # reseult[rownr][0] is partition name
for row in result:
cmd = sql_alter_table_drop_partition % (name, row[0])
# click.echo("Sql: %s" % cmd)
c = db.cursor()
try:
c.execute(cmd)
click.echo("Dropped partition named %s" % row[0])
except MySQL.Error as e:
click.echo("MySQL Error: %s" % e)
partitions()