Mass inserts, updates: SQLite vs MySQL (update: delayed inserts)
Lessons learned:
Using SQLite instead of MySQL can be a great alternative on certain architectures. Especially if you can partition the data into several SQLite databases (e.g. one database per user) and limit parallel transactions on one database. Replication, backup and restore can be done easily over the file system.
The results:
(MySQL 5.6.5 default config without binlog, SQLite 3.7.7, PHP 5.4.5, 2 x 1.4 GHz, disk 5400rpm)
insert / update 200k (1 process)
Using delayed inserts makes MySQL even slower: insert 200k (1 process)
insert / update 2 x 200k (2 processes), 4 x 200k (4 processes)
Here is the test script:
- SQLite performs inserts 8-14 times faster then InnoDB / MyISAM
- SQLite performs updates 4-8 times faster then InnoDB and as fast as MyISAM
- SQLite performs selects 2 times faster than InnoDB and 3 times slower than MyISAM
- SQLite requires 2.6 times less disk space than InnoDB and 1.7 times more than MyISAM
- Allowing null values or using synchronous=NORMAL makes inserts 5-10 percent faster in SQLite
Using SQLite instead of MySQL can be a great alternative on certain architectures. Especially if you can partition the data into several SQLite databases (e.g. one database per user) and limit parallel transactions on one database. Replication, backup and restore can be done easily over the file system.
The results:
(MySQL 5.6.5 default config without binlog, SQLite 3.7.7, PHP 5.4.5, 2 x 1.4 GHz, disk 5400rpm)
insert / update 200k (1 process)
Using delayed inserts makes MySQL even slower: insert 200k (1 process)
insert / update 2 x 200k (2 processes), 4 x 200k (4 processes)
Here is the test script:
// CSV
$csv = tempnam('/tmp', 'csv');
$start = microtime(true);
$fp = fopen($csv, 'w');
for ($i=0; $i<200000; $i++) fputcsv($fp, array($i, $i*2));
fclose($fp);
echo 'csv '.number_format(microtime(true)-$start, 2);
$start = microtime(true);
$fp = fopen($csv, 'r');
$sum = 0;
while (!feof($fp)) $sum += @array_pop(fgetcsv($fp));
fclose($fp);
echo ' '.number_format(microtime(true)-$start, 2);
$start = microtime(true);
$fp = fopen($csv, 'r');
$fp2 = fopen($csv.'2', 'w');
while (!feof($fp)) {
$data = fgetcsv($fp);
fputcsv($fp2, array($data[0], $data[1]+2));
}
fclose($fp);
fclose($fp2);
echo ' '.number_format(microtime(true)-$start, 2)."\n";
// JSON
$json = tempnam('/tmp', 'json');
$start = microtime(true);
$fp = fopen($json, 'w');
for ($i=0; $i<200000; $i++) fwrite($fp, '['.$i.','.($i*2).']'."\n");
fclose($fp);
echo 'json '.number_format(microtime(true)-$start, 2);
$start = microtime(true);
$fp = fopen($json, 'r');
$sum = 0;
while (!feof($fp)) $sum += @array_pop(json_decode(fgets($fp)));
fclose($fp);
echo ' '.number_format(microtime(true)-$start, 2);
$start = microtime(true);
$fp = fopen($json, 'r');
$fp2 = fopen($json.'2', 'w');
while (!feof($fp)) {
$data = json_decode(fgets($fp));
fputs($fp2, '['.$data[0].','.($data[1]+2).']'."\n");
}
fclose($fp);
fclose($fp2);
echo ' '.number_format(microtime(true)-$start, 2)."\n";
// SQLite
// 100.000 was the best transaction size in several runs
sqlite_test(':memory:');
sqlite_test(':memory:', 'not null');
sqlite_test('/tmp/test1a.db', '', 'sync_norm'); // use test2..n for n processes
sqlite_test('/tmp/test1b.db');
sqlite_test('/tmp/test1c.db', 'not null');
function sqlite_test($file, $null='', $opt=false) {
$db = new SQLite3($file);
if ($opt) $db->exec('PRAGMA synchronous=NORMAL');
$db->exec('CREATE TABLE foo (i INT '.$null.', i2 INT '.$null.')');
$start = microtime(true);
$db->exec('begin');
for ($i=0; $i<200000; $i++) {
$db->exec('INSERT INTO foo VALUES ('.$i.', '.($i*2).')');
if ($i%100000==0) {
$db->exec('commit');
$db->exec('begin');
}
}
$db->exec('commit');
echo "sqlite $file $null $opt ".number_format(microtime(true)-$start, 2);
$start = microtime(true);
$db->query('SELECT sum(i2) FROM foo')->fetchArray();
echo ' '.number_format(microtime(true)-$start, 2);
$start = microtime(true);
$db->exec('UPDATE foo SET i2=i2+2');
echo ' '.number_format(microtime(true)-$start, 2);
echo ' '.number_format(@filesize($file)/1048576, 2)."\n";
}
// MySQL
// 30.000 was the best transaction size in several runs
mysql_test('memory', 'not null');
mysql_test('memory');
mysql_test('csv', 'not null');
mysql_test('myisam', 'not null');
mysql_test('myisam');
mysql_test('innodb', 'not null');
mysql_test('innodb');
function mysql_test($e, $null='') {
$db = new mysqli('127.0.0.1', 'root', '', 'test'); // use test2..n
$db->query('DROP TABLE foo');
$db->query('CREATE TABLE foo (i INT '.$null.',i2 INT '.$null.') ENGINE='.$e);
$start = microtime(true);
$db->query('begin');
for ($i=0; $i<200000; $i++) {
$db->query('INSERT INTO foo VALUES ('.$i.', '.($i*2).')');
if ($i%30000==0) {
$db->query('commit');
$db->query('begin');
}
}
$db->query('commit');
echo "mysql $e $null ".number_format(microtime(true)-$start, 2);
$start = microtime(true);
$db->query('SELECT sum(i2) FROM foo')->fetch_row();
echo ' '.number_format(microtime(true)-$start, 2);
$start = microtime(true);
$db->query('UPDATE foo SET i2=i2+2');
echo ' '.number_format(microtime(true)-$start, 2);
$row = $db->query('SHOW TABLE STATUS')->fetch_array();
if (!$row['Data_length']) $row['Data_length'] = filesize('.../test/foo.CSV');
echo ' '.number_format($row['Data_length']/1048576, 2)."\n";
}
Comments
Post a Comment