Быстрый генератор случайных имён-фамилий для базы данных
Относительно быстрый генератор псевдослучайного списка имён-фамилий и рандомных данных в базу. Фокус заключается в записи группы сгенерированных данных сперва во временную таблицу в памяти, потом блокировке основной таблицы и переносе данных туда из временной, после чего блокировка снимается, а цикл повторяется необходимое количество раз.
#!/usr/bin/perl
use strict;
use warnings;
use open ':std', ':encoding(UTF-8)';
use DBI;
use Time::HiRes qw( usleep ualarm gettimeofday tv_interval nanosleep
clock_gettime clock_getres clock_nanosleep clock
stat lstat );
use Math::Random::MT::Auto qw(rand irand shuffle gaussian),
'/dev/urandom'; # Speeds up generation of random numbers. May cause errors on non-Unix systems
use constant TOTAL => 650000; # Total number of lines to be inserted
use constant PERSTEP => 1000; # Size of temporary table (speeds up everything). Values between 200 and 2000 are tested
use constant VERBOSE => 1; # Set 1 to generate more output
my $db_host = 'localhost';
my $db_user = 'tester';
my $db_name = 'testbase';
my $db_pass = 'your_password';
my $names_file = 'name_utf.txt'; # Text file with list of names. One per line
my $lastnames_file = 'family_utf.txt'; # Text file with list of surnames. One per line
my $db_string = "DBI:mysql:dbname=$db_name;host=$db_host";
my $dblink = DBI->connect(
$db_string,
$db_user,
$db_pass,
{PrintError => 1}
) or die "Cannot connect to database: $@";
# -----------------------------------------------------------------------
sub geninfo
{
my @names;
my @lastnames;
my $totalt0 = [gettimeofday]; # Counting total time from this moment
# Populate the names array
open(my $fh, '<:encoding(UTF-8)', $names_file)
or die "Could not open file '$names_file' $!";
while (my $row = <$fh>)
{
chomp $row;
push (@names, $row);
}
close $fh;
# Populate the surnames array
open($fh, '<:encoding(UTF-8)', $lastnames_file)
or die "Could not open file '$lastnames_file' $!";
while (my $row = <$fh>)
{
chomp $row;
push (@lastnames, $row);
}
close $fh;
# Creating temporary table to speed things up
# Temporary table structure must be (almost) identical to target table structure. In this example ID column is omitted
$dblink->prepare("CREATE TEMPORARY TABLE `persons_temp` (
`name` varchar(24) NOT NULL,
`secondname` varchar(24) NOT NULL,
`rnum` int(8) DEFAULT NULL,
`mtime` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00' ON UPDATE CURRENT_TIMESTAMP
) ENGINE=MEMORY")->execute();
my $query = "INSERT INTO `persons_temp` (`name`, `secondname`, `rnum`, `mtime`) VALUES (?, ?, ?, FROM_UNIXTIME(?))";
my $statement = $dblink->prepare($query);
for (my $num = 0; $num < TOTAL;)
{
my $t0 = [gettimeofday]; # Counting one iteration time from this moment
$dblink->prepare("DELETE FROM `persons_temp`")->execute(); # Empty temporary table first (it's filled with previous iteration results)
for (my $stepnum = 0; $stepnum < PERSTEP and $num < TOTAL; $stepnum++, $num++)
{
my $pname = ucfirst($names[rand(0+@names)]); # Take random name and uppercase first letter
my $psname = ucfirst($lastnames[rand(0+@lastnames)]); # Take random surname and uppercase first letter
my $rnd = int(rand(999999999)); # Random number
my $dt = time; # Timestamp
$statement->execute($pname, $psname, $rnd, $dt); # Inserting one row to temporary table
print "Adding record number " . ($num + 1) . ": $pname $psname $rnd\n" if VERBOSE;
}
# Copy data from temporary table to destination table FAST
$dblink->prepare("LOCK TABLE `persons` WRITE, `persons_temp` READ")->execute();
$dblink->prepare("INSERT INTO `persons` (`name`, `secondname`, `rnum`, `mtime`) SELECT * FROM `persons_temp`")->execute();
$dblink->prepare("UNLOCK TABLES;")->execute();
print "Step takes: " . tv_interval ( $t0 ) . "s\n";
}
my $interval = timeinterval(tv_interval($totalt0)); # Convert total time to human-readable format with some damn creepy function (below)
print "\nJob done in $interval\n";
undef($query);
$dblink->prepare("UNLOCK TABLES;")->execute();
$dblink->prepare("ALTER TABLE `persons` ENABLE KEYS;")->execute();
}
sub timeinterval
{
my( $seconds ) = (@_ );
if ( $seconds < 60 )
{
# less than a minute
return( $seconds . "s" );
}
if ( $seconds <= ( 3600 ) )
{
# less than an hour
return( int($seconds/ 60 ) . "m:" . timeinterval($seconds%60) );
}
if ( $seconds <= ( 86400 ) )
{
# less than a day
return( int($seconds/3600) . "h:" . timeinterval($seconds%3600) );
}
if ( $seconds <= ( 60 * 60 * 24 * 7 ) )
{
# less than a week
return( int($seconds/(86400) ) . "d, " . timeinterval($seconds%86400) );
}
# fall-back weeks
return( int( $seconds/(60*60*24*7)) . " weeks" );
}
# -----------------------------------------------------------------------
geninfo();
$dblink->disconnect();
__END__
Пример вывода:
...
...
Adding record number 649995: Мануил Рыбнов 966212308
Adding record number 649996: Евстигней Девятков 513734457
Adding record number 649997: Бадри Колмыченко 396553781
Adding record number 649998: Август Хлудев 3156068
Adding record number 649999: Бронислав Воскресенский 309781404
Adding record number 650000: Мелентий Веретин 19724141
Step takes: 0.156238s
Job done in 3m:8s
Использовалась такая таблица:
CREATE TABLE `persons` (
`id` int(16) unsigned NOT NULL AUTO_INCREMENT,
`name` varchar(24) NOT NULL,
`secondname` varchar(24) NOT NULL,
`rnum` int(8) DEFAULT NULL,
`mtime` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00' ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`id`),
KEY `FULLNAME` (`name`,`secondname`)
) ENGINE=InnoDB AUTO_INCREMENT=664951 DEFAULT CHARSET=utf8;
Само собой, скрипт нужно перепилить в районе CREATE TEMPORARY TABLE
(и несколько последующих строк), чтобы он удовлетворял вашей схеме.