Быстрый генератор случайных имён-фамилий для базы данных
03 Mar 2015
Относительно быстрый генератор псевдослучайного списка имён-фамилий и рандомных данных в базу. Фокус заключается в записи группы сгенерированных данных сперва во временную таблицу в памяти, потом блокировке основной таблицы и переносе данных туда из временной, после чего блокировка снимается, а цикл повторяется необходимое количество раз.
#!/usr/bin/perl
use strict ;
use warnings ;
use open ' :std ', ' :encoding(UTF-8) ';
use DBI ;
use Time:: HiRes qw( usleep ualarm gettimeofday tv_interval nanosleep
clock_gettime clock_getres clock_nanosleep clock
stat lstat ) ;
use Math::Random::MT:: Auto qw(rand irand shuffle gaussian) ,
' /dev/urandom '; # Speeds up generation of random numbers. May cause errors on non-Unix systems
use constant TOTAL => 650000 ; # Total number of lines to be inserted
use constant PERSTEP => 1000 ; # Size of temporary table (speeds up everything). Values between 200 and 2000 are tested
use constant VERBOSE => 1 ; # Set 1 to generate more output
my $db_host = ' localhost ';
my $db_user = ' tester ';
my $db_name = ' testbase ';
my $db_pass = ' your_password ';
my $names_file = ' name_utf.txt '; # Text file with list of names. One per line
my $lastnames_file = ' family_utf.txt '; # Text file with list of surnames. One per line
my $db_string = " DBI:mysql:dbname= $db_name ;host= $db_host ";
my $dblink = DBI -> connect (
$db_string ,
$db_user ,
$db_pass ,
{ PrintError => 1 }
) or die " Cannot connect to database: $@ ";
# -----------------------------------------------------------------------
sub geninfo
{
my @names ;
my @lastnames ;
my $totalt0 = [ gettimeofday ]; # Counting total time from this moment
# Populate the names array
open ( my $fh , ' <:encoding(UTF-8) ', $names_file )
or die " Could not open file ' $names_file ' $! ";
while ( my $row = < $fh > )
{
chomp $row ;
push ( @names , $row );
}
close $fh ;
# Populate the surnames array
open ( $fh , ' <:encoding(UTF-8) ', $lastnames_file )
or die " Could not open file ' $lastnames_file ' $! ";
while ( my $row = < $fh > )
{
chomp $row ;
push ( @lastnames , $row );
}
close $fh ;
# Creating temporary table to speed things up
# Temporary table structure must be (almost) identical to target table structure. In this example ID column is omitted
$dblink -> prepare (" CREATE TEMPORARY TABLE `persons_temp` (
`name` varchar(24) NOT NULL,
`secondname` varchar(24) NOT NULL,
`rnum` int(8) DEFAULT NULL,
`mtime` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00' ON UPDATE CURRENT_TIMESTAMP
) ENGINE=MEMORY ") -> execute ();
my $query = " INSERT INTO `persons_temp` (`name`, `secondname`, `rnum`, `mtime`) VALUES (?, ?, ?, FROM_UNIXTIME(?)) ";
my $statement = $dblink -> prepare ( $query );
for ( my $num = 0 ; $num < TOTAL ;)
{
my $t0 = [ gettimeofday ]; # Counting one iteration time from this moment
$dblink -> prepare (" DELETE FROM `persons_temp` ") -> execute (); # Empty temporary table first (it's filled with previous iteration results)
for ( my $stepnum = 0 ; $stepnum < PERSTEP and $num < TOTAL ; $stepnum ++ , $num ++ )
{
my $pname = ucfirst ( $names [ rand ( 0 + @names )]); # Take random name and uppercase first letter
my $psname = ucfirst ( $lastnames [ rand ( 0 + @lastnames )]); # Take random surname and uppercase first letter
my $rnd = int ( rand ( 999999999 )); # Random number
my $dt = time ; # Timestamp
$statement -> execute ( $pname , $psname , $rnd , $dt ); # Inserting one row to temporary table
print " Adding record number " . ( $num + 1 ) . " : $pname $psname $rnd \n " if VERBOSE ;
}
# Copy data from temporary table to destination table FAST
$dblink -> prepare (" LOCK TABLE `persons` WRITE, `persons_temp` READ ") -> execute ();
$dblink -> prepare (" INSERT INTO `persons` (`name`, `secondname`, `rnum`, `mtime`) SELECT * FROM `persons_temp` ") -> execute ();
$dblink -> prepare (" UNLOCK TABLES; ") -> execute ();
print " Step takes: " . tv_interval ( $t0 ) . " s \n ";
}
my $interval = timeinterval ( tv_interval ( $totalt0 )); # Convert total time to human-readable format with some damn creepy function (below)
print " \n Job done in $interval \n ";
undef ( $query );
$dblink -> prepare (" UNLOCK TABLES; ") -> execute ();
$dblink -> prepare (" ALTER TABLE `persons` ENABLE KEYS; ") -> execute ();
}
sub timeinterval
{
my ( $seconds ) = ( @ _ );
if ( $seconds < 60 )
{
# less than a minute
return ( $seconds . " s " );
}
if ( $seconds <= ( 3600 ) )
{
# less than an hour
return ( int ( $seconds / 60 ) . " m: " . timeinterval ( $seconds % 60 ) );
}
if ( $seconds <= ( 86400 ) )
{
# less than a day
return ( int ( $seconds / 3600 ) . " h: " . timeinterval ( $seconds % 3600 ) );
}
if ( $seconds <= ( 60 * 60 * 24 * 7 ) )
{
# less than a week
return ( int ( $seconds / ( 86400 ) ) . " d, " . timeinterval ( $seconds % 86400 ) );
}
# fall-back weeks
return ( int ( $seconds / ( 60 * 60 * 24 * 7 )) . " weeks " );
}
# -----------------------------------------------------------------------
geninfo ();
$dblink -> disconnect ();
__END__
Пример вывода:
...
...
Adding record number 649995: Мануил Рыбнов 966212308
Adding record number 649996: Евстигней Девятков 513734457
Adding record number 649997: Бадри Колмыченко 396553781
Adding record number 649998: Август Хлудев 3156068
Adding record number 649999: Бронислав Воскресенский 309781404
Adding record number 650000: Мелентий Веретин 19724141
Step takes: 0.156238s
Job done in 3m:8s
Использовалась такая таблица:
CREATE TABLE `persons` (
`id` int ( 16 ) unsigned NOT NULL AUTO_INCREMENT ,
`name` varchar ( 24 ) NOT NULL ,
`secondname` varchar ( 24 ) NOT NULL ,
`rnum` int ( 8 ) DEFAULT NULL ,
`mtime` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00' ON UPDATE CURRENT_TIMESTAMP ,
PRIMARY KEY ( `id` ),
KEY `FULLNAME` ( `name` , `secondname` )
) ENGINE = InnoDB AUTO_INCREMENT = 664951 DEFAULT CHARSET = utf8 ;
Само собой, скрипт нужно перепилить в районе CREATE TEMPORARY TABLE
(и несколько последующих строк), чтобы он удовлетворял вашей схеме.