#!/dataserfs/libs-2024-01-11/bin/perl -w

use strict;


#
# spc + svc
# this is ddu-find-old-in-tree.service
#                     ^^^^^^^^
#





delete @ENV{'PATH','IFS', 'CDPATH', 'ENV', 'BASH_ENV'};
$ENV{'PATH'} = '/bin:/usr/bin';

use DBI;
use File::FcntlLock;
use Carp::Assert;

my $lockfile	= "/var/run/ddu-find-old-trees.lock";
my $logfile	= "/var/log/ddu-find-old-trees.log";

my @MysqlOptions = qw(  DBI:MariaDB:
                        mariadb_server_prepare=0
                        mariadb_read_default_file=/etc/my.cnf );


my $MIN_TREE_BLOCKS = 1000;












# #####################################################################
sub CloseDB{ my( $dbh ) = @_;
# #####################################################################
	assert($dbh);
        $dbh->disconnect();
} # CloseDB



# #####################################################################
# $port is optional
sub OpenDB{my($host,$database,$db_user,$db_pass, $port) = @_;
# #####################################################################
	assert($host);
	assert($database);
	assert($db_user);
	assert($db_pass);
	# port may be NULL

my $dsn = join(';', @MysqlOptions, "host=$host", "database=$database");
        if ( defined $port ) {
                $dsn .= ";port=$port";          # add port if defined
        } # if

        my $dbh = DBI->connect($dsn, $db_user, $db_pass);

 return($dbh);
} # OpenDB
# #####################################################################



























# #####################################################################
sub save_old_trees{my ($dbh,$site,$root,$age_days,$scanno,$root_sha1,
			$treetable,$begutc) = @_;
# #####################################################################
	assert($dbh);
	assert($site);
	assert($root);
	assert($age_days);
	assert($scanno);
	assert($root_sha1);
	assert($treetable);
	assert($begutc);

	$dbh->do(qq{
		DELETE FROM old_trees
		WHERE	age_days = ?
		AND	scanno = ?
		},undef,
		$age_days,
		$scanno);



	$dbh->do(qq{
		INSERT INTO old_trees
		(scanno,root,root_sha1,age_days,begutc,site,
		 dirname,
		 uid,gid,treeblocks,treenumfiles,treenumdirs,
		 treemtime,treeatime,nodenum,username,groupname)
		SELECT	?,?,?,?,?,?,
			found.dirname,
			t.uid,t.gid,t.treeblocks,t.treenumfiles,t.treenumdirs,
			t.treemtime,t.treeatime,t.nodenum,
			t.username,t.groupname
		FROM	found
		JOIN	$treetable AS t
			ON	found.nodenum = t.nodenum
		},undef,
		$scanno,
		$root,
		$root_sha1,
		$age_days,
		$begutc,$site);


} # save_old_trees














# #####################################################################
sub analyze_age_for_one_scan{ my($dbh,$site,$root,$age_days) = @_;
# #####################################################################
	assert($dbh);
	assert($site);
	assert($root);
	assert($age_days);

#print STDERR "analyze_age_for_one_scan $site,$root,$age_days\n";

my($scanno,$root_sha1,
	$begutc) = $dbh->selectrow_array(qq{
			SELECT	scanno,root_sha1,begutc
			FROM	scans
			WHERE	site = ?
			AND	root = ?
			LIMIT 1
			},undef,
			$site,
			$root);

	if(!$scanno) {
		print STDERR "scan not found: $site,$root\n";
		return(-1);
	}

#print STDERR "site:$site, root:$root, scanno:$scanno, root_sha1=$root_sha1\n";

	$dbh->do(qq{ DROP TABLE IF EXISTS search});
	$dbh->do(qq{ DROP TABLE IF EXISTS found});
	$dbh->do(qq{ CREATE TEMPORARY TABLE found(
			nodenum int(11) unsigned not null,
			dirname varchar(20000) not null,
			PRIMARY KEY(nodenum)
			)});


	$dbh->do(qq{ DROP TABLE IF EXISTS search});
	$dbh->do(qq{ CREATE TEMPORARY TABLE search(
			parent_nodenum int(11) unsigned not null,
			dirname varchar(20000) not null
			)});


	$dbh->do(qq{ DROP TABLE IF EXISTS level});
	$dbh->do(qq{ CREATE TEMPORARY TABLE level(
			nodenum int(11) unsigned not null,
			dirname varchar(20000) not null,
			treeblocks bigint unsigned not null,
			blocks bigint unsigned not null,
			old_found integer,
			key(old_found)
			)});


my $treetable = "tree_$root_sha1";
	$dbh->do(qq{ DELETE FROM found });

	$dbh->do(qq{ DELETE FROM search });
	$dbh->do(qq{ INSERT INTO search
			SET parent_nodenum = 0,
			dirname = ?},
			undef,
			$root);

	$dbh->do(qq{ SET \@CUTOFF_OLD=UNIX_TIMESTAMP() - ? * 24 * 3600 },undef,$age_days);

	while(1) {
		my ($count) = $dbh->selectrow_array(qq{
			SELECT COUNT(*) FROM search
		});
		if( !$count) {
			last;
		}

#print STDERR "loop $count\n";

		$dbh->do(qq{ DELETE FROM level });

		#
		# quirk: we need the root at level0, i.e. parent_nodenum=0,
		#	and the dirname there is not useful.
		#
		# 2023/5/2:
		#	- we do not have reliable atime.
		#	- we remove all atime from old_found. it was:
		#		IF(mtime < \@CUTOFF_OLD AND treemtime < \@CUTOFF_OLD
		#		   AND atime < \@CUTOFF_OLD AND treeatime < \@CUTOFF_OLD,1,0),

		#
		$dbh->do(qq{
			INSERT INTO level
			(nodenum,dirname,old_found,blocks,treeblocks)
			SELECT 	nodenum,					# nodenum
				IF(search.parent_nodenum = 0,
				   search.dirname,
				   CONCAT(search.dirname,'/',tree.dirname)), 	# dirname
				IF(mtime < \@CUTOFF_OLD AND
				   treemtime < \@CUTOFF_OLD,1,0),		# old_found
				blocks,treeblocks				# blocks,treeblocks
			FROM 	$treetable AS tree
			JOIN	search
				ON	tree.parent_nodenum= search.parent_nodenum
			});


		$dbh->do(qq{
			INSERT IGNORE INTO found
			(nodenum,dirname)
			SELECT nodenum,dirname
			FROM 	level
			WHERE	old_found = 1
			AND	treeblocks >= ?
			},undef,
			$MIN_TREE_BLOCKS);


		$dbh->do(qq{ DELETE FROM search });
		$dbh->do(qq{
			INSERT IGNORE INTO search
			(parent_nodenum,dirname)
			SELECT 	nodenum,dirname
			FROM 	level
			WHERE	old_found = 0
			});
	} # while

	save_old_trees($dbh,$site,$root,$age_days,$scanno,$root_sha1,$treetable,$begutc);

 return(0);
} #analyze_age_for_one_scan




















# #####################################################################
sub main_thing{my($age_days_str) = @_;
# #####################################################################

	$age_days_str = "10,30,90,120,180,365" if( !$age_days_str);
	#$age_days_str = "90,120,180,365" if( !$age_days_str);


	#open(STDOUT,">>$logfile") || die( "logfile $!");
	#open(STDERR,">>$logfile") || die( "logfile $!");

	# lock to make sure we run only once
	# open for append ( i.e. create if needed )
	open(LOCK, ">>$lockfile" ) || die( "lockfile $!");
	my $fs = new File::FcntlLock( l_type => F_WRLCK);
	if ( ! defined $fs->lock(\*LOCK, F_SETLK) ) {
		#unable to lock
		exit (0) if ( $fs->error =~ /already locked/ );
		die("unable to lock: $!, " . $fs->error);
	} # if


my $db  	= "ddu";
my $db_host  	= "localhost";
my $db_user  	= "ddu";
my $db_pw  	= "ddu";
my $dbh = OpenDB($db_host,$db,$db_user,$db_pw) ||
		die("cannot connect to db '$db' on '$db_host' as '$db_user'");

	$dbh->{RaiseError} = 1;
	$dbh->{AutoCommit} = 1;


my @age_days = split(/,/,$age_days_str);

	# #########################################################################
	# #########################################################################
	#
	# find all scans with need_analysis
	#
	while(sleep(1)) {
	     eval{
		my $scans = $dbh->selectall_arrayref(qq{
			SELECT	scanno,root,site
			FROM	scans
			WHERE	need_analysis > 0
			ORDER	BY need_analysis
			LIMIT 10
			},  {Slice => {}});


		#if( $scans and @$scans ) {
		#	print STDERR "sleep 5 for race\n";
		#	sleep(5);
		#} # if

		foreach my $r (@$scans ) {
if( -t 0) { print STDERR "daemon: $$r{scanno}, $$r{root}\n";}
			# mark done before: do not hang
			$dbh->do(qq{
				UPDATE scans
				SET	need_analysis = 0
				WHERE	scanno = ?
				},undef,
				$$r{scanno});

			foreach my $age_days (@age_days) {
				analyze_age_for_one_scan($dbh,$$r{site},$$r{root},
						$age_days);
			} # foreach
		} # foreach
	      }; # evals
	      if( $@ ){
		print STDERR "CAUGHT: $@\n";
	      } # if
	} # while
	# #########################################################################
	# #########################################################################


	CloseDB($dbh);
	close(LOCK);
} # main_thing



main_thing(@ARGV);
