#!/usr/bin/ruby # Copyright 2007, Daniel Erat # All rights reserved. require 'fileutils' require 'open3' require 'optparse' require 'socket' require 'S3' require 'thread' require 'time' AUTH_FILE = "#{ENV['HOME']}/.s3_auth" ## # Create full and incremental backups using GNU tar. # class BackupCreator ## # constructor # # @param data_dir if nil, data is stored in $HOME/.backup; otherwise # this directory is used # @param tar_cmd path to GNU tar binary (if it's not in $PATH) # def initialize(data_dir=nil, prefix='backup', tar_cmd='tar') @data_dir = (data_dir or "#{ENV['HOME']}/.backup") @prefix = prefix @tar_cmd = tar_cmd create_dirs_if_needed @log_file = File.new( "#{log_dir}/#{Time.now.strftime '%Y%m%d-%H%M%S'}.log", 'w') @log_to_stderr = false end attr_accessor :log_to_stderr def log_dir() "#{@data_dir}/logs" end def snapshot_dir() "#{@data_dir}/snapshots" end def temp_dir() "#{@data_dir}/temp" end private :log_dir, :snapshot_dir, :temp_dir def create_dirs_if_needed FileTest.directory?(@data_dir) or Dir.mkdir @data_dir FileTest.directory?(log_dir) or Dir.mkdir log_dir FileTest.directory?(snapshot_dir) or Dir.mkdir snapshot_dir FileTest.directory?(temp_dir) or Dir.mkdir temp_dir end private :create_dirs_if_needed ## # Log the current time and a message to the log file. If @log_to_stderr # is set, the message is also logged to stderr. # # @param msg message to log; will be newline-terminated if needed # def log msg # FIXME: print caller? msg = "#{Time.now.strftime '%H:%M:%S'} #{msg}" msg += "\n" if msg and not ('' << msg[-1]) == "\n" $stderr.print msg if @log_to_stderr @log_file.print msg @log_file.flush end private :log def fatal msg log "FATAL: #{msg}" raise RuntimeError, msg end private :fatal # FIXME: worthwhile? def get_unique_time name, time ext = nil loop do Dir.glob("#{snapshot_dir}/#{name}_#{time}#{ext}_*").size == 0 and break ext = (not ext) ? '-0' : "-#{(ext[1..-1].to_i + 1).to_s}" end "#{time}#{ext}" end ## # Get the full path to the last backup's snapshot. # Returns the snapshot's filename if found and nil otherwise. # # @param type last backup's desired type: :full or :inc # def find_last_backup_snapshot name, type case type when :full last = Dir.glob("#{snapshot_dir}/#{name}_*_full.lst").sort.reverse[0] when :inc last = Dir.glob("#{snapshot_dir}/#{name}_*_inc_*.lst").sort.reverse[0] else fatal "invalid backup type #{type}" end last end private :find_last_backup_snapshot ## # Given a snapshot filename, extract the snapshot's time. # Returns the time as a string in "%Y%d%m-%H%M%S" format if found and nil # otherwise. # # @param snapshot snapshot filename; full path optional # def get_time_from_snapshot snapshot snapshot =~ /^.*_(\d{8}-\d{6}(-\d+)?)_(full|inc_(\d{8})-(\d{6}))\.lst$/ return $1 end private :get_time_from_snapshot ## # Create a backup. # # @param name backup name (e.g. the machine's hostname) # @param type backup type: :full, :inc_from_last_full, or # :inc_from_last_inc (which will create an # incremental backup from the last full if no # previous incremental is available) # @param paths paths to be backed up # @param exclude globs matching absolute paths of files and # directories to be excluded # @param gpg_identity GPG identity to use to encrypt tar file. if # nil, file will not be encrypted # @param s3 Amazon S3 connection to use to copy archive. if # nil, archive will instead be copied to dest_dir # @param bucket Amazon S3 bucket name # @param dest_dir local directory where archive will be copied if # S3 isn't being used # def backup name, type, paths, exclude, gpg_identity, s3, bucket, dest_dir fatal "missing GPG identity" if not gpg_identity if not s3 fatal "no S3 connection given, but dest dir not supplied" if not dest_dir else fatal "S3 bucket name needed" if not bucket end if paths.is_a?(String) then paths = [paths] end if exclude.is_a?(String) then exclude = [exclude] end fatal "no paths supplied" if paths.size == 0 time = get_unique_time name, Time.now.strftime('%Y%m%d-%H%M%S') log "creating backup \"#{name}\" of type #{type} at #{time}" log "paths are #{paths.join ', '}" log "exclusions are #{exclude.join ', '}" log "additionally excluding temp directory #{temp_dir}" exclude = exclude.dup << temp_dir case type when :full full_name = "#{name}_#{time}_full" when :inc_from_last_full, :inc_from_last_inc old_snapshot_path = find_last_backup_snapshot( name, type == :inc_from_last_full ? :full : :inc) if not old_snapshot_path and type == :inc_from_last_inc # If we didn't find an incremental backup, we'll look for a full # backup -- better than nothing. old_snapshot_path = find_last_backup_snapshot name, :full end old_snapshot_path or fatal "no previous backup" log "inc backup will be based on #{old_snapshot_path}" last_time = get_time_from_snapshot(old_snapshot_path) or fatal "couldn't get time from #{old_snapshot_path}" full_name = "#{name}_#{time}_inc_#{last_time}" else fatal "invalid backup type #{type}" end log "backup's full name is #{full_name}" snapshot_path = "#{temp_dir}/#{full_name}.lst" FileUtils.cp(old_snapshot_path, snapshot_path) if old_snapshot_path tar_file = "#{@prefix}_#{full_name}.tar" tar_file += '.gpg' if gpg_identity tar_path = "#{temp_dir}/#{tar_file}" log "backup will be written to #{tar_path}" # FIXME: escape path_args = paths.collect {|p| "'#{p}'"}.join(' ') exclude_args = exclude.collect {|p| "--exclude='#{p}'"}.join(' ') cmd = "#@tar_cmd -c --anchored #{exclude_args} " \ "--listed-incremental='#{snapshot_path}' #{path_args} " cmd += "| gpg --encrypt --recipient='#{gpg_identity}' " if gpg_identity cmd += ">'#{tar_path}'" log "running \"#{cmd}\"" stdin, stdout, stderr = Open3.popen3 cmd stdin.close stdout.close stderr.readlines.each do |l| log l end stderr.close if s3 File.open(tar_path) do |f| log "copying #{tar_path} to #{bucket}:#{tar_file}" copy_done = false Thread.abort_on_exception = true t = Thread.new do size = File.size tar_path iter = 0 msg = "" last_one = true while not copy_done or last_one last_one = false if copy_done cur = f.tell $stderr.write "\b" * msg.length msg = "copied #{'%.2f' % (100.0*cur/size)}% (#{cur}/#{size})" $stderr.write msg log msg if (iter += 1) % 10 sleep 1 end $stderr.write "\n" end r = s3.put bucket, tar_file, f copy_done = true t.join if not r.http_response.is_a? Net::HTTPSuccess log "failure: got #{r.http_response.code}: " \ "#{r.http_response.message}" end end dest = "#{snapshot_dir}/#{full_name}.lst" log "renaming #{snapshot_path} to #{dest}" File.rename snapshot_path, dest log "deleting temp archive #{tar_path}" File.delete tar_path else log "deleting file list #{snapshot_path}" File.delete snapshot_path dest = "#{dest_dir}/#{tar_file}" log "renaming #{tar_path} to #{dest}" File.rename tar_path, dest end end end def main bucket = nil dest_dir = nil exclude = [] type = :full gpg = nil name = Socket.gethostname s3 = nil o = OptionParser.new o.on('-b', '--bucket=BUCKET', 'S3 bucket name where backup will be', 'copied (will be created if it doesn\'t ', 'exist)') {|v| bucket = v } o.on('-d', '--destination_dir=DIR', 'if --bucket is unsupplied, ', 'the archive will be copied here') {|v| dest_dir = v } o.on('-e', '--exclude=GLOB', 'glob of files to exclude from backup') \ {|v| exclude << v } o.on('-f', '--full', 'perform a full backup (default)') {|v| type = :full } o.on('-g', '--gpg_identity=ID', 'ID of GPG public key to use when', 'encrypting archive') {|v| gpg = v } o.on('-i', '--incremental', 'perform an incremental backup based', 'on the last incremental backup') {|v| type = :inc_from_last_inc } o.on('-j', '--incremental_from_full', 'perform an incremental backup based ', 'on the last full backup') {|v| type = :inc_from_last_full } o.on('-n', '--name=NAME', 'backup name; used to name the archive') \ {|v| name = v } paths = o.parse(*ARGV) if bucket s3 = S3::AWSAuthConnection.new *(File.open(AUTH_FILE) {|f| f.readline.split }) end BackupCreator.new.backup name, type, paths, exclude, gpg, s3, bucket, dest_dir end main