Change files to be deleted in batches instead of one-by-one (#23302)

This commit is contained in:
Eugen Rochko 2023-06-26 14:17:41 +02:00 committed by GitHub
parent ae30a60b1f
commit bb4756c823
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 114 additions and 29 deletions

103
app/lib/attachment_batch.rb Normal file
View File

@ -0,0 +1,103 @@
# frozen_string_literal: true
class AttachmentBatch
# Maximum amount of objects you can delete in an S3 API call. It's
# important to remember that this does not correspond to the number
# of records in the batch, since records can have multiple attachments
LIMIT = 1_000
# Attributes generated and maintained by Paperclip (not all of them
# are always used on every class, however)
NULLABLE_ATTRIBUTES = %w(
file_name
content_type
file_size
fingerprint
created_at
updated_at
).freeze
# Styles that are always present even when not explicitly defined
BASE_STYLES = %i(original).freeze
attr_reader :klass, :records, :storage_mode
def initialize(klass, records)
@klass = klass
@records = records
@storage_mode = Paperclip::Attachment.default_options[:storage]
@attachment_names = klass.attachment_definitions.keys
end
def delete
remove_files
batch.delete_all
end
def clear
remove_files
batch.update_all(nullified_attributes) # rubocop:disable Rails/SkipsModelValidations
end
private
def batch
klass.where(id: records.map(&:id))
end
def remove_files
keys = []
logger.debug { "Preparing to delete attachments for #{records.size} records" }
records.each do |record|
@attachment_names.each do |attachment_name|
attachment = record.public_send(attachment_name)
styles = BASE_STYLES | attachment.styles.keys
next if attachment.blank?
styles.each do |style|
case @storage_mode
when :s3
logger.debug { "Adding #{attachment.path(style)} to batch for deletion" }
keys << attachment.style_name_as_path(style)
when :filesystem
logger.debug { "Deleting #{attachment.path(style)}" }
FileUtils.remove_file(attachment.path(style))
when :fog
logger.debug { "Deleting #{attachment.path(style)}" }
attachment.directory.files.new(key: attachment.path(style)).destroy
end
end
end
end
return unless storage_mode == :s3
# We can batch deletes over S3, but there is a limit of how many
# objects can be processed at once, so we have to potentially
# separate them into multiple calls.
keys.each_slice(LIMIT) do |keys_slice|
logger.debug { "Deleting #{keys_slice.size} objects" }
bucket.delete_objects(delete: {
objects: keys_slice.map { |key| { key: key } },
quiet: true,
})
end
end
def bucket
@bucket ||= records.first.public_send(@attachment_names.first).s3_bucket
end
def nullified_attributes
@attachment_names.flat_map { |attachment_name| NULLABLE_ATTRIBUTES.map { |attribute| "#{attachment_name}_#{attribute}" } & klass.column_names }.index_with(nil)
end
def logger
Rails.logger
end
end

View File

@ -15,15 +15,15 @@ class Vacuum::MediaAttachmentsVacuum
private private
def vacuum_cached_files! def vacuum_cached_files!
media_attachments_past_retention_period.find_each do |media_attachment| media_attachments_past_retention_period.find_in_batches do |media_attachments|
media_attachment.file.destroy AttachmentBatch.new(MediaAttachment, media_attachments).clear
media_attachment.thumbnail.destroy
media_attachment.save
end end
end end
def vacuum_orphaned_records! def vacuum_orphaned_records!
orphaned_media_attachments.in_batches.destroy_all orphaned_media_attachments.find_in_batches do |media_attachments|
AttachmentBatch.new(MediaAttachment, media_attachments).delete
end
end end
def media_attachments_past_retention_period def media_attachments_past_retention_period

View File

@ -10,14 +10,6 @@ class ClearDomainMediaService < BaseService
private private
def invalidate_association_caches!(status_ids)
# Normally, associated models of a status are immutable (except for accounts)
# so they are aggressively cached. After updating the media attachments to no
# longer point to a local file, we need to clear the cache to make those
# changes appear in the API and UI
Rails.cache.delete_multi(status_ids.map { |id| "statuses/#{id}" })
end
def clear_media! def clear_media!
clear_account_images! clear_account_images!
clear_account_attachments! clear_account_attachments!
@ -25,31 +17,21 @@ class ClearDomainMediaService < BaseService
end end
def clear_account_images! def clear_account_images!
blocked_domain_accounts.reorder(nil).find_each do |account| blocked_domain_accounts.reorder(nil).find_in_batches do |accounts|
account.avatar.destroy if account.avatar&.exists? AttachmentBatch.new(Account, accounts).clear
account.header.destroy if account.header&.exists?
account.save
end end
end end
def clear_account_attachments! def clear_account_attachments!
media_from_blocked_domain.reorder(nil).find_in_batches do |attachments| media_from_blocked_domain.reorder(nil).find_in_batches do |attachments|
affected_status_ids = [] AttachmentBatch.new(MediaAttachment, attachments).clear
attachments.each do |attachment|
affected_status_ids << attachment.status_id if attachment.status_id.present?
attachment.file.destroy if attachment.file&.exists?
attachment.type = :unknown
attachment.save
end
invalidate_association_caches!(affected_status_ids) unless affected_status_ids.empty?
end end
end end
def clear_emojos! def clear_emojos!
emojis_from_blocked_domains.destroy_all emojis_from_blocked_domains.find_in_batches do |custom_emojis|
AttachmentBatch.new(CustomEmoji, custom_emojis).delete
end
end end
def blocked_domain def blocked_domain