From 5b1eb09d546120cb456990e15a740d994011013f Mon Sep 17 00:00:00 2001 From: Eugen Rochko Date: Wed, 24 Jan 2024 10:38:10 +0100 Subject: [PATCH] Add annual reports for accounts (#28693) --- .../api/v1/annual_reports_controller.rb | 30 +++++++++ app/lib/annual_report.rb | 43 +++++++++++++ app/lib/annual_report/archetype.rb | 49 +++++++++++++++ .../commonly_interacted_with_accounts.rb | 22 +++++++ .../annual_report/most_reblogged_accounts.rb | 22 +++++++ app/lib/annual_report/most_used_apps.rb | 22 +++++++ app/lib/annual_report/percentiles.rb | 62 +++++++++++++++++++ app/lib/annual_report/source.rb | 16 +++++ app/lib/annual_report/time_series.rb | 30 +++++++++ app/lib/annual_report/top_hashtags.rb | 22 +++++++ app/lib/annual_report/top_statuses.rb | 21 +++++++ app/lib/annual_report/type_distribution.rb | 20 ++++++ app/models/generated_annual_report.rb | 37 +++++++++++ app/presenters/annual_reports_presenter.rb | 23 +++++++ .../rest/annual_report_serializer.rb | 5 ++ .../rest/annual_reports_serializer.rb | 7 +++ app/workers/generate_annual_report_worker.rb | 11 ++++ app/workers/scheduler/indexing_scheduler.rb | 2 + config/routes/api.rb | 6 ++ ...1033014_create_generated_annual_reports.rb | 17 +++++ db/schema.rb | 14 ++++- 21 files changed, 480 insertions(+), 1 deletion(-) create mode 100644 app/controllers/api/v1/annual_reports_controller.rb create mode 100644 app/lib/annual_report.rb create mode 100644 app/lib/annual_report/archetype.rb create mode 100644 app/lib/annual_report/commonly_interacted_with_accounts.rb create mode 100644 app/lib/annual_report/most_reblogged_accounts.rb create mode 100644 app/lib/annual_report/most_used_apps.rb create mode 100644 app/lib/annual_report/percentiles.rb create mode 100644 app/lib/annual_report/source.rb create mode 100644 app/lib/annual_report/time_series.rb create mode 100644 app/lib/annual_report/top_hashtags.rb create mode 100644 app/lib/annual_report/top_statuses.rb create mode 100644 app/lib/annual_report/type_distribution.rb create mode 100644 app/models/generated_annual_report.rb create mode 100644 app/presenters/annual_reports_presenter.rb create mode 100644 app/serializers/rest/annual_report_serializer.rb create mode 100644 app/serializers/rest/annual_reports_serializer.rb create mode 100644 app/workers/generate_annual_report_worker.rb create mode 100644 db/migrate/20240111033014_create_generated_annual_reports.rb diff --git a/app/controllers/api/v1/annual_reports_controller.rb b/app/controllers/api/v1/annual_reports_controller.rb new file mode 100644 index 000000000..9bc8e68ac --- /dev/null +++ b/app/controllers/api/v1/annual_reports_controller.rb @@ -0,0 +1,30 @@ +# frozen_string_literal: true + +class Api::V1::AnnualReportsController < Api::BaseController + before_action -> { doorkeeper_authorize! :read, :'read:accounts' }, only: :index + before_action -> { doorkeeper_authorize! :write, :'write:accounts' }, except: :index + before_action :require_user! + before_action :set_annual_report, except: :index + + def index + with_read_replica do + @presenter = AnnualReportsPresenter.new(GeneratedAnnualReport.where(account_id: current_account.id).pending) + @relationships = StatusRelationshipsPresenter.new(@presenter.statuses, current_account.id) + end + + render json: @presenter, + serializer: REST::AnnualReportsSerializer, + relationships: @relationships + end + + def read + @annual_report.view! + render_empty + end + + private + + def set_annual_report + @annual_report = GeneratedAnnualReport.find_by!(account_id: current_account.id, year: params[:id]) + end +end diff --git a/app/lib/annual_report.rb b/app/lib/annual_report.rb new file mode 100644 index 000000000..cf4297f2a --- /dev/null +++ b/app/lib/annual_report.rb @@ -0,0 +1,43 @@ +# frozen_string_literal: true + +class AnnualReport + include DatabaseHelper + + SOURCES = [ + AnnualReport::Archetype, + AnnualReport::TypeDistribution, + AnnualReport::TopStatuses, + AnnualReport::MostUsedApps, + AnnualReport::CommonlyInteractedWithAccounts, + AnnualReport::TimeSeries, + AnnualReport::TopHashtags, + AnnualReport::MostRebloggedAccounts, + AnnualReport::Percentiles, + ].freeze + + SCHEMA = 1 + + def initialize(account, year) + @account = account + @year = year + end + + def generate + return if GeneratedAnnualReport.exists?(account: @account, year: @year) + + GeneratedAnnualReport.create( + account: @account, + year: @year, + schema_version: SCHEMA, + data: data + ) + end + + private + + def data + with_read_replica do + SOURCES.each_with_object({}) { |klass, hsh| hsh.merge!(klass.new(@account, @year).generate) } + end + end +end diff --git a/app/lib/annual_report/archetype.rb b/app/lib/annual_report/archetype.rb new file mode 100644 index 000000000..ea9ef366d --- /dev/null +++ b/app/lib/annual_report/archetype.rb @@ -0,0 +1,49 @@ +# frozen_string_literal: true + +class AnnualReport::Archetype < AnnualReport::Source + # Average number of posts (including replies and reblogs) made by + # each active user in a single year (2023) + AVERAGE_PER_YEAR = 113 + + def generate + { + archetype: archetype, + } + end + + private + + def archetype + if (standalone_count + replies_count + reblogs_count) < AVERAGE_PER_YEAR + :lurker + elsif reblogs_count > (standalone_count * 2) + :booster + elsif polls_count > (standalone_count * 0.1) # standalone_count includes posts with polls + :pollster + elsif replies_count > (standalone_count * 2) + :replier + else + :oracle + end + end + + def polls_count + @polls_count ||= base_scope.where.not(poll_id: nil).count + end + + def reblogs_count + @reblogs_count ||= base_scope.where.not(reblog_of_id: nil).count + end + + def replies_count + @replies_count ||= base_scope.where.not(in_reply_to_id: nil).where.not(in_reply_to_account_id: @account.id).count + end + + def standalone_count + @standalone_count ||= base_scope.without_replies.without_reblogs.count + end + + def base_scope + @account.statuses.where(id: year_as_snowflake_range) + end +end diff --git a/app/lib/annual_report/commonly_interacted_with_accounts.rb b/app/lib/annual_report/commonly_interacted_with_accounts.rb new file mode 100644 index 000000000..af5e854c2 --- /dev/null +++ b/app/lib/annual_report/commonly_interacted_with_accounts.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true + +class AnnualReport::CommonlyInteractedWithAccounts < AnnualReport::Source + SET_SIZE = 40 + + def generate + { + commonly_interacted_with_accounts: commonly_interacted_with_accounts.map do |(account_id, count)| + { + account_id: account_id, + count: count, + } + end, + } + end + + private + + def commonly_interacted_with_accounts + @account.statuses.reorder(nil).where(id: year_as_snowflake_range).where.not(in_reply_to_account_id: @account.id).group(:in_reply_to_account_id).having('count(*) > 1').order(total: :desc).limit(SET_SIZE).pluck(Arel.sql('in_reply_to_account_id, count(*) AS total')) + end +end diff --git a/app/lib/annual_report/most_reblogged_accounts.rb b/app/lib/annual_report/most_reblogged_accounts.rb new file mode 100644 index 000000000..e3e8a7c90 --- /dev/null +++ b/app/lib/annual_report/most_reblogged_accounts.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true + +class AnnualReport::MostRebloggedAccounts < AnnualReport::Source + SET_SIZE = 10 + + def generate + { + most_reblogged_accounts: most_reblogged_accounts.map do |(account_id, count)| + { + account_id: account_id, + count: count, + } + end, + } + end + + private + + def most_reblogged_accounts + @account.statuses.reorder(nil).where(id: year_as_snowflake_range).where.not(reblog_of_id: nil).joins(reblog: :account).group('accounts.id').having('count(*) > 1').order(total: :desc).limit(SET_SIZE).pluck(Arel.sql('accounts.id, count(*) as total')) + end +end diff --git a/app/lib/annual_report/most_used_apps.rb b/app/lib/annual_report/most_used_apps.rb new file mode 100644 index 000000000..85ff1ff86 --- /dev/null +++ b/app/lib/annual_report/most_used_apps.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true + +class AnnualReport::MostUsedApps < AnnualReport::Source + SET_SIZE = 10 + + def generate + { + most_used_apps: most_used_apps.map do |(name, count)| + { + name: name, + count: count, + } + end, + } + end + + private + + def most_used_apps + @account.statuses.reorder(nil).where(id: year_as_snowflake_range).joins(:application).group('oauth_applications.name').order(total: :desc).limit(SET_SIZE).pluck(Arel.sql('oauth_applications.name, count(*) as total')) + end +end diff --git a/app/lib/annual_report/percentiles.rb b/app/lib/annual_report/percentiles.rb new file mode 100644 index 000000000..9fe4698ee --- /dev/null +++ b/app/lib/annual_report/percentiles.rb @@ -0,0 +1,62 @@ +# frozen_string_literal: true + +class AnnualReport::Percentiles < AnnualReport::Source + def generate + { + percentiles: { + followers: (total_with_fewer_followers / (total_with_any_followers + 1.0)) * 100, + statuses: (total_with_fewer_statuses / (total_with_any_statuses + 1.0)) * 100, + }, + } + end + + private + + def followers_gained + @followers_gained ||= @account.passive_relationships.where("date_part('year', follows.created_at) = ?", @year).count + end + + def statuses_created + @statuses_created ||= @account.statuses.where(id: year_as_snowflake_range).count + end + + def total_with_fewer_followers + @total_with_fewer_followers ||= Follow.find_by_sql([<<~SQL.squish, { year: @year, comparison: followers_gained }]).first.total + WITH tmp0 AS ( + SELECT follows.target_account_id + FROM follows + INNER JOIN accounts ON accounts.id = follows.target_account_id + WHERE date_part('year', follows.created_at) = :year + AND accounts.domain IS NULL + GROUP BY follows.target_account_id + HAVING COUNT(*) < :comparison + ) + SELECT count(*) AS total + FROM tmp0 + SQL + end + + def total_with_fewer_statuses + @total_with_fewer_statuses ||= Status.find_by_sql([<<~SQL.squish, { comparison: statuses_created, min_id: year_as_snowflake_range.first, max_id: year_as_snowflake_range.last }]).first.total + WITH tmp0 AS ( + SELECT statuses.account_id + FROM statuses + INNER JOIN accounts ON accounts.id = statuses.account_id + WHERE statuses.id BETWEEN :min_id AND :max_id + AND accounts.domain IS NULL + GROUP BY statuses.account_id + HAVING count(*) < :comparison + ) + SELECT count(*) AS total + FROM tmp0 + SQL + end + + def total_with_any_followers + @total_with_any_followers ||= Follow.where("date_part('year', follows.created_at) = ?", @year).joins(:target_account).merge(Account.local).count('distinct follows.target_account_id') + end + + def total_with_any_statuses + @total_with_any_statuses ||= Status.where(id: year_as_snowflake_range).joins(:account).merge(Account.local).count('distinct statuses.account_id') + end +end diff --git a/app/lib/annual_report/source.rb b/app/lib/annual_report/source.rb new file mode 100644 index 000000000..1ccb62267 --- /dev/null +++ b/app/lib/annual_report/source.rb @@ -0,0 +1,16 @@ +# frozen_string_literal: true + +class AnnualReport::Source + attr_reader :account, :year + + def initialize(account, year) + @account = account + @year = year + end + + protected + + def year_as_snowflake_range + (Mastodon::Snowflake.id_at(DateTime.new(year, 1, 1))..Mastodon::Snowflake.id_at(DateTime.new(year, 12, 31))) + end +end diff --git a/app/lib/annual_report/time_series.rb b/app/lib/annual_report/time_series.rb new file mode 100644 index 000000000..a144bac0d --- /dev/null +++ b/app/lib/annual_report/time_series.rb @@ -0,0 +1,30 @@ +# frozen_string_literal: true + +class AnnualReport::TimeSeries < AnnualReport::Source + def generate + { + time_series: (1..12).map do |month| + { + month: month, + statuses: statuses_per_month[month] || 0, + following: following_per_month[month] || 0, + followers: followers_per_month[month] || 0, + } + end, + } + end + + private + + def statuses_per_month + @statuses_per_month ||= @account.statuses.reorder(nil).where(id: year_as_snowflake_range).group(:period).pluck(Arel.sql("date_part('month', created_at)::int AS period, count(*)")).to_h + end + + def following_per_month + @following_per_month ||= @account.active_relationships.where("date_part('year', created_at) = ?", @year).group(:period).pluck(Arel.sql("date_part('month', created_at)::int AS period, count(*)")).to_h + end + + def followers_per_month + @followers_per_month ||= @account.passive_relationships.where("date_part('year', created_at) = ?", @year).group(:period).pluck(Arel.sql("date_part('month', created_at)::int AS period, count(*)")).to_h + end +end diff --git a/app/lib/annual_report/top_hashtags.rb b/app/lib/annual_report/top_hashtags.rb new file mode 100644 index 000000000..488dacb1b --- /dev/null +++ b/app/lib/annual_report/top_hashtags.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true + +class AnnualReport::TopHashtags < AnnualReport::Source + SET_SIZE = 40 + + def generate + { + top_hashtags: top_hashtags.map do |(name, count)| + { + name: name, + count: count, + } + end, + } + end + + private + + def top_hashtags + Tag.joins(:statuses).where(statuses: { id: @account.statuses.where(id: year_as_snowflake_range).reorder(nil).select(:id) }).group(:id).having('count(*) > 1').order(total: :desc).limit(SET_SIZE).pluck(Arel.sql('COALESCE(tags.display_name, tags.name), count(*) AS total')) + end +end diff --git a/app/lib/annual_report/top_statuses.rb b/app/lib/annual_report/top_statuses.rb new file mode 100644 index 000000000..112e5591c --- /dev/null +++ b/app/lib/annual_report/top_statuses.rb @@ -0,0 +1,21 @@ +# frozen_string_literal: true + +class AnnualReport::TopStatuses < AnnualReport::Source + def generate + top_reblogs = base_scope.order(reblogs_count: :desc).first&.id + top_favourites = base_scope.where.not(id: top_reblogs).order(favourites_count: :desc).first&.id + top_replies = base_scope.where.not(id: [top_reblogs, top_favourites]).order(replies_count: :desc).first&.id + + { + top_statuses: { + by_reblogs: top_reblogs, + by_favourites: top_favourites, + by_replies: top_replies, + }, + } + end + + def base_scope + @account.statuses.with_public_visibility.joins(:status_stat).where(id: year_as_snowflake_range).reorder(nil) + end +end diff --git a/app/lib/annual_report/type_distribution.rb b/app/lib/annual_report/type_distribution.rb new file mode 100644 index 000000000..fc12a6f1f --- /dev/null +++ b/app/lib/annual_report/type_distribution.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +class AnnualReport::TypeDistribution < AnnualReport::Source + def generate + { + type_distribution: { + total: base_scope.count, + reblogs: base_scope.where.not(reblog_of_id: nil).count, + replies: base_scope.where.not(in_reply_to_id: nil).where.not(in_reply_to_account_id: @account.id).count, + standalone: base_scope.without_replies.without_reblogs.count, + }, + } + end + + private + + def base_scope + @account.statuses.where(id: year_as_snowflake_range) + end +end diff --git a/app/models/generated_annual_report.rb b/app/models/generated_annual_report.rb new file mode 100644 index 000000000..43c97d710 --- /dev/null +++ b/app/models/generated_annual_report.rb @@ -0,0 +1,37 @@ +# frozen_string_literal: true + +# == Schema Information +# +# Table name: generated_annual_reports +# +# id :bigint(8) not null, primary key +# account_id :bigint(8) not null +# year :integer not null +# data :jsonb not null +# schema_version :integer not null +# viewed_at :datetime +# created_at :datetime not null +# updated_at :datetime not null +# + +class GeneratedAnnualReport < ApplicationRecord + belongs_to :account + + scope :pending, -> { where(viewed_at: nil) } + + def viewed? + viewed_at.present? + end + + def view! + update!(viewed_at: Time.now.utc) + end + + def account_ids + data['most_reblogged_accounts'].pluck('account_id') + data['commonly_interacted_with_accounts'].pluck('account_id') + end + + def status_ids + data['top_statuses'].values + end +end diff --git a/app/presenters/annual_reports_presenter.rb b/app/presenters/annual_reports_presenter.rb new file mode 100644 index 000000000..001e1d37b --- /dev/null +++ b/app/presenters/annual_reports_presenter.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +class AnnualReportsPresenter + alias read_attribute_for_serialization send + + attr_reader :annual_reports + + def initialize(annual_reports) + @annual_reports = annual_reports + end + + def accounts + @accounts ||= Account.where(id: @annual_reports.flat_map(&:account_ids)).includes(:account_stat, :moved_to_account, user: :role) + end + + def statuses + @statuses ||= Status.where(id: @annual_reports.flat_map(&:status_ids)).with_includes + end + + def self.model_name + @model_name ||= ActiveModel::Name.new(self) + end +end diff --git a/app/serializers/rest/annual_report_serializer.rb b/app/serializers/rest/annual_report_serializer.rb new file mode 100644 index 000000000..1fb5ddb5c --- /dev/null +++ b/app/serializers/rest/annual_report_serializer.rb @@ -0,0 +1,5 @@ +# frozen_string_literal: true + +class REST::AnnualReportSerializer < ActiveModel::Serializer + attributes :year, :data, :schema_version +end diff --git a/app/serializers/rest/annual_reports_serializer.rb b/app/serializers/rest/annual_reports_serializer.rb new file mode 100644 index 000000000..ea9572be1 --- /dev/null +++ b/app/serializers/rest/annual_reports_serializer.rb @@ -0,0 +1,7 @@ +# frozen_string_literal: true + +class REST::AnnualReportsSerializer < ActiveModel::Serializer + has_many :annual_reports, serializer: REST::AnnualReportSerializer + has_many :accounts, serializer: REST::AccountSerializer + has_many :statuses, serializer: REST::StatusSerializer +end diff --git a/app/workers/generate_annual_report_worker.rb b/app/workers/generate_annual_report_worker.rb new file mode 100644 index 000000000..7094c1ab9 --- /dev/null +++ b/app/workers/generate_annual_report_worker.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +class GenerateAnnualReportWorker + include Sidekiq::Worker + + def perform(account_id, year) + AnnualReport.new(Account.find(account_id), year).generate + rescue ActiveRecord::RecordNotFound, ActiveRecord::RecordNotUnique + true + end +end diff --git a/app/workers/scheduler/indexing_scheduler.rb b/app/workers/scheduler/indexing_scheduler.rb index 5c985e25a..f52d0141d 100644 --- a/app/workers/scheduler/indexing_scheduler.rb +++ b/app/workers/scheduler/indexing_scheduler.rb @@ -24,6 +24,8 @@ class Scheduler::IndexingScheduler end end + private + def indexes [AccountsIndex, TagsIndex, PublicStatusesIndex, StatusesIndex] end diff --git a/config/routes/api.rb b/config/routes/api.rb index 0fe9f69ab..853a44e0e 100644 --- a/config/routes/api.rb +++ b/config/routes/api.rb @@ -51,6 +51,12 @@ namespace :api, format: false do resources :scheduled_statuses, only: [:index, :show, :update, :destroy] resources :preferences, only: [:index] + resources :annual_reports, only: [:index] do + member do + post :read + end + end + resources :announcements, only: [:index] do scope module: :announcements do resources :reactions, only: [:update, :destroy] diff --git a/db/migrate/20240111033014_create_generated_annual_reports.rb b/db/migrate/20240111033014_create_generated_annual_reports.rb new file mode 100644 index 000000000..2a755fb14 --- /dev/null +++ b/db/migrate/20240111033014_create_generated_annual_reports.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +class CreateGeneratedAnnualReports < ActiveRecord::Migration[7.1] + def change + create_table :generated_annual_reports do |t| + t.belongs_to :account, null: false, foreign_key: { on_cascade: :delete }, index: false + t.integer :year, null: false + t.jsonb :data, null: false + t.integer :schema_version, null: false + t.datetime :viewed_at + + t.timestamps + end + + add_index :generated_annual_reports, [:account_id, :year], unique: true + end +end diff --git a/db/schema.rb b/db/schema.rb index cbe54c1db..50f4e7189 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[7.1].define(version: 2024_01_09_103012) do +ActiveRecord::Schema[7.1].define(version: 2024_01_11_033014) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" @@ -516,6 +516,17 @@ ActiveRecord::Schema[7.1].define(version: 2024_01_09_103012) do t.index ["target_account_id"], name: "index_follows_on_target_account_id" end + create_table "generated_annual_reports", force: :cascade do |t| + t.bigint "account_id", null: false + t.integer "year", null: false + t.jsonb "data", null: false + t.integer "schema_version", null: false + t.datetime "viewed_at" + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + t.index ["account_id", "year"], name: "index_generated_annual_reports_on_account_id_and_year", unique: true + end + create_table "identities", force: :cascade do |t| t.string "provider", default: "", null: false t.string "uid", default: "", null: false @@ -1226,6 +1237,7 @@ ActiveRecord::Schema[7.1].define(version: 2024_01_09_103012) do add_foreign_key "follow_requests", "accounts", name: "fk_76d644b0e7", on_delete: :cascade add_foreign_key "follows", "accounts", column: "target_account_id", name: "fk_745ca29eac", on_delete: :cascade add_foreign_key "follows", "accounts", name: "fk_32ed1b5560", on_delete: :cascade + add_foreign_key "generated_annual_reports", "accounts" add_foreign_key "identities", "users", name: "fk_bea040f377", on_delete: :cascade add_foreign_key "imports", "accounts", name: "fk_6db1b6e408", on_delete: :cascade add_foreign_key "invites", "users", on_delete: :cascade