From 5a10cead6bb333126f65a9ea9642819fe4ad62af Mon Sep 17 00:00:00 2001 From: Hugo Peixoto Date: Fri, 28 Jul 2023 03:01:40 +0100 Subject: [PATCH] Coisas a funcionar --- .gitignore | 2 + database.rb | 22 ++++++++++ gems.locked | 47 ++++++++++++++++++++++ gems.rb | 9 +++++ import-csv.rb | 24 +++++++++++ index.html.erb | 20 ++++++++++ main.rb | 58 +++++++++++++++++++++++++++ models.rb | 13 ++++++ saca-sapos | 106 +++++++++++++++++++++++++++++++++++++++++++++++++ 9 files changed, 301 insertions(+) create mode 100644 .gitignore create mode 100644 database.rb create mode 100644 gems.locked create mode 100644 gems.rb create mode 100644 import-csv.rb create mode 100644 index.html.erb create mode 100644 main.rb create mode 100644 models.rb create mode 100755 saca-sapos diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8386a83 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/videos/ +/videos.db diff --git a/database.rb b/database.rb new file mode 100644 index 0000000..31e8d04 --- /dev/null +++ b/database.rb @@ -0,0 +1,22 @@ +# SPDX-License-Identifier: AGPL-3.0-only +# SPDX-FileCopyrightText: 2023 Hugo Peixoto + +require 'active_record' +require 'sqlite3' + +ActiveRecord::Base.establish_connection +ActiveRecord::Base.logger = Logger.new(STDOUT) +ActiveRecord::Base.logger.level = :warn +ActiveRecord::Schema.define do + create_table :videos, if_not_exists: true do |t| + t.string :randname, index: { unique: true } + t.json :metadata + end + + create_table :downloads, if_not_exists: true do |t| + t.references :video, foreign_key: true + t.bigint :size + t.string :sha256 + t.string :email + end +end diff --git a/gems.locked b/gems.locked new file mode 100644 index 0000000..6fa5e90 --- /dev/null +++ b/gems.locked @@ -0,0 +1,47 @@ +GEM + remote: https://rubygems.org/ + specs: + activemodel (7.0.6) + activesupport (= 7.0.6) + activerecord (7.0.6) + activemodel (= 7.0.6) + activesupport (= 7.0.6) + activesupport (7.0.6) + concurrent-ruby (~> 1.0, >= 1.0.2) + i18n (>= 1.6, < 2) + minitest (>= 5.1) + tzinfo (~> 2.0) + concurrent-ruby (1.2.2) + i18n (1.14.1) + concurrent-ruby (~> 1.0) + minitest (5.19.0) + mustermann (3.0.0) + ruby2_keywords (~> 0.0.1) + nio4r (2.5.9) + puma (6.3.0) + nio4r (~> 2.0) + rack (2.2.7) + rack-protection (3.0.6) + rack + ruby2_keywords (0.0.5) + sinatra (3.0.6) + mustermann (~> 3.0) + rack (~> 2.2, >= 2.2.4) + rack-protection (= 3.0.6) + tilt (~> 2.0) + sqlite3 (1.6.3-x86_64-linux) + tilt (2.2.0) + tzinfo (2.0.6) + concurrent-ruby (~> 1.0) + +PLATFORMS + x86_64-linux + +DEPENDENCIES + activerecord + puma + sinatra + sqlite3 + +BUNDLED WITH + 2.4.16 diff --git a/gems.rb b/gems.rb new file mode 100644 index 0000000..0e6828b --- /dev/null +++ b/gems.rb @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: AGPL-3.0-only +# SPDX-FileCopyrightText: 2023 Hugo Peixoto + +source 'https://rubygems.org' + +gem 'activerecord' +gem 'puma' +gem 'sinatra' +gem 'sqlite3' diff --git a/import-csv.rb b/import-csv.rb new file mode 100644 index 0000000..45d227f --- /dev/null +++ b/import-csv.rb @@ -0,0 +1,24 @@ +# SPDX-License-Identifier: AGPL-3.0-only +# SPDX-FileCopyrightText: 2023 Hugo Peixoto + +require 'json' +require './database.rb' +require './models.rb' + +i = 0 +k = 2000 +File.readlines(ARGV[0]).each_slice(k) do |lines| + start = Time.now + + ActiveRecord::Base.transaction do + lines.each do |line| + metadata = JSON.parse(line) + Video.where(randname: metadata["randname"]).first_or_create({ metadata: metadata }) + end + end + + finish = Time.now + + i += k + puts "done #{i} (#{finish - start})" +end diff --git a/index.html.erb b/index.html.erb new file mode 100644 index 0000000..e32336f --- /dev/null +++ b/index.html.erb @@ -0,0 +1,20 @@ +

Vamos arquivar o SAPO Vídeos

+ +
    +
  1. Faz download do programa
  2. +
  3. + Corre o programa a partir de uma pasta com espaço suficiente para descarregar o número de vídeos especificado: +
    +    $ cd /pasta/com/bastante/espaço
    +    $ ./saca-sapos 1000 hugo.peixoto@gmail.com
    +    
    +
  4. +
  5. Quando o programa acabar, já está! Corre novamente para descarregar mais uns quantos vídeos.
  6. +
  7. Eventualmente vamos contactar-te a pedir que nos mandes os vídeos que descarregaste
  8. +
+ +

Estatísticas

+ +

Número de vídeos descarregados: <%= stats[:downloads]["total"].to_i %>

+

Tamanho total: <%= stats[:downloads]["bytes"].to_i / 1024 / 1024 %> MiB

+

Tamanho médio: <%= stats[:downloads]["bytes"].to_i / 1024 / 1024 / stats[:downloads]["total"].to_i %> MiB/vídeo

diff --git a/main.rb b/main.rb new file mode 100644 index 0000000..2bed7a9 --- /dev/null +++ b/main.rb @@ -0,0 +1,58 @@ +# SPDX-License-Identifier: AGPL-3.0-only +# SPDX-FileCopyrightText: 2023 Hugo Peixoto + +require 'sinatra' + +require './database.rb' +require './models.rb' + +get '/' do + downloads = ActiveRecord::Base.connection.execute(" + SELECT + COUNT(1) AS total, + SUM(size) AS bytes + FROM (select size from downloads group by video_id) as x; + ")[0] + + stats = { downloads: downloads } + + ERB + .new(File.read("index.html.erb"), trim_mode: "<>-") + .result_with_hash(stats: stats) +end + +get '/videos.json' do + content_type 'application/json' + + amount = params['amount'].to_i + + if amount > 100_000 + { error: "greedy" }.to_json + else + { videos: Video.order("RANDOM()").limit(amount).pluck(:randname) }.to_json + end +end + +post '/video' do + content_type 'application/json' + + data = JSON.parse(request.body.read) + + Download.create!( + video: Video.find_by!(randname: data["video_id"]), + size: data["size"], + sha256: data["sha256"], + email: data["email"], + ) + + { status: 'ok' }.to_json +end + +get '/config.json' do + content_type 'application/json' + { + videos_url: "https://sapo.pxto.pt/videos.json", + upload_url: "https://sapo.pxto.pt/video", + max_amount: 100_000, + }.to_json +end diff --git a/models.rb b/models.rb new file mode 100644 index 0000000..dbc9862 --- /dev/null +++ b/models.rb @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: AGPL-3.0-only +# SPDX-FileCopyrightText: 2023 Hugo Peixoto + +require 'active_record' +require 'date' + +class Video < ActiveRecord::Base + has_many :downloads +end + +class Download < ActiveRecord::Base + belongs_to :video +end diff --git a/saca-sapos b/saca-sapos new file mode 100755 index 0000000..cfd18b9 --- /dev/null +++ b/saca-sapos @@ -0,0 +1,106 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: AGPL-3.0-only +# SPDX-FileCopyrightText: 2023 Hugo Peixoto + +set -ueo pipefail + +test_command() { + command -v "$1" > /dev/null || ( + echo "$1: comando não instalado" + exit 1; + ) +} + +test_command curl +test_command jq +test_command xargs +test_command sha256sum +test_command stat + +PROCS=${PROCS:=4} +SERVER="https://sapo.pxto.pt/config.json" +export SAPO_EXEC="$0" + +get_config() { + curl -s "$SERVER" +} + +indexer() { + AMOUNT="$1" + export SAPO_CONTACT="$2" + + CONFIG="$(get_config)" + export SAPO_CONFIG_VIDEOS_URL="$(echo "$CONFIG" | jq -r .videos_url)" + export SAPO_CONFIG_UPLOAD_URL="$(echo "$CONFIG" | jq -r .upload_url)" + export SAPO_CONFIG_MAX="$(echo "$CONFIG" | jq -r .max_amount)" + + if [ "$AMOUNT" -gt "$SAPO_CONFIG_MAX" ]; then + echo "Não podes pedir mais do que $SAPO_CONFIG_MAX vídeos!" >&2 + exit 1; + fi + + VIDEOS="$(curl -s "$SAPO_CONFIG_VIDEOS_URL?amount=$AMOUNT")" + + if [ "$(echo "$VIDEOS" | jq -r .error)" = "greedy" ]; then + echo "Não podes pedir mais do que $SAPO_CONFIG_MAX vídeos!" >&2 + exit 1; + fi + + mkdir -p videos + echo "$VIDEOS" | + jq -r '.videos[]' | + xargs "-P$PROCS" -I {} "$SAPO_EXEC" single {} +} + +download() { + VIDEO_ID="$1" + VIDEO_URL="https://rd.videos.sapo.pt/$VIDEO_ID/mov/1" + FILENAME="videos/$VIDEO_ID.mp4" + + if [ -f "$FILENAME" ]; then + SHA=$(sha256sum "$FILENAME" | awk '{print $1}') + SIZE=$(stat --format=%s "$FILENAME") + MB=$(( $SIZE / 1024 / 1024 )) + echo "repetido: $VIDEO_ID / sha256:$SHA / ${MB} MiB" + else + curl -Ls "$VIDEO_URL" -o "$FILENAME" + SHA=$(sha256sum "$FILENAME" | awk '{print $1}') + SIZE=$(stat --format=%s "$FILENAME") + MB=$(( $SIZE / 1024 / 1024 )) + + PAYLOAD="$(jq -rc --null-input \ + --arg video_id "$VIDEO_ID" \ + --arg sha "$SHA" \ + --arg email "$SAPO_CONTACT" \ + --argjson size "$SIZE" \ + '{video_id: $video_id, sha256: $sha, size: $size, email: $email}')" + curl -s "$SAPO_CONFIG_UPLOAD_URL" -H 'content-type: application/json' -d "$PAYLOAD" > /dev/null + + echo "descarregado: $VIDEO_ID / sha256:$SHA / ${MB} MiB" + fi +} + +case "$1" in + "download") + indexer "$2" "$3" + ;; + "single") + download "$2" + ;; + "stats") + BYTES="$(du -b videos/ | awk '{print $1}')" + NUM="$(ls -1 videos/ | wc -l)" + MB=$(( $BYTES / 1024 / 1024 )) + MB_PER_VIDEO=$(( $MB / $NUM )) + + echo "Número de vídeos: $NUM" + echo "Tamanho total: $MB MiB" + echo "Tamanho médio: $MB_PER_VIDEO MiB/vídeo" + ;; + "test") + get_config > /dev/null || ( + echo "A comunicação com o servidor falhou: $SERVER" + exit 1; + ) + echo "Tudo pronto!" +esac