Coisas a funcionar

This commit is contained in:
Hugo Peixoto 2023-07-28 03:01:40 +01:00
commit 5a10cead6b
9 changed files with 301 additions and 0 deletions

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
/videos/
/videos.db

22
database.rb Normal file
View File

@ -0,0 +1,22 @@
# SPDX-License-Identifier: AGPL-3.0-only
# SPDX-FileCopyrightText: 2023 Hugo Peixoto <hugo.peixoto@gmail.com>
require 'active_record'
require 'sqlite3'
ActiveRecord::Base.establish_connection
ActiveRecord::Base.logger = Logger.new(STDOUT)
ActiveRecord::Base.logger.level = :warn
ActiveRecord::Schema.define do
create_table :videos, if_not_exists: true do |t|
t.string :randname, index: { unique: true }
t.json :metadata
end
create_table :downloads, if_not_exists: true do |t|
t.references :video, foreign_key: true
t.bigint :size
t.string :sha256
t.string :email
end
end

47
gems.locked Normal file
View File

@ -0,0 +1,47 @@
GEM
remote: https://rubygems.org/
specs:
activemodel (7.0.6)
activesupport (= 7.0.6)
activerecord (7.0.6)
activemodel (= 7.0.6)
activesupport (= 7.0.6)
activesupport (7.0.6)
concurrent-ruby (~> 1.0, >= 1.0.2)
i18n (>= 1.6, < 2)
minitest (>= 5.1)
tzinfo (~> 2.0)
concurrent-ruby (1.2.2)
i18n (1.14.1)
concurrent-ruby (~> 1.0)
minitest (5.19.0)
mustermann (3.0.0)
ruby2_keywords (~> 0.0.1)
nio4r (2.5.9)
puma (6.3.0)
nio4r (~> 2.0)
rack (2.2.7)
rack-protection (3.0.6)
rack
ruby2_keywords (0.0.5)
sinatra (3.0.6)
mustermann (~> 3.0)
rack (~> 2.2, >= 2.2.4)
rack-protection (= 3.0.6)
tilt (~> 2.0)
sqlite3 (1.6.3-x86_64-linux)
tilt (2.2.0)
tzinfo (2.0.6)
concurrent-ruby (~> 1.0)
PLATFORMS
x86_64-linux
DEPENDENCIES
activerecord
puma
sinatra
sqlite3
BUNDLED WITH
2.4.16

9
gems.rb Normal file
View File

@ -0,0 +1,9 @@
# SPDX-License-Identifier: AGPL-3.0-only
# SPDX-FileCopyrightText: 2023 Hugo Peixoto <hugo.peixoto@gmail.com>
source 'https://rubygems.org'
gem 'activerecord'
gem 'puma'
gem 'sinatra'
gem 'sqlite3'

24
import-csv.rb Normal file
View File

@ -0,0 +1,24 @@
# SPDX-License-Identifier: AGPL-3.0-only
# SPDX-FileCopyrightText: 2023 Hugo Peixoto <hugo.peixoto@gmail.com>
require 'json'
require './database.rb'
require './models.rb'
i = 0
k = 2000
File.readlines(ARGV[0]).each_slice(k) do |lines|
start = Time.now
ActiveRecord::Base.transaction do
lines.each do |line|
metadata = JSON.parse(line)
Video.where(randname: metadata["randname"]).first_or_create({ metadata: metadata })
end
end
finish = Time.now
i += k
puts "done #{i} (#{finish - start})"
end

20
index.html.erb Normal file
View File

@ -0,0 +1,20 @@
<h1>Vamos arquivar o SAPO Vídeos</h1>
<ol>
<li>Faz download do <a href="">programa</a></li>
<li>
Corre o programa a partir de uma pasta com espaço suficiente para descarregar o número de vídeos especificado:
<pre>
$ cd /pasta/com/bastante/espaço
$ ./saca-sapos 1000 hugo.peixoto@gmail.com
</pre>
</li>
<li>Quando o programa acabar, já está! Corre novamente para descarregar mais uns quantos vídeos.</li>
<li>Eventualmente vamos contactar-te a pedir que nos mandes os vídeos que descarregaste</li>
</ol>
<h2>Estatísticas</h2>
<p>Número de vídeos descarregados: <%= stats[:downloads]["total"].to_i %></p>
<p>Tamanho total: <%= stats[:downloads]["bytes"].to_i / 1024 / 1024 %> MiB</p>
<p>Tamanho médio: <%= stats[:downloads]["bytes"].to_i / 1024 / 1024 / stats[:downloads]["total"].to_i %> MiB/vídeo</p>

58
main.rb Normal file
View File

@ -0,0 +1,58 @@
# SPDX-License-Identifier: AGPL-3.0-only
# SPDX-FileCopyrightText: 2023 Hugo Peixoto <hugo.peixoto@gmail.com>
require 'sinatra'
require './database.rb'
require './models.rb'
get '/' do
downloads = ActiveRecord::Base.connection.execute("
SELECT
COUNT(1) AS total,
SUM(size) AS bytes
FROM (select size from downloads group by video_id) as x;
")[0]
stats = { downloads: downloads }
ERB
.new(File.read("index.html.erb"), trim_mode: "<>-")
.result_with_hash(stats: stats)
end
get '/videos.json' do
content_type 'application/json'
amount = params['amount'].to_i
if amount > 100_000
{ error: "greedy" }.to_json
else
{ videos: Video.order("RANDOM()").limit(amount).pluck(:randname) }.to_json
end
end
post '/video' do
content_type 'application/json'
data = JSON.parse(request.body.read)
Download.create!(
video: Video.find_by!(randname: data["video_id"]),
size: data["size"],
sha256: data["sha256"],
email: data["email"],
)
{ status: 'ok' }.to_json
end
get '/config.json' do
content_type 'application/json'
{
videos_url: "https://sapo.pxto.pt/videos.json",
upload_url: "https://sapo.pxto.pt/video",
max_amount: 100_000,
}.to_json
end

13
models.rb Normal file
View File

@ -0,0 +1,13 @@
# SPDX-License-Identifier: AGPL-3.0-only
# SPDX-FileCopyrightText: 2023 Hugo Peixoto <hugo.peixoto@gmail.com>
require 'active_record'
require 'date'
class Video < ActiveRecord::Base
has_many :downloads
end
class Download < ActiveRecord::Base
belongs_to :video
end

106
saca-sapos Executable file
View File

@ -0,0 +1,106 @@
#!/usr/bin/env bash
# SPDX-License-Identifier: AGPL-3.0-only
# SPDX-FileCopyrightText: 2023 Hugo Peixoto <hugo.peixoto@gmail.com>
set -ueo pipefail
test_command() {
command -v "$1" > /dev/null || (
echo "$1: comando não instalado"
exit 1;
)
}
test_command curl
test_command jq
test_command xargs
test_command sha256sum
test_command stat
PROCS=${PROCS:=4}
SERVER="https://sapo.pxto.pt/config.json"
export SAPO_EXEC="$0"
get_config() {
curl -s "$SERVER"
}
indexer() {
AMOUNT="$1"
export SAPO_CONTACT="$2"
CONFIG="$(get_config)"
export SAPO_CONFIG_VIDEOS_URL="$(echo "$CONFIG" | jq -r .videos_url)"
export SAPO_CONFIG_UPLOAD_URL="$(echo "$CONFIG" | jq -r .upload_url)"
export SAPO_CONFIG_MAX="$(echo "$CONFIG" | jq -r .max_amount)"
if [ "$AMOUNT" -gt "$SAPO_CONFIG_MAX" ]; then
echo "Não podes pedir mais do que $SAPO_CONFIG_MAX vídeos!" >&2
exit 1;
fi
VIDEOS="$(curl -s "$SAPO_CONFIG_VIDEOS_URL?amount=$AMOUNT")"
if [ "$(echo "$VIDEOS" | jq -r .error)" = "greedy" ]; then
echo "Não podes pedir mais do que $SAPO_CONFIG_MAX vídeos!" >&2
exit 1;
fi
mkdir -p videos
echo "$VIDEOS" |
jq -r '.videos[]' |
xargs "-P$PROCS" -I {} "$SAPO_EXEC" single {}
}
download() {
VIDEO_ID="$1"
VIDEO_URL="https://rd.videos.sapo.pt/$VIDEO_ID/mov/1"
FILENAME="videos/$VIDEO_ID.mp4"
if [ -f "$FILENAME" ]; then
SHA=$(sha256sum "$FILENAME" | awk '{print $1}')
SIZE=$(stat --format=%s "$FILENAME")
MB=$(( $SIZE / 1024 / 1024 ))
echo "repetido: $VIDEO_ID / sha256:$SHA / ${MB} MiB"
else
curl -Ls "$VIDEO_URL" -o "$FILENAME"
SHA=$(sha256sum "$FILENAME" | awk '{print $1}')
SIZE=$(stat --format=%s "$FILENAME")
MB=$(( $SIZE / 1024 / 1024 ))
PAYLOAD="$(jq -rc --null-input \
--arg video_id "$VIDEO_ID" \
--arg sha "$SHA" \
--arg email "$SAPO_CONTACT" \
--argjson size "$SIZE" \
'{video_id: $video_id, sha256: $sha, size: $size, email: $email}')"
curl -s "$SAPO_CONFIG_UPLOAD_URL" -H 'content-type: application/json' -d "$PAYLOAD" > /dev/null
echo "descarregado: $VIDEO_ID / sha256:$SHA / ${MB} MiB"
fi
}
case "$1" in
"download")
indexer "$2" "$3"
;;
"single")
download "$2"
;;
"stats")
BYTES="$(du -b videos/ | awk '{print $1}')"
NUM="$(ls -1 videos/ | wc -l)"
MB=$(( $BYTES / 1024 / 1024 ))
MB_PER_VIDEO=$(( $MB / $NUM ))
echo "Número de vídeos: $NUM"
echo "Tamanho total: $MB MiB"
echo "Tamanho médio: $MB_PER_VIDEO MiB/vídeo"
;;
"test")
get_config > /dev/null || (
echo "A comunicação com o servidor falhou: $SERVER"
exit 1;
)
echo "Tudo pronto!"
esac