diff --git a/get-category.sh b/get-category.sh new file mode 100755 index 0000000..528f873 --- /dev/null +++ b/get-category.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +TERM="$1" +PAGES="$2" + +TOKEN="581a26491893263804e888240fad1cf7" + +for PAGE in $(seq 1 $PAGES); do + echo "category: $TERM.$PAGE"; + ./get.sh category "$TERM" "$PAGE" "$TOKEN" > "data/category-$TERM.$PAGE.json" +done diff --git a/get-failures.sh b/get-failures.sh new file mode 100755 index 0000000..b966105 --- /dev/null +++ b/get-failures.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +TOKEN="581a26491893263804e888240fad1cf7" + +echo "not working" >&2 +exit 1; + +cat failures.txt | +while read TERM PAGE; do + echo "$TERM.$PAGE"; + curl -s \ + "https://videos.sapo.pt/ajax/search?q=$TERM&type=videos&token=$TOKEN&nocache=9638&page=$PAGE&order=rel" \ + -H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/115.0' \ + -H 'Accept: text/javascript,text/xml,application/xml,application/xhtml+xml,text/html,application/json;q=0.9,text/plain;q=0.8,video/x-mng,image/png,image/jpeg,image/gif;q=0.2,*/*;q=0.1' \ + -H 'Accept-Language: en-US' \ + -H 'Accept-Encoding: gzip, deflate, br' \ + -H 'X-Requested-With: XMLHttpRequest' \ + -H "Cookie: language=pt; sso_tld=POR; bsu-v3-api=1689792348549; sv_token=$TOKEN" -o "data/$TERM.$PAGE.json" +done diff --git a/get-search.sh b/get-search.sh new file mode 100755 index 0000000..7ab7efe --- /dev/null +++ b/get-search.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +TERM="$1" +PAGES="$2" + +TOKEN="581a26491893263804e888240fad1cf7" + +for PAGE in $(seq 1 $PAGES); do + echo "search: $TERM.$PAGE"; + ./get.sh search "$TERM" "$PAGE" "$TOKEN" > "data/$TERM.$PAGE.json" +done diff --git a/get-tag.sh b/get-tag.sh new file mode 100755 index 0000000..001d78f --- /dev/null +++ b/get-tag.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +TERM="$1" +ENCTERM="$(echo -n "$TERM" | xxd -p)" + +TOKEN="581a26491893263804e888240fad1cf7" + +PAGES="$(./get.sh tag "$TERM" "1" "$TOKEN" | jq .total_pages)" + +echo $ENCTERM $PAGES >&2 + +for PAGE in $(seq 1 $PAGES); do + echo "tag: $TERM.$PAGE"; + ./get.sh tag "$TERM" "$PAGE" "$TOKEN" > "data/tag-$ENCTERM.$PAGE.json" +done diff --git a/get-username.sh b/get-username.sh new file mode 100755 index 0000000..5a81a20 --- /dev/null +++ b/get-username.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +TERM="$1" +ENCTERM="$(echo -n "$TERM" | xxd -p)" + +TOKEN="581a26491893263804e888240fad1cf7" + +PAGES="$(./get.sh username "$TERM" "1" "$TOKEN" | jq .total_pages)" + +echo $ENCTERM $PAGES >&2 + +for PAGE in $(seq 1 $PAGES); do + echo "username: $TERM.$PAGE" >&2 + ./get.sh username "$TERM" "$PAGE" "$TOKEN" > "data/username.$ENCTERM.$PAGE.json" +done diff --git a/get.sh b/get.sh new file mode 100755 index 0000000..fd42f92 --- /dev/null +++ b/get.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash + +TYPE="$1" +TERM="$2" +PAGE="$3" +TOKEN="$4" + +case "$TYPE" in + search) + curl -s \ + "https://videos.sapo.pt/ajax/search?q=$TERM&type=videos&token=$TOKEN&nocache=9638&page=$PAGE&order=rel" \ + -H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/115.0' \ + -H 'Accept: text/javascript,text/xml,application/xml,application/xhtml+xml,text/html,application/json;q=0.9,text/plain;q=0.8,video/x-mng,image/png,image/jpeg,image/gif;q=0.2,*/*;q=0.1' \ + -H 'Accept-Language: en-US' \ + -H 'Accept-Encoding: gzip, deflate, br' \ + -H 'X-Requested-With: XMLHttpRequest' \ + -H "Cookie: language=pt; sso_tld=POR; bsu-v3-api=1689792348549; sv_token=$TOKEN" + ;; + + category) + curl -s \ + "https://videos.sapo.pt/ajax/category/$TERM?token=$TOKEN&nocache=9544&page=$PAGE&order=releve" \ + -H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/115.0' \ + -H 'Accept: text/javascript,text/xml,application/xml,application/xhtml+xml,text/html,application/json;q=0.9,text/plain;q=0.8,video/x-mng,image/png,image/jpeg,image/gif;q=0.2,*/*;q=0.1' \ + -H 'Accept-Language: en-US' \ + -H 'Accept-Encoding: gzip, deflate, br' \ + -H 'X-Requested-With: XMLHttpRequest' \ + -H "Cookie: language=pt; sso_tld=POR; bsu-v3-api=1689792348549; sv_token=$TOKEN" + ;; + + username) + curl -s \ + "https://videos.sapo.pt/ajax/video?username=$TERM&token=$TOKEN&page=$PAGE" \ + -H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/115.0' \ + -H 'Accept: text/javascript,text/xml,application/xml,application/xhtml+xml,text/html,application/json;q=0.9,text/plain;q=0.8,video/x-mng,image/png,image/jpeg,image/gif;q=0.2,*/*;q=0.1' \ + -H 'Accept-Language: en-US' \ + -H 'Accept-Encoding: gzip, deflate, br' \ + -H 'X-Requested-With: XMLHttpRequest' \ + -H "Cookie: language=pt; sso_tld=POR; bsu-v3-api=1689792348549; sv_token=$TOKEN" + ;; + tag) + curl -s \ + "https://videos.sapo.pt/ajax/searchbytag?q=$TERM&type=videos&token=$TOKEN&page=$PAGE&order=releve&limit=20" \ + -H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/115.0' \ + -H 'Accept: text/javascript,text/xml,application/xml,application/xhtml+xml,text/html,application/json;q=0.9,text/plain;q=0.8,video/x-mng,image/png,image/jpeg,image/gif;q=0.2,*/*;q=0.1' \ + -H 'Accept-Language: en-US' \ + -H 'Accept-Encoding: gzip, deflate, br' \ + -H 'X-Requested-With: XMLHttpRequest' \ + -H "Cookie: language=pt; sso_tld=POR; bsu-v3-api=1689792348549; sv_token=$TOKEN" + ;; + *) + exit 1; +esac