Compare commits

...

5 Commits

Author SHA1 Message Date
Roman Krček
4879c05a3b Add basic unittests
Some checks failed
Build Docker image / test (push) Failing after 1m6s
Build Docker image / build (push) Has been skipped
2024-10-13 11:33:29 +02:00
Roman Krček
b2254e99a2 Make testing faster by tunning tox in parallel 2024-10-13 11:30:33 +02:00
Roman Krček
725cf30319 Reorganize the project for better compatibility with unittests 2024-10-13 11:22:37 +02:00
Roman Krček
3c34c0f947 Prepare git for unittests 2024-10-13 11:21:31 +02:00
Roman Krček
32a423bf28 Fix code spell not being used in CI 2024-10-13 11:21:17 +02:00
13 changed files with 543 additions and 163 deletions

View File

@@ -24,7 +24,7 @@ jobs:
run: pip install tox>=4.16
- name: Run tox
run: tox
run: tox --parallel auto
build:
runs-on: ubuntu-latest

3
.gitignore vendored
View File

@@ -162,5 +162,6 @@ cython_debug/
#.idea/
.venv/
.env
.env/
.stestr/
data/

View File

@@ -1,4 +1,7 @@
import logging
import os
LOG_LEVEL = os.getenv("LOG_LEVEL")
def configure_logger(log_level: str) -> logging.Logger:
@@ -13,3 +16,6 @@ def configure_logger(log_level: str) -> logging.Logger:
)
return logging.getLogger()
log = configure_logger(LOG_LEVEL)

View File

@@ -1,165 +1,30 @@
import datetime
import integv
import os
import re
import sentry_sdk
import uvloop
from functools import wraps
from pyrogram import Client, filters
from pyrogram.types import Message
from tiktok_downloader import snaptik
from telegram_downloader_bot import logger
from telegram_downloader_bot.logger import log
from telegram_downloader_bot.telemetry import init_telemetry
from telegram_downloader_bot import utils, security
API_ID = os.getenv("API_ID") # Your API ID from my.telegram.org
API_HASH = os.getenv("API_HASH") # Your API Hash from my.telegram.org
BOT_TOKEN = os.getenv("BOT_TOKEN") # Your bot token from BotFather
STORAGE = os.getenv("STORAGE") # Storage directory for downloads
LOG_LEVEL = os.getenv("LOG_LEVEL") # Log level
# Your message ID for authorization separated by commas
msg_ids = os.getenv("MSG_IDS")
ALLOWED_IDS = set([int(i) for i in msg_ids.split(",")]) # Convert to set
log = logger.configure_logger(LOG_LEVEL)
STORAGE = os.getenv("STORAGE") # Your bot token from BotFather
uvloop.install()
sentry_sdk.init(
dsn="https://12d7a075d483fc133cde0ed82e72ac45@o4508071875313664.ingest.de.sentry.io/4508075566694480", # noqa: E501
traces_sample_rate=1.0,
profiles_sample_rate=1.0,
enable_tracing=True
)
init_telemetry()
app = Client("downloader_bot",
api_id=API_ID,
api_hash=API_HASH,
bot_token=BOT_TOKEN)
def protected(func):
@wraps(func)
async def wrapper(client, message):
if message.from_user.id not in ALLOWED_IDS:
return await message.reply_text("You are not on the list!")
return await func(client, message)
return wrapper
async def get_user_folder(message: Message) -> os.path:
# Determine folder name based on whether the message was forwarded
# and who it was forwarded from
if message.forward_from:
user = message.forward_from
# User's first and last name for folder name,
# fallback to user ID if not available
user_folder_name = (
f"{user.first_name}_{user.last_name}".strip()
if user.first_name and user.last_name
else str(user.id)
)
elif message.forward_from_chat:
user = message.forward_from_chat
# Use chat title for groups and channels
user_folder_name = "".join(
c for c in user.title if c.isalnum() or c in (" ", "_")
).rstrip()
else:
user = message.from_user
# User's first and last name for folder name,
# fallback to user ID if not available
user_folder_name = (
f"{user.first_name}_{user.last_name}".strip()
if user.first_name and user.last_name
else str(user.id)
)
# Sanitize the folder name
user_folder_name = "".join(
c for c in user_folder_name if c.isalnum() or c in (" ", "_")
).rstrip()
user_folder = os.path.join(STORAGE, "telegram", user_folder_name)
os.makedirs(user_folder, exist_ok=True)
return user_folder
async def handle_media_message_contents(client: Client, message: Message):
user_folder = get_user_folder(message)
# Handle documents
if message.document:
file_name = message.document.file_name
file_path = os.path.join(user_folder, file_name)
await client.download_media(message, file_path)
await message.reply_text(f"Document saved to {user_folder}")
# Handle single or multiple photos
elif message.photo:
file_name = f"photo_{message.photo.file_id}.jpg"
file_path = os.path.join(user_folder, file_name)
await client.download_media(message.photo, file_path)
await message.reply_text(f"Photo saved to {user_folder}")
# Handle videos
elif message.video:
file_name = f"video_{message.video.file_id}.mp4"
file_path = os.path.join(user_folder, file_name)
await client.download_media(message, file_path)
await message.reply_text(f"Video saved to {user_folder}")
# Handle GIFs
elif message.animation:
file_name = f"gif_{message.animation.file_id}.gif"
file_path = os.path.join(user_folder, file_name)
await client.download_media(message.animation, file_path)
await message.reply_text(f"GIF saved to {user_folder}")
# Handle unknown data types
else:
await message.reply_text("Unknown media type!")
def download_tt_video(url: str) -> bool:
videos = snaptik(url)
now = datetime.datetime.now()
max_tries = 5
log.debug(f"Downloading video from {url}...")
for video in videos:
video_filename = now.strftime("video-tiktok-%Y-%m-%d_%H-%M-%S.mp4")
video_filepath: os.path = os.path.join(
STORAGE, "tiktok", video_filename)
for i in range(max_tries):
video_content = video.download()
is_valid_mp4 = integv.verify(
video_content.tobytes(), file_type="mp4")
log.debug(
f"Attempt {i+1}/{max_tries} to "
"download video, video valid: {is_valid_mp4}"
)
if is_valid_mp4:
break
if not is_valid_mp4:
log.error("Downloaded video is not a valid mp4 file")
return False
with open(video_filepath, "wb") as f:
f.write(video_content)
log.debug("Video saved successfully")
return True
bot_token=BOT_TOKEN,
workers=1)
@app.on_message(filters.command("start"))
@protected
@security.protected
async def start_handler(_, message: Message):
await message.reply_text(
"This bot downloads TikTok videos to my personal server"
@@ -167,27 +32,35 @@ async def start_handler(_, message: Message):
@app.on_message(filters.command("help"))
@protected
@security.protected
async def help_handler(_, message: Message):
await message.reply_text("I won't help you!")
@app.on_message(filters.text)
@protected
@security.protected
async def message_handler(_, message: Message):
urls = re.findall(r"\bhttps?://[^\s]+", message.text)
urls = utils.extract_urls(message.text)
if not urls:
return await message.reply_text(
"No links found in the message. Nothing to download!"
)
tt_urls = utils.filter_tt_urls(urls)
if not tt_urls:
return await message.reply_text(
"No TikTok URLs found! Nothing to download!"
)
success_count = 0
for i, url in enumerate(urls):
msg = f"Downloading video {i+1}/{len(urls)}..."
log.info(msg)
await message.reply_text(msg)
outcome = download_tt_video(url)
outcome = utils.download_tt_video(STORAGE, url)
success_count += 1 if outcome else 0
await message.reply_text(f"{success_count}/{len(urls)} "
@@ -195,15 +68,14 @@ async def message_handler(_, message: Message):
@app.on_message(filters.media)
@protected
@security.protected
async def media_handler(client, message: Message):
await message.reply_text("Downloading media...")
handle_media_message_contents(client, message)
utils.handle_media_message_contents(STORAGE, client, message)
if __name__ == "__main__":
os.makedirs(os.path.join(STORAGE, "tiktok"), exist_ok=True)
os.makedirs(os.path.join(STORAGE, "telegram"), exist_ok=True)
utils.make_fs(STORAGE)
app.run()

View File

@@ -0,0 +1,18 @@
import os
from functools import wraps
# Comma separated list of Telegram IDs that this bot will respond to
allowed_ids_raw = os.getenv("ALLOWED_IDS", "")
allowed_ids = allowed_ids_raw.split(",")
print(allowed_ids_raw)
print(allowed_ids)
def protected(func):
@wraps(func)
async def wrapper(client, message):
if message.from_user.id not in allowed_ids:
return await message.reply_text("You are not on the list!")
return await func(client, message)
return wrapper

View File

@@ -0,0 +1,10 @@
import sentry_sdk
def init_telemetry() -> None:
sentry_sdk.init(
dsn="https://12d7a075d483fc133cde0ed82e72ac45@o4508071875313664.ingest.de.sentry.io/4508075566694480", # noqa: E501
traces_sample_rate=1.0,
profiles_sample_rate=1.0,
enable_tracing=True
)

View File

@@ -0,0 +1,142 @@
import integv
import os
import re
from datetime import datetime
from pyrogram import Client
from pyrogram.types import Message
from tiktok_downloader import snaptik
from telegram_downloader_bot.logger import log
async def get_user_folder(storage_path: os.path, message: Message) -> os.path:
""" Determine folder name used to save the media to. Depending on
which type of message (forwarded, direct) detect that person's
or group's name."""
if message.forward_from:
user = message.forward_from
# User's first and last name for folder name,
# fallback to user ID if not available
user_folder_name = (
f"{user.first_name}_{user.last_name}".strip()
if user.first_name and user.last_name
else str(user.id)
)
elif message.forward_from_chat:
user = message.forward_from_chat
# Use chat title for groups and channels
user_folder_name = "".join(
c for c in user.title if c.isalnum() or c in (" ", "_")
).rstrip()
else:
user = message.from_user
# User's first and last name for folder name,
# fallback to user ID if not available
user_folder_name = (
f"{user.first_name}_{user.last_name}".strip()
if user.first_name and user.last_name
else str(user.id)
)
# Sanitize the folder name
user_folder_name = "".join(
c for c in user_folder_name if c.isalnum() or c in (" ", "_")
).rstrip()
user_folder = os.path.join(storage_path, "telegram", user_folder_name)
os.makedirs(user_folder, exist_ok=True)
return user_folder
async def handle_media_message_contents(storage_path: os.path,
client: Client,
message: Message):
"""Detect what kind of media is being sent over from the user.
Based on that, determine the correct file extension and save
that media."""
user_folder = get_user_folder(storage_path, message)
# Handle documents
if message.document:
file_name = message.document.file_name
file_path = os.path.join(user_folder, file_name)
await client.download_media(message, file_path)
await message.reply_text(f"Document saved to {user_folder}")
# Handle single or multiple photos
elif message.photo:
file_name = f"photo_{message.photo.file_id}.jpg"
file_path = os.path.join(user_folder, file_name)
await client.download_media(message.photo, file_path)
await message.reply_text(f"Photo saved to {user_folder}")
# Handle videos
elif message.video:
file_name = f"video_{message.video.file_id}.mp4"
file_path = os.path.join(user_folder, file_name)
await client.download_media(message, file_path)
await message.reply_text(f"Video saved to {user_folder}")
# Handle GIFs
elif message.animation:
file_name = f"gif_{message.animation.file_id}.gif"
file_path = os.path.join(user_folder, file_name)
await client.download_media(message.animation, file_path)
await message.reply_text(f"GIF saved to {user_folder}")
# Handle unknown data types
else:
await message.reply_text("Unknown media type!")
def download_tt_video(storage_path: str, url: str) -> bool:
"""Downloads tiktok video from a given URL.
Makes sure the video integrity is correct."""
videos = snaptik(url)
now = datetime.datetime.now()
max_tries = 5
log.debug(f"Downloading video from {url}...")
for video in videos:
video_filename = now.strftime("video-tiktok-%Y-%m-%d_%H-%M-%S.mp4")
video_filepath: os.path = os.path.join(
storage_path, "tiktok", video_filename)
for i in range(max_tries):
video_content = video.download().getbuffer()
is_valid_mp4 = integv.verify(
video_content.tobytes(), file_type="mp4")
log.debug(
f"Attempt {i+1}/{max_tries} to "
"download video, video valid: {is_valid_mp4}"
)
if is_valid_mp4:
break
if not is_valid_mp4:
log.error("Downloaded video is not a valid mp4 file")
return False
with open(video_filepath, "wb") as f:
f.write(video_content)
log.debug("Video saved successfully")
return True
def make_fs(storaga_path: str) -> None:
os.makedirs(os.path.join(storaga_path, "tiktok"), exist_ok=True)
os.makedirs(os.path.join(storaga_path, "telegram"), exist_ok=True)
def extract_urls(text: str) -> list:
return re.findall(r"\bhttps?://[^\s]+", text)
def filter_tt_urls(urls: list) -> list:
return [x for x in urls if "tiktok" in x]

View File

@@ -1,4 +1,5 @@
codespell
flake8
bandit
pytest
pytest
stestr

View File

@@ -1,6 +0,0 @@
from tiktok_downloader import snaptik
videos = snaptik("https://vm.tiktok.com/ZGde8k5P7/")
for video in videos:
print(video.json)

View File

@@ -0,0 +1 @@
# To be implemented

331
tests/test_utils.py Normal file
View File

@@ -0,0 +1,331 @@
import unittest
import os
import re
import asyncio
import tempfile
import shutil
from unittest.mock import Mock, AsyncMock, patch
from datetime import datetime
from telegram_downloader_bot.utils import (
get_user_folder,
handle_media_message_contents,
download_tt_video,
make_fs,
extract_urls,
filter_tt_urls,
)
from pyrogram.types import Message, User, Chat
class TestGetUserFolder(unittest.IsolatedAsyncioTestCase):
def setUp(self):
# Create a temporary directory for each test
self.tmp_path = tempfile.mkdtemp()
def tearDown(self):
# Remove the directory after the test
shutil.rmtree(self.tmp_path)
async def test_forward_from_full_name(self):
user = Mock()
user.first_name = "John"
user.last_name = "Doe"
user.id = 12345
message = Mock()
message.forward_from = user
message.forward_from_chat = None
message.from_user = None
result = await get_user_folder(self.tmp_path, message)
expected_folder = os.path.join(self.tmp_path, "telegram", "John_Doe")
self.assertEqual(result, expected_folder)
self.assertTrue(os.path.exists(expected_folder))
async def test_forward_from_first_name_only(self):
user = Mock()
user.first_name = "John"
user.last_name = None
user.id = 12345
message = Mock()
message.forward_from = user
message.forward_from_chat = None
message.from_user = None
result = await get_user_folder(self.tmp_path, message)
expected_folder = os.path.join(self.tmp_path, "telegram", "12345")
self.assertEqual(result, expected_folder)
self.assertTrue(os.path.exists(expected_folder))
async def test_forward_from_no_name(self):
user = Mock()
user.first_name = None
user.last_name = None
user.id = 12345
message = Mock()
message.forward_from = user
message.forward_from_chat = None
message.from_user = None
result = await get_user_folder(self.tmp_path, message)
expected_folder = os.path.join(self.tmp_path, "telegram", "12345")
self.assertEqual(result, expected_folder)
self.assertTrue(os.path.exists(expected_folder))
async def test_forward_from_chat_special_chars(self):
chat = Mock()
chat.title = "My *Awesome* Group/Chat!"
message = Mock()
message.forward_from = None
message.forward_from_chat = chat
message.from_user = None
result = await get_user_folder(self.tmp_path, message)
expected_folder = os.path.join(
self.tmp_path, "telegram", "My_Awesome_GroupChat"
)
self.assertEqual(result, expected_folder)
self.assertTrue(os.path.exists(expected_folder))
async def test_from_user_full_name(self):
user = Mock()
user.first_name = "Jane"
user.last_name = "Doe"
user.id = 54321
message = Mock()
message.forward_from = None
message.forward_from_chat = None
message.from_user = user
result = await get_user_folder(self.tmp_path, message)
expected_folder = os.path.join(self.tmp_path, "telegram", "Jane_Doe")
self.assertEqual(result, expected_folder)
self.assertTrue(os.path.exists(expected_folder))
async def test_from_user_first_name_only(self):
user = Mock()
user.first_name = "Jane"
user.last_name = None
user.id = 54321
message = Mock()
message.forward_from = None
message.forward_from_chat = None
message.from_user = user
result = await get_user_folder(self.tmp_path, message)
expected_folder = os.path.join(self.tmp_path, "telegram", "54321")
self.assertEqual(result, expected_folder)
self.assertTrue(os.path.exists(expected_folder))
async def test_special_characters_in_name(self):
user = Mock()
user.first_name = "Ja*ne"
user.last_name = "Do/e"
user.id = 54321
message = Mock()
message.forward_from = None
message.forward_from_chat = None
message.from_user = user
result = await get_user_folder(self.tmp_path, message)
expected_folder = os.path.join(self.tmp_path, "telegram", "Jane_Doe")
self.assertEqual(result, expected_folder)
self.assertTrue(os.path.exists(expected_folder))
class TestHandleMediaMessageContents(unittest.IsolatedAsyncioTestCase):
def setUp(self):
# Create a temporary directory for each test
self.tmp_path = tempfile.mkdtemp()
def tearDown(self):
# Remove the directory after the test
shutil.rmtree(self.tmp_path)
@patch("telegram_downloader_bot.utils.get_user_folder")
async def test_document(self, mock_get_user_folder):
user_folder = os.path.join(self.tmp_path, "user_folder")
mock_get_user_folder.return_value = user_folder
os.makedirs(user_folder, exist_ok=True)
client = Mock()
client.download_media = AsyncMock()
message = Mock()
message.document = Mock()
message.document.file_name = "test_document.pdf"
message.photo = None
message.video = None
message.animation = None
message.reply_text = AsyncMock()
await handle_media_message_contents(self.tmp_path, client, message)
client.download_media.assert_awaited_once_with(
message, os.path.join(user_folder, "test_document.pdf")
)
message.reply_text.assert_awaited_once_with(f"Document saved to {user_folder}")
@patch("telegram_downloader_bot.utils.get_user_folder")
async def test_photo(self, mock_get_user_folder):
user_folder = os.path.join(self.tmp_path, "user_folder")
mock_get_user_folder.return_value = user_folder
os.makedirs(user_folder, exist_ok=True)
client = Mock()
client.download_media = AsyncMock()
message = Mock()
message.document = None
message.photo = Mock()
message.photo.file_id = "1234567890"
message.video = None
message.animation = None
message.reply_text = AsyncMock()
await handle_media_message_contents(self.tmp_path, client, message)
expected_file = os.path.join(user_folder, f"photo_{message.photo.file_id}.jpg")
client.download_media.assert_awaited_once_with(message.photo, expected_file)
message.reply_text.assert_awaited_once_with(f"Photo saved to {user_folder}")
@patch("telegram_downloader_bot.utils.get_user_folder")
async def test_unknown_media(self, mock_get_user_folder):
user_folder = os.path.join(self.tmp_path, "user_folder")
mock_get_user_folder.return_value = user_folder
os.makedirs(user_folder, exist_ok=True)
client = Mock()
client.download_media = AsyncMock()
message = Mock()
message.document = None
message.photo = None
message.video = None
message.animation = None
message.reply_text = AsyncMock()
await handle_media_message_contents(self.tmp_path, client, message)
client.download_media.assert_not_awaited()
message.reply_text.assert_awaited_once_with("Unknown media type!")
class TestDownloadTTVideo(unittest.TestCase):
def setUp(self):
# Create a temporary directory for each test
self.tmp_path = tempfile.mkdtemp()
def tearDown(self):
# Remove the directory after the test
shutil.rmtree(self.tmp_path)
@patch("telegram_downloader_bot.utils.snaptik")
@patch("telegram_downloader_bot.utils.integv.verify")
@patch("telegram_downloader_bot.utils.datetime")
def test_success(self, mock_datetime, mock_verify, mock_snaptik):
mock_video = Mock()
mock_video.download.return_value.getbuffer.return_value = b"video_content"
mock_snaptik.return_value = [mock_video]
mock_verify.return_value = True
mock_now = datetime(2023, 1, 1, 12, 0, 0)
mock_datetime.now.return_value = mock_now
result = download_tt_video(self.tmp_path, "http://tiktok.com/video123")
self.assertTrue(result)
video_filename = mock_now.strftime("video-tiktok-%Y-%m-%d_%H-%M-%S.mp4")
video_filepath = os.path.join(self.tmp_path, "tiktok", video_filename)
self.assertTrue(os.path.exists(video_filepath))
@patch("telegram_downloader_bot.utils.snaptik")
@patch("telegram_downloader_bot.utils.integv.verify")
@patch("telegram_downloader_bot.utils.datetime")
def test_failure(self, mock_datetime, mock_verify, mock_snaptik):
mock_video = Mock()
mock_video.download.return_value.getbuffer.return_value = b"video_content"
mock_snaptik.return_value = [mock_video]
mock_verify.return_value = False
mock_now = datetime(2023, 1, 1, 12, 0, 0)
mock_datetime.now.return_value = mock_now
result = download_tt_video(self.tmp_path, "http://tiktok.com/video123")
self.assertFalse(result)
video_filename = mock_now.strftime("video-tiktok-%Y-%m-%d_%H-%M-%S.mp4")
video_filepath = os.path.join(self.tmp_path, "tiktok", video_filename)
self.assertFalse(os.path.exists(video_filepath))
class TestMakeFS(unittest.TestCase):
def setUp(self):
# Create a temporary directory for each test
self.tmp_path = tempfile.mkdtemp()
def tearDown(self):
# Remove the directory after the test
shutil.rmtree(self.tmp_path)
def test_make_fs(self):
make_fs(self.tmp_path)
self.assertTrue(os.path.exists(os.path.join(self.tmp_path, "tiktok")))
self.assertTrue(os.path.exists(os.path.join(self.tmp_path, "telegram")))
class TestExtractURLs(unittest.TestCase):
def test_no_urls(self):
result = extract_urls("This is some text without any URLs.")
self.assertEqual(result, [])
def test_one_url(self):
result = extract_urls("Check out this link: http://example.com")
self.assertEqual(result, ["http://example.com"])
def test_multiple_urls(self):
result = extract_urls(
"Here are some links: http://example.com and https://test.com/page"
)
self.assertEqual(result, ["http://example.com", "https://test.com/page"])
def test_malformed_url(self):
result = extract_urls("This is not a URL: htt://badurl.com")
self.assertEqual(result, [])
def test_url_at_text_boundaries(self):
result = extract_urls("http://start.com text in the middle https://end.com")
self.assertEqual(result, ["http://start.com", "https://end.com"])
class TestFilterTTURLs(unittest.TestCase):
def test_empty_list(self):
result = filter_tt_urls([])
self.assertEqual(result, [])
def test_no_tiktok_urls(self):
urls = ["http://example.com", "https://test.com/page"]
result = filter_tt_urls(urls)
self.assertEqual(result, [])
def test_only_tiktok_urls(self):
urls = ["http://tiktok.com/video1", "https://www.tiktok.com/@user/video/123"]
result = filter_tt_urls(urls)
self.assertEqual(result, urls)
def test_mixed_urls(self):
urls = ["http://example.com", "https://www.tiktok.com/@user/video/123"]
result = filter_tt_urls(urls)
self.assertEqual(result, ["https://www.tiktok.com/@user/video/123"])
def test_tiktok_in_query(self):
urls = ["http://example.com?param=tiktok", "https://www.other.com/path"]
result = filter_tt_urls(urls)
self.assertEqual(result, ["http://example.com?param=tiktok"])

10
tox.ini
View File

@@ -1,9 +1,10 @@
[tox]
envlist = py311, flake8, bandit
envlist = py311, flake8, bandit, codespell, unit
[testenv]
basepython = python3.11
deps = -r {toxinidir}/test-requirements.txt
deps = -r {toxinidir}/test-requirements.txt
-r {toxinidir}/requirements.txt
[testenv:flake8]
commands = flake8 telegram_downloader_bot/
@@ -12,4 +13,7 @@ commands = flake8 telegram_downloader_bot/
commands = bandit -r telegram_downloader_bot/
[testenv:codespell]
commands = codespell telegram_downloader_bot/
commands = codespell telegram_downloader_bot/
[testenv:unit]
commands = stestr run --test-path tests/