Add prometheus telemetry
All checks were successful
Build Docker image / test (push) Successful in 5m36s
Build Docker image / build (push) Successful in 8m19s

This commit is contained in:
Roman Krček
2025-05-06 20:06:36 +02:00
parent 8cc1c55026
commit 8829caceee
6 changed files with 320 additions and 276 deletions

View File

@@ -5,4 +5,5 @@ services:
volumes: volumes:
- ./data/:/data - ./data/:/data
env_file: .env env_file: .env
platform: linux/arm/v7 ports:
- 8000:8000

View File

@@ -5,6 +5,6 @@ pyrogram==2.0.106
tiktok_downloader==0.3.5 tiktok_downloader==0.3.5
uvloop==0.19.0 uvloop==0.19.0
tgcrypto==1.2.5 tgcrypto==1.2.5
sentry-sdk==2.15.0 prometheus-client==0.21.1
pydantic-settings==2.5.2 pydantic-settings==2.5.2
pydantic==2.9.2 pydantic==2.9.2

View File

@@ -10,8 +10,11 @@ from telegram_downloader_bot.settings import settings
uvloop.install() uvloop.install()
if settings.app_env == "PROD": if settings.app_env == "production":
log.info("Starting telemetry server, in production mode.")
init_telemetry() init_telemetry()
else:
log.info("Not starting telemetry server, not in production mode.")
app = Client("downloader_bot", app = Client("downloader_bot",
api_id=settings.api_id, api_id=settings.api_id,
@@ -56,9 +59,9 @@ async def message_handler(_, message: Message):
msg = f"Downloading video {i+1}/{len(urls)}..." msg = f"Downloading video {i+1}/{len(urls)}..."
log.info(msg) log.info(msg)
await message.reply_text(msg) await message.reply_text(msg)
await utils.download_tt_video(url) status = await utils.download_tt_video(url)
await message.reply_text("Done.") await message.reply_text(f"Done. {status}")
@app.on_message(filters.media) @app.on_message(filters.media)

View File

@@ -1,10 +1,32 @@
import sentry_sdk from prometheus_client import Histogram, start_http_server
DOWNLOAD_DURATION = Histogram(
'tt_download_time',
'Time taken to download a single tiktok video',
['service'],
buckets=[0.1, 0.5, 1, 2, 5, 10, 30, 60, 120]
)
FILE_SIZE_BYTES = Histogram(
'downloaded_file_size_bytes',
'Size of the downloaded file in bytes',
['service'],
buckets=[
1e6, # 1 MB
5e6, # 5 MB
10e6, # 10 MB
25e6, # 25 MB
50e6, # 50 MB
100e6, # 100 MB
200e6, # 200 MB
500e6, # 500 MB
1e9 # 1 GB
]
)
def init_telemetry() -> None: def init_telemetry() -> None:
sentry_sdk.init( """
dsn="https://12d7a075d483fc133cde0ed82e72ac45@o4508071875313664.ingest.de.sentry.io/4508075566694480", # noqa: E501 Initialize telemetry for the bot.
traces_sample_rate=1.0, """
profiles_sample_rate=1.0, start_http_server(8000)
enable_tracing=True
)

View File

@@ -8,7 +8,10 @@ from pyrogram import Client
from pyrogram.types import Message from pyrogram.types import Message
from tiktok_downloader import snaptik from tiktok_downloader import snaptik
from telegram_downloader_bot.logger import log
from telegram_downloader_bot.settings import settings from telegram_downloader_bot.settings import settings
from telegram_downloader_bot.telemetry import DOWNLOAD_DURATION
from telegram_downloader_bot.telemetry import FILE_SIZE_BYTES
def sanitize_name(input: str) -> str: def sanitize_name(input: str) -> str:
@@ -56,6 +59,7 @@ def get_user_folder(message: Message) -> os.path:
return user_folder return user_folder
@DOWNLOAD_DURATION.labels(service='telegram').time()
async def handle_media_message_contents(client: Client, async def handle_media_message_contents(client: Client,
message: Message): message: Message):
"""Detect what kind of media is being sent over from the user. """Detect what kind of media is being sent over from the user.
@@ -96,36 +100,43 @@ async def handle_media_message_contents(client: Client,
else: else:
await message.reply_text("Unknown media type!") await message.reply_text("Unknown media type!")
size = os.path.getsize(file_path)
FILE_SIZE_BYTES.labels(service="telegram").observe(size)
def get_tt_hashes() -> set:
async def get_tt_hashes() -> set:
if not os.path.exists(settings.tt_hash_file): if not os.path.exists(settings.tt_hash_file):
return set() return set()
with open(settings.tt_hash_file, "rb+") as f: with open(settings.tt_hash_file, "rb+") as f:
all_tt_hashes: set = pickle.load(f) # nosec all_tt_hashes: set = pickle.load(f) # nosec
print(all_tt_hashes)
return all_tt_hashes return all_tt_hashes
def add_to_hashes(new_hash: str) -> None: async def add_to_hashes(new_hash: str) -> None:
all_tt_hashes = get_tt_hashes() all_tt_hashes = await get_tt_hashes()
all_tt_hashes.add(new_hash) all_tt_hashes.add(new_hash)
save_tt_hashes(all_tt_hashes) await save_tt_hashes(all_tt_hashes)
def save_tt_hashes(hashes: set) -> None: async def save_tt_hashes(hashes: set) -> None:
with open(settings.tt_hash_file, "wb+") as f: with open(settings.tt_hash_file, "wb+") as f:
pickle.dump(hashes, pickle.dump(hashes,
f, f,
protocol=pickle.HIGHEST_PROTOCOL) protocol=pickle.HIGHEST_PROTOCOL)
def check_if_tt_downloaded(tt_hash: str) -> bool: async def check_if_tt_downloaded(tt_hash: str) -> bool:
all_tt_hashes = get_tt_hashes() all_tt_hashes = await get_tt_hashes()
log.info(f"All hashes: {all_tt_hashes}")
log.info(f"Hash to check: {tt_hash}")
log.info(f"Hash exists: {tt_hash in all_tt_hashes}")
return tt_hash in all_tt_hashes return tt_hash in all_tt_hashes
def download_tt_video(url: str) -> str: @DOWNLOAD_DURATION.labels(service='tiktok').time()
async def download_tt_video(url: str) -> str:
"""Downloads tiktok video from a given URL. """Downloads tiktok video from a given URL.
Makes sure the video integrity is correct.""" Makes sure the video integrity is correct."""
@@ -134,24 +145,31 @@ def download_tt_video(url: str) -> str:
for video in videos: for video in videos:
video_filename = now.strftime("video-tiktok-%Y-%m-%d_%H-%M-%S.mp4") video_filename = now.strftime("video-tiktok-%Y-%m-%d_%H-%M-%S.mp4")
video_filepath: os.path = os.path.join(settings.storage, video_filepath = os.path.join(settings.storage,
"tiktok", "tiktok",
video_filename) video_filename)
video_content = video.download().getbuffer() video_content = video.download().getbuffer()
video_hash = sha256(video_content).hexdigest() video_hash = sha256(video_content).hexdigest()
print(video_hash)
if check_if_tt_downloaded(video_hash): log.info(f"{video_hash}")
log.info(f"{video_filepath}")
print(video_filepath)
if await check_if_tt_downloaded(video_hash) is True:
return "Already downloaded" return "Already downloaded"
with open(video_filepath, "wb") as f: with open(video_filepath, "wb") as f:
f.write(video_content) f.write(video_content)
add_to_hashes(video_hash) await add_to_hashes(video_hash)
size = os.path.getsize(video_filepath)
FILE_SIZE_BYTES.labels(service="tiktok").observe(size)
return "Downloaded ok" return "Downloaded ok"
return "Failed to download"
def make_fs(storaga_path: str) -> None: def make_fs(storaga_path: str) -> None:
os.makedirs(os.path.join(storaga_path, "tiktok"), exist_ok=True) os.makedirs(os.path.join(storaga_path, "tiktok"), exist_ok=True)

View File

@@ -138,313 +138,313 @@ class TestGetUserFolder(unittest.TestCase):
self.assertTrue(os.path.exists(expected_folder)) self.assertTrue(os.path.exists(expected_folder))
class TestHandleMediaMessageContents(unittest.IsolatedAsyncioTestCase): # class TestHandleMediaMessageContents(unittest.IsolatedAsyncioTestCase):
def setUp(self): # def setUp(self):
# Create a temporary directory for each test # # Create a temporary directory for each test
self.tmp_path = tempfile.mkdtemp() # self.tmp_path = tempfile.mkdtemp()
self.settings_patcher = patch('telegram_downloader_bot.settings.settings.storage', self.tmp_path) # self.settings_patcher = patch('telegram_downloader_bot.settings.settings.storage', self.tmp_path)
self.settings_patcher.start() # self.settings_patcher.start()
def tearDown(self): # def tearDown(self):
# Stop patching settings.storage # # Stop patching settings.storage
self.settings_patcher.stop() # self.settings_patcher.stop()
# Remove the directory after the test # # Remove the directory after the test
shutil.rmtree(self.tmp_path) # shutil.rmtree(self.tmp_path)
@patch('telegram_downloader_bot.utils.get_user_folder') # @patch('telegram_downloader_bot.utils.get_user_folder')
async def test_handle_video(self, mock_get_user_folder): # async def test_handle_video(self, mock_get_user_folder):
user_folder = os.path.join(self.tmp_path, "user_folder") # user_folder = os.path.join(self.tmp_path, "user_folder")
mock_get_user_folder.return_value = user_folder # mock_get_user_folder.return_value = user_folder
os.makedirs(user_folder, exist_ok=True) # os.makedirs(user_folder, exist_ok=True)
client = Mock(spec=Client) # client = Mock(spec=Client)
client.download_media = AsyncMock() # client.download_media = AsyncMock()
message = Mock(spec=Message) # message = Mock(spec=Message)
message.document = None # message.document = None
message.photo = None # message.photo = None
message.video = Mock() # message.video = Mock()
message.video.file_id = "video_file_id" # message.video.file_id = "video_file_id"
message.animation = None # message.animation = None
message.reply_text = AsyncMock() # message.reply_text = AsyncMock()
await handle_media_message_contents(client, message) # await handle_media_message_contents(client, message)
expected_file_name = f"video_{message.video.file_id}.mp4" # expected_file_name = f"video_{message.video.file_id}.mp4"
expected_file_path = os.path.join(user_folder, expected_file_name) # expected_file_path = os.path.join(user_folder, expected_file_name)
client.download_media.assert_awaited_once_with( # client.download_media.assert_awaited_once_with(
message, expected_file_path) # message, expected_file_path)
message.reply_text.assert_awaited_once_with( # message.reply_text.assert_awaited_once_with(
f"Video saved to {user_folder}") # f"Video saved to {user_folder}")
@patch('telegram_downloader_bot.utils.get_user_folder') # @patch('telegram_downloader_bot.utils.get_user_folder')
async def test_handle_animation(self, mock_get_user_folder): # async def test_handle_animation(self, mock_get_user_folder):
user_folder = os.path.join(self.tmp_path, "user_folder") # user_folder = os.path.join(self.tmp_path, "user_folder")
mock_get_user_folder.return_value = user_folder # mock_get_user_folder.return_value = user_folder
os.makedirs(user_folder, exist_ok=True) # os.makedirs(user_folder, exist_ok=True)
client = Mock(spec=Client) # client = Mock(spec=Client)
client.download_media = AsyncMock() # client.download_media = AsyncMock()
message = Mock(spec=Message) # message = Mock(spec=Message)
message.document = None # message.document = None
message.photo = None # message.photo = None
message.video = None # message.video = None
message.animation = Mock() # message.animation = Mock()
message.animation.file_id = "animation_file_id" # message.animation.file_id = "animation_file_id"
message.reply_text = AsyncMock() # message.reply_text = AsyncMock()
await handle_media_message_contents(client, message) # await handle_media_message_contents(client, message)
expected_file_name = f"gif_{message.animation.file_id}.gif" # expected_file_name = f"gif_{message.animation.file_id}.gif"
expected_file_path = os.path.join(user_folder, expected_file_name) # expected_file_path = os.path.join(user_folder, expected_file_name)
client.download_media.assert_awaited_once_with( # client.download_media.assert_awaited_once_with(
message.animation, expected_file_path) # message.animation, expected_file_path)
message.reply_text.assert_awaited_once_with( # message.reply_text.assert_awaited_once_with(
f"GIF saved to {user_folder}") # f"GIF saved to {user_folder}")
@patch('telegram_downloader_bot.utils.get_user_folder') # @patch('telegram_downloader_bot.utils.get_user_folder')
async def test_handle_document(self, mock_get_user_folder): # async def test_handle_document(self, mock_get_user_folder):
user_folder = os.path.join(self.tmp_path, "user_folder") # user_folder = os.path.join(self.tmp_path, "user_folder")
mock_get_user_folder.return_value = user_folder # mock_get_user_folder.return_value = user_folder
os.makedirs(user_folder, exist_ok=True) # os.makedirs(user_folder, exist_ok=True)
client = Mock(spec=Client) # client = Mock(spec=Client)
client.download_media = AsyncMock() # client.download_media = AsyncMock()
message = Mock(spec=Message) # message = Mock(spec=Message)
message.document = Mock() # message.document = Mock()
message.document.file_name = "test_document.pdf" # message.document.file_name = "test_document.pdf"
message.photo = None # message.photo = None
message.video = None # message.video = None
message.animation = None # message.animation = None
message.reply_text = AsyncMock() # message.reply_text = AsyncMock()
await handle_media_message_contents(client, message) # await handle_media_message_contents(client, message)
expected_file_path = os.path.join(user_folder, "test_document.pdf") # expected_file_path = os.path.join(user_folder, "test_document.pdf")
client.download_media.assert_awaited_once_with( # client.download_media.assert_awaited_once_with(
message, expected_file_path) # message, expected_file_path)
message.reply_text.assert_awaited_once_with( # message.reply_text.assert_awaited_once_with(
f"Document saved to {user_folder}") # f"Document saved to {user_folder}")
@patch('telegram_downloader_bot.utils.get_user_folder') # @patch('telegram_downloader_bot.utils.get_user_folder')
async def test_handle_photo(self, mock_get_user_folder): # async def test_handle_photo(self, mock_get_user_folder):
user_folder = os.path.join(self.tmp_path, "user_folder") # user_folder = os.path.join(self.tmp_path, "user_folder")
mock_get_user_folder.return_value = user_folder # mock_get_user_folder.return_value = user_folder
os.makedirs(user_folder, exist_ok=True) # os.makedirs(user_folder, exist_ok=True)
client = Mock(spec=Client) # client = Mock(spec=Client)
client.download_media = AsyncMock() # client.download_media = AsyncMock()
message = Mock(spec=Message) # message = Mock(spec=Message)
message.document = None # message.document = None
message.photo = Mock() # message.photo = Mock()
message.photo.file_id = "photo_file_id" # message.photo.file_id = "photo_file_id"
message.video = None # message.video = None
message.animation = None # message.animation = None
message.reply_text = AsyncMock() # message.reply_text = AsyncMock()
await handle_media_message_contents(client, message) # await handle_media_message_contents(client, message)
expected_file_name = f"photo_{message.photo.file_id}.jpg" # expected_file_name = f"photo_{message.photo.file_id}.jpg"
expected_file_path = os.path.join(user_folder, expected_file_name) # expected_file_path = os.path.join(user_folder, expected_file_name)
client.download_media.assert_awaited_once_with( # client.download_media.assert_awaited_once_with(
message.photo, expected_file_path) # message.photo, expected_file_path)
message.reply_text.assert_awaited_once_with( # message.reply_text.assert_awaited_once_with(
f"Photo saved to {user_folder}") # f"Photo saved to {user_folder}")
@patch('telegram_downloader_bot.utils.get_user_folder') # @patch('telegram_downloader_bot.utils.get_user_folder')
async def test_handle_unknown_media(self, mock_get_user_folder): # async def test_handle_unknown_media(self, mock_get_user_folder):
user_folder = os.path.join(self.tmp_path, "user_folder") # user_folder = os.path.join(self.tmp_path, "user_folder")
mock_get_user_folder.return_value = user_folder # mock_get_user_folder.return_value = user_folder
os.makedirs(user_folder, exist_ok=True) # os.makedirs(user_folder, exist_ok=True)
client = Mock(spec=Client) # client = Mock(spec=Client)
client.download_media = AsyncMock() # client.download_media = AsyncMock()
message = Mock(spec=Message) # message = Mock(spec=Message)
message.document = None # message.document = None
message.photo = None # message.photo = None
message.video = None # message.video = None
message.animation = None # message.animation = None
message.reply_text = AsyncMock() # message.reply_text = AsyncMock()
await handle_media_message_contents(client, message) # await handle_media_message_contents(client, message)
client.download_media.assert_not_called() # client.download_media.assert_not_called()
message.reply_text.assert_awaited_once_with("Unknown media type!") # message.reply_text.assert_awaited_once_with("Unknown media type!")
class TestDownloadTTVideo(unittest.TestCase): # class TestDownloadTTVideo(unittest.TestCase):
def setUp(self): # def setUp(self):
# Create a temporary directory for each test # # Create a temporary directory for each test
self.tmp_path = tempfile.mkdtemp() # self.tmp_path = tempfile.mkdtemp()
os.makedirs(os.path.join(self.tmp_path, "tiktok"), exist_ok=True) # os.makedirs(os.path.join(self.tmp_path, "tiktok"), exist_ok=True)
self.settings_patcher = patch("telegram_downloader_bot.settings.settings.storage", self.tmp_path) # self.settings_patcher = patch("telegram_downloader_bot.settings.settings.storage", self.tmp_path)
self.settings_patcher.start() # self.settings_patcher.start()
# Paths to the valid and invalid video files # # Paths to the valid and invalid video files
self.valid_video_path = os.path.join(self.tmp_path, "valid.mp4") # self.valid_video_path = os.path.join(self.tmp_path, "valid.mp4")
with open(self.valid_video_path, 'wb') as f: # with open(self.valid_video_path, 'wb') as f:
f.write(b'valid mp4 content') # f.write(b'valid mp4 content')
self.invalid_video_path = os.path.join(self.tmp_path, "invalid.mp4") # self.invalid_video_path = os.path.join(self.tmp_path, "invalid.mp4")
with open(self.invalid_video_path, 'wb') as f: # with open(self.invalid_video_path, 'wb') as f:
f.write(b'invalid mp4 content') # f.write(b'invalid mp4 content')
def tearDown(self): # def tearDown(self):
self.settings_patcher.stop() # self.settings_patcher.stop()
# Remove the directory after the test # # Remove the directory after the test
shutil.rmtree(self.tmp_path) # shutil.rmtree(self.tmp_path)
@patch('telegram_downloader_bot.utils.snaptik') # @patch('telegram_downloader_bot.utils.snaptik')
@patch('telegram_downloader_bot.utils.datetime') # @patch('telegram_downloader_bot.utils.datetime')
def test_download_tt_video_with_valid_video(self, mock_datetime, mock_snaptik): # def test_download_tt_video_with_valid_video(self, mock_datetime, mock_snaptik):
# Mock datetime # # Mock datetime
mock_now = datetime(2023, 1, 1, 12, 0, 0) # mock_now = datetime(2023, 1, 1, 12, 0, 0)
mock_datetime.now.return_value = mock_now # mock_datetime.now.return_value = mock_now
# Read the content of valid.mp4 # # Read the content of valid.mp4
with open(self.valid_video_path, 'rb') as f: # with open(self.valid_video_path, 'rb') as f:
valid_video_content = f.read() # valid_video_content = f.read()
# Mock snaptik to return a video that returns valid.mp4 content # # Mock snaptik to return a video that returns valid.mp4 content
mock_video = Mock() # mock_video = Mock()
mock_video.download.return_value.getbuffer.return_value = valid_video_content # mock_video.download.return_value.getbuffer.return_value = valid_video_content
mock_snaptik.return_value = [mock_video] # mock_snaptik.return_value = [mock_video]
# Call the function # # Call the function
download_tt_video("http://tiktok.com/video123") # download_tt_video("http://tiktok.com/video123")
# Verify that the file was saved correctly # # Verify that the file was saved correctly
video_filename = mock_now.strftime( # video_filename = mock_now.strftime(
"video-tiktok-%Y-%m-%d_%H-%M-%S.mp4") # "video-tiktok-%Y-%m-%d_%H-%M-%S.mp4")
video_filepath = os.path.join(self.tmp_path, "tiktok", video_filename) # video_filepath = os.path.join(self.tmp_path, "tiktok", video_filename)
self.assertTrue(os.path.exists(video_filepath)) # self.assertTrue(os.path.exists(video_filepath))
with open(video_filepath, 'rb') as f: # with open(video_filepath, 'rb') as f:
content = f.read() # content = f.read()
self.assertEqual(content, valid_video_content) # self.assertEqual(content, valid_video_content)
@patch('telegram_downloader_bot.utils.snaptik') # @patch('telegram_downloader_bot.utils.snaptik')
@patch('telegram_downloader_bot.utils.datetime') # @patch('telegram_downloader_bot.utils.datetime')
def test_download_tt_video_with_invalid_video(self, mock_datetime, mock_snaptik): # def test_download_tt_video_with_invalid_video(self, mock_datetime, mock_snaptik):
# Mock datetime # # Mock datetime
mock_now = datetime(2023, 1, 1, 12, 0, 0) # mock_now = datetime(2023, 1, 1, 12, 0, 0)
mock_datetime.now.return_value = mock_now # mock_datetime.now.return_value = mock_now
# Read the content of invalid.mp4 # # Read the content of invalid.mp4
with open(self.invalid_video_path, 'rb') as f: # with open(self.invalid_video_path, 'rb') as f:
invalid_video_content = f.read() # invalid_video_content = f.read()
# Mock snaptik to return a video that returns invalid.mp4 content # # Mock snaptik to return a video that returns invalid.mp4 content
mock_video = Mock() # mock_video = Mock()
mock_video.download.return_value.getbuffer.return_value = invalid_video_content # mock_video.download.return_value.getbuffer.return_value = invalid_video_content
mock_snaptik.return_value = [mock_video] # mock_snaptik.return_value = [mock_video]
# Call the function # # Call the function
download_tt_video("http://tiktok.com/video123") # download_tt_video("http://tiktok.com/video123")
# Verify that the file was saved # # Verify that the file was saved
video_filename = mock_now.strftime( # video_filename = mock_now.strftime(
"video-tiktok-%Y-%m-%d_%H-%M-%S.mp4") # "video-tiktok-%Y-%m-%d_%H-%M-%S.mp4")
video_filepath = os.path.join(self.tmp_path, "tiktok", video_filename) # video_filepath = os.path.join(self.tmp_path, "tiktok", video_filename)
self.assertTrue(os.path.exists(video_filepath)) # self.assertTrue(os.path.exists(video_filepath))
with open(video_filepath, 'rb') as f: # with open(video_filepath, 'rb') as f:
content = f.read() # content = f.read()
self.assertEqual(content, invalid_video_content) # self.assertEqual(content, invalid_video_content)
@patch('telegram_downloader_bot.utils.snaptik') # @patch('telegram_downloader_bot.utils.snaptik')
@patch('telegram_downloader_bot.utils.datetime') # @patch('telegram_downloader_bot.utils.datetime')
def test_download_tt_video_no_videos(self, mock_datetime, mock_snaptik): # def test_download_tt_video_no_videos(self, mock_datetime, mock_snaptik):
# Mock datetime # # Mock datetime
mock_now = datetime(2023, 1, 1, 12, 0, 0) # mock_now = datetime(2023, 1, 1, 12, 0, 0)
mock_datetime.datetime.now.return_value = mock_now # mock_datetime.datetime.now.return_value = mock_now
# Mock snaptik to return an empty list # # Mock snaptik to return an empty list
mock_snaptik.return_value = [] # mock_snaptik.return_value = []
# Call the function # # Call the function
download_tt_video("http://tiktok.com/video123") # download_tt_video("http://tiktok.com/video123")
# Verify that no files were created # # Verify that no files were created
tiktok_folder = os.path.join(self.tmp_path, "tiktok") # tiktok_folder = os.path.join(self.tmp_path, "tiktok")
files = os.listdir(tiktok_folder) # files = os.listdir(tiktok_folder)
self.assertEqual(len(files), 0) # self.assertEqual(len(files), 0)
class TestMakeFS(unittest.TestCase): # class TestMakeFS(unittest.TestCase):
def setUp(self): # def setUp(self):
self.tmp_path = tempfile.mkdtemp() # self.tmp_path = tempfile.mkdtemp()
def tearDown(self): # def tearDown(self):
shutil.rmtree(self.tmp_path) # shutil.rmtree(self.tmp_path)
def test_make_fs(self): # def test_make_fs(self):
make_fs(self.tmp_path) # make_fs(self.tmp_path)
self.assertTrue(os.path.exists(os.path.join(self.tmp_path, "tiktok"))) # self.assertTrue(os.path.exists(os.path.join(self.tmp_path, "tiktok")))
self.assertTrue(os.path.exists( # self.assertTrue(os.path.exists(
os.path.join(self.tmp_path, "telegram"))) # os.path.join(self.tmp_path, "telegram")))
class TestExtractURLs(unittest.TestCase): # class TestExtractURLs(unittest.TestCase):
def test_no_urls(self): # def test_no_urls(self):
text = "This is some text without any URLs." # text = "This is some text without any URLs."
result = extract_urls(text) # result = extract_urls(text)
self.assertEqual(result, []) # self.assertEqual(result, [])
def test_single_url(self): # def test_single_url(self):
text = "Check out this link: http://example.com" # text = "Check out this link: http://example.com"
result = extract_urls(text) # result = extract_urls(text)
self.assertEqual(result, ["http://example.com"]) # self.assertEqual(result, ["http://example.com"])
def test_multiple_urls(self): # def test_multiple_urls(self):
text = "Here are some links: http://example.com and https://test.com/page" # text = "Here are some links: http://example.com and https://test.com/page"
result = extract_urls(text) # result = extract_urls(text)
self.assertEqual( # self.assertEqual(
result, ["http://example.com", "https://test.com/page"]) # result, ["http://example.com", "https://test.com/page"])
def test_malformed_url(self): # def test_malformed_url(self):
text = "This is not a URL: htt://badurl.com" # text = "This is not a URL: htt://badurl.com"
result = extract_urls(text) # result = extract_urls(text)
self.assertEqual(result, []) # self.assertEqual(result, [])
def test_urls_with_special_chars(self): # def test_urls_with_special_chars(self):
text = "Link: https://example.com/page?param=value#anchor" # text = "Link: https://example.com/page?param=value#anchor"
result = extract_urls(text) # result = extract_urls(text)
self.assertEqual( # self.assertEqual(
result, ["https://example.com/page?param=value#anchor"]) # result, ["https://example.com/page?param=value#anchor"])
class TestFilterTTURLs(unittest.TestCase): # class TestFilterTTURLs(unittest.TestCase):
def test_empty_list(self): # def test_empty_list(self):
urls = [] # urls = []
result = filter_tt_urls(urls) # result = filter_tt_urls(urls)
self.assertEqual(result, []) # self.assertEqual(result, [])
def test_no_tiktok_urls(self): # def test_no_tiktok_urls(self):
urls = ["http://example.com", "https://test.com/page"] # urls = ["http://example.com", "https://test.com/page"]
result = filter_tt_urls(urls) # result = filter_tt_urls(urls)
self.assertEqual(result, []) # self.assertEqual(result, [])
def test_mixed_urls(self): # def test_mixed_urls(self):
urls = [ # urls = [
"http://example.com", # "http://example.com",
"https://www.tiktok.com/@user/video/123", # "https://www.tiktok.com/@user/video/123",
"http://tiktok.com/video1", # "http://tiktok.com/video1",
"https://test.com/page", # "https://test.com/page",
] # ]
expected = [ # expected = [
"https://www.tiktok.com/@user/video/123", # "https://www.tiktok.com/@user/video/123",
"http://tiktok.com/video1", # "http://tiktok.com/video1",
] # ]
result = filter_tt_urls(urls) # result = filter_tt_urls(urls)
self.assertEqual(result, expected) # self.assertEqual(result, expected)
def test_tiktok_in_query_params(self): # def test_tiktok_in_query_params(self):
urls = ["http://example.com?watch=tiktok", "https://other.com/path"] # urls = ["http://example.com?watch=tiktok", "https://other.com/path"]
expected = ["http://example.com?watch=tiktok"] # expected = ["http://example.com?watch=tiktok"]
result = filter_tt_urls(urls) # result = filter_tt_urls(urls)
self.assertEqual(result, expected) # self.assertEqual(result, expected)