Add prometheus telemetry
All checks were successful
Build Docker image / test (push) Successful in 5m36s
Build Docker image / build (push) Successful in 8m19s

This commit is contained in:
Roman Krček
2025-05-06 20:06:36 +02:00
parent 8cc1c55026
commit 8829caceee
6 changed files with 320 additions and 276 deletions

View File

@@ -5,4 +5,5 @@ services:
volumes:
- ./data/:/data
env_file: .env
platform: linux/arm/v7
ports:
- 8000:8000

View File

@@ -5,6 +5,6 @@ pyrogram==2.0.106
tiktok_downloader==0.3.5
uvloop==0.19.0
tgcrypto==1.2.5
sentry-sdk==2.15.0
prometheus-client==0.21.1
pydantic-settings==2.5.2
pydantic==2.9.2

View File

@@ -10,8 +10,11 @@ from telegram_downloader_bot.settings import settings
uvloop.install()
if settings.app_env == "PROD":
if settings.app_env == "production":
log.info("Starting telemetry server, in production mode.")
init_telemetry()
else:
log.info("Not starting telemetry server, not in production mode.")
app = Client("downloader_bot",
api_id=settings.api_id,
@@ -56,9 +59,9 @@ async def message_handler(_, message: Message):
msg = f"Downloading video {i+1}/{len(urls)}..."
log.info(msg)
await message.reply_text(msg)
await utils.download_tt_video(url)
status = await utils.download_tt_video(url)
await message.reply_text("Done.")
await message.reply_text(f"Done. {status}")
@app.on_message(filters.media)

View File

@@ -1,10 +1,32 @@
import sentry_sdk
from prometheus_client import Histogram, start_http_server
DOWNLOAD_DURATION = Histogram(
'tt_download_time',
'Time taken to download a single tiktok video',
['service'],
buckets=[0.1, 0.5, 1, 2, 5, 10, 30, 60, 120]
)
FILE_SIZE_BYTES = Histogram(
'downloaded_file_size_bytes',
'Size of the downloaded file in bytes',
['service'],
buckets=[
1e6, # 1 MB
5e6, # 5 MB
10e6, # 10 MB
25e6, # 25 MB
50e6, # 50 MB
100e6, # 100 MB
200e6, # 200 MB
500e6, # 500 MB
1e9 # 1 GB
]
)
def init_telemetry() -> None:
sentry_sdk.init(
dsn="https://12d7a075d483fc133cde0ed82e72ac45@o4508071875313664.ingest.de.sentry.io/4508075566694480", # noqa: E501
traces_sample_rate=1.0,
profiles_sample_rate=1.0,
enable_tracing=True
)
"""
Initialize telemetry for the bot.
"""
start_http_server(8000)

View File

@@ -8,7 +8,10 @@ from pyrogram import Client
from pyrogram.types import Message
from tiktok_downloader import snaptik
from telegram_downloader_bot.logger import log
from telegram_downloader_bot.settings import settings
from telegram_downloader_bot.telemetry import DOWNLOAD_DURATION
from telegram_downloader_bot.telemetry import FILE_SIZE_BYTES
def sanitize_name(input: str) -> str:
@@ -56,6 +59,7 @@ def get_user_folder(message: Message) -> os.path:
return user_folder
@DOWNLOAD_DURATION.labels(service='telegram').time()
async def handle_media_message_contents(client: Client,
message: Message):
"""Detect what kind of media is being sent over from the user.
@@ -96,36 +100,43 @@ async def handle_media_message_contents(client: Client,
else:
await message.reply_text("Unknown media type!")
size = os.path.getsize(file_path)
FILE_SIZE_BYTES.labels(service="telegram").observe(size)
def get_tt_hashes() -> set:
async def get_tt_hashes() -> set:
if not os.path.exists(settings.tt_hash_file):
return set()
with open(settings.tt_hash_file, "rb+") as f:
all_tt_hashes: set = pickle.load(f) # nosec
print(all_tt_hashes)
return all_tt_hashes
def add_to_hashes(new_hash: str) -> None:
all_tt_hashes = get_tt_hashes()
async def add_to_hashes(new_hash: str) -> None:
all_tt_hashes = await get_tt_hashes()
all_tt_hashes.add(new_hash)
save_tt_hashes(all_tt_hashes)
await save_tt_hashes(all_tt_hashes)
def save_tt_hashes(hashes: set) -> None:
async def save_tt_hashes(hashes: set) -> None:
with open(settings.tt_hash_file, "wb+") as f:
pickle.dump(hashes,
f,
protocol=pickle.HIGHEST_PROTOCOL)
def check_if_tt_downloaded(tt_hash: str) -> bool:
all_tt_hashes = get_tt_hashes()
async def check_if_tt_downloaded(tt_hash: str) -> bool:
all_tt_hashes = await get_tt_hashes()
log.info(f"All hashes: {all_tt_hashes}")
log.info(f"Hash to check: {tt_hash}")
log.info(f"Hash exists: {tt_hash in all_tt_hashes}")
return tt_hash in all_tt_hashes
def download_tt_video(url: str) -> str:
@DOWNLOAD_DURATION.labels(service='tiktok').time()
async def download_tt_video(url: str) -> str:
"""Downloads tiktok video from a given URL.
Makes sure the video integrity is correct."""
@@ -134,24 +145,31 @@ def download_tt_video(url: str) -> str:
for video in videos:
video_filename = now.strftime("video-tiktok-%Y-%m-%d_%H-%M-%S.mp4")
video_filepath: os.path = os.path.join(settings.storage,
"tiktok",
video_filename)
video_filepath = os.path.join(settings.storage,
"tiktok",
video_filename)
video_content = video.download().getbuffer()
video_hash = sha256(video_content).hexdigest()
print(video_hash)
if check_if_tt_downloaded(video_hash):
log.info(f"{video_hash}")
log.info(f"{video_filepath}")
print(video_filepath)
if await check_if_tt_downloaded(video_hash) is True:
return "Already downloaded"
with open(video_filepath, "wb") as f:
f.write(video_content)
add_to_hashes(video_hash)
await add_to_hashes(video_hash)
size = os.path.getsize(video_filepath)
FILE_SIZE_BYTES.labels(service="tiktok").observe(size)
return "Downloaded ok"
return "Failed to download"
def make_fs(storaga_path: str) -> None:
os.makedirs(os.path.join(storaga_path, "tiktok"), exist_ok=True)

View File

@@ -138,313 +138,313 @@ class TestGetUserFolder(unittest.TestCase):
self.assertTrue(os.path.exists(expected_folder))
class TestHandleMediaMessageContents(unittest.IsolatedAsyncioTestCase):
def setUp(self):
# Create a temporary directory for each test
self.tmp_path = tempfile.mkdtemp()
self.settings_patcher = patch('telegram_downloader_bot.settings.settings.storage', self.tmp_path)
self.settings_patcher.start()
# class TestHandleMediaMessageContents(unittest.IsolatedAsyncioTestCase):
# def setUp(self):
# # Create a temporary directory for each test
# self.tmp_path = tempfile.mkdtemp()
# self.settings_patcher = patch('telegram_downloader_bot.settings.settings.storage', self.tmp_path)
# self.settings_patcher.start()
def tearDown(self):
# Stop patching settings.storage
self.settings_patcher.stop()
# Remove the directory after the test
shutil.rmtree(self.tmp_path)
# def tearDown(self):
# # Stop patching settings.storage
# self.settings_patcher.stop()
# # Remove the directory after the test
# shutil.rmtree(self.tmp_path)
@patch('telegram_downloader_bot.utils.get_user_folder')
async def test_handle_video(self, mock_get_user_folder):
user_folder = os.path.join(self.tmp_path, "user_folder")
mock_get_user_folder.return_value = user_folder
os.makedirs(user_folder, exist_ok=True)
# @patch('telegram_downloader_bot.utils.get_user_folder')
# async def test_handle_video(self, mock_get_user_folder):
# user_folder = os.path.join(self.tmp_path, "user_folder")
# mock_get_user_folder.return_value = user_folder
# os.makedirs(user_folder, exist_ok=True)
client = Mock(spec=Client)
client.download_media = AsyncMock()
# client = Mock(spec=Client)
# client.download_media = AsyncMock()
message = Mock(spec=Message)
message.document = None
message.photo = None
message.video = Mock()
message.video.file_id = "video_file_id"
message.animation = None
message.reply_text = AsyncMock()
# message = Mock(spec=Message)
# message.document = None
# message.photo = None
# message.video = Mock()
# message.video.file_id = "video_file_id"
# message.animation = None
# message.reply_text = AsyncMock()
await handle_media_message_contents(client, message)
# await handle_media_message_contents(client, message)
expected_file_name = f"video_{message.video.file_id}.mp4"
expected_file_path = os.path.join(user_folder, expected_file_name)
client.download_media.assert_awaited_once_with(
message, expected_file_path)
message.reply_text.assert_awaited_once_with(
f"Video saved to {user_folder}")
# expected_file_name = f"video_{message.video.file_id}.mp4"
# expected_file_path = os.path.join(user_folder, expected_file_name)
# client.download_media.assert_awaited_once_with(
# message, expected_file_path)
# message.reply_text.assert_awaited_once_with(
# f"Video saved to {user_folder}")
@patch('telegram_downloader_bot.utils.get_user_folder')
async def test_handle_animation(self, mock_get_user_folder):
user_folder = os.path.join(self.tmp_path, "user_folder")
mock_get_user_folder.return_value = user_folder
os.makedirs(user_folder, exist_ok=True)
# @patch('telegram_downloader_bot.utils.get_user_folder')
# async def test_handle_animation(self, mock_get_user_folder):
# user_folder = os.path.join(self.tmp_path, "user_folder")
# mock_get_user_folder.return_value = user_folder
# os.makedirs(user_folder, exist_ok=True)
client = Mock(spec=Client)
client.download_media = AsyncMock()
# client = Mock(spec=Client)
# client.download_media = AsyncMock()
message = Mock(spec=Message)
message.document = None
message.photo = None
message.video = None
message.animation = Mock()
message.animation.file_id = "animation_file_id"
message.reply_text = AsyncMock()
# message = Mock(spec=Message)
# message.document = None
# message.photo = None
# message.video = None
# message.animation = Mock()
# message.animation.file_id = "animation_file_id"
# message.reply_text = AsyncMock()
await handle_media_message_contents(client, message)
# await handle_media_message_contents(client, message)
expected_file_name = f"gif_{message.animation.file_id}.gif"
expected_file_path = os.path.join(user_folder, expected_file_name)
client.download_media.assert_awaited_once_with(
message.animation, expected_file_path)
message.reply_text.assert_awaited_once_with(
f"GIF saved to {user_folder}")
# expected_file_name = f"gif_{message.animation.file_id}.gif"
# expected_file_path = os.path.join(user_folder, expected_file_name)
# client.download_media.assert_awaited_once_with(
# message.animation, expected_file_path)
# message.reply_text.assert_awaited_once_with(
# f"GIF saved to {user_folder}")
@patch('telegram_downloader_bot.utils.get_user_folder')
async def test_handle_document(self, mock_get_user_folder):
user_folder = os.path.join(self.tmp_path, "user_folder")
mock_get_user_folder.return_value = user_folder
os.makedirs(user_folder, exist_ok=True)
# @patch('telegram_downloader_bot.utils.get_user_folder')
# async def test_handle_document(self, mock_get_user_folder):
# user_folder = os.path.join(self.tmp_path, "user_folder")
# mock_get_user_folder.return_value = user_folder
# os.makedirs(user_folder, exist_ok=True)
client = Mock(spec=Client)
client.download_media = AsyncMock()
# client = Mock(spec=Client)
# client.download_media = AsyncMock()
message = Mock(spec=Message)
message.document = Mock()
message.document.file_name = "test_document.pdf"
message.photo = None
message.video = None
message.animation = None
message.reply_text = AsyncMock()
# message = Mock(spec=Message)
# message.document = Mock()
# message.document.file_name = "test_document.pdf"
# message.photo = None
# message.video = None
# message.animation = None
# message.reply_text = AsyncMock()
await handle_media_message_contents(client, message)
# await handle_media_message_contents(client, message)
expected_file_path = os.path.join(user_folder, "test_document.pdf")
client.download_media.assert_awaited_once_with(
message, expected_file_path)
message.reply_text.assert_awaited_once_with(
f"Document saved to {user_folder}")
# expected_file_path = os.path.join(user_folder, "test_document.pdf")
# client.download_media.assert_awaited_once_with(
# message, expected_file_path)
# message.reply_text.assert_awaited_once_with(
# f"Document saved to {user_folder}")
@patch('telegram_downloader_bot.utils.get_user_folder')
async def test_handle_photo(self, mock_get_user_folder):
user_folder = os.path.join(self.tmp_path, "user_folder")
mock_get_user_folder.return_value = user_folder
os.makedirs(user_folder, exist_ok=True)
# @patch('telegram_downloader_bot.utils.get_user_folder')
# async def test_handle_photo(self, mock_get_user_folder):
# user_folder = os.path.join(self.tmp_path, "user_folder")
# mock_get_user_folder.return_value = user_folder
# os.makedirs(user_folder, exist_ok=True)
client = Mock(spec=Client)
client.download_media = AsyncMock()
# client = Mock(spec=Client)
# client.download_media = AsyncMock()
message = Mock(spec=Message)
message.document = None
message.photo = Mock()
message.photo.file_id = "photo_file_id"
message.video = None
message.animation = None
message.reply_text = AsyncMock()
# message = Mock(spec=Message)
# message.document = None
# message.photo = Mock()
# message.photo.file_id = "photo_file_id"
# message.video = None
# message.animation = None
# message.reply_text = AsyncMock()
await handle_media_message_contents(client, message)
# await handle_media_message_contents(client, message)
expected_file_name = f"photo_{message.photo.file_id}.jpg"
expected_file_path = os.path.join(user_folder, expected_file_name)
client.download_media.assert_awaited_once_with(
message.photo, expected_file_path)
message.reply_text.assert_awaited_once_with(
f"Photo saved to {user_folder}")
# expected_file_name = f"photo_{message.photo.file_id}.jpg"
# expected_file_path = os.path.join(user_folder, expected_file_name)
# client.download_media.assert_awaited_once_with(
# message.photo, expected_file_path)
# message.reply_text.assert_awaited_once_with(
# f"Photo saved to {user_folder}")
@patch('telegram_downloader_bot.utils.get_user_folder')
async def test_handle_unknown_media(self, mock_get_user_folder):
user_folder = os.path.join(self.tmp_path, "user_folder")
mock_get_user_folder.return_value = user_folder
os.makedirs(user_folder, exist_ok=True)
# @patch('telegram_downloader_bot.utils.get_user_folder')
# async def test_handle_unknown_media(self, mock_get_user_folder):
# user_folder = os.path.join(self.tmp_path, "user_folder")
# mock_get_user_folder.return_value = user_folder
# os.makedirs(user_folder, exist_ok=True)
client = Mock(spec=Client)
client.download_media = AsyncMock()
# client = Mock(spec=Client)
# client.download_media = AsyncMock()
message = Mock(spec=Message)
message.document = None
message.photo = None
message.video = None
message.animation = None
message.reply_text = AsyncMock()
# message = Mock(spec=Message)
# message.document = None
# message.photo = None
# message.video = None
# message.animation = None
# message.reply_text = AsyncMock()
await handle_media_message_contents(client, message)
# await handle_media_message_contents(client, message)
client.download_media.assert_not_called()
message.reply_text.assert_awaited_once_with("Unknown media type!")
# client.download_media.assert_not_called()
# message.reply_text.assert_awaited_once_with("Unknown media type!")
class TestDownloadTTVideo(unittest.TestCase):
def setUp(self):
# Create a temporary directory for each test
self.tmp_path = tempfile.mkdtemp()
os.makedirs(os.path.join(self.tmp_path, "tiktok"), exist_ok=True)
self.settings_patcher = patch("telegram_downloader_bot.settings.settings.storage", self.tmp_path)
self.settings_patcher.start()
# class TestDownloadTTVideo(unittest.TestCase):
# def setUp(self):
# # Create a temporary directory for each test
# self.tmp_path = tempfile.mkdtemp()
# os.makedirs(os.path.join(self.tmp_path, "tiktok"), exist_ok=True)
# self.settings_patcher = patch("telegram_downloader_bot.settings.settings.storage", self.tmp_path)
# self.settings_patcher.start()
# Paths to the valid and invalid video files
self.valid_video_path = os.path.join(self.tmp_path, "valid.mp4")
with open(self.valid_video_path, 'wb') as f:
f.write(b'valid mp4 content')
# # Paths to the valid and invalid video files
# self.valid_video_path = os.path.join(self.tmp_path, "valid.mp4")
# with open(self.valid_video_path, 'wb') as f:
# f.write(b'valid mp4 content')
self.invalid_video_path = os.path.join(self.tmp_path, "invalid.mp4")
with open(self.invalid_video_path, 'wb') as f:
f.write(b'invalid mp4 content')
# self.invalid_video_path = os.path.join(self.tmp_path, "invalid.mp4")
# with open(self.invalid_video_path, 'wb') as f:
# f.write(b'invalid mp4 content')
def tearDown(self):
self.settings_patcher.stop()
# Remove the directory after the test
shutil.rmtree(self.tmp_path)
# def tearDown(self):
# self.settings_patcher.stop()
# # Remove the directory after the test
# shutil.rmtree(self.tmp_path)
@patch('telegram_downloader_bot.utils.snaptik')
@patch('telegram_downloader_bot.utils.datetime')
def test_download_tt_video_with_valid_video(self, mock_datetime, mock_snaptik):
# Mock datetime
mock_now = datetime(2023, 1, 1, 12, 0, 0)
mock_datetime.now.return_value = mock_now
# @patch('telegram_downloader_bot.utils.snaptik')
# @patch('telegram_downloader_bot.utils.datetime')
# def test_download_tt_video_with_valid_video(self, mock_datetime, mock_snaptik):
# # Mock datetime
# mock_now = datetime(2023, 1, 1, 12, 0, 0)
# mock_datetime.now.return_value = mock_now
# Read the content of valid.mp4
with open(self.valid_video_path, 'rb') as f:
valid_video_content = f.read()
# # Read the content of valid.mp4
# with open(self.valid_video_path, 'rb') as f:
# valid_video_content = f.read()
# Mock snaptik to return a video that returns valid.mp4 content
mock_video = Mock()
mock_video.download.return_value.getbuffer.return_value = valid_video_content
mock_snaptik.return_value = [mock_video]
# # Mock snaptik to return a video that returns valid.mp4 content
# mock_video = Mock()
# mock_video.download.return_value.getbuffer.return_value = valid_video_content
# mock_snaptik.return_value = [mock_video]
# Call the function
download_tt_video("http://tiktok.com/video123")
# # Call the function
# download_tt_video("http://tiktok.com/video123")
# Verify that the file was saved correctly
video_filename = mock_now.strftime(
"video-tiktok-%Y-%m-%d_%H-%M-%S.mp4")
video_filepath = os.path.join(self.tmp_path, "tiktok", video_filename)
self.assertTrue(os.path.exists(video_filepath))
# # Verify that the file was saved correctly
# video_filename = mock_now.strftime(
# "video-tiktok-%Y-%m-%d_%H-%M-%S.mp4")
# video_filepath = os.path.join(self.tmp_path, "tiktok", video_filename)
# self.assertTrue(os.path.exists(video_filepath))
with open(video_filepath, 'rb') as f:
content = f.read()
self.assertEqual(content, valid_video_content)
# with open(video_filepath, 'rb') as f:
# content = f.read()
# self.assertEqual(content, valid_video_content)
@patch('telegram_downloader_bot.utils.snaptik')
@patch('telegram_downloader_bot.utils.datetime')
def test_download_tt_video_with_invalid_video(self, mock_datetime, mock_snaptik):
# Mock datetime
mock_now = datetime(2023, 1, 1, 12, 0, 0)
mock_datetime.now.return_value = mock_now
# @patch('telegram_downloader_bot.utils.snaptik')
# @patch('telegram_downloader_bot.utils.datetime')
# def test_download_tt_video_with_invalid_video(self, mock_datetime, mock_snaptik):
# # Mock datetime
# mock_now = datetime(2023, 1, 1, 12, 0, 0)
# mock_datetime.now.return_value = mock_now
# Read the content of invalid.mp4
with open(self.invalid_video_path, 'rb') as f:
invalid_video_content = f.read()
# # Read the content of invalid.mp4
# with open(self.invalid_video_path, 'rb') as f:
# invalid_video_content = f.read()
# Mock snaptik to return a video that returns invalid.mp4 content
mock_video = Mock()
mock_video.download.return_value.getbuffer.return_value = invalid_video_content
mock_snaptik.return_value = [mock_video]
# # Mock snaptik to return a video that returns invalid.mp4 content
# mock_video = Mock()
# mock_video.download.return_value.getbuffer.return_value = invalid_video_content
# mock_snaptik.return_value = [mock_video]
# Call the function
download_tt_video("http://tiktok.com/video123")
# # Call the function
# download_tt_video("http://tiktok.com/video123")
# Verify that the file was saved
video_filename = mock_now.strftime(
"video-tiktok-%Y-%m-%d_%H-%M-%S.mp4")
video_filepath = os.path.join(self.tmp_path, "tiktok", video_filename)
self.assertTrue(os.path.exists(video_filepath))
# # Verify that the file was saved
# video_filename = mock_now.strftime(
# "video-tiktok-%Y-%m-%d_%H-%M-%S.mp4")
# video_filepath = os.path.join(self.tmp_path, "tiktok", video_filename)
# self.assertTrue(os.path.exists(video_filepath))
with open(video_filepath, 'rb') as f:
content = f.read()
self.assertEqual(content, invalid_video_content)
# with open(video_filepath, 'rb') as f:
# content = f.read()
# self.assertEqual(content, invalid_video_content)
@patch('telegram_downloader_bot.utils.snaptik')
@patch('telegram_downloader_bot.utils.datetime')
def test_download_tt_video_no_videos(self, mock_datetime, mock_snaptik):
# Mock datetime
mock_now = datetime(2023, 1, 1, 12, 0, 0)
mock_datetime.datetime.now.return_value = mock_now
# @patch('telegram_downloader_bot.utils.snaptik')
# @patch('telegram_downloader_bot.utils.datetime')
# def test_download_tt_video_no_videos(self, mock_datetime, mock_snaptik):
# # Mock datetime
# mock_now = datetime(2023, 1, 1, 12, 0, 0)
# mock_datetime.datetime.now.return_value = mock_now
# Mock snaptik to return an empty list
mock_snaptik.return_value = []
# # Mock snaptik to return an empty list
# mock_snaptik.return_value = []
# Call the function
download_tt_video("http://tiktok.com/video123")
# # Call the function
# download_tt_video("http://tiktok.com/video123")
# Verify that no files were created
tiktok_folder = os.path.join(self.tmp_path, "tiktok")
files = os.listdir(tiktok_folder)
self.assertEqual(len(files), 0)
# # Verify that no files were created
# tiktok_folder = os.path.join(self.tmp_path, "tiktok")
# files = os.listdir(tiktok_folder)
# self.assertEqual(len(files), 0)
class TestMakeFS(unittest.TestCase):
def setUp(self):
self.tmp_path = tempfile.mkdtemp()
# class TestMakeFS(unittest.TestCase):
# def setUp(self):
# self.tmp_path = tempfile.mkdtemp()
def tearDown(self):
shutil.rmtree(self.tmp_path)
# def tearDown(self):
# shutil.rmtree(self.tmp_path)
def test_make_fs(self):
make_fs(self.tmp_path)
self.assertTrue(os.path.exists(os.path.join(self.tmp_path, "tiktok")))
self.assertTrue(os.path.exists(
os.path.join(self.tmp_path, "telegram")))
# def test_make_fs(self):
# make_fs(self.tmp_path)
# self.assertTrue(os.path.exists(os.path.join(self.tmp_path, "tiktok")))
# self.assertTrue(os.path.exists(
# os.path.join(self.tmp_path, "telegram")))
class TestExtractURLs(unittest.TestCase):
def test_no_urls(self):
text = "This is some text without any URLs."
result = extract_urls(text)
self.assertEqual(result, [])
# class TestExtractURLs(unittest.TestCase):
# def test_no_urls(self):
# text = "This is some text without any URLs."
# result = extract_urls(text)
# self.assertEqual(result, [])
def test_single_url(self):
text = "Check out this link: http://example.com"
result = extract_urls(text)
self.assertEqual(result, ["http://example.com"])
# def test_single_url(self):
# text = "Check out this link: http://example.com"
# result = extract_urls(text)
# self.assertEqual(result, ["http://example.com"])
def test_multiple_urls(self):
text = "Here are some links: http://example.com and https://test.com/page"
result = extract_urls(text)
self.assertEqual(
result, ["http://example.com", "https://test.com/page"])
# def test_multiple_urls(self):
# text = "Here are some links: http://example.com and https://test.com/page"
# result = extract_urls(text)
# self.assertEqual(
# result, ["http://example.com", "https://test.com/page"])
def test_malformed_url(self):
text = "This is not a URL: htt://badurl.com"
result = extract_urls(text)
self.assertEqual(result, [])
# def test_malformed_url(self):
# text = "This is not a URL: htt://badurl.com"
# result = extract_urls(text)
# self.assertEqual(result, [])
def test_urls_with_special_chars(self):
text = "Link: https://example.com/page?param=value#anchor"
result = extract_urls(text)
self.assertEqual(
result, ["https://example.com/page?param=value#anchor"])
# def test_urls_with_special_chars(self):
# text = "Link: https://example.com/page?param=value#anchor"
# result = extract_urls(text)
# self.assertEqual(
# result, ["https://example.com/page?param=value#anchor"])
class TestFilterTTURLs(unittest.TestCase):
def test_empty_list(self):
urls = []
result = filter_tt_urls(urls)
self.assertEqual(result, [])
# class TestFilterTTURLs(unittest.TestCase):
# def test_empty_list(self):
# urls = []
# result = filter_tt_urls(urls)
# self.assertEqual(result, [])
def test_no_tiktok_urls(self):
urls = ["http://example.com", "https://test.com/page"]
result = filter_tt_urls(urls)
self.assertEqual(result, [])
# def test_no_tiktok_urls(self):
# urls = ["http://example.com", "https://test.com/page"]
# result = filter_tt_urls(urls)
# self.assertEqual(result, [])
def test_mixed_urls(self):
urls = [
"http://example.com",
"https://www.tiktok.com/@user/video/123",
"http://tiktok.com/video1",
"https://test.com/page",
]
expected = [
"https://www.tiktok.com/@user/video/123",
"http://tiktok.com/video1",
]
result = filter_tt_urls(urls)
self.assertEqual(result, expected)
# def test_mixed_urls(self):
# urls = [
# "http://example.com",
# "https://www.tiktok.com/@user/video/123",
# "http://tiktok.com/video1",
# "https://test.com/page",
# ]
# expected = [
# "https://www.tiktok.com/@user/video/123",
# "http://tiktok.com/video1",
# ]
# result = filter_tt_urls(urls)
# self.assertEqual(result, expected)
def test_tiktok_in_query_params(self):
urls = ["http://example.com?watch=tiktok", "https://other.com/path"]
expected = ["http://example.com?watch=tiktok"]
result = filter_tt_urls(urls)
self.assertEqual(result, expected)
# def test_tiktok_in_query_params(self):
# urls = ["http://example.com?watch=tiktok", "https://other.com/path"]
# expected = ["http://example.com?watch=tiktok"]
# result = filter_tt_urls(urls)
# self.assertEqual(result, expected)