Skip to content

Commit

Permalink
Merge pull request #4 from Demmenie/v3.2.0
Browse files Browse the repository at this point in the history
V3.2.0
  • Loading branch information
Demmenie authored Jul 11, 2024
2 parents 5a74ec2 + ea75078 commit f712256
Show file tree
Hide file tree
Showing 11 changed files with 279 additions and 180 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci_linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ['3.7', '3.8', '3.9', '3.10', '3.11']
python-version: ['3.8', '3.9', '3.10', '3.11']
steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ci_mac_os.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
runs-on: macos-latest
strategy:
matrix:
python-version: ['3.9']
python-version: ['3.11']
steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ci_windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
runs-on: windows-latest
strategy:
matrix:
python-version: ['3.9']
python-version: ['3.11']
steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/python-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: '3.9'
python-version: ['3.8, '3.9', '3.10', '3.11']
- name: Install dependencies
run: |
python -m pip install --upgrade pip
Expand Down
4 changes: 3 additions & 1 deletion DEVELOPMENT.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@ submitting a pull request.
### Testing

Before submitting a pull request, make sure the code passes all the tests and is
formatted by black:
formatted by black. I would recommend testing this package in an isolated
environment, preferably a VM; certainly, avoid doing testing in WSL as it can
lead to files being erroneously removed / corrupted from your machine.

```bash
# Inside the project root (directory containing this file)
Expand Down
19 changes: 17 additions & 2 deletions tests/test_videoduration.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import os

import pytest

from videohash2.exceptions import DidNotSupplyPathOrUrl
from videohash2.videoduration import video_duration
from videohash2.utils import create_and_return_temporary_directory

this_dir = os.path.dirname(os.path.realpath(__file__))

Expand All @@ -19,4 +20,18 @@ def test_video_duration():
+ "rocket.mkv"
)

assert (video_duration(video_path) - 52.08) < 0.1
assert (video_duration(path=video_path) - 52.08) < 0.1

url = "https://raw.githubusercontent.com/demmenie/videohash2/main/assets/rocket.mkv"

assert (video_duration(url=url) - 52.08) < 0.1

with pytest.raises(DidNotSupplyPathOrUrl):
video_duration(url=None, path=None)

with pytest.raises(ValueError):
storage_path = os.path.join(
create_and_return_temporary_directory(),
("thisdirdoesnotexist" + os.path.sep),
)
video_duration(url="https://example.com", path=storage_path)
10 changes: 7 additions & 3 deletions tests/test_videohash.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,9 @@
def test_all():

source1 = (
"https://raw.githubusercontent.com/akamhy/videohash/main/assets/rocket.mkv"
"https://raw.githubusercontent.com/demmenie/videohash2/main/assets/rocket.mkv"
)
videohash1 = VideoHash(url=source1, frame_interval=3)
videohash1.delete_storage_path()
hash1 = videohash1.hash
hash_hex1 = videohash1.hash_hex
assert hash1 == "0b1010100110101001111111111111101101011110101100010000001100000011"
Expand Down Expand Up @@ -56,13 +55,15 @@ def test_all():
+ os.path.sep
+ "rocket.mkv"
)

videohash2 = VideoHash(path=source2, frame_interval=3)
hash2 = videohash2.hash
hash_hex2 = videohash2.hash_hex
assert hash2 == "0b1010100110101001111111111111101101011110101100010000001100000011"
assert hash_hex2 == "0xa9a9fffb5eb10303"

source3 = "https://www.youtube.com/watch?v=PapBjpzRhnA"

videohash3 = VideoHash(url=source3)
hash3 = videohash3.hash
hash_hex3 = videohash3.hash_hex
Expand Down Expand Up @@ -90,7 +91,7 @@ def test_all():
assert videohash1 != videohash4
assert videohash2 != videohash4
assert videohash3 != videohash4
assert videohash3.is_diffrent(videohash4)
assert videohash3.is_different(videohash4)

with pytest.raises(ValueError):
# not padded with 0x
Expand Down Expand Up @@ -142,3 +143,6 @@ def __init__(self, hash=None):
create_and_return_temporary_directory(), "file_extension_less_video"
)
VideoHash(path=path)

if __name__ == "__main__":
test_all()
26 changes: 24 additions & 2 deletions videohash2/utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import os
import tempfile
from pathlib import Path
import random
from typing import List
from pathlib import Path


def get_list_of_all_files_in_dir(directory: str) -> List[str]:
Expand Down Expand Up @@ -34,7 +35,6 @@ def does_path_exists(path: str) -> bool:
# it's file
return False


def create_and_return_temporary_directory() -> str:
"""
create a temporary directory where we can store the video, frames and the
Expand All @@ -47,3 +47,25 @@ def create_and_return_temporary_directory() -> str:
path = os.path.join(tempfile.mkdtemp(), ("temp_storage_dir" + os.path.sep))
Path(path).mkdir(parents=True, exist_ok=True)
return path

def _get_task_uid() -> str:
"""
Returns an unique task id for the instance. Task id is used to
differentiate the instance files from the other unrelated files.

We want to make sure that only the instance is manipulating the instance files
and no other process nor user by accident deletes or edits instance files while
we are still processing.

:return: instance's unique task id.

:rtype: str
"""
sys_random = random.SystemRandom()

return "".join(
sys_random.choice(
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
)
for _ in range(20)
)
156 changes: 156 additions & 0 deletions videohash2/videocopy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
import os
import re
import shutil
from pathlib import Path
from typing import Optional
from .exceptions import DidNotSupplyPathOrUrl, StoragePathDoesNotExist
from .downloader import Download
from .utils import (get_list_of_all_files_in_dir,
does_path_exists,
create_and_return_temporary_directory,
_get_task_uid)

def _copy_video_to_video_dir(
video_dir: str,
video_download_dir: str,
do_not_copy: Optional[bool] = True,
download_worst: bool = False,
url: Optional[str] = None,
path: Optional[str] = None) -> str:
"""
Copy the video from the path to the video directory.

Copying avoids issues such as the user or some other
process deleting the instance files while we are still
processing.

If instead of the path the uploader specified an url,
then download the video and copy the file to video
directory.


:return: None

:rtype: NoneType

:raises ValueError: If the path supplied by the end user
lacks an extension. E.g. webm, mkv and mp4.
"""
video_path: str = ""

if path:
# create a copy of the video at self.storage_path
match = re.search(r"\.([^.]+$)", path)

if match:
extension = match.group(1)

else:
raise ValueError("File name (path) does not have an extension.")

video_path = os.path.join(video_dir, (f"video.{extension}"))

if do_not_copy:
os.symlink(path, video_path)
else:
shutil.copyfile(path, video_path)

if url:

Download(
url,
video_download_dir,
worst=download_worst,
)

downloaded_file = get_list_of_all_files_in_dir(video_download_dir)[0]
match = re.search(r"\.(.*?)$", downloaded_file)

extension = "mkv"

if match:
extension = match.group(1)

video_path = f"{video_dir}video.{extension}"

if do_not_copy:
os.symlink(downloaded_file, video_path)
else:
shutil.copyfile(downloaded_file, video_path)

return video_path

def _create_required_dirs_and_check_for_errors(
url: Optional[str] = None,
path: Optional[str] = None,
storage_path: Optional[str] = None
) -> tuple:
"""
Creates important directories before the main processing starts.

The instance files are stored in these directories, no need to worry
about the end user or some other processes interfering with the instance
generated files.


:raises DidNotSupplyPathOrUrl: If the user forgot to specify both the
path and the url. One of them must be
specified for creating the object.

:raises ValueError: If user passed both path and url. Only pass
one of them if the file is available on both
then pass the path only.

:raises StoragePathDoesNotExist: If the storage path specified by the
user does not exist.

:return: None

:rtype: NoneType
"""
if not path and not url:
raise DidNotSupplyPathOrUrl(
"You must specify either a path or an URL of the video."
)

if path and url:
raise ValueError("Specify either a path or an URL and NOT both.")

if not storage_path:
storage_path = create_and_return_temporary_directory()
if not does_path_exists(storage_path):
raise StoragePathDoesNotExist(
f"Storage path '{storage_path}' does not exist."
)

os_path_sep = os.path.sep

storage_path = os.path.join(
storage_path, (f"{_get_task_uid()}{os_path_sep}")
)

video_dir = os.path.join(storage_path, (f"video{os_path_sep}"))
Path(video_dir).mkdir(parents=True, exist_ok=True)

video_download_dir = os.path.join(
storage_path, (f"downloadedvideo{os_path_sep}")
)
Path(video_download_dir).mkdir(parents=True, exist_ok=True)

frames_dir = os.path.join(storage_path, (f"frames{os_path_sep}"))
Path(frames_dir).mkdir(parents=True, exist_ok=True)

tiles_dir = os.path.join(storage_path, (f"tiles{os_path_sep}"))
Path(tiles_dir).mkdir(parents=True, exist_ok=True)

collage_dir = os.path.join(storage_path, (f"collage{os_path_sep}"))
Path(collage_dir).mkdir(parents=True, exist_ok=True)

horizontally_concatenated_image_dir = os.path.join(
storage_path, (f"horizontally_concatenated_image{os_path_sep}")
)
Path(horizontally_concatenated_image_dir).mkdir(
parents=True, exist_ok=True
)

return video_dir, video_download_dir, frames_dir, tiles_dir, collage_dir, horizontally_concatenated_image_dir
43 changes: 40 additions & 3 deletions videohash2/videoduration.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,33 @@
from shutil import which
from subprocess import PIPE, Popen
from typing import Optional
from .exceptions import DidNotSupplyPathOrUrl
from .videocopy import (_create_required_dirs_and_check_for_errors,
_copy_video_to_video_dir)

# Module to determine the length of video.
# The length is found by the FFmpeg, the output of video_duration is in seconds.


def video_duration(video_path: str, ffmpeg_path: Optional[str] = None) -> float:
def video_duration(url: Optional[str] = None,
path: Optional[str] = None,
storage_path: Optional[str] = None,
do_not_copy: Optional[bool] = True,
ffmpeg_path: Optional[str] = None
) -> float:

"""
Retrieve the exact video duration as echoed by the FFmpeg and return
the duration in seconds. Maximum duration supported is 999 hours, above
which the regex is doomed to fail(no match).

:param video_path: Absolute path of the video file.
:param url: A URL that leads to a video.

:param path: Absolute path of the video file.

:param storage_path: Optional, path to where you want to store the video.

:param do_not_copy: Used when you want to save the video, defaults to True.

:param ffmpeg_path: Path of the FFmpeg software if not in path.

Expand All @@ -22,10 +37,32 @@ def video_duration(video_path: str, ffmpeg_path: Optional[str] = None) -> float:
:rtype: float
"""

if not path and not url:
raise DidNotSupplyPathOrUrl(
"You must specify either a path or an URL of the video."
)

if path and url:
raise ValueError("Specify either a path or an URL and NOT both.")

if not ffmpeg_path:
ffmpeg_path = str(which("ffmpeg"))

command = f'"{ffmpeg_path}" -i "{video_path}"'
if url:
video_dir, video_download_dir = _create_required_dirs_and_check_for_errors(
url=url,
storage_path=storage_path
)[0:2]

path = _copy_video_to_video_dir(
video_dir,
video_download_dir,
do_not_copy=do_not_copy,
download_worst=True,
url=url
)

command = f'"{ffmpeg_path}" -i "{path}"'
process = Popen(command, shell=True, stdout=PIPE, stderr=PIPE)
output, error = process.communicate()

Expand Down
Loading

0 comments on commit f712256

Please sign in to comment.