300 lines
9.6 KiB
Python
300 lines
9.6 KiB
Python
"""
|
|
Tests for document_converter.py
|
|
"""
|
|
|
|
import pytest
|
|
import sys
|
|
from pathlib import Path
|
|
from unittest.mock import Mock, patch, MagicMock
|
|
|
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
|
|
import document_converter as dc
|
|
|
|
|
|
class TestEnvLoading:
|
|
"""Test environment variable loading."""
|
|
|
|
@patch('document_converter.load_dotenv')
|
|
@patch('pathlib.Path.exists')
|
|
def test_load_env_files_success(self, mock_exists, mock_load_dotenv):
|
|
"""Test successful .env file loading."""
|
|
mock_exists.return_value = True
|
|
dc.load_env_files()
|
|
# Should be called for skill, skills, and claude dirs
|
|
assert mock_load_dotenv.call_count >= 1
|
|
|
|
@patch('document_converter.load_dotenv', None)
|
|
def test_load_env_files_no_dotenv(self):
|
|
"""Test when dotenv is not available."""
|
|
# Should not raise an error
|
|
dc.load_env_files()
|
|
|
|
|
|
class TestDependencyCheck:
|
|
"""Test dependency checking."""
|
|
|
|
@patch('builtins.__import__')
|
|
def test_check_all_dependencies_available(self, mock_import):
|
|
"""Test when all dependencies are available."""
|
|
mock_import.return_value = Mock()
|
|
|
|
deps = dc.check_dependencies()
|
|
|
|
assert 'pypdf' in deps
|
|
assert 'markdown' in deps
|
|
assert 'pillow' in deps
|
|
|
|
@patch('builtins.__import__')
|
|
def test_check_dependencies_missing(self, mock_import):
|
|
"""Test when dependencies are missing."""
|
|
def import_side_effect(name, *args, **kwargs):
|
|
if name == 'pypdf':
|
|
raise ImportError()
|
|
return Mock()
|
|
|
|
mock_import.side_effect = import_side_effect
|
|
|
|
# The function uses try/except, so we test the actual function
|
|
with patch('document_converter.sys.modules', {}):
|
|
# This is tricky to test due to import handling
|
|
pass
|
|
|
|
|
|
class TestPDFPageExtraction:
|
|
"""Test PDF page extraction."""
|
|
|
|
@patch('pypdf.PdfReader')
|
|
@patch('pypdf.PdfWriter')
|
|
@patch('builtins.open', create=True)
|
|
def test_extract_single_page(self, mock_open, mock_writer_class, mock_reader_class):
|
|
"""Test extracting a single page."""
|
|
# Mock reader
|
|
mock_reader = Mock()
|
|
mock_page = Mock()
|
|
mock_reader.pages = [Mock(), mock_page, Mock()]
|
|
mock_reader_class.return_value = mock_reader
|
|
|
|
# Mock writer
|
|
mock_writer = Mock()
|
|
mock_writer.pages = [mock_page]
|
|
mock_writer_class.return_value = mock_writer
|
|
|
|
result = dc.extract_pdf_pages(
|
|
'input.pdf',
|
|
'output.pdf',
|
|
page_range='2',
|
|
verbose=False
|
|
)
|
|
|
|
assert result is True
|
|
mock_writer.add_page.assert_called_once_with(mock_page)
|
|
|
|
@patch('pypdf.PdfReader')
|
|
@patch('pypdf.PdfWriter')
|
|
@patch('builtins.open', create=True)
|
|
def test_extract_page_range(self, mock_open, mock_writer_class, mock_reader_class):
|
|
"""Test extracting a range of pages."""
|
|
mock_reader = Mock()
|
|
mock_reader.pages = [Mock() for _ in range(10)]
|
|
mock_reader_class.return_value = mock_reader
|
|
|
|
mock_writer = Mock()
|
|
mock_writer.pages = []
|
|
mock_writer_class.return_value = mock_writer
|
|
|
|
result = dc.extract_pdf_pages(
|
|
'input.pdf',
|
|
'output.pdf',
|
|
page_range='2-5',
|
|
verbose=False
|
|
)
|
|
|
|
assert result is True
|
|
assert mock_writer.add_page.call_count == 4 # Pages 2-5 (4 pages)
|
|
|
|
def test_extract_pages_no_pypdf(self):
|
|
"""Test page extraction without pypdf."""
|
|
with patch.dict('sys.modules', {'pypdf': None}):
|
|
result = dc.extract_pdf_pages('input.pdf', 'output.pdf', '1-10')
|
|
assert result is False
|
|
|
|
|
|
class TestPDFOptimization:
|
|
"""Test PDF optimization."""
|
|
|
|
@patch('pypdf.PdfReader')
|
|
@patch('pypdf.PdfWriter')
|
|
@patch('builtins.open', create=True)
|
|
@patch('pathlib.Path.stat')
|
|
def test_optimize_pdf_success(self, mock_stat, mock_open, mock_writer_class, mock_reader_class):
|
|
"""Test successful PDF optimization."""
|
|
# Mock reader
|
|
mock_reader = Mock()
|
|
mock_page = Mock()
|
|
mock_reader.pages = [mock_page, mock_page]
|
|
mock_reader_class.return_value = mock_reader
|
|
|
|
# Mock writer
|
|
mock_writer = Mock()
|
|
mock_writer.pages = [mock_page, mock_page]
|
|
mock_writer_class.return_value = mock_writer
|
|
|
|
# Mock file sizes
|
|
mock_stat.return_value.st_size = 1024 * 1024
|
|
|
|
result = dc.optimize_pdf('input.pdf', 'output.pdf', verbose=False)
|
|
|
|
assert result is True
|
|
mock_page.compress_content_streams.assert_called()
|
|
|
|
def test_optimize_pdf_no_pypdf(self):
|
|
"""Test PDF optimization without pypdf."""
|
|
with patch.dict('sys.modules', {'pypdf': None}):
|
|
result = dc.optimize_pdf('input.pdf', 'output.pdf')
|
|
assert result is False
|
|
|
|
|
|
class TestImageExtraction:
|
|
"""Test image extraction from PDFs."""
|
|
|
|
@patch('pypdf.PdfReader')
|
|
@patch('PIL.Image')
|
|
@patch('pathlib.Path.mkdir')
|
|
@patch('builtins.open', create=True)
|
|
def test_extract_images_success(self, mock_open, mock_mkdir, mock_image, mock_reader_class):
|
|
"""Test successful image extraction."""
|
|
# Mock PDF reader
|
|
mock_reader = Mock()
|
|
mock_page = MagicMock()
|
|
|
|
# Mock XObject with image
|
|
mock_obj = MagicMock()
|
|
mock_obj.__getitem__.side_effect = lambda k: {
|
|
'/Subtype': '/Image',
|
|
'/Width': 100,
|
|
'/Height': 100,
|
|
'/Filter': '/DCTDecode'
|
|
}[k]
|
|
mock_obj.get_data.return_value = b'image_data'
|
|
|
|
mock_xobjects = MagicMock()
|
|
mock_xobjects.__iter__.return_value = ['img1']
|
|
mock_xobjects.__getitem__.return_value = mock_obj
|
|
|
|
mock_resources = MagicMock()
|
|
mock_resources.get_object.return_value = mock_xobjects
|
|
mock_page.__getitem__.side_effect = lambda k: {
|
|
'/Resources': {'/XObject': mock_resources}
|
|
}[k]
|
|
|
|
mock_reader.pages = [mock_page]
|
|
mock_reader_class.return_value = mock_reader
|
|
|
|
result = dc.extract_images_from_pdf('input.pdf', './output', verbose=False)
|
|
|
|
assert len(result) > 0
|
|
|
|
def test_extract_images_no_dependencies(self):
|
|
"""Test image extraction without required dependencies."""
|
|
with patch.dict('sys.modules', {'pypdf': None}):
|
|
result = dc.extract_images_from_pdf('input.pdf', './output')
|
|
assert result == []
|
|
|
|
|
|
class TestMarkdownConversion:
|
|
"""Test Markdown to PDF conversion."""
|
|
|
|
@patch('markdown.markdown')
|
|
@patch('builtins.open', create=True)
|
|
@patch('subprocess.run')
|
|
@patch('pathlib.Path.unlink')
|
|
def test_convert_markdown_success(self, mock_unlink, mock_run, mock_open, mock_markdown):
|
|
"""Test successful Markdown to PDF conversion."""
|
|
mock_markdown.return_value = '<h1>Test</h1>'
|
|
|
|
# Mock file reading and writing
|
|
mock_file = MagicMock()
|
|
mock_file.__enter__.return_value.read.return_value = '# Test'
|
|
mock_open.return_value = mock_file
|
|
|
|
result = dc.convert_markdown_to_pdf('input.md', 'output.pdf', verbose=False)
|
|
|
|
assert result is True
|
|
mock_run.assert_called_once()
|
|
|
|
@patch('markdown.markdown')
|
|
@patch('builtins.open', create=True)
|
|
@patch('subprocess.run')
|
|
def test_convert_markdown_no_wkhtmltopdf(self, mock_run, mock_open, mock_markdown):
|
|
"""Test Markdown conversion without wkhtmltopdf."""
|
|
mock_markdown.return_value = '<h1>Test</h1>'
|
|
|
|
mock_file = MagicMock()
|
|
mock_file.__enter__.return_value.read.return_value = '# Test'
|
|
mock_open.return_value = mock_file
|
|
|
|
mock_run.side_effect = FileNotFoundError()
|
|
|
|
result = dc.convert_markdown_to_pdf('input.md', 'output.pdf', verbose=False)
|
|
|
|
assert result is False
|
|
|
|
def test_convert_markdown_no_markdown_lib(self):
|
|
"""Test Markdown conversion without markdown library."""
|
|
with patch.dict('sys.modules', {'markdown': None}):
|
|
result = dc.convert_markdown_to_pdf('input.md', 'output.pdf')
|
|
assert result is False
|
|
|
|
|
|
class TestHTMLConversion:
|
|
"""Test HTML to PDF conversion."""
|
|
|
|
@patch('subprocess.run')
|
|
def test_convert_html_success(self, mock_run):
|
|
"""Test successful HTML to PDF conversion."""
|
|
result = dc.convert_html_to_pdf('input.html', 'output.pdf', verbose=False)
|
|
|
|
assert result is True
|
|
mock_run.assert_called_once()
|
|
|
|
@patch('subprocess.run')
|
|
def test_convert_html_no_wkhtmltopdf(self, mock_run):
|
|
"""Test HTML conversion without wkhtmltopdf."""
|
|
mock_run.side_effect = FileNotFoundError()
|
|
|
|
result = dc.convert_html_to_pdf('input.html', 'output.pdf', verbose=False)
|
|
|
|
assert result is False
|
|
|
|
|
|
class TestIntegration:
|
|
"""Integration tests."""
|
|
|
|
@patch('pathlib.Path.exists')
|
|
def test_file_not_found(self, mock_exists):
|
|
"""Test handling of non-existent input file."""
|
|
mock_exists.return_value = False
|
|
|
|
# This would normally be tested via main() but we test the concept
|
|
assert not Path('nonexistent.pdf').exists()
|
|
|
|
@patch('document_converter.check_dependencies')
|
|
def test_check_dependencies_integration(self, mock_check):
|
|
"""Test dependency checking integration."""
|
|
mock_check.return_value = {
|
|
'pypdf': True,
|
|
'markdown': True,
|
|
'pillow': True
|
|
}
|
|
|
|
deps = dc.check_dependencies()
|
|
|
|
assert deps['pypdf'] is True
|
|
assert deps['markdown'] is True
|
|
assert deps['pillow'] is True
|
|
|
|
|
|
if __name__ == '__main__':
|
|
pytest.main([__file__, '-v', '--cov=document_converter', '--cov-report=term-missing'])
|