pdf module¶
PDF conversion utilities for autorpt.
convert_all_reports(reports_dir='reports', output_dir=None)
¶
Convert all Word reports in the reports directory to PDF
Parameters:
Name | Type | Description | Default |
---|---|---|---|
reports_dir |
str |
Directory containing Word reports |
'reports' |
output_dir |
str |
Directory to save PDFs. If None, saves in same directory as Word files |
None |
Returns:
Type | Description |
---|---|
dict |
Summary of conversion results |
Source code in autorpt/pdf.py
def convert_all_reports(reports_dir="reports", output_dir=None):
"""Convert all Word reports in the reports directory to PDF
Args:
reports_dir (str): Directory containing Word reports
output_dir (str, optional): Directory to save PDFs. If None, saves in same directory as Word files
Returns:
dict: Summary of conversion results
"""
reports_path = Path(reports_dir)
if not reports_path.exists():
print(f"â Reports directory not found: {reports_dir}")
return {"success": 0, "failed": 0, "errors": [f"Directory not found: {reports_dir}"]}
# Find all .docx files
word_files = list(reports_path.glob("*.docx"))
if not word_files:
print(f"âšī¸ No Word documents found in {reports_dir}")
return {"success": 0, "failed": 0, "errors": []}
print(f"đ Found {len(word_files)} Word document(s) to convert")
results = {"success": 0, "failed": 0, "errors": []}
for i, word_file in enumerate(word_files):
print(f"\nđ Processing ({i+1}/{len(word_files)}): {word_file.name}")
# Add delay between conversions to let Word close properly
if i > 0:
print("âąī¸ Waiting for Word to close properly...")
time.sleep(3)
success, result_msg = convert_to_pdf(str(word_file), output_dir)
if success:
results["success"] += 1
else:
results["failed"] += 1
results["errors"].append(f"{word_file.name}: {result_msg}")
# Print summary
print(f"\nđ Conversion Summary:")
print(f" â
Successful: {results['success']}")
print(f" â Failed: {results['failed']}")
if results["errors"]:
print(f" đ Errors:")
for error in results["errors"]:
print(f" - {error}")
return results
convert_to_pdf(word_file, output_dir=None, max_retries=2)
¶
Convert a Word document to PDF with the same name
Parameters:
Name | Type | Description | Default |
---|---|---|---|
word_file |
str |
Path to the Word document to convert |
required |
output_dir |
str |
Directory to save PDF. If None, saves in same directory as Word file |
None |
max_retries |
int |
Maximum number of retry attempts (default: 2) |
2 |
Returns:
Type | Description |
---|---|
tuple |
(bool, str) - (success status, pdf_path or error message) |
Source code in autorpt/pdf.py
def convert_to_pdf(word_file, output_dir=None, max_retries=2):
"""Convert a Word document to PDF with the same name
Args:
word_file (str): Path to the Word document to convert
output_dir (str, optional): Directory to save PDF. If None, saves in same directory as Word file
max_retries (int): Maximum number of retry attempts (default: 2)
Returns:
tuple: (bool, str) - (success status, pdf_path or error message)
"""
if not PDF_AVAILABLE:
error_msg = "â PDF conversion not available. Please install docx2pdf: pip install docx2pdf"
print(error_msg)
return False, error_msg
# Convert to Path object for easier manipulation
word_path = Path(word_file)
# Check if Word file exists
if not word_path.exists():
error_msg = f"â Word file not found: {word_file}"
print(error_msg)
return False, error_msg
# Determine output directory
if output_dir:
output_path = Path(output_dir)
output_path.mkdir(parents=True, exist_ok=True)
pdf_path = output_path / word_path.with_suffix('.pdf').name
else:
pdf_path = word_path.with_suffix('.pdf')
# Try conversion with retries
for attempt in range(max_retries + 1):
try:
if attempt > 0:
print(f"đ Retry attempt {attempt} for {word_path.name}...")
time.sleep(2) # Wait 2 seconds between retries
else:
print(f"đ Converting {word_path.name} to PDF...")
convert(str(word_path), str(pdf_path))
# Verify PDF was created
if pdf_path.exists():
file_size = os.path.getsize(pdf_path) / 1024 # KB
print(f"â
PDF created successfully: {pdf_path}")
print(f"đ PDF file size: {file_size:.1f} KB")
return True, str(pdf_path)
else:
error_msg = f"â PDF conversion failed: {pdf_path} was not created"
if attempt < max_retries:
print(f"â ī¸ {error_msg} - retrying...")
continue
else:
print(error_msg)
return False, error_msg
except Exception as e:
error_msg = f"â Error converting to PDF: {e}"
if attempt < max_retries:
print(f"â ī¸ {error_msg} - retrying...")
# Try to kill any hanging Word processes before retry
_cleanup_word_processes()
continue
else:
print(error_msg)
print("đĄ Tips:")
print(" - Make sure Microsoft Word is installed")
print(" - Close the Word document if it's open")
print(" - Try closing all Word applications and retry")
return False, error_msg
# Should not reach here, but just in case
return False, "Conversion failed after all retries"
main()
¶
Main function for PDF conversion script
Source code in autorpt/pdf.py
def main():
"""Main function for PDF conversion script"""
parser = argparse.ArgumentParser(
description='Convert Word reports to PDF',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
python pdf.py -f report.docx # Convert single file
python pdf.py -a # Convert all reports in reports/ folder
python pdf.py -a -d reports -o pdfs # Convert all from reports/ to pdfs/ folder
python pdf.py -f report.docx -o output # Convert single file to output/ folder
""")
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument('--file', '-f', help='Convert a specific Word document to PDF')
group.add_argument('--all', '-a', action='store_true', help='Convert all Word documents in reports directory')
parser.add_argument('--dir', '-d', default='reports', help='Input directory for --all option (default: reports)')
parser.add_argument('--output', '-o',help='Output directory for PDF files (default: same as input)')
parser.add_argument('--verbose', '-v', action='store_true', help='Enable verbose output')
args = parser.parse_args()
if not PDF_AVAILABLE:
print("â PDF conversion not available. Please install docx2pdf:")
print(" pip install docx2pdf")
return 1
if args.verbose:
print("đ§ Verbose mode enabled")
if args.file:
print(f"đ Converting file: {args.file}")
else:
print(f"đ Converting all files from: {args.dir}")
if args.output:
print(f"đ Output directory: {args.output}")
success = True
if args.file:
# Convert single file
success, result = convert_to_pdf(args.file, args.output)
if not success:
print(f"â Conversion failed: {result}")
else:
# Convert all files
results = convert_all_reports(args.dir, args.output)
success = results["failed"] == 0
return 0 if success else 1