Automating Image Optimization in Hugo with GitHub Actions
Why Automate Image Optimization?
When managing a Hugo site, keeping image sizes optimized and references up-to-date can be a challenge. Hosting platforms like Cloudflare provide tools to optimize images, but enabling such optimizations for every site rebuild can add unnecessary complexity and delay deployments. For a leaner, more manageable solution, I wanted a workflow that could pre-optimize my images before deployment.
Additionally, I needed a way to manage new content—including images—entirely from my phone. This meant finding a solution that required minimal manual intervention and could automatically optimize images, commit changes, and create pull requests without needing to switch devices. To achieve this, I built a custom continuous integration (CI) pipeline in GitHub Actions.
This workflow ensures that:
- My site’s images are always optimized before deployment.
- I can focus on writing content while the pipeline handles the heavy lifting.
- Changes are versioned through pull requests for transparency and control.
Let’s dive into how the workflow works and how you can set it up for your own projects.
The Pipeline
The CI pipeline performs the following tasks:
Optimize images
Resizes images to standardized widths (800px or 1920px for featured images) and converts them to WebP format.
import os
from pathlib import Path
from PIL import Image
def process_images(content_dir):
"""Iterates through the content directory to convert and resize images."""
for root, _, files in os.walk(content_dir):
for file in files:
file_path = Path(root) / file
if file_path.suffix.lower() in ['.jpeg', '.jpg', '.png', '.webp']:
process_image(file_path)
def process_image(file_path):
"""Converts and resizes an image based on filename and dimensions."""
try:
with Image.open(file_path) as img:
original_width, original_height = img.size
# Determine the new width based on the filename
if file_path.name.startswith("featured_"):
new_width = min(original_width, 1920)
else:
new_width = min(original_width, 800)
# Resize if necessary
if original_width > new_width:
resize_image(img, file_path, new_width)
# Convert to WebP if not already
if file_path.suffix.lower() != '.webp':
convert_to_webp(img, file_path)
except Exception as e:
print(f"Error processing {file_path}: {e}")
def resize_image(img, file_path, new_width):
"""Resizes the image to the specified width while maintaining aspect ratio."""
new_height = int((new_width / img.width) * img.height)
resized_img = img.resize((new_width, new_height), Image.LANCZOS)
output_path = file_path
resized_img.save(output_path, quality=85)
print(f"Resized {file_path} to {new_width}x{new_height}")
def convert_to_webp(img, file_path):
"""Converts an image to WebP format and saves it."""
webp_path = file_path.with_suffix('.webp')
img.save(webp_path, format='WEBP', quality=85)
print(f"Converted {file_path} to WebP as {webp_path}")
def main():
content_dir = Path("content")
if not content_dir.exists():
print(f"Content directory '{content_dir}' does not exist.")
return
process_images(content_dir)
if __name__ == "__main__":
main()
Update references
Adjusts Markdown files to reference the optimized images.
import os
import re
import yaml
# Set the path to your content directory
content_dir = 'content'
# Define the possible original image extensions
old_extensions = ['.jpeg', '.jpg', '.png']
new_extension = '.webp'
# Function to update image references in markdown files
def update_image_references():
for dirpath, _, files in os.walk(content_dir):
for file in files:
if file.endswith('.md'): # Only process markdown files
file_path = os.path.join(dirpath, file)
print(f"Processing file: {file_path}")
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
# Step 1: Check for image references in the markdown content (regular image tags)
image_references = re.findall(r'!\[.*?\]\((.*?)\)', content)
# Process each image reference
for image_path in image_references:
# Check if the image path ends with any of the old extensions
for ext in old_extensions:
if image_path.endswith(ext):
# Generate the corresponding .webp path
new_image_path = image_path.replace(ext, new_extension)
# Check if the .webp file exists in the same directory
webp_file_path = os.path.join(dirpath, new_image_path)
if os.path.exists(webp_file_path):
print(f"Updating reference: {image_path} -> {new_image_path}")
content = content.replace(image_path, new_image_path)
else:
print(f"Skipping (no .webp file found): {image_path}")
# Step 2: Check and update the front matter (YAML) for featured_image and recipe.image
# Extract the YAML front matter
front_matter_match = re.match(r'---\n(.*?)\n---', content, re.DOTALL)
if front_matter_match:
front_matter = front_matter_match.group(1)
try:
# Load the front matter into a Python dict using PyYAML
fm_data = yaml.safe_load(front_matter)
updated = False
# Check 'featured_image' field
if 'featured_image' in fm_data:
featured_image = fm_data['featured_image']
for ext in old_extensions:
if featured_image.endswith(ext):
new_featured_image = featured_image.replace(ext, new_extension)
webp_featured_image_path = os.path.join(dirpath, new_featured_image)
if os.path.exists(webp_featured_image_path):
print(f"Updating featured_image: {featured_image} -> {new_featured_image}")
fm_data['featured_image'] = new_featured_image
updated = True
# Check 'recipe' -> 'image' field
if 'recipe' in fm_data and 'image' in fm_data['recipe']:
recipe_image = fm_data['recipe']['image']
for ext in old_extensions:
if recipe_image.endswith(ext):
new_recipe_image = recipe_image.replace(ext, new_extension)
webp_recipe_image_path = os.path.join(dirpath, new_recipe_image)
if os.path.exists(webp_recipe_image_path):
print(f"Updating recipe.image: {recipe_image} -> {new_recipe_image}")
fm_data['recipe']['image'] = new_recipe_image
updated = True
# If there were any updates to the front matter, rewrite it
if updated:
updated_front_matter = yaml.dump(fm_data, default_flow_style=False)
new_content = content.replace(front_matter, updated_front_matter)
# Write the updated content back to the file
with open(file_path, 'w', encoding='utf-8') as f:
f.write(new_content)
print(f"Updated front matter for file: {file_path}")
except yaml.YAMLError as e:
print(f"Error processing front matter in {file_path}: {e}")
# Run the update function
update_image_references()
print("Image references update complete.")
Clean up
Removes unused or unreferenced original images.
import os
from pathlib import Path
import frontmatter
def get_referenced_images(content_dir):
"""Iterates through markdown files to find all referenced image files."""
referenced_images = set()
for root, _, files in os.walk(content_dir):
for file in files:
if file.endswith(".md"):
file_path = Path(root) / file
try:
with open(file_path, "r", encoding="utf-8") as f:
content = frontmatter.load(f)
body = content.content
# Add images from front matter
for key, value in content.metadata.items():
if isinstance(value, str) and value.lower().endswith((".jpg", ".jpeg", ".png")):
referenced_images.add(Path(root) / value)
elif isinstance(value, dict):
for subkey, subvalue in value.items():
if isinstance(subvalue, str) and subvalue.lower().endswith((".jpg", ".jpeg", ".png")):
referenced_images.add(Path(root) / subvalue)
# Add images from markdown body
for line in body.splitlines():
if " + 1
end = line.find(")")
if start > 0 and end > start:
image_path = line[start:end]
if image_path.lower().endswith((".jpg", ".jpeg", ".png")):
referenced_images.add(Path(root) / image_path)
except Exception as e:
print(f"Error processing {file_path}: {e}")
return referenced_images
def cleanup_unused_images(content_dir):
"""Deletes image files not referenced in markdown files."""
referenced_images = get_referenced_images(content_dir)
for root, _, files in os.walk(content_dir):
for file in files:
file_path = Path(root) / file
if file_path.suffix.lower() in [".jpg", ".jpeg", ".png"]:
if file_path not in referenced_images:
webp_path = file_path.with_suffix(".webp")
if webp_path.exists():
print(f"Deleting unused file: {file_path}")
file_path.unlink()
def main():
content_dir = Path("content")
if not content_dir.exists():
print(f"Content directory '{content_dir}' does not exist.")
return
cleanup_unused_images(content_dir)
if __name__ == "__main__":
main()
These steps are handled by three Python scripts:
scripts/image_efficiency.pyscripts/image_migrate.pyscripts/cleanup_unused_images.py
Each script performs its task independently, ensuring modularity and simplicity.
Setting Up the CI Workflow
Step 1: Write Your Workflow File
Create a .github/workflows/optimize-images.yml file:
name: Optimize Images
on:
push:
branches:
- main
jobs:
optimize-images:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.x"
- name: Install dependencies
run: pip install -r scripts/requirements.txt
- name: Run image optimization
run: python scripts/image_efficiency.py
- name: Update references
run: python scripts/image_migrate.py
- name: Cleanup unused images
run: python scripts/cleanup_unused_images.py
- name: Commit and create pull request
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
git add .
if git diff-index --quiet HEAD; then
echo "No changes to commit."
exit 0
fi
git commit -m "Optimize images via CI"
git branch -M optimize-images-changes
git push -u origin optimize-images-changes
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Create pull request
run: gh pr create --title "Optimize images" --body "This pull request contains automated image optimizations." --head optimize-images-changes
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}