sudo apt install tesseract-ocr -y
sudo nano batch_ocr.sh
#!/bin/bash
# Set the source and output directories
source_dir="/path/to/source/folder"
output_dir="/path/to/output/folder"
# Loop through image and PDF files in the source directory
for file in "$source_dir"/*.jpg "$source_dir"/*.png "$source_dir"/*.pdf; do
filename=$(basename "$file")
base="${filename%.*}"
# Perform OCR using Tesseract
tesseract "$file" "$output_dir/$base" -l eng
done
chmod +x batch_ocr.sh
https://linuxhint.com/install-tesseract-ocr-linux/
No comments:
Post a Comment