225 lines
7.9 KiB
Python

#!/usr/bin/env python3
"""
Robust HTTP file server with authentication and automatic file archiving.
Serves files from a source directory and moves them to an archive directory after serving.
"""
import os
import shutil
import logging
import zipfile
import io
from pathlib import Path
from http.server import HTTPServer, BaseHTTPRequestHandler
from threading import Lock
# Configuration
AUTH_TOKEN = "chai7pu5oosigh4Ahzajoocheich9hio"
SOURCE_DIR = Path("/data/books/ingest")
ARCHIVE_DIR = Path("/data/books/served")
HOST = "0.0.0.0"
PORT = 18000
# Setup logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('file_server.log'),
logging.StreamHandler()
]
)
logger = logging.getLogger(__name__)
# Thread-safe file operation lock
file_lock = Lock()
class FileServerHandler(BaseHTTPRequestHandler):
"""HTTP request handler for authenticated file serving."""
def log_message(self, format, *args):
"""Override to use our logger instead of stderr."""
logger.info("%s - %s" % (self.get_client_ip(), format % args))
def get_client_ip(self):
"""Get the real client IP, considering reverse proxy headers."""
# Check X-Forwarded-For header (set by nginx)
forwarded_for = self.headers.get('X-Forwarded-For')
if forwarded_for:
# X-Forwarded-For can be a comma-separated list, get the first one
return forwarded_for.split(',')[0].strip()
# Check X-Real-IP header (alternative nginx header)
real_ip = self.headers.get('X-Real-IP')
if real_ip:
return real_ip.strip()
# Fallback to direct connection IP
return self.address_string()
def _send_error_response(self, code, message):
"""Send a JSON error response."""
self.send_response(code)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(f'{{"error": "{message}"}}\n'.encode())
def _check_auth(self):
"""Verify the authentication token."""
auth_header = self.headers.get('Authorization', '')
# Support both "Bearer TOKEN" and just "TOKEN" formats
if auth_header.startswith('Bearer '):
token = auth_header[7:]
else:
token = auth_header
return token == AUTH_TOKEN
def _get_files_to_serve(self):
"""Get list of files in the source directory."""
try:
if not SOURCE_DIR.exists():
logger.error(f"Source directory does not exist: {SOURCE_DIR}")
return []
files = [f for f in SOURCE_DIR.iterdir() if f.is_file()]
logger.info(f"Found {len(files)} files to serve")
return files
except Exception as e:
logger.error(f"Error reading source directory: {e}")
return []
def _create_zip_archive(self, files):
"""Create a zip archive of the files in memory."""
zip_buffer = io.BytesIO()
try:
with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
for file_path in files:
try:
zip_file.write(file_path, file_path.name)
logger.info(f"Added {file_path.name} to archive")
except Exception as e:
logger.error(f"Failed to add {file_path.name} to archive: {e}")
raise
return zip_buffer.getvalue()
except Exception as e:
logger.error(f"Error creating zip archive: {e}")
raise
def _move_files_to_archive(self, files):
"""Move served files to the archive directory."""
# Ensure archive directory exists
ARCHIVE_DIR.mkdir(parents=True, exist_ok=True)
moved_count = 0
failed_files = []
for file_path in files:
try:
dest_path = ARCHIVE_DIR / file_path.name
# Handle duplicate filenames
if dest_path.exists():
base = dest_path.stem
suffix = dest_path.suffix
counter = 1
while dest_path.exists():
dest_path = ARCHIVE_DIR / f"{base}_{counter}{suffix}"
counter += 1
shutil.move(str(file_path), str(dest_path))
logger.info(f"Moved {file_path.name} to {dest_path}")
moved_count += 1
except Exception as e:
logger.error(f"Failed to move {file_path.name}: {e}")
failed_files.append(file_path.name)
if failed_files:
logger.warning(f"Failed to move {len(failed_files)} files: {failed_files}")
return moved_count, failed_files
def do_POST(self):
"""Handle POST requests to /get endpoint."""
if self.path != '/get':
self._send_error_response(404, "Endpoint not found")
return
# Check authentication
if not self._check_auth():
logger.warning(f"Unauthorized access attempt from {self.get_client_ip()}")
self._send_error_response(401, "Unauthorized - Invalid token")
return
# Use lock to prevent concurrent file operations
with file_lock:
try:
# Get files to serve
files = self._get_files_to_serve()
if not files:
self._send_error_response(404, "No files available to serve")
return
# Create zip archive
logger.info(f"Creating archive of {len(files)} files")
zip_data = self._create_zip_archive(files)
# Send the zip file
self.send_response(200)
self.send_header('Content-Type', 'application/zip')
self.send_header('Content-Disposition', 'attachment; filename="files.zip"')
self.send_header('Content-Length', str(len(zip_data)))
self.end_headers()
self.wfile.write(zip_data)
logger.info(f"Successfully sent {len(zip_data)} bytes to {self.get_client_ip()}")
# Move files to archive directory
moved, failed = self._move_files_to_archive(files)
logger.info(f"Archived {moved}/{len(files)} files")
except Exception as e:
logger.error(f"Error processing request: {e}", exc_info=True)
self._send_error_response(500, "Internal server error")
def do_GET(self):
"""Handle GET requests (health check)."""
if self.path == '/health':
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(b'{"status": "ok"}\n')
else:
self._send_error_response(404, "Endpoint not found. Use POST /get")
def run_server():
"""Start the HTTP server."""
# Ensure directories exist
SOURCE_DIR.mkdir(parents=True, exist_ok=True)
ARCHIVE_DIR.mkdir(parents=True, exist_ok=True)
logger.info(f"Starting server on {HOST}:{PORT}")
logger.info(f"Source directory: {SOURCE_DIR}")
logger.info(f"Archive directory: {ARCHIVE_DIR}")
server = HTTPServer((HOST, PORT), FileServerHandler)
try:
logger.info("Server is running. Press Ctrl+C to stop.")
server.serve_forever()
except KeyboardInterrupt:
logger.info("Server stopped by user")
finally:
server.server_close()
logger.info("Server closed")
if __name__ == '__main__':
run_server()