#!/usr/bin/env python3 """ Robust HTTP file server with authentication and automatic file archiving. Serves files from a source directory and moves them to an archive directory after serving. """ import os import shutil import logging import zipfile import io from pathlib import Path from http.server import HTTPServer, BaseHTTPRequestHandler from threading import Lock # Configuration AUTH_TOKEN = "chai7pu5oosigh4Ahzajoocheich9hio" SOURCE_DIR = Path("/data/books/ingest") ARCHIVE_DIR = Path("/data/books/served") HOST = "0.0.0.0" PORT = 18000 # Setup logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler('file_server.log'), logging.StreamHandler() ] ) logger = logging.getLogger(__name__) # Thread-safe file operation lock file_lock = Lock() class FileServerHandler(BaseHTTPRequestHandler): """HTTP request handler for authenticated file serving.""" def log_message(self, format, *args): """Override to use our logger instead of stderr.""" logger.info("%s - %s" % (self.get_client_ip(), format % args)) def get_client_ip(self): """Get the real client IP, considering reverse proxy headers.""" # Check X-Forwarded-For header (set by nginx) forwarded_for = self.headers.get('X-Forwarded-For') if forwarded_for: # X-Forwarded-For can be a comma-separated list, get the first one return forwarded_for.split(',')[0].strip() # Check X-Real-IP header (alternative nginx header) real_ip = self.headers.get('X-Real-IP') if real_ip: return real_ip.strip() # Fallback to direct connection IP return self.address_string() def _send_error_response(self, code, message): """Send a JSON error response.""" self.send_response(code) self.send_header('Content-Type', 'application/json') self.end_headers() self.wfile.write(f'{{"error": "{message}"}}\n'.encode()) def _check_auth(self): """Verify the authentication token.""" auth_header = self.headers.get('Authorization', '') # Support both "Bearer TOKEN" and just "TOKEN" formats if auth_header.startswith('Bearer '): token = auth_header[7:] else: token = auth_header return token == AUTH_TOKEN def _get_files_to_serve(self): """Get list of files in the source directory.""" try: if not SOURCE_DIR.exists(): logger.error(f"Source directory does not exist: {SOURCE_DIR}") return [] files = [f for f in SOURCE_DIR.iterdir() if f.is_file()] logger.info(f"Found {len(files)} files to serve") return files except Exception as e: logger.error(f"Error reading source directory: {e}") return [] def _create_zip_archive(self, files): """Create a zip archive of the files in memory.""" zip_buffer = io.BytesIO() try: with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file: for file_path in files: try: zip_file.write(file_path, file_path.name) logger.info(f"Added {file_path.name} to archive") except Exception as e: logger.error(f"Failed to add {file_path.name} to archive: {e}") raise return zip_buffer.getvalue() except Exception as e: logger.error(f"Error creating zip archive: {e}") raise def _move_files_to_archive(self, files): """Move served files to the archive directory.""" # Ensure archive directory exists ARCHIVE_DIR.mkdir(parents=True, exist_ok=True) moved_count = 0 failed_files = [] for file_path in files: try: dest_path = ARCHIVE_DIR / file_path.name # Handle duplicate filenames if dest_path.exists(): base = dest_path.stem suffix = dest_path.suffix counter = 1 while dest_path.exists(): dest_path = ARCHIVE_DIR / f"{base}_{counter}{suffix}" counter += 1 shutil.move(str(file_path), str(dest_path)) logger.info(f"Moved {file_path.name} to {dest_path}") moved_count += 1 except Exception as e: logger.error(f"Failed to move {file_path.name}: {e}") failed_files.append(file_path.name) if failed_files: logger.warning(f"Failed to move {len(failed_files)} files: {failed_files}") return moved_count, failed_files def do_POST(self): """Handle POST requests to /get endpoint.""" if self.path != '/get': self._send_error_response(404, "Endpoint not found") return # Check authentication if not self._check_auth(): logger.warning(f"Unauthorized access attempt from {self.get_client_ip()}") self._send_error_response(401, "Unauthorized - Invalid token") return # Use lock to prevent concurrent file operations with file_lock: try: # Get files to serve files = self._get_files_to_serve() if not files: self._send_error_response(404, "No files available to serve") return # Create zip archive logger.info(f"Creating archive of {len(files)} files") zip_data = self._create_zip_archive(files) # Send the zip file self.send_response(200) self.send_header('Content-Type', 'application/zip') self.send_header('Content-Disposition', 'attachment; filename="files.zip"') self.send_header('Content-Length', str(len(zip_data))) self.end_headers() self.wfile.write(zip_data) logger.info(f"Successfully sent {len(zip_data)} bytes to {self.get_client_ip()}") # Move files to archive directory moved, failed = self._move_files_to_archive(files) logger.info(f"Archived {moved}/{len(files)} files") except Exception as e: logger.error(f"Error processing request: {e}", exc_info=True) self._send_error_response(500, "Internal server error") def do_GET(self): """Handle GET requests (health check).""" if self.path == '/health': self.send_response(200) self.send_header('Content-Type', 'application/json') self.end_headers() self.wfile.write(b'{"status": "ok"}\n') else: self._send_error_response(404, "Endpoint not found. Use POST /get") def run_server(): """Start the HTTP server.""" # Ensure directories exist SOURCE_DIR.mkdir(parents=True, exist_ok=True) ARCHIVE_DIR.mkdir(parents=True, exist_ok=True) logger.info(f"Starting server on {HOST}:{PORT}") logger.info(f"Source directory: {SOURCE_DIR}") logger.info(f"Archive directory: {ARCHIVE_DIR}") server = HTTPServer((HOST, PORT), FileServerHandler) try: logger.info("Server is running. Press Ctrl+C to stop.") server.serve_forever() except KeyboardInterrupt: logger.info("Server stopped by user") finally: server.server_close() logger.info("Server closed") if __name__ == '__main__': run_server()