225 lines
7.9 KiB
Python
225 lines
7.9 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Robust HTTP file server with authentication and automatic file archiving.
|
|
Serves files from a source directory and moves them to an archive directory after serving.
|
|
"""
|
|
|
|
import os
|
|
import shutil
|
|
import logging
|
|
import zipfile
|
|
import io
|
|
from pathlib import Path
|
|
from http.server import HTTPServer, BaseHTTPRequestHandler
|
|
from threading import Lock
|
|
|
|
# Configuration
|
|
AUTH_TOKEN = "chai7pu5oosigh4Ahzajoocheich9hio"
|
|
SOURCE_DIR = Path("/data/books/ingest")
|
|
ARCHIVE_DIR = Path("/data/books/served")
|
|
HOST = "0.0.0.0"
|
|
PORT = 18000
|
|
|
|
# Setup logging
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
|
handlers=[
|
|
logging.FileHandler('file_server.log'),
|
|
logging.StreamHandler()
|
|
]
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Thread-safe file operation lock
|
|
file_lock = Lock()
|
|
|
|
|
|
class FileServerHandler(BaseHTTPRequestHandler):
|
|
"""HTTP request handler for authenticated file serving."""
|
|
|
|
def log_message(self, format, *args):
|
|
"""Override to use our logger instead of stderr."""
|
|
logger.info("%s - %s" % (self.get_client_ip(), format % args))
|
|
|
|
def get_client_ip(self):
|
|
"""Get the real client IP, considering reverse proxy headers."""
|
|
# Check X-Forwarded-For header (set by nginx)
|
|
forwarded_for = self.headers.get('X-Forwarded-For')
|
|
if forwarded_for:
|
|
# X-Forwarded-For can be a comma-separated list, get the first one
|
|
return forwarded_for.split(',')[0].strip()
|
|
|
|
# Check X-Real-IP header (alternative nginx header)
|
|
real_ip = self.headers.get('X-Real-IP')
|
|
if real_ip:
|
|
return real_ip.strip()
|
|
|
|
# Fallback to direct connection IP
|
|
return self.address_string()
|
|
|
|
def _send_error_response(self, code, message):
|
|
"""Send a JSON error response."""
|
|
self.send_response(code)
|
|
self.send_header('Content-Type', 'application/json')
|
|
self.end_headers()
|
|
self.wfile.write(f'{{"error": "{message}"}}\n'.encode())
|
|
|
|
def _check_auth(self):
|
|
"""Verify the authentication token."""
|
|
auth_header = self.headers.get('Authorization', '')
|
|
|
|
# Support both "Bearer TOKEN" and just "TOKEN" formats
|
|
if auth_header.startswith('Bearer '):
|
|
token = auth_header[7:]
|
|
else:
|
|
token = auth_header
|
|
|
|
return token == AUTH_TOKEN
|
|
|
|
def _get_files_to_serve(self):
|
|
"""Get list of files in the source directory."""
|
|
try:
|
|
if not SOURCE_DIR.exists():
|
|
logger.error(f"Source directory does not exist: {SOURCE_DIR}")
|
|
return []
|
|
|
|
files = [f for f in SOURCE_DIR.iterdir() if f.is_file()]
|
|
logger.info(f"Found {len(files)} files to serve")
|
|
return files
|
|
except Exception as e:
|
|
logger.error(f"Error reading source directory: {e}")
|
|
return []
|
|
|
|
def _create_zip_archive(self, files):
|
|
"""Create a zip archive of the files in memory."""
|
|
zip_buffer = io.BytesIO()
|
|
|
|
try:
|
|
with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
|
|
for file_path in files:
|
|
try:
|
|
zip_file.write(file_path, file_path.name)
|
|
logger.info(f"Added {file_path.name} to archive")
|
|
except Exception as e:
|
|
logger.error(f"Failed to add {file_path.name} to archive: {e}")
|
|
raise
|
|
|
|
return zip_buffer.getvalue()
|
|
except Exception as e:
|
|
logger.error(f"Error creating zip archive: {e}")
|
|
raise
|
|
|
|
def _move_files_to_archive(self, files):
|
|
"""Move served files to the archive directory."""
|
|
# Ensure archive directory exists
|
|
ARCHIVE_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
|
moved_count = 0
|
|
failed_files = []
|
|
|
|
for file_path in files:
|
|
try:
|
|
dest_path = ARCHIVE_DIR / file_path.name
|
|
|
|
# Handle duplicate filenames
|
|
if dest_path.exists():
|
|
base = dest_path.stem
|
|
suffix = dest_path.suffix
|
|
counter = 1
|
|
while dest_path.exists():
|
|
dest_path = ARCHIVE_DIR / f"{base}_{counter}{suffix}"
|
|
counter += 1
|
|
|
|
shutil.move(str(file_path), str(dest_path))
|
|
logger.info(f"Moved {file_path.name} to {dest_path}")
|
|
moved_count += 1
|
|
except Exception as e:
|
|
logger.error(f"Failed to move {file_path.name}: {e}")
|
|
failed_files.append(file_path.name)
|
|
|
|
if failed_files:
|
|
logger.warning(f"Failed to move {len(failed_files)} files: {failed_files}")
|
|
|
|
return moved_count, failed_files
|
|
|
|
def do_POST(self):
|
|
"""Handle POST requests to /get endpoint."""
|
|
if self.path != '/get':
|
|
self._send_error_response(404, "Endpoint not found")
|
|
return
|
|
|
|
# Check authentication
|
|
if not self._check_auth():
|
|
logger.warning(f"Unauthorized access attempt from {self.get_client_ip()}")
|
|
self._send_error_response(401, "Unauthorized - Invalid token")
|
|
return
|
|
|
|
# Use lock to prevent concurrent file operations
|
|
with file_lock:
|
|
try:
|
|
# Get files to serve
|
|
files = self._get_files_to_serve()
|
|
|
|
if not files:
|
|
self._send_error_response(404, "No files available to serve")
|
|
return
|
|
|
|
# Create zip archive
|
|
logger.info(f"Creating archive of {len(files)} files")
|
|
zip_data = self._create_zip_archive(files)
|
|
|
|
# Send the zip file
|
|
self.send_response(200)
|
|
self.send_header('Content-Type', 'application/zip')
|
|
self.send_header('Content-Disposition', 'attachment; filename="files.zip"')
|
|
self.send_header('Content-Length', str(len(zip_data)))
|
|
self.end_headers()
|
|
self.wfile.write(zip_data)
|
|
|
|
logger.info(f"Successfully sent {len(zip_data)} bytes to {self.get_client_ip()}")
|
|
|
|
# Move files to archive directory
|
|
moved, failed = self._move_files_to_archive(files)
|
|
logger.info(f"Archived {moved}/{len(files)} files")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error processing request: {e}", exc_info=True)
|
|
self._send_error_response(500, "Internal server error")
|
|
|
|
def do_GET(self):
|
|
"""Handle GET requests (health check)."""
|
|
if self.path == '/health':
|
|
self.send_response(200)
|
|
self.send_header('Content-Type', 'application/json')
|
|
self.end_headers()
|
|
self.wfile.write(b'{"status": "ok"}\n')
|
|
else:
|
|
self._send_error_response(404, "Endpoint not found. Use POST /get")
|
|
|
|
|
|
def run_server():
|
|
"""Start the HTTP server."""
|
|
# Ensure directories exist
|
|
SOURCE_DIR.mkdir(parents=True, exist_ok=True)
|
|
ARCHIVE_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
|
logger.info(f"Starting server on {HOST}:{PORT}")
|
|
logger.info(f"Source directory: {SOURCE_DIR}")
|
|
logger.info(f"Archive directory: {ARCHIVE_DIR}")
|
|
|
|
server = HTTPServer((HOST, PORT), FileServerHandler)
|
|
|
|
try:
|
|
logger.info("Server is running. Press Ctrl+C to stop.")
|
|
server.serve_forever()
|
|
except KeyboardInterrupt:
|
|
logger.info("Server stopped by user")
|
|
finally:
|
|
server.server_close()
|
|
logger.info("Server closed")
|
|
|
|
|
|
if __name__ == '__main__':
|
|
run_server()
|