Flask - 監控與健康檢查
目標
- 添加健康檢查端點
- 集成 Prometheus 監控
- 收集基本指標並測試
步驟
準備環境
- 繼續使用
flask_api/
項目結構,激活虛擬環境:1 2
# Windows: flask_api_env\Scripts\activate # macOS/Linux: source flask_api_env/bin/activate
- 安裝 Prometheus 客戶端:
1
pip install prometheus-client
- 繼續使用
添加健康檢查端點
修改 app/init.py,添加健康檢查路由:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
from flask import Flask, jsonify, g from flask_sqlalchemy import SQLAlchemy from flask_marshmallow import Marshmallow from flask_bcrypt import Bcrypt from flask_restx import Api from flask_caching import Cache from flask_limiter import Limiter from flask_limiter.util import get_remote_address from flask_cors import CORS import jwt from functools import wraps from .routes.v1.todos import todos_bp as todos_v1_bp from .routes.v1.users import users_bp as users_v1_bp from .routes.v1.posts import posts_bp as posts_v1_bp from .routes.v2.todos import todos_bp as todos_v2_bp from .routes.v2.posts import posts_bp as posts_v2_bp from .config import config_map from .celery_config import make_celery import os import logging from logging.handlers import RotatingFileHandler db = SQLAlchemy() ma = Marshmallow() bcrypt = Bcrypt() cache = Cache() limiter = Limiter(key_func=get_remote_address) def setup_logging(app): if not app.debug: handler = RotatingFileHandler('app.log', maxBytes=10000, backupCount=3) handler.setLevel(logging.INFO) formatter = logging.Formatter( '%(asctime)s %(levelname)s: %(message)s [in %(pathname)s:%(lineno)d]' ) handler.setFormatter(formatter) app.logger.addHandler(handler) console_handler = logging.StreamHandler() console_handler.setLevel(logging.DEBUG) console_handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s: %(message)s')) app.logger.addHandler(console_handler) app.logger.setLevel(logging.DEBUG) def create_app(): app = Flask(__name__) env = os.getenv('FLASK_ENV', 'development') app.config.from_object(config_map[env]) if os.getenv('DATABASE_URL'): app.config['SQLALCHEMY_DATABASE_URI'] = os.getenv('DATABASE_URL').replace('postgres://', 'postgresql://') os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True) app.static_folder = app.root_path db.init_app(app) ma.init_app(app) bcrypt.init_app(app) cache.init_app(app) limiter.init_app(app) setup_logging(app) CORS(app, resources={r"/api/*": {"origins": "*"}}) api = Api(app, title='Blog API', version='1.0', description='A simple blog API with user and post management', doc='/api/docs/', authorizations={ 'jwt': { 'type': 'apiKey', 'in': 'header', 'name': 'Authorization', 'description': 'Enter "Bearer <token>"' } }) global celery celery = make_celery(app) celery.conf.broker_url = os.getenv('REDIS_URL', 'redis://localhost:6379/0') celery.conf.result_backend = os.getenv('REDIS_URL', 'redis://localhost:6379/0') app.register_blueprint(todos_v1_bp, url_prefix='/api/v1') app.register_blueprint(users_v1_bp, url_prefix='/api/v1') app.register_blueprint(posts_v1_bp, url_prefix='/api/v1') app.register_blueprint(todos_v2_bp, url_prefix='/api/v2') app.register_blueprint(posts_v2_bp, url_prefix='/api/v2') @app.route('/health') def health_check(): """Health check endpoint""" try: db.session.execute('SELECT 1') # 測試數據庫連接 status = 'healthy' except Exception as e: app.logger.error(f'Health check failed: {str(e)}') status = 'unhealthy' return jsonify({'status': status}), 503 return jsonify({'status': status}) @app.errorhandler(404) def not_found(error): app.logger.error(f'404 error: {str(error)}') return jsonify({'error': 'Not Found', 'message': str(error)}), 404 @app.errorhandler(400) def bad_request(error): app.logger.warning(f'400 error: {str(error)}') return jsonify({'error': 'Bad Request', 'message': str(error)}), 400 @app.errorhandler(500) def internal_error(error): app.logger.critical(f'500 error: {str(error)}') return jsonify({'error': 'Internal Server Error', 'message': 'Something went wrong on our end'}), 500 with app.app_context(): db.create_all() return app def login_required(f): @wraps(f) def decorated_function(*args, **kwargs): from .models import User token = request.headers.get('Authorization') if not token: abort(401, description='Missing token') try: if token.startswith('Bearer '): token = token[7:] data = jwt.decode(token, app.config['SECRET_KEY'], algorithms=['HS256']) user = User.query.get(data['user_id']) if not user: abort(401, description='Invalid token') g.current_user = user except jwt.ExpiredSignatureError: abort(401, description='Token has expired') except jwt.InvalidTokenError: abort(401, description='Invalid token') return f(*args, **kwargs) return decorated_function def admin_required(f): @wraps(f) @login_required def decorated_function(*args, **kwargs): if not g.current_user.is_admin: abort(403, description='Admin access required') return f(*args, **kwargs) return decorated_function celery = None
集成 Prometheus 監控
新建 app/monitoring.py:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
from prometheus_client import Counter, Histogram, generate_latest from flask import Response # 定義指標 REQUEST_COUNT = Counter('blog_api_requests_total', 'Total number of requests', ['method', 'endpoint', 'status']) REQUEST_LATENCY = Histogram('blog_api_request_latency_seconds', 'Request latency in seconds', ['method', 'endpoint']) def setup_metrics(app): @app.route('/metrics') def metrics(): """Expose Prometheus metrics""" return Response(generate_latest(), mimetype='text/plain') @app.before_request def before_request(): request.start_time = time.time() @app.after_request def after_request(response): if hasattr(request, 'start_time'): latency = time.time() - request.start_time REQUEST_LATENCY.labels(request.method, request.path).observe(latency) REQUEST_COUNT.labels(request.method, request.path, response.status_code).inc() return response
修改 app/init.py,導入並設置監控:
1 2 3 4 5 6 7 8 9
# 在文件頂部添加 import time from .monitoring import setup_metrics # 在 create_app 中,CORS 初始化後添加 def create_app(): # ... 之前的代碼 ... setup_metrics(app) # 添加監控設置 # ... 後續代碼 ...
運行應用
- 運行:
1
python run.py
- 運行:
測試健康檢查和監控
- 健康檢查:
- 訪問
http://localhost:5000/health
:- 正常:
{"status": "healthy"}
(200) - 斷開數據庫(如修改
DATABASE_URL
為無效值):{"status": "unhealthy"}
(503)
- 正常:
- 訪問
- Prometheus 指標:
- 訪問
http://localhost:5000/metrics
,應看到類似輸出:1 2 3 4 5 6
# HELP blog_api_requests_total Total number of requests # TYPE blog_api_requests_total counter blog_api_requests_total{method="GET",endpoint="/api/v1/posts",status="200"} 3.0 # HELP blog_api_request_latency_seconds Request latency in seconds # TYPE blog_api_request_latency_seconds histogram blog_api_request_latency_seconds_bucket{method="GET",endpoint="/api/v1/posts",le="0.005"} 1.0
- 使用 Postman 多次請求
/api/v1/posts
,觀察指標變化。
- 訪問
- 健康檢查:
部署更新
- 更新
requirements.txt
:1
pip freeze > requirements.txt
- 提交並推送:
1 2 3
git add . git commit -m "Add health check and Prometheus monitoring" git push heroku main
- 測試線上:
https://my-blog-api.herokuapp.com/health
https://my-blog-api.herokuapp.com/metrics
- 更新
作業
- 添加一個自定義指標,例如跟踪用戶登錄次數(提示:在
login
路由中增加Counter
)。 - 配置本地 Prometheus 服務器收集指標(參考 官方文檔)。
- 添加一個自定義指標,例如跟踪用戶登錄次數(提示:在
注意事項
- 健康檢查應根據應用需求擴展(例如檢查 Redis 連接)。
- Prometheus 指標在生產環境需配合 Grafana 可視化。
- Heroku 的動態端口可能影響本地測試,確保監控路徑公開。
本文章以 CC BY 4.0 授權