Add server metrics graphing feature
All checks were successful
Docker Build and Push (Multi-architecture) / build-and-push (push) Successful in 5m25s

This commit is contained in:
2025-09-28 16:11:43 +00:00
parent cbb951d121
commit 4b400fea1f
3 changed files with 478 additions and 12 deletions

View File

@@ -30,7 +30,13 @@ import configparser
from datetime import datetime
from typing import Dict, List, Optional, Tuple
from pathlib import Path
import generate_config
import matplotlib
matplotlib.use('Agg') # Use non-interactive backend for server environments
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from collections import deque
import io
from server_metrics_graphs import ServerMetricsGraphs, ServerMetricsManager
# ==============================================
# LOGGING SETUP
@@ -549,10 +555,11 @@ class PterodactylBot(commands.Bot):
self.server_cache: Dict[str, dict] = {} # Cache of server data from Pterodactyl
self.embed_locations: Dict[str, Dict[str, int]] = {} # Tracks where embeds are posted
self.update_lock = asyncio.Lock() # Prevents concurrent updates
self.embed_storage_path = Path(EMBED_LOCATIONS_FILE) # File to store embed locations
self.embed_storage_path = Path(EMBED_LOCATIONS_FILE) # File to store embed
self.metrics_manager = ServerMetricsManager() # Data manager for metrics graphing system
# Track previous server states and CPU usage to detect changes
# Format: {server_id: (state, cpu_usage, last_force_update)}
self.previous_states: Dict[str, Tuple[str, float, Optional[float]]] = {}
self.previous_states: Dict[str, Tuple[str, float, Optional[float]]] = {}
logger.info("Initialized PterodactylBot instance with state tracking")
async def setup_hook(self):
@@ -753,9 +760,9 @@ class PterodactylBot(commands.Bot):
embed.add_field(name="🆔 Server ID", value=f"`{identifier}`", inline=True)
if is_suspended:
embed.add_field(name="📊 Status", value="⛔ Suspended", inline=True)
embed.add_field(name=" Status", value="⛔ Suspended", inline=True)
else:
embed.add_field(name="📊 Status", value="✅ Active", inline=True)
embed.add_field(name=" Status", value="✅ Active", inline=True)
# Add resource usage if server is running
if current_state.lower() == "running":
@@ -806,22 +813,37 @@ class PterodactylBot(commands.Bot):
usage_text = (
f"```properties\n"
f"CPU: {cpu_usage:>8} / {format_limit(cpu_limit, ' %')}\n"
f"Memory: {memory_usage:>8} / {format_limit(memory_limit, ' MB')}\n"
f"Disk: {disk_usage:>8} / {format_limit(disk_limit, ' MB')}\n"
f"Memory: {memory_usage:>8} / {format_limit(memory_limit, ' MiB')}\n"
f"Disk: {disk_usage:>8} / {format_limit(disk_limit, ' MiB')}\n"
f"```"
)
embed.add_field(
name="📈 Resource Usage",
name="📊 Resource Usage",
value=usage_text,
inline=False
)
embed.add_field(
name="🌐 Network",
value=f"⬇️ {network_rx} MB / ⬆️ {network_tx} MB",
value=f"⬇️ {network_rx} MiB / ⬆️ {network_tx} MiB",
inline=False
)
# Add graph images if available
server_graphs = self.metrics_manager.get_server_graphs(identifier)
if server_graphs and server_graphs.has_sufficient_data:
summary = server_graphs.get_data_summary()
# Add a field explaining the graphs
embed.add_field(
name="📈 Usage Trends (Last Minute)",
value=f"Data points: {summary['point_count']}/6 • CPU trend: {summary['cpu_trend']} • Memory trend: {summary['memory_trend']}",
inline=False
)
# Set graph images (these will be attached as files in the update_status method)
embed.set_image(url=f"attachment://metrics_graph_{identifier}.png")
embed.set_footer(text="Last updated")
@@ -860,6 +882,10 @@ class PterodactylBot(commands.Bot):
# Update our local cache with fresh server data
self.server_cache = {server['attributes']['identifier']: server for server in servers}
logger.debug(f"Updated server cache with {len(servers)} servers")
# Clean up metrics for servers that no longer exist
active_server_ids = list(self.server_cache.keys())
self.metrics_manager.cleanup_old_servers(active_server_ids)
# Variables to track our update statistics
update_count = 0 # Successful updates
@@ -885,7 +911,13 @@ class PterodactylBot(commands.Bot):
resources = await self.pterodactyl_api.get_server_resources(server_id)
current_state = resources.get('attributes', {}).get('current_state', 'offline')
cpu_usage = round(resources.get('attributes', {}).get('resources', {}).get('cpu_absolute', 0), 2)
# Collect metrics data for running servers
if current_state == 'running':
memory_usage = round(resources.get('attributes', {}).get('resources', {}).get('memory_bytes', 0) / (1024 ** 2), 2)
self.metrics_manager.add_server_data(server_id, server_name, cpu_usage, memory_usage)
logger.debug(f"Added metrics data for {server_name}: CPU={cpu_usage}%, Memory={memory_usage}MB")
# Retrieve previous recorded state, CPU usage, and last force update time
prev_state, prev_cpu, last_force_update = self.previous_states.get(server_id, (None, 0, None))
@@ -929,7 +961,20 @@ class PterodactylBot(commands.Bot):
# Fetch and update the existing message
message = await channel.fetch_message(int(location['message_id']))
await message.edit(embed=embed, view=view)
# Generate and attach graph images if available
files = []
server_graphs = self.metrics_manager.get_server_graphs(server_id)
if server_graphs and server_graphs.has_sufficient_data:
# Generate CPU graph
combined_graph = server_graphs.generate_combined_graph()
if combined_graph:
files.append(discord.File(combined_graph, filename=f"metrics_graph_{server_id}.png"))
# Update message with embed, view, and graph files
if files:
await message.edit(embed=embed, view=view, attachments=files)
else:
await message.edit(embed=embed, view=view)
update_count += 1
logger.debug(f"Updated status for {server_name}")

View File

@@ -1,4 +1,5 @@
discord.py>=2.3.0
aiohttp>=3.8.0
configparser>=5.3.0
python-dotenv
python-dotenv
matplotlib

420
server_metrics_graphs.py Normal file
View File

@@ -0,0 +1,420 @@
"""
Server Metrics Graphs Module for Pterodactyl Discord Bot
This module provides graphing capabilities for server CPU and memory usage.
Generates line graphs as PNG images for embedding in Discord messages.
"""
import matplotlib
matplotlib.use('Agg') # Use non-interactive backend for server environments
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from collections import deque
from datetime import datetime, timedelta
from typing import Dict, Tuple, Optional
import io
import logging
# Get the logger from the main bot module
logger = logging.getLogger('pterodisbot')
class ServerMetricsGraphs:
"""
Manages CPU and memory usage graphs for individual servers.
Features:
- Stores last 6 data points (1 minute of history at 10-second intervals)
- Generates PNG images of line graphs for Discord embedding
- Automatic data rotation (FIFO queue with max 6 points)
- Separate tracking for CPU percentage and memory MB usage
- Clean graph styling optimized for Discord dark theme
"""
def __init__(self, server_id: str, server_name: str):
"""
Initialize metrics tracking for a server.
Args:
server_id: Pterodactyl server identifier
server_name: Human-readable server name
"""
self.server_id = server_id
self.server_name = server_name
# Use deque with maxlen=6 for automatic FIFO rotation
# Each entry is a tuple: (timestamp, cpu_percent, memory_mb)
self.data_points = deque(maxlen=6)
# Track if we have enough data for meaningful graphs (at least 2 points)
self.has_sufficient_data = False
logger.debug(f"Initialized metrics tracking for server {server_name} ({server_id})")
def add_data_point(self, cpu_percent: float, memory_mb: float, timestamp: Optional[datetime] = None):
"""
Add a new data point to the metrics history.
Args:
cpu_percent: Current CPU usage percentage
memory_mb: Current memory usage in megabytes
timestamp: Optional timestamp, defaults to current time
"""
if timestamp is None:
timestamp = datetime.now()
# Add new data point (automatically rotates old data due to maxlen=6)
self.data_points.append((timestamp, cpu_percent, memory_mb))
# Update sufficient data flag
self.has_sufficient_data = len(self.data_points) >= 2
logger.debug(f"Added metrics data point for {self.server_name}: CPU={cpu_percent}%, Memory={memory_mb}MB")
def generate_cpu_graph(self) -> Optional[io.BytesIO]:
"""
Generate a CPU usage line graph as a PNG image.
Returns:
BytesIO object containing PNG image data, or None if insufficient data
"""
if not self.has_sufficient_data:
logger.debug(f"Insufficient data for CPU graph generation: {self.server_name}")
return None
try:
# Extract timestamps and CPU data
timestamps = [point[0] for point in self.data_points]
cpu_values = [point[1] for point in self.data_points]
# Create figure with dark theme styling
plt.style.use('dark_background')
fig, ax = plt.subplots(figsize=(8, 4), dpi=100)
fig.patch.set_facecolor('#2f3136') # Discord dark theme background
ax.set_facecolor('#36393f') # Slightly lighter for graph area
# Plot CPU line with gradient fill
line = ax.plot(timestamps, cpu_values, color='#7289da', linewidth=2.5, marker='o', markersize=4)
ax.fill_between(timestamps, cpu_values, alpha=0.3, color='#7289da')
# Customize axes
ax.set_ylabel('CPU Usage (%)', color='#ffffff', fontsize=10)
ax.set_ylim(0, max(100, max(cpu_values) * 1.1)) # Dynamic scaling with 100% minimum
# Format time axis
ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M:%S'))
ax.xaxis.set_major_locator(mdates.SecondLocator(interval=20))
plt.setp(ax.xaxis.get_majorticklabels(), rotation=45, ha='right', color='#ffffff', fontsize=8)
# Style the graph
ax.tick_params(colors='#ffffff', labelsize=8)
ax.grid(True, alpha=0.3, color='#ffffff')
ax.spines['bottom'].set_color('#ffffff')
ax.spines['left'].set_color('#ffffff')
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
# Add title
ax.set_title(f'{self.server_name} - CPU Usage', color='#ffffff', fontsize=12, pad=20)
# Tight layout to prevent label cutoff
plt.tight_layout()
# Save to BytesIO
img_buffer = io.BytesIO()
plt.savefig(img_buffer, format='png', facecolor='#2f3136', edgecolor='none',
bbox_inches='tight', dpi=100)
img_buffer.seek(0)
# Clean up matplotlib resources
plt.close(fig)
logger.debug(f"Generated CPU graph for {self.server_name}")
return img_buffer
except Exception as e:
logger.error(f"Failed to generate CPU graph for {self.server_name}: {str(e)}")
plt.close('all') # Clean up any remaining figures
return None
def generate_memory_graph(self) -> Optional[io.BytesIO]:
"""
Generate a memory usage line graph as a PNG image.
Returns:
BytesIO object containing PNG image data, or None if insufficient data
"""
if not self.has_sufficient_data:
logger.debug(f"Insufficient data for memory graph generation: {self.server_name}")
return None
try:
# Extract timestamps and memory data
timestamps = [point[0] for point in self.data_points]
memory_values = [point[2] for point in self.data_points]
# Create figure with dark theme styling
plt.style.use('dark_background')
fig, ax = plt.subplots(figsize=(8, 4), dpi=100)
fig.patch.set_facecolor('#2f3136') # Discord dark theme background
ax.set_facecolor('#36393f') # Slightly lighter for graph area
# Plot memory line with gradient fill
line = ax.plot(timestamps, memory_values, color='#43b581', linewidth=2.5, marker='o', markersize=4)
ax.fill_between(timestamps, memory_values, alpha=0.3, color='#43b581')
# Customize axes
ax.set_ylabel('Memory Usage (MB)', color='#ffffff', fontsize=10)
ax.set_ylim(0, max(memory_values) * 1.1) # Dynamic scaling with 10% padding
# Format time axis
ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M:%S'))
ax.xaxis.set_major_locator(mdates.SecondLocator(interval=20))
plt.setp(ax.xaxis.get_majorticklabels(), rotation=45, ha='right', color='#ffffff', fontsize=8)
# Style the graph
ax.tick_params(colors='#ffffff', labelsize=8)
ax.grid(True, alpha=0.3, color='#ffffff')
ax.spines['bottom'].set_color('#ffffff')
ax.spines['left'].set_color('#ffffff')
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
# Add title
ax.set_title(f'{self.server_name} - Memory Usage', color='#ffffff', fontsize=12, pad=20)
# Tight layout to prevent label cutoff
plt.tight_layout()
# Save to BytesIO
img_buffer = io.BytesIO()
plt.savefig(img_buffer, format='png', facecolor='#2f3136', edgecolor='none',
bbox_inches='tight', dpi=100)
img_buffer.seek(0)
# Clean up matplotlib resources
plt.close(fig)
logger.debug(f"Generated memory graph for {self.server_name}")
return img_buffer
except Exception as e:
logger.error(f"Failed to generate memory graph for {self.server_name}: {str(e)}")
plt.close('all') # Clean up any remaining figures
return None
def generate_combined_graph(self) -> Optional[io.BytesIO]:
"""
Generate a combined CPU and memory usage graph as a PNG image.
Returns:
BytesIO object containing PNG image data, or None if insufficient data
"""
if not self.has_sufficient_data:
logger.debug(f"Insufficient data for combined graph generation: {self.server_name}")
return None
try:
# Extract data
timestamps = [point[0] for point in self.data_points]
cpu_values = [point[1] for point in self.data_points]
memory_values = [point[2] for point in self.data_points]
# Create figure with two subplots
plt.style.use('dark_background')
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 6), dpi=100, sharex=True)
fig.patch.set_facecolor('#2f3136')
# CPU subplot
ax1.set_facecolor('#36393f')
ax1.plot(timestamps, cpu_values, color='#7289da', linewidth=2.5, marker='o', markersize=4)
ax1.fill_between(timestamps, cpu_values, alpha=0.3, color='#7289da')
ax1.set_ylabel('CPU Usage (%)', color='#ffffff', fontsize=10)
ax1.set_ylim(0, max(100, max(cpu_values) * 1.1))
ax1.tick_params(colors='#ffffff', labelsize=8)
ax1.grid(True, alpha=0.3, color='#ffffff')
ax1.set_title(f'{self.server_name} - Resource Usage', color='#ffffff', fontsize=12)
# Memory subplot
ax2.set_facecolor('#36393f')
ax2.plot(timestamps, memory_values, color='#43b581', linewidth=2.5, marker='o', markersize=4)
ax2.fill_between(timestamps, memory_values, alpha=0.3, color='#43b581')
ax2.set_ylabel('Memory (MB)', color='#ffffff', fontsize=10)
ax2.set_ylim(0, max(memory_values) * 1.1)
ax2.tick_params(colors='#ffffff', labelsize=8)
ax2.grid(True, alpha=0.3, color='#ffffff')
# Format time axis (only on bottom subplot)
ax2.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M:%S'))
ax2.xaxis.set_major_locator(mdates.SecondLocator(interval=20))
plt.setp(ax2.xaxis.get_majorticklabels(), rotation=45, ha='right', color='#ffffff', fontsize=8)
# Style both subplots
for ax in [ax1, ax2]:
ax.spines['bottom'].set_color('#ffffff')
ax.spines['left'].set_color('#ffffff')
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.tight_layout()
# Save to BytesIO
img_buffer = io.BytesIO()
plt.savefig(img_buffer, format='png', facecolor='#2f3136', edgecolor='none',
bbox_inches='tight', dpi=100)
img_buffer.seek(0)
plt.close(fig)
logger.debug(f"Generated combined graph for {self.server_name}")
return img_buffer
except Exception as e:
logger.error(f"Failed to generate combined graph for {self.server_name}: {str(e)}")
plt.close('all')
return None
def get_data_summary(self) -> Dict[str, any]:
"""
Get summary statistics for the current data points.
Returns:
Dictionary containing data point count, latest values, and trends
"""
if not self.data_points:
return {
'point_count': 0,
'has_data': False,
'latest_cpu': 0,
'latest_memory': 0
}
# Get latest values
latest_point = self.data_points[-1]
latest_cpu = latest_point[1]
latest_memory = latest_point[2]
# Calculate trends if we have multiple points
cpu_trend = 'stable'
memory_trend = 'stable'
if len(self.data_points) >= 2:
first_point = self.data_points[0]
cpu_change = latest_cpu - first_point[1]
memory_change = latest_memory - first_point[2]
# Determine trends (>5% change considered significant)
if abs(cpu_change) > 5:
cpu_trend = 'increasing' if cpu_change > 0 else 'decreasing'
if abs(memory_change) > 50: # 50MB change threshold
memory_trend = 'increasing' if memory_change > 0 else 'decreasing'
return {
'point_count': len(self.data_points),
'has_data': self.has_sufficient_data,
'latest_cpu': latest_cpu,
'latest_memory': latest_memory,
'cpu_trend': cpu_trend,
'memory_trend': memory_trend,
'time_span_minutes': len(self.data_points) * 10 / 60 # Convert to minutes
}
class ServerMetricsManager:
"""
Global manager for all server metrics graphs.
Handles:
- Creation and cleanup of ServerMetricsGraphs instances
- Bulk operations across all tracked servers
- Memory management for graph storage
"""
def __init__(self):
"""Initialize the metrics manager."""
self.server_graphs: Dict[str, ServerMetricsGraphs] = {}
logger.info("Initialized ServerMetricsManager")
def get_or_create_server_graphs(self, server_id: str, server_name: str) -> ServerMetricsGraphs:
"""
Get existing ServerMetricsGraphs instance or create a new one.
Args:
server_id: Pterodactyl server identifier
server_name: Human-readable server name
Returns:
ServerMetricsGraphs instance for the specified server
"""
if server_id not in self.server_graphs:
self.server_graphs[server_id] = ServerMetricsGraphs(server_id, server_name)
logger.debug(f"Created new metrics graphs for server {server_name}")
return self.server_graphs[server_id]
def add_server_data(self, server_id: str, server_name: str, cpu_percent: float, memory_mb: float):
"""
Add data point to a server's metrics tracking.
Args:
server_id: Pterodactyl server identifier
server_name: Human-readable server name
cpu_percent: Current CPU usage percentage
memory_mb: Current memory usage in megabytes
"""
graphs = self.get_or_create_server_graphs(server_id, server_name)
graphs.add_data_point(cpu_percent, memory_mb)
def remove_server(self, server_id: str):
"""
Remove a server from metrics tracking.
Args:
server_id: Pterodactyl server identifier to remove
"""
if server_id in self.server_graphs:
del self.server_graphs[server_id]
logger.debug(f"Removed metrics tracking for server {server_id}")
def get_server_graphs(self, server_id: str) -> Optional[ServerMetricsGraphs]:
"""
Get ServerMetricsGraphs instance for a specific server.
Args:
server_id: Pterodactyl server identifier
Returns:
ServerMetricsGraphs instance or None if not found
"""
return self.server_graphs.get(server_id)
def cleanup_old_servers(self, active_server_ids: list):
"""
Remove tracking for servers that no longer exist.
Args:
active_server_ids: List of currently active server IDs
"""
servers_to_remove = []
for server_id in self.server_graphs:
if server_id not in active_server_ids:
servers_to_remove.append(server_id)
for server_id in servers_to_remove:
self.remove_server(server_id)
if servers_to_remove:
logger.info(f"Cleaned up metrics for {len(servers_to_remove)} inactive servers")
def get_summary(self) -> Dict[str, any]:
"""
Get summary of all tracked servers.
Returns:
Dictionary with tracking statistics
"""
return {
'total_servers': len(self.server_graphs),
'servers_with_data': sum(1 for graphs in self.server_graphs.values() if graphs.has_sufficient_data),
'total_data_points': sum(len(graphs.data_points) for graphs in self.server_graphs.values())
}