LLM Analysis Assistant
Comprehensive MCP client and monitoring framework with multi-transport support, request logging, and LLM API simulation capabilities
View SourceLLM Analysis Assistant
LLM Analysis Assistant is a powerful and streamlined MCP client framework designed for comprehensive monitoring, testing, and analysis of Model Context Protocol interactions. It provides multi-transport support, detailed request logging, and advanced simulation capabilities for various LLM APIs including OpenAI and Ollama.
Features
- Multi-Transport Support: Comprehensive support for stdio, SSE, and streamable HTTP transports
- Request Monitoring: Real-time monitoring and logging of all MCP interactions
- Web Dashboard: Intuitive web interface for viewing request/response logs
- API Simulation: Built-in simulation for OpenAI and Ollama interfaces
- Performance Analysis: Detailed performance metrics and latency analysis
- Debug Tools: Advanced debugging capabilities for MCP development
- Export Functionality: Export logs and analysis data in various formats
- Custom Filters: Filter and search through request logs with advanced queries
Installation
Python Package
pip install llm-analysis-assistant
From Source
git clone https://github.com/xuzexin-hz/llm-analysis-assistant
cd llm-analysis-assistant
pip install -e .
Docker
docker pull llm-analysis-assistant:latest
docker run -p 8080:8080 llm-analysis-assistant
Getting Started
Basic Usage
from llm_analysis_assistant import MCPAnalyzer
# Create analyzer instance
analyzer = MCPAnalyzer({
'transport': 'stdio',
'server_path': '/path/to/mcp/server',
'logging': {
'enabled': True,
'level': 'DEBUG',
'output': 'web'
}
})
# Start monitoring
analyzer.start_monitoring()
# Analyze MCP server
results = analyzer.analyze_server()
print(f"Server capabilities: {results['capabilities']}")
print(f"Available tools: {results['tools']}")
Web Interface
from llm_analysis_assistant import WebInterface
# Launch web dashboard
web_app = WebInterface({
'port': 8080,
'host': '0.0.0.0',
'debug': True
})
web_app.run()
# Access dashboard at http://localhost:8080
Transport Support
Stdio Transport
from llm_analysis_assistant import StdioClient
# Connect to stdio MCP server
client = StdioClient({
'command': ['python', '-m', 'my_mcp_server'],
'cwd': '/path/to/server',
'env': {'API_KEY': 'your-key'},
'timeout': 30
})
# Initialize connection
await client.initialize()
# List available tools
tools = await client.list_tools()
print(f"Available tools: {[tool['name'] for tool in tools]}")
# Call a tool
result = await client.call_tool('data_processor', {
'input': 'sample data',
'format': 'json'
})
print(f"Result: {result}")
SSE Transport
from llm_analysis_assistant import SSEClient
# Connect to SSE MCP server
client = SSEClient({
'url': 'http://localhost:3000/mcp/sse',
'headers': {
'Authorization': 'Bearer your-token',
'X-API-Key': 'your-key'
},
'retry_attempts': 3,
'reconnect_interval': 5
})
# Initialize connection
await client.connect()
# Subscribe to server events
client.on('tool_result', lambda data: print(f"Tool result: {data}"))
client.on('resource_updated', lambda data: print(f"Resource updated: {data}"))
# Call tools through SSE
result = await client.call_tool('async_processor', {'data': 'test'})
Streamable HTTP Transport
from llm_analysis_assistant import StreamableHTTPClient
# Connect to streamable HTTP MCP server
client = StreamableHTTPClient({
'base_url': 'http://localhost:3000/mcp',
'stream': True,
'chunk_size': 1024,
'compression': 'gzip'
})
# Stream large responses
async for chunk in client.stream_tool_call('large_data_processor', {
'dataset': 'large_file.csv',
'operations': ['filter', 'aggregate', 'export']
}):
print(f"Received chunk: {len(chunk)} bytes")
# Process chunk incrementally
Monitoring and Logging
Request Logger
from llm_analysis_assistant import RequestLogger
# Configure logging
logger = RequestLogger({
'storage': 'sqlite', # 'sqlite', 'postgresql', 'mongodb'
'database_url': 'sqlite:///mcp_logs.db',
'log_level': 'DEBUG',
'filters': {
'exclude_tools': ['health_check', 'ping'],
'include_only': ['data_*', 'analysis_*']
},
'retention': {
'max_entries': 10000,
'max_age_days': 30
}
})
# Log MCP interactions
logger.log_request({
'timestamp': '2024-01-15T10:30:00Z',
'transport': 'stdio',
'tool_name': 'data_processor',
'parameters': {'input': 'test'},
'execution_time': 1.5,
'success': True,
'response_size': 1024
})
# Query logs
recent_logs = logger.query({
'tool_name': 'data_processor',
'success': True,
'limit': 100
})
Performance Metrics
from llm_analysis_assistant import PerformanceMonitor
# Monitor performance
monitor = PerformanceMonitor({
'metrics': {
'response_time': True,
'memory_usage': True,
'cpu_usage': True,
'network_io': True
},
'sampling_interval': 1.0,
'alert_thresholds': {
'response_time': 5.0, # seconds
'memory_usage': 80, # percentage
'error_rate': 10 # percentage
}
})
# Get performance report
report = monitor.get_report(time_range='1h')
print(f"Average response time: {report['avg_response_time']:.2f}s")
print(f"Memory usage: {report['memory_usage']:.1f}%")
print(f"Error rate: {report['error_rate']:.1f}%")
API Simulation
OpenAI API Simulation
from llm_analysis_assistant import OpenAISimulator
# Create OpenAI API simulator
simulator = OpenAISimulator({
'mcp_server_url': 'http://localhost:3000/mcp',
'model_mapping': {
'gpt-4': 'mcp_reasoning_tool',
'gpt-3.5-turbo': 'mcp_chat_tool',
'text-davinci-003': 'mcp_completion_tool'
},
'rate_limits': {
'requests_per_minute': 60,
'tokens_per_minute': 10000
}
})
# Start simulation server
simulator.start(port=8081)
# Use with OpenAI client
import openai
openai.api_base = 'http://localhost:8081/v1'
response = openai.ChatCompletion.create(
model='gpt-4',
messages=[{'role': 'user', 'content': 'Analyze this data'}],
stream=True
)
for chunk in response:
print(chunk.choices[0].delta.get('content', ''), end='')
Ollama Simulation
from llm_analysis_assistant import OllamaSimulator
# Create Ollama simulator
simulator = OllamaSimulator({
'mcp_server_url': 'http://localhost:3000/mcp',
'models': {
'llama2': {
'mcp_tool': 'llama_processor',
'context_length': 4096,
'parameters': '7B'
},
'codellama': {
'mcp_tool': 'code_analyzer',
'context_length': 8192,
'parameters': '13B'
}
}
})
# Start Ollama-compatible API
simulator.start(port=11434)
# Use with Ollama client
import ollama
response = ollama.generate(
model='llama2',
prompt='Explain quantum computing',
stream=True
)
for chunk in response:
print(chunk['response'], end='')
Web Dashboard
Features Overview
- Real-time Monitoring: Live view of MCP interactions
- Log Explorer: Search and filter through historical logs
- Performance Dashboard: Visual metrics and charts
- Tool Inspector: Detailed analysis of individual tools
- Request Replay: Replay previous requests for testing
- Export Tools: Download logs and reports in various formats
Dashboard Configuration
from llm_analysis_assistant import Dashboard
dashboard = Dashboard({
'theme': 'dark', # 'light', 'dark', 'auto'
'refresh_interval': 5, # seconds
'max_log_entries': 1000,
'charts': {
'response_time': True,
'request_volume': True,
'error_rate': True,
'tool_usage': True
},
'filters': {
'default_time_range': '1h',
'quick_filters': [
'errors_only',
'slow_requests',
'recent_activity'
]
}
})
# Customize dashboard
dashboard.add_widget('custom_metric', {
'title': 'Custom Metric',
'query': 'SELECT COUNT(*) FROM logs WHERE tool_name LIKE "data_%"',
'chart_type': 'line'
})
dashboard.run(host='0.0.0.0', port=8080)
Advanced Features
Load Testing
from llm_analysis_assistant import LoadTester
# Configure load test
tester = LoadTester({
'target_url': 'http://localhost:3000/mcp',
'test_scenarios': [
{
'name': 'tool_stress_test',
'tool': 'data_processor',
'parameters': {'size': 'large'},
'concurrent_users': 10,
'duration': 60 # seconds
},
{
'name': 'resource_load_test',
'resource': 'large_dataset',
'concurrent_requests': 20,
'duration': 120
}
]
})
# Run load test
results = await tester.run_test()
print(f"Average response time: {results['avg_response_time']:.2f}s")
print(f"Throughput: {results['requests_per_second']:.1f} req/s")
print(f"Error rate: {results['error_rate']:.1f}%")
Automated Testing
from llm_analysis_assistant import TestSuite
# Create test suite
suite = TestSuite({
'mcp_server_url': 'http://localhost:3000/mcp',
'test_data_path': './test_data',
'output_format': 'junit'
})
# Add test cases
suite.add_test('test_tool_availability', {
'expected_tools': ['data_processor', 'analyzer', 'formatter'],
'timeout': 5
})
suite.add_test('test_tool_functionality', {
'tool': 'data_processor',
'test_cases': [
{'input': {'data': 'test'}, 'expect': 'success'},
{'input': {'data': ''}, 'expect': 'error'}
]
})
# Run tests
results = await suite.run()
print(f"Tests passed: {results['passed']}/{results['total']}")
Configuration
Configuration File
# analysis-config.yaml
server:
host: '0.0.0.0'
port: 8080
debug: true
mcp:
transport: 'stdio'
server_command: ['python', '-m', 'my_server']
timeout: 30
logging:
level: 'INFO'
storage: 'sqlite'
database_url: 'sqlite:///mcp_logs.db'
retention_days: 30
monitoring:
performance_tracking: true
alert_thresholds:
response_time: 5.0
error_rate: 10.0
memory_usage: 80.0
dashboard:
theme: 'dark'
refresh_interval: 5
max_entries: 1000
simulation:
openai:
enabled: true
port: 8081
ollama:
enabled: true
port: 11434
Environment Variables
# Server configuration
LAA_HOST=0.0.0.0
LAA_PORT=8080
LAA_DEBUG=true
# MCP configuration
LAA_MCP_TRANSPORT=stdio
LAA_MCP_SERVER_PATH=/path/to/server
LAA_MCP_TIMEOUT=30
# Database
LAA_DB_URL=sqlite:///mcp_logs.db
LAA_LOG_LEVEL=INFO
# API keys
LAA_OPENAI_API_KEY=your-openai-key
LAA_ANTHROPIC_API_KEY=your-anthropic-key
Use Cases
- MCP Development: Debug and test MCP servers during development
- Performance Optimization: Identify bottlenecks and optimize server performance
- Integration Testing: Test MCP integrations with various clients and transports
- API Migration: Gradually migrate from proprietary APIs to MCP
- Load Testing: Validate MCP server performance under load
- Monitoring Production: Monitor MCP servers in production environments
Best Practices
Monitoring Setup
# Production monitoring configuration
monitor_config = {
'sampling_rate': 0.1, # Sample 10% of requests
'sensitive_data_filter': True,
'alert_channels': ['email', 'slack'],
'retention_policy': {
'detailed_logs': '7d',
'aggregated_metrics': '90d',
'error_logs': '30d'
}
}
Security Considerations
# Secure configuration
security_config = {
'authentication': {
'required': True,
'method': 'jwt',
'secret_key': 'your-secret-key'
},
'data_privacy': {
'mask_sensitive_fields': True,
'fields_to_mask': ['password', 'api_key', 'token']
},
'rate_limiting': {
'enabled': True,
'requests_per_minute': 100
}
}
Sponsored