Vendor Galaxy Roles and Collections
This commit is contained in:
parent
c1e1897cda
commit
2aed20393f
3553 changed files with 387444 additions and 2 deletions
|
|
@ -0,0 +1,10 @@
|
|||
---
|
||||
- hosts: all
|
||||
become: true
|
||||
# pre_tasks happen before roles are executed / applied
|
||||
pre_tasks: []
|
||||
# roles are ran after pre_tasks
|
||||
roles:
|
||||
- grafana_agent
|
||||
# tasks are ran after roles
|
||||
tasks: []
|
||||
|
|
@ -0,0 +1,12 @@
|
|||
- hosts: all
|
||||
tasks:
|
||||
- name: Install Grafana Agent
|
||||
ansible.builtin.include_role:
|
||||
name: grafana.grafana.grafana_agent
|
||||
vars:
|
||||
grafana_agent_mode: flow
|
||||
# Change config file on the host to .river
|
||||
grafana_agent_config_filename: config.river
|
||||
# Remove default flags
|
||||
grafana_agent_flags_extra:
|
||||
server.http.listen-addr: '0.0.0.0:12345'
|
||||
|
|
@ -0,0 +1,95 @@
|
|||
---
|
||||
- hosts: all
|
||||
become: true
|
||||
vars:
|
||||
grafana_agent_metrics_config:
|
||||
global:
|
||||
external_labels:
|
||||
datacenter: primary
|
||||
cluster: my-cluster
|
||||
instance: "{{ ansible_host }}"
|
||||
remote_write:
|
||||
- url: https://prometheus-<your region>.grafana.net/api/prom/push
|
||||
basic_auth:
|
||||
username: "1234567" # your username / instanceID
|
||||
password: "..." # your grafana.com token
|
||||
configs:
|
||||
- name: local
|
||||
scrape_configs:
|
||||
# scrape a an application on the localhost
|
||||
- job_name: my-app
|
||||
metrics_path: /metrics
|
||||
static_configs:
|
||||
- targets:
|
||||
- localhost:8080
|
||||
relabel_configs: []
|
||||
metric_relabel_configs: []
|
||||
|
||||
grafana_agent_logs_config:
|
||||
global:
|
||||
clients:
|
||||
- url: https://logs-<your region>.grafana.net/loki/api/v1/push
|
||||
basic_auth:
|
||||
username: "1234567" # your username / instanceID
|
||||
password: "..." # your grafana.com token
|
||||
configs:
|
||||
- name: local
|
||||
positions:
|
||||
filename: /tmp/positions.yaml
|
||||
target_config:
|
||||
sync_period: 10s
|
||||
scrape_configs:
|
||||
# scrape all of the log files in /var/log on the localhost
|
||||
- job_name: log-files
|
||||
static_configs:
|
||||
- targets:
|
||||
- localhost
|
||||
labels:
|
||||
job: var-logs
|
||||
instance: "{{ ansible_host }}"
|
||||
__path__: /var/log/*.log
|
||||
# scrape all of the journal logs on localhost
|
||||
- job_name: systemd-journal
|
||||
journal:
|
||||
max_age: 12h
|
||||
labels:
|
||||
job: systemd-journal
|
||||
relabel_configs:
|
||||
- source_labels:
|
||||
- __journal__systemd_unit
|
||||
target_label: systemd_unit
|
||||
- source_labels:
|
||||
- __journal__hostname
|
||||
target_label: hostname
|
||||
- source_labels:
|
||||
- __journal_syslog_identifier
|
||||
target_label: syslog_identifier
|
||||
- source_labels:
|
||||
- __journal__pid
|
||||
target_label: pid
|
||||
- source_labels:
|
||||
- __journal__uid
|
||||
target_label: uid
|
||||
- source_labels:
|
||||
- __journal__transport
|
||||
target_label: transport
|
||||
grafana_agent_integrations_config:
|
||||
scrape_integrations: true
|
||||
# get metrics about the agent
|
||||
agent:
|
||||
enabled: true
|
||||
relabel_configs: []
|
||||
metric_relabel_configs: []
|
||||
# get node exporter metrics
|
||||
node_exporter:
|
||||
enabled: true
|
||||
relabel_configs: []
|
||||
metric_relabel_configs: []
|
||||
|
||||
# pre_tasks happen before roles are executed / applied
|
||||
pre_tasks: []
|
||||
# roles are ran after pre_tasks
|
||||
roles:
|
||||
- grafana_agent
|
||||
# tasks are ran after roles
|
||||
tasks: []
|
||||
17
ansible_collections/grafana/grafana/examples/alloy.yaml
Normal file
17
ansible_collections/grafana/grafana/examples/alloy.yaml
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
---
|
||||
- name: Deploy alloy
|
||||
hosts: all
|
||||
become: true
|
||||
roles:
|
||||
- role: grafana.grafana.alloy
|
||||
vars:
|
||||
alloy_config: |
|
||||
prometheus.scrape "default" {
|
||||
targets = [{"__address__" = "127.0.0.1:12345"}]
|
||||
forward_to = [prometheus.remote_write.prom.receiver]
|
||||
}
|
||||
prometheus.remote_write "prom" {
|
||||
endpoint {
|
||||
url = "http://mimir:9009/api/v1/push"
|
||||
}
|
||||
}
|
||||
32
ansible_collections/grafana/grafana/examples/ansible.cfg
Normal file
32
ansible_collections/grafana/grafana/examples/ansible.cfg
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
[defaults]
|
||||
# (string) Sets the macro for the 'ansible_managed' variable available for :ref:`ansible_collections.ansible.builtin.template_module` and :ref:`ansible_collections.ansible.windows.win_template_module`. This is only relevant for those two modules.
|
||||
ansible_managed="Ansible managed file. Be wary of possible overwrites."
|
||||
|
||||
# (boolean) Toggle to control the showing of deprecation warnings
|
||||
deprecation_warnings=False
|
||||
|
||||
# (boolean) Set this to "False" if you want to avoid host key checking by the underlying tools Ansible uses to connect to the host
|
||||
host_key_checking=False
|
||||
|
||||
# (pathlist) Comma separated list of Ansible inventory sources
|
||||
inventory=hosts
|
||||
|
||||
# (pathspec) Colon separated paths in which Ansible will search for Modules.
|
||||
library=../plugins/modules
|
||||
|
||||
# (path) File to which Ansible will log on the controller. When empty logging is disabled.
|
||||
log_path=./ansible.log
|
||||
|
||||
# (pathspec) Colon separated paths in which Ansible will search for Roles.
|
||||
roles_path=../roles
|
||||
|
||||
[ssh_connection]
|
||||
|
||||
# ssh arguments to use
|
||||
# Leaving off ControlPersist will result in poor performance, so use
|
||||
# paramiko on older platforms rather than removing it
|
||||
ssh_args = -o ControlMaster=auto -o ControlPersist=60s
|
||||
|
||||
# if True, make ansible use scp if the connection type is ssh
|
||||
# (default is sftp)
|
||||
scp_if_ssh = True
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
- name: Deploy Loki using the default configuration
|
||||
hosts: all
|
||||
become: true
|
||||
roles:
|
||||
- role: grafana.grafana.loki
|
||||
|
|
@ -0,0 +1,70 @@
|
|||
---
|
||||
- name: Deploy Loki using the local filesystem
|
||||
hosts: all
|
||||
become: true
|
||||
roles:
|
||||
- role: grafana.grafana.loki
|
||||
vars:
|
||||
loki_querier:
|
||||
max_concurrent: 16
|
||||
engine:
|
||||
max_look_back_period: 8760h
|
||||
loki_storage_config:
|
||||
tsdb_shipper:
|
||||
active_index_directory: "{{ loki_working_path }}/tsdb-index"
|
||||
cache_location: "{{ loki_working_path }}/tsdb-cache"
|
||||
filesystem:
|
||||
directory: "{{ loki_working_path }}/chunks"
|
||||
loki_ingester:
|
||||
wal:
|
||||
enabled: true
|
||||
dir: "{{ loki_working_path }}/wal"
|
||||
lifecycler:
|
||||
address: 127.0.0.1
|
||||
ring:
|
||||
kvstore:
|
||||
store: inmemory
|
||||
replication_factor: 1
|
||||
final_sleep: 0s
|
||||
chunk_idle_period: 1h
|
||||
max_chunk_age: 2h
|
||||
chunk_target_size: 1048576
|
||||
query_store_max_look_back_period: 8760h
|
||||
loki_limits_config:
|
||||
split_queries_by_interval: 0
|
||||
reject_old_samples: true
|
||||
reject_old_samples_max_age: 168h
|
||||
max_query_length: 0
|
||||
max_query_series: 50000
|
||||
retention_period: 8760h
|
||||
allow_structured_metadata: false
|
||||
max_query_lookback: 8760h
|
||||
loki_compactor:
|
||||
working_directory: "{{ loki_working_path }}/compactor"
|
||||
compaction_interval: 10m
|
||||
retention_enabled: true
|
||||
retention_delete_delay: 2h
|
||||
retention_delete_worker_count: 150
|
||||
delete_request_store: filesystem
|
||||
loki_common:
|
||||
path_prefix: "{{ loki_working_path }}"
|
||||
storage:
|
||||
filesystem:
|
||||
rules_directory: "{{ loki_working_path }}/rules"
|
||||
replication_factor: 1
|
||||
ring:
|
||||
instance_addr: 127.0.0.1
|
||||
kvstore:
|
||||
store: inmemory
|
||||
loki_ruler_alerts:
|
||||
- name: Logs.sshd
|
||||
rules:
|
||||
- alert: SshLoginFailed
|
||||
expr: |
|
||||
count_over_time({job=~"secure"} |="sshd[" |~": Failed|: Invalid|: Connection closed by authenticating user" | __error__="" [15m]) > 6
|
||||
for: 0m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "{% raw %}SSH authentication failure (instance {{ $labels.instance }}).{% endraw %}"
|
||||
description: "{% raw %}Increase of SSH authentication failures in last 15 minutes\\n VALUE = {{ $value }}{% endraw %}"
|
||||
|
|
@ -0,0 +1,32 @@
|
|||
- name: Install mimir
|
||||
hosts: [mimir-1, mimir-2, mimir-3]
|
||||
become: true
|
||||
|
||||
tasks:
|
||||
- name: Install mimir
|
||||
ansible.builtin.include_role:
|
||||
name: grafana.grafana.mimir
|
||||
vars:
|
||||
# Run against minio blob store backed, see readme for local setup or mimir docs for Azure, AWS, etc.
|
||||
mimir_storage:
|
||||
storage:
|
||||
backend: s3
|
||||
s3:
|
||||
endpoint: localhost:9000
|
||||
access_key_id: testtest
|
||||
secret_access_key: testtest
|
||||
insecure: true
|
||||
bucket_name: mimir
|
||||
|
||||
# Blocks storage requires a prefix when using a common object storage bucket.
|
||||
mimir_blocks_storage:
|
||||
storage_prefix: blocks
|
||||
tsdb:
|
||||
dir: "{{ mimir_working_path}}/ingester"
|
||||
|
||||
# Use memberlist, a gossip-based protocol, to enable the 3 Mimir replicas to communicate
|
||||
mimir_memberlist:
|
||||
join_members:
|
||||
- mimir-1:7946
|
||||
- mimir-2:7946
|
||||
- mimir-3:7946
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
- name: Install mimir
|
||||
hosts: monitoring-node
|
||||
become: true
|
||||
|
||||
tasks:
|
||||
- name: Install mimir
|
||||
ansible.builtin.include_role:
|
||||
name: grafana.grafana.mimir
|
||||
vars:
|
||||
mimir_storage:
|
||||
storage:
|
||||
backend: s3
|
||||
s3:
|
||||
endpoint: "{{ s3_endpoint }}"
|
||||
access_key_id: "{{ vault_s3_access }}"
|
||||
secret_access_key: "{{ vault_s3_secret }}"
|
||||
bucket_name: your-mimir-bucket
|
||||
|
||||
# Blocks storage requires a prefix when using a common object storage bucket.
|
||||
mimir_blocks_storage:
|
||||
storage_prefix: blocks
|
||||
tsdb:
|
||||
dir: "{{ mimir_working_path}}/ingester"
|
||||
|
||||
mimir_limits:
|
||||
# set metrics retenion to 30d
|
||||
compactor_blocks_retention_period: 30d
|
||||
max_label_names_per_series: 100
|
||||
|
||||
# this setting is required to prevent mimir from attempting
|
||||
# to make quorum
|
||||
mimir_ingester:
|
||||
ring:
|
||||
replication_factor: 1
|
||||
|
|
@ -0,0 +1,193 @@
|
|||
# Scaling OpenTelemetry Collector Deployments Using Grafana Ansible Collection
|
||||
This guide is focused on scaling the OpenTelemetry Collector deployment across various Linux hosts by leveraging Ansible, to function both as gateways and agents within your observability architecture. Utilizing the OpenTelemetry Collector in this dual capacity enables a robust collection and forwarding of metrics, traces, and logs to analysis and visualization platforms, such as Grafana Cloud.
|
||||
|
||||
Here, we outline a strategy for deploying and managing the OpenTelemetry Collector's scalable instances throughout your infrastructure with Ansible, enhancing your overall monitoring strategy and data visualization capabilities in Grafana Cloud.
|
||||
|
||||
## Before You Begin
|
||||
|
||||
To follow this guide, ensure you have:
|
||||
|
||||
- Linux hosts.
|
||||
- SSH access to each of these Linux hosts.
|
||||
- Account permissions to install and configure the OpenTelemetry Collector on these hosts.
|
||||
|
||||
## Install the Grafana Ansible collection
|
||||
|
||||
The [Grafana Agent role](https://github.com/grafana/grafana-ansible-collection/tree/main/roles/grafana_agent) is available in the Grafana Ansible collection as of the 1.1.0 release.
|
||||
|
||||
To install the Grafana Ansible collection, run this command:
|
||||
|
||||
```
|
||||
ansible-galaxy collection install grafana.grafana
|
||||
```
|
||||
|
||||
## Create an Ansible inventory file
|
||||
|
||||
Next, you will set up your hosts and create an inventory file.
|
||||
|
||||
1. Create your hosts and add public SSH keys to them.
|
||||
|
||||
This example uses eight Linux hosts: two Ubuntu hosts, two CentOS hosts, two Fedora hosts, and two Debian hosts.
|
||||
|
||||
1. Create an Ansible inventory file.
|
||||
|
||||
The Ansible inventory, which resides in a file named `inventory`, looks similar to this:
|
||||
|
||||
```
|
||||
146.190.208.216 # hostname = ubuntu-01
|
||||
146.190.208.190 # hostname = ubuntu-02
|
||||
137.184.155.128 # hostname = centos-01
|
||||
146.190.216.129 # hostname = centos-02
|
||||
198.199.82.174 # hostname = debian-01
|
||||
198.199.77.93 # hostname = debian-02
|
||||
143.198.182.156 # hostname = fedora-01
|
||||
143.244.174.246 # hostname = fedora-02
|
||||
```
|
||||
|
||||
> **Note**: If you are copying the above file, remove the comments (#).
|
||||
|
||||
1. Create an `ansible.cfg` file within the same directory as `inventory`, with the following values:
|
||||
```
|
||||
[defaults]
|
||||
inventory = inventory # Path to the inventory file
|
||||
private_key_file = ~/.ssh/id_rsa # Path to my private SSH Key
|
||||
remote_user=root
|
||||
```
|
||||
|
||||
## Use the OpenTelemetry Collector Ansible Role
|
||||
|
||||
Next, you'll define an Ansible playbook to apply your chosen or created OpenTelemetry Collector role across your hosts.
|
||||
|
||||
Create a file named `deploy-opentelemetry.yml` in the same directory as your `ansible.cfg` and `inventory`.
|
||||
|
||||
```yaml
|
||||
- name: Install OpenTelemetry Collector
|
||||
hosts: all
|
||||
become: true
|
||||
|
||||
vars:
|
||||
grafana_cloud_api_key: <Your Grafana.com API Key> # Example - eyJrIjoiYjI3NjI5MGQxZTcyOTIxYTc0MDgzMGVhNDhlODNhYzA5OTk2Y2U5YiIsIm4iOiJhbnNpYmxldGVzdCIsImlkIjo2NTI5
|
||||
metrics_username: <prometheus-username> # Example - 825019
|
||||
logs_username: <loki-username> # Example - 411478
|
||||
prometheus_url: <prometheus-push-url> # Example - https://prometheus-us-central1.grafana.net/api/prom/push
|
||||
loki_url: <loki-push-url> # Example - https://logs-prod-017.grafana.net/loki/api/v1/push
|
||||
tempo_url: <tempo-push-url> # Example - tempo-prod-04-prod-us-east-0.grafana.net:443
|
||||
traces_username: <tempo-username> # Example - 411478
|
||||
|
||||
tasks:
|
||||
- name: Install OpenTelemetry Collector
|
||||
ansible.builtin.include_role:
|
||||
name: grafana.grafana.opentelemetry_collector
|
||||
vars:
|
||||
otel_collector_extensions:
|
||||
basicauth/grafana_cloud_tempo:
|
||||
# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/extension/basicauthextension
|
||||
client_auth:
|
||||
username: "{{ traces_username }}"
|
||||
password: "{{ grafana_cloud_api_key }}"
|
||||
basicauth/grafana_cloud_prometheus:
|
||||
client_auth:
|
||||
username: "{{ prometheus_url }}"
|
||||
password: "{{ grafana_cloud_api_key }}"
|
||||
basicauth/grafana_cloud_loki:
|
||||
client_auth:
|
||||
username: "{{ logs_username }}"
|
||||
password: "{{ grafana_cloud_api_key }}"
|
||||
|
||||
|
||||
otel_collector_receivers:
|
||||
otlp:
|
||||
# https://github.com/open-telemetry/opentelemetry-collector/tree/main/receiver/otlpreceiver
|
||||
protocols:
|
||||
grpc:
|
||||
http:
|
||||
hostmetrics:
|
||||
# Optional. Host Metrics Receiver added as an example of Infra Monitoring capabilities of the OpenTelemetry Collector
|
||||
# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/hostmetricsreceiver
|
||||
scrapers:
|
||||
load:
|
||||
memory:
|
||||
|
||||
otel_collector_processors:
|
||||
batch:
|
||||
# https://github.com/open-telemetry/opentelemetry-collector/tree/main/processor/batchprocessor
|
||||
resourcedetection:
|
||||
# Enriches telemetry data with resource information from the host
|
||||
# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/resourcedetectionprocessor
|
||||
detectors: ["env", "system"]
|
||||
override: false
|
||||
transform/add_resource_attributes_as_metric_attributes:
|
||||
# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/transformprocessor
|
||||
error_mode: ignore
|
||||
metric_statements:
|
||||
- context: datapoint
|
||||
statements:
|
||||
- set(attributes["deployment.environment"], resource.attributes["deployment.environment"])
|
||||
- set(attributes["service.version"], resource.attributes["service.version"])
|
||||
|
||||
otel_collector_exporters:
|
||||
otlp/grafana_cloud_traces:
|
||||
# https://github.com/open-telemetry/opentelemetry-collector/tree/main/exporter/otlpexporter
|
||||
endpoint: "{{ tempo_url }}"
|
||||
auth:
|
||||
authenticator: basicauth/grafana_cloud_tempo
|
||||
|
||||
loki/grafana_cloud_logs:
|
||||
# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/exporter/lokiexporter
|
||||
endpoint: "{{ loki_url }}"
|
||||
auth:
|
||||
authenticator: basicauth/grafana_cloud_loki
|
||||
|
||||
prometheusremotewrite/grafana_cloud_metrics:
|
||||
# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/exporter/prometheusremotewriteexporter
|
||||
endpoint: "{{ prometheus_url }}"
|
||||
add_metric_suffixes: false
|
||||
auth:
|
||||
authenticator: basicauth/grafana_cloud_prometheus
|
||||
|
||||
|
||||
otel_collector_service:
|
||||
extensions: [basicauth/grafana_cloud_tempo, basicauth/grafana_cloud_prometheus, basicauth/grafana_cloud_loki]
|
||||
pipelines:
|
||||
traces:
|
||||
receivers: [otlp]
|
||||
processors: [resourcedetection, batch]
|
||||
exporters: [otlp/grafana_cloud_traces]
|
||||
metrics:
|
||||
receivers: [otlp, hostmetrics]
|
||||
processors: [resourcedetection, transform/add_resource_attributes_as_metric_attributes, batch]
|
||||
exporters: [prometheusremotewrite/grafana_cloud_metrics]
|
||||
logs:
|
||||
receivers: [otlp]
|
||||
processors: [resourcedetection, batch]
|
||||
exporters: [loki/grafana_cloud_logs]
|
||||
```
|
||||
|
||||
> **Note:** You'll need to adjust the configuration to match the specific telemetry data you intend to collect and where you plan to forward it. The configuration snippet above is a basic example designed for traces, logs and metrics collection via OTLP and forwarding to Grafana Cloud.
|
||||
|
||||
|
||||
## Running the Ansible Playbook
|
||||
|
||||
Deploy the OpenTelemetry Collector across your hosts by executing:
|
||||
|
||||
```sh
|
||||
ansible-playbook deploy-opentelemetry.yml
|
||||
```
|
||||
|
||||
## Verifying Data Ingestion into Grafana Cloud
|
||||
|
||||
Once you've deployed the OpenTelemetry Collector and configured it to forward data to Grafana Cloud, you can verify the ingestion:
|
||||
|
||||
- Log into your Grafana Cloud instance.
|
||||
- Navigate to the **Explore** section.
|
||||
- Select your Grafana Cloud Prometheus data source from the dropdown menu.
|
||||
- Execute a query to confirm the reception of metrics, e.g., `{instance="ubuntu-01"}` for a specific host's metrics.
|
||||
|
||||
## Visualizing Metrics and Logs in Grafana
|
||||
|
||||
With data successfully ingested into Grafana Cloud, you can create custom dashboards to visualize the metrics, logs and traces received from your OpenTelemetry Collector. Utilize Grafana's powerful query builder and visualization tools to derive insights from your data effectively.
|
||||
|
||||
- Consider creating dashboards that offer a comprehensive overview of your infrastructure's health and performance.
|
||||
- Utilize Grafana's alerting features to proactively manage and respond to issues identified through the OpenTelemetry data.
|
||||
|
||||
This guide simplifies the deployment of the OpenTelemetry Collector across multiple Linux hosts using Ansible and illustrates how to visualize collected telemetry data in Grafana Cloud. Tailor the Ansible roles, OpenTelemetry Collector configurations, and Grafana dashboards to suit your specific monitoring and observability requirements.
|
||||
|
|
@ -0,0 +1,184 @@
|
|||
# Monitoring multiple Linux hosts with Grafana Agent Role
|
||||
|
||||
Monitoring with Grafana Agents across multiple Linux hosts can be difficult.
|
||||
To make it easier, you can use the Grafana Agent role with the Grafana Ansible collection.
|
||||
This guide shows how to use the `grafana_agent` Ansible role to deploy and manage Grafana Agents across multiple Linux hosts so you can monitor them in Grafana.
|
||||
|
||||
## Before you begin
|
||||
|
||||
Before you begin, you should have:
|
||||
|
||||
- Linux hosts
|
||||
- SSH access to the Linux hosts
|
||||
- Account permissions sufficient to install and use Grafana Agent on the Linux hosts
|
||||
|
||||
## Install the Grafana Ansible collection
|
||||
|
||||
The [Grafana Agent role](https://github.com/grafana/grafana-ansible-collection/tree/main/roles/grafana_agent) is available in the Grafana Ansible collection as of the 1.1.0 release.
|
||||
|
||||
To install the Grafana Ansible collection, run this command:
|
||||
|
||||
```
|
||||
ansible-galaxy collection install grafana.grafana:2.0.0
|
||||
```
|
||||
|
||||
## Create an Ansible inventory file
|
||||
|
||||
Next, you will set up your hosts and create an inventory file.
|
||||
|
||||
1. Create your hosts and add public SSH keys to them.
|
||||
|
||||
This example uses eight Linux hosts: two Ubuntu hosts, two CentOS hosts, two Fedora hosts, and two Debian hosts.
|
||||
|
||||
1. Create an Ansible inventory file.
|
||||
|
||||
The Ansible inventory, which resides in a file named `inventory`, looks similar to this:
|
||||
|
||||
```
|
||||
146.190.208.216 # hostname = ubuntu-01
|
||||
146.190.208.190 # hostname = ubuntu-02
|
||||
137.184.155.128 # hostname = centos-01
|
||||
146.190.216.129 # hostname = centos-02
|
||||
198.199.82.174 # hostname = debian-01
|
||||
198.199.77.93 # hostname = debian-02
|
||||
143.198.182.156 # hostname = fedora-01
|
||||
143.244.174.246 # hostname = fedora-02
|
||||
```
|
||||
|
||||
> **Note**: If you are copying the above file, remove the comments (#).
|
||||
|
||||
1. Create an `ansible.cfg` file within the same directory as `inventory`, with the following values:
|
||||
```
|
||||
[defaults]
|
||||
inventory = inventory # Path to the inventory file
|
||||
private_key_file = ~/.ssh/id_rsa # Path to my private SSH Key
|
||||
remote_user=root
|
||||
```
|
||||
|
||||
## Use the Grafana Agent Ansible role
|
||||
|
||||
Next you will create an Ansible playbook that calls the `grafana_agent` role from the `grafana.grafana` Ansible collection.
|
||||
|
||||
To use the Grafana Agent Ansible role:
|
||||
|
||||
1. Create a file named `deploy-agent.yml` in the same directory as `ansible.cfg` and `inventory` and add the configuration below.
|
||||
|
||||
```yaml
|
||||
- name: Install Grafana Agent
|
||||
hosts: all
|
||||
become: true
|
||||
|
||||
vars:
|
||||
grafana_cloud_api_key: <Your Grafana.com API Key> # Example - eyJrIjoiYjI3NjI5MGQxZTcyOTIxYTc0MDgzMGVhNDhlODNhYzA5OTk2Y2U5YiIsIm4iOiJhbnNpYmxldGVzdCIsImlkIjo2NTI5
|
||||
metrics_username: <prometheus-username> # Example - 825019
|
||||
logs_username: <loki-username> # Example - 411478
|
||||
prometheus_url: <prometheus-push-url> # Example - https://prometheus-us-central1.grafana.net/api/prom/push
|
||||
loki_url: <loki-push-url> # Example - https://logs-prod-017.grafana.net/loki/api/v1/push
|
||||
tasks:
|
||||
- name: Install Grafana Agent
|
||||
ansible.builtin.include_role:
|
||||
name: grafana.grafana.grafana_agent
|
||||
vars:
|
||||
grafana_agent_metrics_config:
|
||||
configs:
|
||||
- name: integrations
|
||||
remote_write:
|
||||
- basic_auth:
|
||||
password: "{{ grafana_cloud_api_key }}"
|
||||
username: "{{ metrics_username }}"
|
||||
url: "{{ prometheus_url }}"
|
||||
|
||||
global:
|
||||
scrape_interval: 60s
|
||||
wal_directory: /tmp/grafana-agent-wal
|
||||
|
||||
grafana_agent_logs_config:
|
||||
configs:
|
||||
- name: default
|
||||
clients:
|
||||
- basic_auth:
|
||||
password: "{{ grafana_cloud_api_key }}"
|
||||
username: "{{ logs_username }}"
|
||||
url: "{{ loki_url }}"
|
||||
positions:
|
||||
filename: /tmp/positions.yaml
|
||||
target_config:
|
||||
sync_period: 10s
|
||||
scrape_configs:
|
||||
- job_name: varlogs
|
||||
static_configs:
|
||||
- targets: [localhost]
|
||||
labels:
|
||||
instance: ${HOSTNAME:-default}
|
||||
job: varlogs
|
||||
__path__: /var/log/*log
|
||||
grafana_agent_integrations_config:
|
||||
node_exporter:
|
||||
enabled: true
|
||||
instance: ${HOSTNAME:-default}
|
||||
prometheus_remote_write:
|
||||
- basic_auth:
|
||||
password: "{{ grafana_cloud_api_key }}"
|
||||
username: "{{ metrics_username }}"
|
||||
url: "{{ prometheus_url }}"
|
||||
```
|
||||
|
||||
The playbook calls the `grafana_agent` role from the `grafana.grafana` Ansible collection.
|
||||
The Agent configuration in this playbook send metrics and logs from the linux hosts to your Prometheus and Loki data sources.
|
||||
|
||||
Refer to the [Grafana Ansible documentation](https://github.com/grafana/grafana-ansible-collection/tree/main/roles/grafana_agent#role-variables) to understand the other variables you can pass to the `grafana_agent` role.
|
||||
|
||||
When deploying the Agent across multiple instances for monitoring them, It is essential that the Agent is able to auto-detect the hostname for ease in monitoring.
|
||||
Notice that the label `instance` has been set to the value `${HOSTNAME:-default}`, which is substituted by the value of the HOSTNAME environment variable in the Linux host.
|
||||
To read more about the variable substitution, refer to the Grafana Agent [node_exporter_config](https://grafana.com/docs/agent/latest/configuration/integrations/node-exporter-config/) documentation.
|
||||
|
||||
1. To run the playbook, run this command:
|
||||
|
||||
```
|
||||
ansible-playbook deploy-agent.yml
|
||||
```
|
||||
|
||||
> **Note:** You can place the `deploy-agent.yml`, `ansible.cfg` and `inventory` files in different directories based on your needs.
|
||||
|
||||
## Check that logs and metrics are being ingested into Prometheus and Loki
|
||||
|
||||
Logs and metrics will soon be available in Grafana if your Promtheus and Loki datasources are added.
|
||||
To test this, use the Explore feature.
|
||||
Click the Explore icon (compass icon) in the vertical navigation bar.
|
||||
|
||||
### Check logs
|
||||
|
||||
To check logs:
|
||||
|
||||
1. Use the dropdown menu at the top of the page to select your Loki logs data source.
|
||||
|
||||
1. In the log browser, run the query `{instance="centos-01"}` where centos-01 is the hostname of one of the Linux hosts.
|
||||
|
||||
If you see log lines (shown in the example below), logs are being received.
|
||||
|
||||

|
||||
|
||||
If no log lines appear, logs are not being collected.
|
||||
|
||||
### Check metrics
|
||||
|
||||
To check metrics:
|
||||
|
||||
1. Use the dropdown menu at the top of the page to select your Prometheus data source.
|
||||
|
||||
1. Run the query `{instance="centos-01"}` where centos-01 is the hostname of one of the Linux hosts.
|
||||
|
||||
If you see a metrics graph and table (shown in the example below), metrics are being received.
|
||||
|
||||

|
||||
|
||||
If no metrics appear, metrics are not being collected.
|
||||
|
||||
### View dashboards
|
||||
|
||||
Now that you have logs and metrics in Grafana, you can use dashboards to view them.
|
||||
Here's an example of one of the prebuilt dashboards included with the Linux integration in Grafana Cloud:
|
||||
|
||||

|
||||
|
||||
Using the **Instance** dropdown in the dashboard, you can select from the hostnames where you deployed Grafana Agent and start monitoring them.
|
||||
|
|
@ -0,0 +1,43 @@
|
|||
- name: Install OpenTelemetry Collector
|
||||
hosts: all
|
||||
become: true
|
||||
|
||||
tasks:
|
||||
- name: Install OpenTelemetry Collector
|
||||
ansible.builtin.include_role:
|
||||
name: grafana.grafana.opentelemetry_collector
|
||||
vars:
|
||||
otel_collector_receivers:
|
||||
otlp:
|
||||
protocols:
|
||||
grpc:
|
||||
endpoint: 0.0.0.0:4317
|
||||
http:
|
||||
endpoint: 0.0.0.0:4318
|
||||
otel_collector_processors:
|
||||
batch:
|
||||
|
||||
otel_collector_exporters:
|
||||
otlp:
|
||||
endpoint: otelcol:4317
|
||||
|
||||
otel_collector_extensions:
|
||||
health_check:
|
||||
pprof:
|
||||
zpages:
|
||||
|
||||
otel_collector_service:
|
||||
extensions: [health_check, pprof, zpages]
|
||||
pipelines:
|
||||
traces:
|
||||
receivers: [otlp]
|
||||
processors: [batch]
|
||||
exporters: [otlp]
|
||||
metrics:
|
||||
receivers: [otlp]
|
||||
processors: [batch]
|
||||
exporters: [otlp]
|
||||
logs:
|
||||
receivers: [otlp]
|
||||
processors: [batch]
|
||||
exporters: [otlp]
|
||||
|
|
@ -0,0 +1,24 @@
|
|||
---
|
||||
- name: Deploy Promtail to ship logs to the local Loki instance
|
||||
hosts: all
|
||||
become: true
|
||||
roles:
|
||||
- role: grafana.grafana.promtail
|
||||
vars:
|
||||
promtail_clients:
|
||||
- url: http://localhost:3100/loki/api/v1/push
|
||||
promtail_scrape_configs:
|
||||
- job_name: system
|
||||
static_configs:
|
||||
- targets:
|
||||
- localhost
|
||||
labels:
|
||||
job: messages
|
||||
instance: "{{ ansible_facts['fqdn'] }}"
|
||||
__path__: /var/log/messages
|
||||
- targets:
|
||||
- localhost
|
||||
labels:
|
||||
job: nginx
|
||||
instance: "{{ ansible_facts['fqdn'] }}"
|
||||
__path__: /var/log/nginx/*.log
|
||||
Loading…
Add table
Add a link
Reference in a new issue