Getting Started with rmet

Brazil’s National Institute of Meteorology (INMET) maintains a network of automatic weather stations that record hourly observations across the country. The raw data are distributed as annual ZIP archives containing CSV files, one per station, with formatting inconsistencies that vary across years (e.g., different date formats, trailing semicolons, phantom columns, and mixed encodings). rmet automates the full pipeline: downloading those archives, resolving their structural quirks, and returning a clean, analysis-ready data.frame.

Installation

# install.packages("devtools")
devtools::install_github("rodrigosqrt3/rmet")
library(rmet)

Step 1 — Browse the station catalogue

The package ships with a bundled catalogue of all automatic INMET stations. inmet_stations() lets you filter by state, so you can confirm station codes before downloading anything.

stations <- inmet_stations(state = "RS")
head(stations[, c("code", "name", "latitude", "longitude", "elevation")])

Station A801 is the Porto Alegre automatic station, located at roughly −30.05 °S, −51.17 °W, 46.97 m above sea level.

Step 2 — Download annual archives

inmet_download() fetches the annual ZIP files from INMET’s servers and saves them to a persistent local cache. Downloads are resumable: if a connection drops, re-running the same call picks up where it left off.

# Downloads the 2023 archive (~70 MB) to the default cache directory.
# Safe to re-run — skips files that are already complete.
inmet_download(2023)

You can inspect what is already cached at any time:

inmet_cache_status()

Step 3 — Read data into R

inmet_read() parses the ZIP archives directly — no manual extraction needed. It normalises column names, converts all measurement columns to numeric, parses timestamps, and shifts UTC to local Brazilian time.

df <- inmet_read(
  years      = 2023,
  stations   = "A801",
  start_date = "2023-01-01",
  end_date   = "2023-12-31"
)

The examples below use the package’s built-in sample dataset, which contains one year of hourly observations from station A801 (Porto Alegre, 2023).

df <- rmet_example
str(df[, c("datetime", "station_code", "state",
           "temp_dry_c", "precip_mm", "humid_rel_pct",
           "wind_speed_ms", "pressure_station_hpa")])
#> 'data.frame':    8663 obs. of  8 variables:
#>  $ datetime            : POSIXct, format: "2022-12-31 21:00:00" "2022-12-31 22:00:00" ...
#>  $ station_code        : chr  "A801" "A801" "A801" "A801" ...
#>  $ state               : chr  "RS" "RS" "RS" "RS" ...
#>  $ temp_dry_c          : num  26.5 26.8 26.3 25.2 24.1 23.1 22 21.5 21 21.1 ...
#>  $ precip_mm           : num  0 0 0 0 0 0 0 0 0 0 ...
#>  $ humid_rel_pct       : int  55 49 50 59 65 73 81 89 91 91 ...
#>  $ wind_speed_ms       : num  2 1.9 1.5 1.4 1.1 1.1 1.1 1.2 0.9 1.2 ...
#>  $ pressure_station_hpa: num  1006 1007 1007 1006 1006 ...

Exploring the data

Daily aggregation

Hourly data are often most useful after aggregating to daily summaries.

df$date <- as.Date(df$datetime)

daily <- aggregate(
  cbind(temp_dry_c, precip_mm, humid_rel_pct, wind_speed_ms) ~ date,
  data = df,
  FUN  = function(x) mean(x, na.rm = TRUE)
)

daily$temp_max <- aggregate(temp_dry_c ~ date, df, max, na.rm = TRUE)$temp_dry_c
daily$temp_min <- aggregate(temp_dry_c ~ date, df, min, na.rm = TRUE)$temp_dry_c
daily$precip   <- aggregate(precip_mm  ~ date, df, sum, na.rm = TRUE)$precip_mm
daily$month    <- as.integer(format(daily$date, "%m"))

Temperature series

col_max  <- "#E74C3C"
col_min  <- "#3498DB"
col_band <- "#F1948A"

oldpar <- par(mar = c(4, 4.5, 3, 1), family = "sans", cex.lab = 0.95, cex.axis = 0.85)

plot(
  daily$date, daily$temp_max,
  type = "n",
  ylim = range(c(daily$temp_min, daily$temp_max), na.rm = TRUE),
  xlab = "",
  ylab = "Temperature (\u00b0C)",
  main = "Daily Temperature Range \u2014 Porto Alegre (A801), 2023",
  axes = FALSE
)

polygon(
  c(daily$date, rev(daily$date)),
  c(daily$temp_max, rev(daily$temp_min)),
  col    = adjustcolor(col_band, alpha.f = 0.35),
  border = NA
)

lines(daily$date, daily$temp_max, col = col_max, lwd = 1.2)
lines(daily$date, daily$temp_min, col = col_min, lwd = 1.2)

axis(1,
     at     = seq(min(daily$date), max(daily$date), by = "2 months"),
     labels = format(seq(min(daily$date), max(daily$date), by = "2 months"), "%b"),
     las    = 1)
axis(2, las = 1)
box(col = "grey80")

legend(
  "topright",
  legend = c("Daily max", "Daily min"),
  col    = c(col_max, col_min),
  lwd    = 2,
  bty    = "n",
  cex    = 0.85
)
Daily temperature range at Porto Alegre (A801) — 2023.

Daily temperature range at Porto Alegre (A801) — 2023.


par(oldpar)

Precipitation

col_prec <- "#3498DB"

oldpar <- par(mar = c(4, 4.5, 3, 1), family = "sans", cex.lab = 0.95, cex.axis = 0.85)

barplot(
  daily$precip,
  col    = adjustcolor(col_prec, alpha.f = 0.75),
  border = NA,
  space  = 0,
  xlab   = "",
  ylab   = "Precipitation (mm)",
  main   = "Daily Precipitation \u2014 Porto Alegre (A801), 2023",
  axes   = FALSE
)

axis(2, las = 1)

# Month labels on x axis
month_starts <- which(!duplicated(format(daily$date, "%m")))
axis(1,
     at     = month_starts,
     labels = format(daily$date[month_starts], "%b"),
     tick   = FALSE)

box(col = "grey80")
Daily accumulated precipitation at Porto Alegre (A801) — 2023.

Daily accumulated precipitation at Porto Alegre (A801) — 2023.


par(oldpar)

Monthly temperature boxplots

col_box <- "#E74C3C"

df$month <- as.integer(format(df$datetime, "%m"))
month_labels <- c("Jan","Feb","Mar","Apr","May","Jun",
                  "Jul","Aug","Sep","Oct","Nov","Dec")

oldpar <- par(mar = c(4, 4.5, 3, 1), family = "sans", cex.lab = 0.95, cex.axis = 0.85)

boxplot(
  temp_dry_c ~ month,
  data     = df,
  col      = adjustcolor(col_box, alpha.f = 0.40),
  border   = col_box,
  names    = month_labels,
  xlab     = "Month",
  ylab     = "Dry-bulb temperature (\u00b0C)",
  main     = "Temperature Seasonality \u2014 Porto Alegre (A801), 2023",
  outline  = FALSE,
  whisklty = 1,
  medlwd   = 2,
  axes     = FALSE
)

axis(1, at = 1:12, labels = month_labels, las = 1)
axis(2, las = 1)
box(col = "grey80")
Seasonal cycle of hourly dry-bulb temperature at Porto Alegre (A801) — 2023.

Seasonal cycle of hourly dry-bulb temperature at Porto Alegre (A801) — 2023.


par(oldpar)

Wind speed distribution

col_wind <- "#2C3E50"

ws <- df$wind_speed_ms[!is.na(df$wind_speed_ms) & df$wind_speed_ms >= 0]

oldpar <- par(mar = c(4, 4.5, 3, 1), family = "sans", cex.lab = 0.95, cex.axis = 0.85)

h <- hist(ws, breaks = 30, plot = FALSE)

plot(h,
     col    = adjustcolor(col_wind, alpha.f = 0.55),
     border = "white",
     xlab   = "Wind speed (m/s)",
     ylab   = "Frequency",
     main   = "Wind Speed Distribution \u2014 Porto Alegre (A801), 2023",
     axes   = FALSE)

abline(v   = mean(ws),
       col = "#E74C3C",
       lwd = 2,
       lty = 2)

axis(1, las = 1)
axis(2, las = 1)
box(col = "grey80")

legend("topright",
       legend = paste0("Mean: ", round(mean(ws), 1), " m/s"),
       col    = "#E74C3C",
       lwd    = 2,
       lty    = 2,
       bty    = "n",
       cex    = 0.85)
Distribution of hourly wind speed at Porto Alegre (A801) — 2023.

Distribution of hourly wind speed at Porto Alegre (A801) — 2023.


par(oldpar)

Filtering and subsetting

inmet_read() supports filtering at read time, which avoids loading unnecessary data into memory.

# Only summer months (December–February), temperature and humidity
summer <- inmet_read(
  years      = 2023,
  stations   = "A801",
  start_date = "2023-12-01",
  end_date   = "2023-12-31",
  variables  = c("temp_dry_c", "humid_rel_pct")
)

# Multiple stations across Rio Grande do Sul
rs_stations <- inmet_read(
  years    = 2023,
  stations = c("A801", "A802", "A827"),
  variables = c("temp_dry_c", "precip_mm")
)

Extracting CSV files

If you need the raw CSV files for use in other tools, inmet_extract() copies them out of the ZIP archives to a directory of your choice.

inmet_extract(
  years   = 2023,
  out_dir = file.path(tempdir(), "inmet_csv")
)

Cache management

# See what is cached and how large each file is
inmet_cache_status()

# Remove all cached archives to free disk space
inmet_cache_clear()

Variable reference

The table below lists all meteorological columns returned by inmet_read().

Column Description Unit
temp_dry_c Dry-bulb (air) temperature °C
temp_max_c Maximum temperature (previous hour) °C
temp_min_c Minimum temperature (previous hour) °C
temp_dew_c Dew-point temperature °C
precip_mm Total precipitation mm
pressure_station_hpa Atmospheric pressure at station level hPa
pressure_max_hpa Maximum pressure (previous hour) hPa
pressure_min_hpa Minimum pressure (previous hour) hPa
humid_rel_pct Relative humidity %
humid_rel_max_pct Maximum relative humidity (previous hour) %
humid_rel_min_pct Minimum relative humidity (previous hour) %
wind_speed_ms Wind speed m/s
wind_dir_deg Wind direction degrees
wind_gust_ms Wind gust speed m/s
radiation_kjm2 Global solar radiation kJ/m²

Data source

All data are provided by INMET and are freely available. The package does not redistribute raw data; it only automates retrieval and parsing of files hosted by INMET.