library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(purrr)
library(ggplot2)
The data on GISCO is divided into topics.
get_topics()
#> # A tibble: 7 × 2
#> title titleMultilinual
#> <chr> <named list>
#> 1 Coastal lines <named list [3]>
#> 2 Communes <named list [3]>
#> 3 Countries <named list [3]>
#> 4 Local Administrative Units <named list [3]>
#> 5 NUTS <named list [3]>
#> 6 Postal codes <named list [3]>
#> 7 Urban Audit <named list [3]>
Select the topics that you are interested in. Within each topic there are numerous files. These may differ in in the year they are associated with, spatial resolution, coordinate reference system, data format, among other things.
This package provides an easy access to the latest files. The example below selects the highest resolution file, where the coordinate system is the usual lat/long.
api <- get_topic("NUTS")
file_list <- get_latest_files(api)$gpkg |>
grep(pattern = "01M_.*_4326_", value = TRUE)
file_list
#> NUTS_BN_01M_2024_4326_LEVL_0.gpkg
#> "gpkg/NUTS_BN_01M_2024_4326_LEVL_0.gpkg"
#> NUTS_BN_01M_2024_4326_LEVL_1.gpkg
#> "gpkg/NUTS_BN_01M_2024_4326_LEVL_1.gpkg"
#> NUTS_BN_01M_2024_4326_LEVL_2.gpkg
#> "gpkg/NUTS_BN_01M_2024_4326_LEVL_2.gpkg"
#> NUTS_BN_01M_2024_4326_LEVL_3.gpkg
#> "gpkg/NUTS_BN_01M_2024_4326_LEVL_3.gpkg"
#> NUTS_RG_01M_2024_4326_LEVL_0.gpkg
#> "gpkg/NUTS_RG_01M_2024_4326_LEVL_0.gpkg"
#> NUTS_RG_01M_2024_4326_LEVL_1.gpkg
#> "gpkg/NUTS_RG_01M_2024_4326_LEVL_1.gpkg"
#> NUTS_RG_01M_2024_4326_LEVL_2.gpkg
#> "gpkg/NUTS_RG_01M_2024_4326_LEVL_2.gpkg"
#> NUTS_RG_01M_2024_4326_LEVL_3.gpkg
#> "gpkg/NUTS_RG_01M_2024_4326_LEVL_3.gpkg"
Be aware, that these files can be huge. The
get_content_length
function returns the size of a file
without downloading it. It is not vectorized, so you have to use a
map
like construct if you have a list of files.
to_tibble <- function(x, column_name = "value")
tibble::tibble(names = names(x), `:=`(!!column_name, x))
file_sizes <-
map_int(file_list, get_content_length, api = api) |>
to_tibble(column_name = "size")
# tibble::as_tibble_col()
file_sizes |>
knitr::kable(
format.args = list(big.mark = "_", scientific = FALSE)
)
names | size |
---|---|
NUTS_BN_01M_2024_4326_LEVL_0.gpkg | 4_108_288 |
NUTS_BN_01M_2024_4326_LEVL_1.gpkg | 1_036_288 |
NUTS_BN_01M_2024_4326_LEVL_2.gpkg | 1_171_456 |
NUTS_BN_01M_2024_4326_LEVL_3.gpkg | 2_957_312 |
NUTS_RG_01M_2024_4326_LEVL_0.gpkg | 4_096_000 |
NUTS_RG_01M_2024_4326_LEVL_1.gpkg | 5_357_568 |
NUTS_RG_01M_2024_4326_LEVL_2.gpkg | 7_012_352 |
NUTS_RG_01M_2024_4326_LEVL_3.gpkg | 11_915_264 |
Suppose we selected a file to download. Then you can save it to a
local file using the get_content
function. It also save a
copy into a cache under your cache folder. The place of this folder is
OS dependent, use rappdirs::user_cache_dir("eudata")
to
locate it.
If you do not specify a dest
file, the data will be
downloaded into a temporary file. The path to this file is the
body
element of the result of the call.
file_to_download <- grep(pattern = "RG.*LEVL_3", file_list, value = TRUE)
file_to_download
#> NUTS_RG_01M_2024_4326_LEVL_3.gpkg
#> "gpkg/NUTS_RG_01M_2024_4326_LEVL_3.gpkg"
result <- get_content(
api = api,
end_point = file_to_download,
save_to_file = TRUE
)
result
#> <httr2_response>
#> GET
#> https://gisco-services.ec.europa.eu/distribution/v2/nuts/gpkg/NUTS_RG_01M_2024_4326_LEVL_3.gpkg
#> Status: 304 Not Modified
#> Content-Type: application/geopackage+sqlite3
#> Body: On disk
#> '/tmp/RtmpMiPjmW/Rbuild7cc628a5e9c3/eudata/vignettes/NUTS_RG_01M_2024_4326_LEVL_3.gpkg'
#> (11915264 bytes)
The selected data format dpkg
can be read into memory
with the sf
package. First only the first five records are
shown.
db_file <- result$body
layer <- sf::st_layers(db_file)
layer
#> Driver: GPKG
#> Available layers:
#> layer_name geometry_type features fields crs_name
#> 1 NUTS_RG_01M_2024_4326_LEVL_3.gpkg Multi Polygon 1345 8 WGS 84
sample <- sf::st_read(
db_file,
query = glue::glue("select * from \"{layer}\" limit 5")
)
#> Reading query `select * from "NUTS_RG_01M_2024_4326_LEVL_3.gpkg" limit 5'
#> from data source `/tmp/RtmpMiPjmW/Rbuild7cc628a5e9c3/eudata/vignettes/NUTS_RG_01M_2024_4326_LEVL_3.gpkg'
#> using driver `GPKG'
#> Simple feature collection with 5 features and 8 fields
#> Geometry type: POLYGON
#> Dimension: XY
#> Bounding box: xmin: 19.3143 ymin: 40.36304 xmax: 20.62595 ymax: 42.55856
#> Geodetic CRS: WGS 84
sample
#> Simple feature collection with 5 features and 8 fields
#> Geometry type: POLYGON
#> Dimension: XY
#> Bounding box: xmin: 19.3143 ymin: 40.36304 xmax: 20.62595 ymax: 42.55856
#> Geodetic CRS: WGS 84
#> NUTS_ID LEVL_CODE CNTR_CODE NAME_LATN NUTS_NAME MOUNT_TYPE URBN_TYPE
#> 1 AL011 3 AL Dibër Dibër NA NA
#> 2 AL012 3 AL Durrës Durrës NA NA
#> 3 AL013 3 AL Kukës Kukës NA NA
#> 4 AL031 3 AL Berat Berat NA NA
#> 5 AL032 3 AL Fier Fier NA NA
#> COAST_TYPE geom
#> 1 NA POLYGON ((20.3461 41.8761, ...
#> 2 NA POLYGON ((19.8072 41.5662, ...
#> 3 NA POLYGON ((20.11943 42.52485...
#> 4 NA POLYGON ((19.96849 40.86741...
#> 5 NA POLYGON ((19.62572 41.06464...
Once you have the structure of the database, it is easy to filter, for example, for Hungarian data only.
hu_data <- sf::st_read(
db_file,
query = glue::glue("select * from \"{layer}\" where CNTR_CODE = \"HU\"")
)
#> Reading query `select * from "NUTS_RG_01M_2024_4326_LEVL_3.gpkg" where CNTR_CODE = "HU"'
#> from data source `/tmp/RtmpMiPjmW/Rbuild7cc628a5e9c3/eudata/vignettes/NUTS_RG_01M_2024_4326_LEVL_3.gpkg'
#> using driver `GPKG'
#> Simple feature collection with 20 features and 8 fields
#> Geometry type: POLYGON
#> Dimension: XY
#> Bounding box: xmin: 16.11385 ymin: 45.73807 xmax: 22.89627 ymax: 48.58464
#> Geodetic CRS: WGS 84
hu_data |>
knitr::kable()
NUTS_ID | LEVL_CODE | CNTR_CODE | NAME_LATN | NUTS_NAME | MOUNT_TYPE | URBN_TYPE | COAST_TYPE | geom |
---|---|---|---|---|---|---|---|---|
HU213 | 3 | HU | Veszprém | Veszprém | NA | 2 | NA | POLYGON ((17.44675 47.44017… |
HU221 | 3 | HU | Győr-Moson-Sopron | Győr-Moson-Sopron | NA | 2 | NA | POLYGON ((17.24179 48.02247… |
HU222 | 3 | HU | Vas | Vas | NA | 2 | NA | POLYGON ((17.04417 47.39973… |
HU223 | 3 | HU | Zala | Zala | NA | 3 | NA | POLYGON ((17.07991 47.04601… |
HU233 | 3 | HU | Tolna | Tolna | NA | 3 | NA | POLYGON ((18.99883 46.6876,… |
HU231 | 3 | HU | Baranya | Baranya | NA | 2 | NA | POLYGON ((18.32133 46.41398… |
HU232 | 3 | HU | Somogy | Somogy | NA | 3 | NA | POLYGON ((18.20145 46.88144… |
HU110 | 3 | HU | Budapest | Budapest | NA | 1 | NA | POLYGON ((19.14045 47.34944… |
HU120 | 3 | HU | Pest | Pest | NA | 2 | NA | POLYGON ((18.89655 48.05398… |
HU211 | 3 | HU | Fejér | Fejér | NA | 2 | NA | POLYGON ((18.68845 47.57705… |
HU212 | 3 | HU | Komárom-Esztergom | Komárom-Esztergom | NA | 2 | NA | POLYGON ((18.92296 47.75898… |
HU311 | 3 | HU | Borsod-Abaúj-Zemplén | Borsod-Abaúj-Zemplén | NA | 2 | NA | POLYGON ((21.46748 48.57032… |
HU312 | 3 | HU | Heves | Heves | NA | 3 | NA | POLYGON ((20.40755 48.15312… |
HU313 | 3 | HU | Nógrád | Nógrád | NA | 3 | NA | POLYGON ((19.63712 48.24557… |
HU321 | 3 | HU | Hajdú-Bihar | Hajdú-Bihar | NA | 2 | NA | POLYGON ((21.19624 47.95697… |
HU322 | 3 | HU | Jász-Nagykun-Szolnok | Jász-Nagykun-Szolnok | NA | 2 | NA | POLYGON ((20.82797 47.65902… |
HU323 | 3 | HU | Szabolcs-Szatmár-Bereg | Szabolcs-Szatmár-Bereg | NA | 3 | NA | POLYGON ((22.31386 48.32577… |
HU331 | 3 | HU | Bács-Kiskun | Bács-Kiskun | NA | 2 | NA | POLYGON ((19.26903 47.13413… |
HU332 | 3 | HU | Békés | Békés | NA | 2 | NA | POLYGON ((21.01366 47.22622… |
HU333 | 3 | HU | Csongrád-Csanád | Csongrád-Csanád | NA | 2 | NA | POLYGON ((20.50795 46.77311… |
A map with ggplot2
.
Another example, now for postal codes.
api <- get_topic("Postal")
file_to_download <- grep("_4326", get_latest_files(api)$gpkg, value = TRUE)
result <- get_content(api, file_to_download, save_to_file = TRUE)
result
#> <httr2_response>
#> GET
#> https://gisco-services.ec.europa.eu/distribution/v2/pcode/gpkg/PCODE_PT_2024_4326.gpkg
#> Status: 304 Not Modified
#> Content-Type: application/geopackage+sqlite3
#> Body: On disk
#> '/tmp/RtmpMiPjmW/Rbuild7cc628a5e9c3/eudata/vignettes/PCODE_PT_2024_4326.gpkg'
#> (145338368 bytes)
db_file <- result$body
layer <- sf::st_layers(db_file)
layer
#> Driver: GPKG
#> Available layers:
#> layer_name geometry_type features fields crs_name
#> 1 PCODE_PT_2024_4326.gpkg Point 820156 14 WGS 84
sample <- sf::st_read(
db_file,
query = glue::glue("select * from \"{layer}\" limit 5")
)
#> Reading query `select * from "PCODE_PT_2024_4326.gpkg" limit 5'
#> from data source `/tmp/RtmpMiPjmW/Rbuild7cc628a5e9c3/eudata/vignettes/PCODE_PT_2024_4326.gpkg'
#> using driver `GPKG'
#> Simple feature collection with 5 features and 14 fields
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: 21.7416 ymin: 62.1132 xmax: 23.1452 ymax: 62.8429
#> Geodetic CRS: WGS 84
sample
#> Simple feature collection with 5 features and 14 fields
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: 21.7416 ymin: 62.1132 xmax: 23.1452 ymax: 62.8429
#> Geodetic CRS: WGS 84
#> POSTCODE CNTR_ID PC_CNTR NUTS3_2024 CODE GISCO_ID_2021 NSI_CODE_2021
#> 1 64701 FI FI_64701 'FI199' '64701' FI_846 846
#> 2 64901 FI FI_64901 'FI199' '64901' FI_151 151
#> 3 60060 FI FI_60060 'FI199' '60060' FI_743 743
#> 4 60101 FI FI_60101 'FI199' '60101' FI_743 743
#> 5 60110 FI FI_60110 'FI199' '60110' FI_743 743
#> LAU_NAT LAU_LATIN COASTAL CITY_ID GREATER_CITY_ID FUA_ID DGURBA
#> 1 Teuva <NA> <NA> <NA> <NA> <NA> 3
#> 2 Isojoki <NA> <NA> <NA> <NA> <NA> 3
#> 3 Seinäjoki <NA> <NA> <NA> <NA> <NA> 2
#> 4 Seinäjoki <NA> <NA> <NA> <NA> <NA> 2
#> 5 Seinäjoki <NA> <NA> <NA> <NA> <NA> 2
#> geom
#> 1 POINT (21.7416 62.4819)
#> 2 POINT (21.9588 62.1132)
#> 3 POINT (22.954 62.8429)
#> 4 POINT (22.8636 62.7652)
#> 5 POINT (23.1452 62.5424)
hu_data <- sf::st_read(
db_file,
query = glue::glue("select * from \"{layer}\" where CNTR_ID = \"HU\"")
)
#> Reading query `select * from "PCODE_PT_2024_4326.gpkg" where CNTR_ID = "HU"'
#> from data source `/tmp/RtmpMiPjmW/Rbuild7cc628a5e9c3/eudata/vignettes/PCODE_PT_2024_4326.gpkg'
#> using driver `GPKG'
#> Simple feature collection with 3155 features and 14 fields
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: 16.16701 ymin: 45.77374 xmax: 22.86451 ymax: 48.56046
#> Geodetic CRS: WGS 84
hu_data |>
select(POSTCODE, LAU_NAT)
#> Simple feature collection with 3155 features and 2 fields
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: 16.16701 ymin: 45.77374 xmax: 22.86451 ymax: 48.56046
#> Geodetic CRS: WGS 84
#> First 10 features:
#> POSTCODE LAU_NAT geom
#> 1 2181 Iklad POINT (19.43659 47.6654)
#> 2 2182 Domony POINT (19.41373 47.64118)
#> 3 2183 Galgamácsa POINT (19.39007 47.69573)
#> 4 2184 Vácegres POINT (19.36604 47.67584)
#> 5 2185 Váckisújfalu POINT (19.34847 47.70234)
#> 6 2191 Bag POINT (19.48168 47.63521)
#> 7 2192 Hévízgyörk POINT (19.52681 47.6225)
#> 8 2193 Galgahévíz POINT (19.54952 47.61575)
#> 9 2194 Tura POINT (19.59443 47.6041)
#> 10 2200 Monor POINT (19.44099 47.34713)
EOV <- "EPSG:23700"
hu_data |>
filter(grepl("^Gyöngyös$", LAU_NAT)) |>
ggplot() +
geom_sf() +
coord_sf(crs = EOV, datum = EOV)
Cities with the highest number of associated postal codes
hu_data |>
sf::st_drop_geometry() |>
count(LAU_NAT) |>
arrange(-n) |>
filter(n > 1)
#> LAU_NAT n
#> 1 Budapest 184
#> 2 Miskolc 24
#> 3 Pécs 20
#> 4 Debrecen 19
#> 5 Szeged 17
#> 6 Győr 13
#> 7 Nyíregyháza 11
#> 8 Orosháza 8
#> 9 Ózd 8
#> 10 Ajka 5
#> 11 Nagykanizsa 5
#> 12 Salgótarján 5
#> 13 Gyöngyös 4
#> 14 Hajdúböszörmény 4
#> 15 Sopron 4
#> 16 Sárbogárd 4
#> 17 Sátoraljaújhely 4
#> 18 Zalaszentgrót 4
#> 19 Badacsonytomaj 3
#> 20 Bátonyterenye 3
#> 21 Békéscsaba 3
#> 22 Dabas 3
#> 23 Dunaújváros 3
#> 24 Encs 3
#> 25 Ercsi 3
#> 26 Esztergom 3
#> 27 Gárdony 3
#> 28 Hódmezővásárhely 3
#> 29 Jászberény 3
#> 30 Kaposvár 3
#> 31 Kecskemét 3
#> 32 Komárom 3
#> 33 Lenti 3
#> 34 Lőrinci 3
#> 35 Marcali 3
#> 36 Mohács 3
#> 37 Nyíradony 3
#> 38 Polgárdi 3
#> 39 Pápa 3
#> 40 Pásztó 3
#> 41 Sarród 3
#> 42 Szentgotthárd 3
#> 43 Szigetszentmiklós 3
#> 44 Szolnok 3
#> 45 Szombathely 3
#> 46 Sárvár 3
#> 47 Tiszafüred 3
#> 48 Veszprém 3
#> 49 Vásárosnamény 3
#> 50 Aba 2
#> 51 Almásfüzitő 2
#> 52 Alsóörs 2
#> 53 Badacsonytördemic 2
#> 54 Baja 2
#> 55 Bakonypéterd 2
#> 56 Baktakék 2
#> 57 Balatonalmádi 2
#> 58 Balatonboglár 2
#> 59 Balatonfűzfő 2
#> 60 Balinka 2
#> 61 Baracs 2
#> 62 Barcs 2
#> 63 Berhida 2
#> 64 Bonyhád 2
#> 65 Borsodnádasd 2
#> 66 Bük 2
#> 67 Cegléd 2
#> 68 Celldömölk 2
#> 69 Csehimindszent 2
#> 70 Csikvánd 2
#> 71 Csoma 2
#> 72 Csongrád 2
#> 73 Csákvár 2
#> 74 Dabronc 2
#> 75 Edelény 2
#> 76 Egercsehi 2
#> 77 Egyek 2
#> 78 Egyházasharaszti 2
#> 79 Enying 2
#> 80 Fegyvernek 2
#> 81 Fertőd 2
#> 82 Fityeház 2
#> 83 Fonyód 2
#> 84 Fülöpjakab 2
#> 85 Gesztely 2
#> 86 Gyarmat 2
#> 87 Gyomaendrőd 2
#> 88 Gyula 2
#> 89 Gávavencsellő 2
#> 90 Göd 2
#> 91 Gödre 2
#> 92 Hajdúnánás 2
#> 93 Harta 2
#> 94 Hosszúpályi 2
#> 95 Jánossomorja 2
#> 96 Kapuvár 2
#> 97 Kazár 2
#> 98 Kehidakustány 2
#> 99 Kenderes 2
#> 100 Kerepes 2
#> 101 Kisbér 2
#> 102 Kondó 2
#> 103 Kunszentmiklós 2
#> 104 Kunszentmárton 2
#> 105 Kákics 2
#> 106 Kám 2
#> 107 Körmend 2
#> 108 Kövegy 2
#> 109 Lázi 2
#> 110 Létavértes 2
#> 111 Magyarszecsőd 2
#> 112 Makó 2
#> 113 Medgyesbodzás 2
#> 114 Medgyesegyháza 2
#> 115 Mezőkeresztes 2
#> 116 Mosonszentmiklós 2
#> 117 Mosonszolnok 2
#> 118 Mánfa 2
#> 119 Mátranovák 2
#> 120 Mátraszentimre 2
#> 121 Mátraterenye 2
#> 122 Mérk 2
#> 123 Nemesgörzsöny 2
#> 124 Nyírkércs 2
#> 125 Nádudvar 2
#> 126 Olaszliszka 2
#> 127 Paks 2
#> 128 Parád 2
#> 129 Pilis 2
#> 130 Pilisszentkereszt 2
#> 131 Pér 2
#> 132 Sarkad 2
#> 133 Siófok 2
#> 134 Sumony 2
#> 135 Szerecseny 2
#> 136 Szerencs 2
#> 137 Szákszend 2
#> 138 Székesfehérvár 2
#> 139 Sántos 2
#> 140 Sülysáp 2
#> 141 Tahitótfalu 2
#> 142 Takácsi 2
#> 143 Tapolca 2
#> 144 Tata 2
#> 145 Tiszaalpár 2
#> 146 Tiszaeszlár 2
#> 147 Tiszaföldvár 2
#> 148 Tiszakécske 2
#> 149 Tiszakürt 2
#> 150 Tiszalök 2
#> 151 Tiszanagyfalu 2
#> 152 Tiszaszentimre 2
#> 153 Tiszatelek 2
#> 154 Tiszaug 2
#> 155 Tiszavárkony 2
#> 156 Tolna 2
#> 157 Töltéstava 2
#> 158 Törökszentmiklós 2
#> 159 Túrkeve 2
#> 160 Verpelét 2
#> 161 Visegrád 2
#> 162 Visonta 2
#> 163 Vámosszabadi 2
#> 164 Zalaegerszeg 2
#> 165 Zalakomár 2
#> 166 Zalaszentlászló 2
#> 167 Zsurk 2
#> 168 Öregcsertő 2
#> 169 Öreglak 2