Stable version from CRAN
install.packages("solrium")
Or the development version from GitHub
::install_github("ropensci/solrium") remotes
Load
library("solrium")
Initialize connection. By default, you connect to http://localhost:8983
SolrClient$new()) (conn <-
#> <Solr Client>
#> host: 127.0.0.1
#> path:
#> port: 8983
#> scheme: http
#> errors: simple
#> proxy:
For now, only lists and data.frame’s supported.
if (!collection_exists(conn, "books")) {
collection_create(conn, name = "books", numShards = 1)
}
#> $responseHeader
#> $responseHeader$status
#> [1] 0
#>
#> $responseHeader$QTime
#> [1] 6513
#>
#>
#> $success
#> $success$`172.20.0.5:8983_solr`
#> $success$`172.20.0.5:8983_solr`$responseHeader
#> $success$`172.20.0.5:8983_solr`$responseHeader$status
#> [1] 0
#>
#> $success$`172.20.0.5:8983_solr`$responseHeader$QTime
#> [1] 5024
#>
#>
#> $success$`172.20.0.5:8983_solr`$core
#> [1] "books_shard1_replica_n1"
#>
#>
#>
#> $warning
#> [1] "Using _default configset. Data driven schema functionality is enabled by default, which is NOT RECOMMENDED for production use. To turn it off: curl http://{host:port}/solr/books/config -d '{\"set-user-property\": {\"update.autoCreateFields\":\"false\"}}'"
data.frame(id = c(67, 68), price = c(1000, 500000000))
df <-$add(df, "books") conn
#> $responseHeader
#> $responseHeader$rf
#> [1] 1
#>
#> $responseHeader$status
#> [1] 0
#>
#> $responseHeader$QTime
#> [1] 987
list(list(id = 1, price = 100), list(id = 2, price = 500))
ss <-$add(ss, "books") conn
#> $responseHeader
#> $responseHeader$rf
#> [1] 1
#>
#> $responseHeader$status
#> [1] 0
#>
#> $responseHeader$QTime
#> [1] 61
Create collection if it doesn’t exist yet
if (!collection_exists(conn, "gettingstarted")) {
collection_create(conn, name = "gettingstarted", numShards = 1)
}
#> $responseHeader
#> $responseHeader$status
#> [1] 0
#>
#> $responseHeader$QTime
#> [1] 6446
#>
#>
#> $success
#> $success$`172.20.0.7:8983_solr`
#> $success$`172.20.0.7:8983_solr`$responseHeader
#> $success$`172.20.0.7:8983_solr`$responseHeader$status
#> [1] 0
#>
#> $success$`172.20.0.7:8983_solr`$responseHeader$QTime
#> [1] 5112
#>
#>
#> $success$`172.20.0.7:8983_solr`$core
#> [1] "gettingstarted_shard1_replica_n1"
#>
#>
#>
#> $warning
#> [1] "Using _default configset. Data driven schema functionality is enabled by default, which is NOT RECOMMENDED for production use. To turn it off: curl http://{host:port}/solr/gettingstarted/config -d '{\"set-user-property\": {\"update.autoCreateFields\":\"false\"}}'"
Add some documents first
list(list(id = 1, price = 100, name = "brown"),
docs <-list(id = 2, price = 500, name = "blue"),
list(id = 3, price = 2000L, name = "pink"))
$add(docs, "gettingstarted") conn
#> $responseHeader
#> $responseHeader$rf
#> [1] 1
#>
#> $responseHeader$status
#> [1] 0
#>
#> $responseHeader$QTime
#> [1] 1108
And the documents are now in your Solr database
$search(name = "gettingstarted", params = list(q = "*:*", rows = 3)) conn
#> # A tibble: 3 x 4
#> id price name `_version_`
#> <chr> <int> <chr> <dbl>
#> 1 1 100 brown 1.66e18
#> 2 2 500 blue 1.66e18
#> 3 3 2000 pink 1.66e18
Now delete those documents just added
$delete_by_id(ids = c(1, 2, 3), "gettingstarted") conn
#> $responseHeader
#> $responseHeader$rf
#> [1] 1
#>
#> $responseHeader$status
#> [1] 0
#>
#> $responseHeader$QTime
#> [1] 48
And now they are gone
$search("gettingstarted", params = list(q = "*:*", rows = 4)) conn
#> # A tibble: 0 x 0
Add some documents first
$add(docs, "gettingstarted") conn
#> $responseHeader
#> $responseHeader$rf
#> [1] 1
#>
#> $responseHeader$status
#> [1] 0
#>
#> $responseHeader$QTime
#> [1] 72
And the documents are now in your Solr database
$search("gettingstarted", params = list(q = "*:*", rows = 5)) conn
#> # A tibble: 3 x 4
#> id price name `_version_`
#> <chr> <int> <chr> <dbl>
#> 1 1 100 brown 1.66e18
#> 2 2 500 blue 1.66e18
#> 3 3 2000 pink 1.66e18
Now delete those documents just added
$delete_by_query(query = "(name:blue OR name:pink)", "gettingstarted") conn
#> $responseHeader
#> $responseHeader$rf
#> [1] 1
#>
#> $responseHeader$status
#> [1] 0
#>
#> $responseHeader$QTime
#> [1] 122
And now they are gone
$search("gettingstarted", params = list(q = "*:*", rows = 5)) conn
#> # A tibble: 1 x 4
#> id price name `_version_`
#> <chr> <int> <chr> <dbl>
#> 1 1 100 brown 1.66e18
This approach is best if you have many different things you want to do at once, e.g., delete and add files and set any additional options. The functions are:
update_xml()
update_json()
update_csv()
There are separate functions for each of the data types as they take slightly different parameters - and to make it more clear that those are the three input options for data types.
system.file("examples", "books.json", package = "solrium")
file <-$update_json(file, "books") conn
#> $responseHeader
#> $responseHeader$rf
#> [1] 1
#>
#> $responseHeader$status
#> [1] 0
#>
#> $responseHeader$QTime
#> [1] 782
Add a document first, that we can later delete
list(list(id = 456, name = "cat"))
ss <-$add(ss, "books") conn
#> $responseHeader
#> $responseHeader$rf
#> [1] 1
#>
#> $responseHeader$status
#> [1] 0
#>
#> $responseHeader$QTime
#> [1] 100
Now add a new document, and delete the one we just made
system.file("examples", "add_delete.xml", package = "solrium")
file <-cat(readLines(file), sep = "\n")
#> <update>
#> <add>
#> <doc>
#> <field name="id">978-0641723445</field>
#> <field name="cat">book,hardcover</field>
#> <field name="name">The Lightning Thief</field>
#> <field name="author">Rick Riordan</field>
#> <field name="series_t">Percy Jackson and the Olympians</field>
#> <field name="sequence_i">1</field>
#> <field name="genre_s">fantasy</field>
#> <field name="inStock">TRUE</field>
#> <field name="pages_i">384</field>
#> </doc>
#> </add>
#> <delete>
#> <id>456</id>
#> </delete>
#> </update>
$update_xml(file, "books") conn
#> $responseHeader
#> $responseHeader$rf
#> [1] 1
#>
#> $responseHeader$status
#> [1] 0
#>
#> $responseHeader$QTime
#> [1] 279
Note that update_xml()
and update_json()
have exactly the same parameters, but simply use different data input formats. update_csv()
is different in that you can’t provide document or field level boosts or other modifications. In addition update_csv()
can accept not just csv, but tsv and other types of separators.