% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/PsychWordVec.R
\name{most_similar}
\alias{most_similar}
\title{Find the Top-N most similar words.}
\usage{
most_similar(data, x, topn = 10, keep = FALSE, above = NULL, verbose = TRUE)
}
\arguments{
\item{data}{A \code{data.table} (of new class \code{wordvec})
loaded by \code{\link{data_wordvec_load}}.}

\item{x}{Can be one of the following:
\itemize{
  \item{a single word:

  \code{"China"}}

  \item{a list of words:

  \code{c("king", "queen")}}

  \code{cc(" king , queen ; man | woman")}

  \item{an R formula (\code{~ xxx}) specifying
  words that positively and negatively
  contribute to the similarity (for word analogy):

  \code{~ boy - he + she}

  \code{~ king - man + woman}

  \code{~ Beijing - China + Japan}}
}}

\item{topn}{Top-N most similar words. Defaults to \code{10}.}

\item{keep}{Keep words specified in \code{x} in results?
Defaults to \code{FALSE}.}

\item{above}{Defaults to \code{NULL}. Can be one of the following:
\itemize{
  \item{a threshold value to find all words with cosine similarities
  higher than this value}
  \item{a critical word to find all words with cosine similarities
  higher than that with this critical word}
}
If both \code{topn} and \code{above} are specified, \code{above} wins.}

\item{verbose}{Print information to the console? Defaults to \code{TRUE}.}
}
\value{
A \code{data.table} with the most similar words and their cosine similarities.
The row number of each word in the raw data is also returned,
which may help determine the relative word frequency in some cases.

Two attributes are appended to the returned \code{data.table} (see examples):
\code{wordvec} and \code{wordvec.formula}.
Users may extract them for further use.
}
\description{
Find the Top-N most similar words, which replicates the results produced
by the Python \code{gensim} module \code{most_similar()} function.
(Exact replication of \code{gensim} requires the same word vectors data,
not the \code{demodata} used here in examples.)
}
\section{Download}{

Download pre-trained word vectors data (\code{.RData}):
\url{https://psychbruce.github.io/WordVector_RData.pdf}
}

\examples{
d = data_wordvec_normalize(demodata)

most_similar(d, "China")
most_similar(d, c("king", "queen"))
most_similar(d, cc(" king , queen ; man | woman "))

\donttest{# the same as above:
most_similar(d, ~ China)
most_similar(d, ~ king + queen)
most_similar(d, ~ king + queen + man + woman)

most_similar(d, ~ boy - he + she)
most_similar(d, ~ Jack - he + she)
most_similar(d, ~ Rose - she + he)

most_similar(d, ~ king - man + woman)
most_similar(d, ~ Tokyo - Japan + China)
most_similar(d, ~ Beijing - China + Japan)

most_similar(d, "China", above=0.7)
most_similar(d, "China", above="Shanghai")

# automatically normalized for more accurate results
ms = most_similar(demodata, ~ king - man + woman)
ms
str(ms)
attr(ms, "dims")
attr(ms, "normalized")
attr(ms, "wordvec.formula")
attr(ms, "wordvec")
# final word vector computed according to the formula
}
}
\seealso{
\code{\link{cosine_similarity}}

\code{\link{pair_similarity}}

\code{\link{tab_similarity}}
}
