% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/kanjistat-package.R
\docType{data}
\encoding{UTF-8}
\name{distdata}
\alias{distdata}
\alias{dstrokedit}
\alias{dyehli}
\title{Precomputed kanji distances}
\format{
Symmetric sparse matrices containing distances between a key kanji, its ten nearest neighbors and
possibly some other close kanji.\if{html}{\out{<br>}}
For \code{dstrokedit}, these are the stroke edit distances according to Yencken and Baldwin (2008).\if{html}{\out{<br>}}
For \code{dyehli}, these are the bag-of-radicals distances according to Yeh and Li (2002).\if{html}{\out{<br>}}
Both are an instance of the S4 class \code{dsCMatrix} (symmetric sparse matrices in \emph{column}-compressed format)
with 2133 rows and 2133 columns.

All pre-2010 jouyou kanji that are also post-2010
jouyou kanji are included. The indices are those from \code{\link{kbase}}.
}
\source{
Datasets from \url{https://lars.yencken.org/datasets}, made available under the
Creative Commons Attribution 3.0 Unported licence.

Computed as part of \emph{Yencken, Lars (2010)
\href{https://lars.yencken.org/papers/phd-thesis.pdf}{Orthographic support for passing the reading hurdle in Japanese}.
PhD Thesis, University of Melbourne, Melbourne, Australia}.
}
\usage{
dstrokedit

dyehli
}
\description{
Precomputed kanji distances
}
\examples{
# Find index for kanji \u90e8
bu_index <- match("\u90e8", kbase$kanji)

# Look up available stroke edit distances for \u90e8.
non_zero <- which(dstrokedit[bu_index,] != 0)
sed <- dstrokedit[non_zero, bu_index]
names(sed) <- kbase[non_zero,]$kanji
sort(sed)

# Look up available bag-of-radicals distances for \u90e8.
non_zero <- which(dyehli[bu_index,] != 0)
bord <- dyehli[non_zero, bu_index]
names(bord) <- kbase[non_zero,]$kanji
sort(bord)

}
\references{
Yeh, Su-Ling
and Li, Jing-Ling (2002). Role of structure and component in judgements of
visual similarity of Chinese characters. \emph{Journal of Experimental Psychology:
Human Perception and Performance}, \strong{28}(4), 933–947.

Yencken, Lars, & Baldwin, Timothy (2008). Measuring and predicting orthographic associations:
Modelling the similarity of Japanese kanji. In: \emph{Proceedings of the 22nd International Conference on Computational
Linguistics (Coling 2008)}, pp. 1041-1048.
}
\keyword{datasets}
