-
© .. © ..
. .. !""
,
#
apanovich@iis.nsk.su mag@iis.nsk.su
" $ %$
& '", (' ) *+
+$$( $( + LOD +$$( $( +
$( +". - + +/$( ( *+
$( $( $(, $ / *& )&, '&'& )& . $( $( *+ * $
%" SpringerLink $
$ ( - !#.
1
- + , ' $(
!"" ( - !#) -$ ( -
!# [8] $ -$$( +$(
2$( (Linked Open Data, LOD) [10]. 2 ) +$( 2$( / * +$
$ (1), $ " "
(2), '" (3)
*) $( (4). [1]
$ $ / ' +$(
++), / $ $, +&' '"
RDF-$( RKBExplorer.com. 9$
+, +$$(
+$$( + +", " *"
*(
*$( , , )
*( +$( $ & "
. - + +/$( (
& %" $
*+ $( $(
$(, $ / *&
)&, '&'& )&
. " $
%$ ) '"
%" SpringerLink (http://link.springer.com/) -$ ( -
!#.
2
/$ % " +$ +"
' " % '", *, )
" owl:sameAs.
( * * ,
$ -$ ( - !#
)& % $, +& + ( ( . / , -$ ( *+&
+$$ , * , $ $ , *+&
+$$ ( / $( . <, / +* , $ *+ * +$$
$(, $ " %"
elibrary.ru? = ) & )&
" , , /&, + %" %"
+* , ( $ -$$ ( - !#.
% elibrary.ru / $* * +"
) ( )" + 10-15 , +" + )" ( , " . #
* /* elibrary.ru ) ( &", .. , 9.. @(, .. < ( ( ", +/ ( $ " ""
.
= SpringerLink $ $ % * . - $(, )+ $( ,
" , *
XVII !
DAMDID/RCDL’2015 «
" », #, 13-16 2015
/& ( - !#. - $(, / $ $ PDF ( )". / $ $ )" $, SpringerLink /
& - & )&
+, $ ( + *), ) " . *(, %"
+ , *+$ WorldCat.org [4], – +& * +$
, A&' *$(
, ( 9 < G, *( %$( . WorldCat.org ( -$$( +$(
2$(. 2$ WorldCat.org $ RDF- , + "
*+ schema.org + BiblioGraph (http://BiblioGraph.net).
=$ WordlCat.org +, % , / $ , ) +$$(
. #, '" WorldCat Identities, (http://www.worldcat.org/wcidentities/lccn- n80162678), /' $&'( ", $ " +*, ' &
. - / )& (
*( ..
) .. ( ) + # . % * , 1989 2012 , $ $ «$ + .. » ( ..
1988 ).
$$( , ) '" &
+$ $ « $ "$», + $ & %. ' ) $( " +$(
" OCLC
" < G (viaf.org), , , ++$ ) ( ( + (, +)(, ( + (. # , ) *+& * $
$, +*$ *$. @, , , "
, ) http://viaf.org/viaf/5347110, ,
$ $(, , $(
" * $ , $ $'$ )", ,
$ . "
$, * $
(http://viaf.org/viaf/196995053), /
$ $ )"
, $ % («! ( …»), $ $ $ / .
@ *, +& ), " " /
$ & $ *( +$(
, " $ $
& +$ .
+ " " ) '" +"
-$$( +$( $(
SILK[6]. = , *+$ VIAF DBLP,
$ ( [5, 7]. / &
/ $ &
+$$( ( +". [12, 13].
Q' " + ) '"
$(, + +$(
+" . ( +$$(
( / +$ $( , $ )". Q %" $ / * VIAF, ( %$ +$ &, , - T, ' * +/$ $ ). " $, + ) +$( , +$( $ (/
/ +, + (* +) ). % $ ) ( % ", ) ( , ('(
$( $(, +$
. # $ ) & $ )", + $( +/, ) $ +$
$. Q *)$ &
+$( ( ( / +*
' . ' ' &
$ $ ) , &&' + ), , , -+
[9, 14].
+$$( +$$( %
$ / $ (', , +$ $ / , , +$
+" " . % $
*+ ) '"
$" (, &'"
)"
)". ( % $ ' %"
SpringerLink. ! * * ),
$( + +$ * ( * , +$ « )"» +" $).
-' ( $ ) '" &'" :
x +$ (, ) $ % ( -
!# ( , " ),
& +/$ $ +$
'
" %"
SpringerLink.
x * ( - !# *+&
+$$ + +)",
%" SpringerLink + & +$$ + , ' + +) ' Google (Google translator.com). X /" ""
* SpringerLink + $ + ' $, +$ "
$ ( - !#.
' Jaro- Winkler [3]. *, ) + " +)"
/& , , + "
" / +) , &
+$ $ '". $(
*( $ +$ ', +$ (, -
!#).
x 2 ) * $ +"
+), + $ + ( -
!#.
x "$ * + &
$ ) $ $. @$
", $( $ +, & *, +'$ . '" ( +/: ' tf-idf [16] "
+, / LDA (Latent Dirichlet Allocation) [2].
LDA, / $ ' ) <* — Z".
$ ( , (
& ' ) [15].
x 2 ( ", $( + (/ + / ' &'(
, + +
NewgroupN, N-% $"
* + " $.
x 2 /" $ " + +, + + + $(
$ + . (
/ *, /&
, ++.
<) , & $, $ ), / ) + , " (W) ( / . ( / $ , / % +. + + . #, ) + 30 0.05.
$" +/ '
$ , (/
$ & / . / + $& &' . / = Temperature * SpringForce(d) * W SpringForceK ;
= Temperature * ElectricForce(d) / W * ElectricForceK;
SpringForce(D) = 2 * log(D);
ElectricForce(D) = 1 / d2, -% , W– ( / .
*+ * +/* *
* (" ), + &'" " $, * ) , " $, ' $.
3 $ " %
) '"
* ( .. , )" % ( .. , )(
- !#, &'( $" .
!+*$ $ $ %" $ * %"
" elibrary.ru.
Q $( SpringerLink, - $( * +*$" + A " ) )( (
$ +) *( )), ' * ).
@/ +*$ $ "
$ + 100 ( 3000 )") +, 80% , )( $ )
$, * * )$. $ $ +$ 70% .
# !. 1 + $ $, ''" + $ “.. ” ) %"
SpringerLink.com. $ "
( - !# $ +$
+$$ $ % ,
+$ (" . "
+$ +$$ $ $ +" $. ), + , +/&'" ), $ &
. $ ) & ), $(
$ +. $ / ) & ), $(
$ +. + “.. ” SpringerLink.com $ " 91 ). +
( 5 " / Andrei P. Ershov, * – Andrei Ershov, 84 - A.P. Ershov,
* – A.P. Yershov. # +(
$ + $ ( "$( ". + %( 91 )" . . * / 21 ), *$ * / ' * +$ ..
$.
!. 1. ), ) $ /' .. . *,
$( $ +, +$ $ ) .
" * ) 19 )" , 66 )"
*$( .. $(. +*
$( % * * * ) * *& * )" . . , /*
SpringerLink * )"
. . , /$(
% ( .. .
# " " $, ( ' " $ $(, "
$, )" + (& *, +&
93%.
4 &'(
" $ $ + (, *" $ ) $ $ )" ' + %" ).
* $ $ . =$ +,
$ $, * /* , &
$, % ) /* ( - !#. %( (, , $" + +$ ( )" + ( +$( , ) %(
( A*. ' %$ $ ('(
$( %) , + &'( A*
)& +*$
. @/ %$
$& + +
$( ' , / ), *+ +$(
" ) -) .
)
! $ " /
!TT ( _ 14-07-00386).
*
[1] Apanovich Z.V., Marchuk A.G. Experiments on using the LOD cloud datasets to enrich the content of a scientific knowledge base, P.Klinov and D.Mouromtsev (Eds.) KESW 2013, CCIS 394, Springer Verlag Berlin Heidelberg 2013, pp. 1-14.
[2] Blei D. M., Ng A., Jordan M. Latent Dirichlet allocationJournal of Machine Learning Research (3) 2003 pp. 993-1022.
[3] Cohen W. W., Ravikumar P. D., Fienberg S. E.: A Comparison of String Distance Metrics for Name- Matching Tasks. IIWeb 2003, pp. 73-78.
[4] Godby C. J., Denenberg R. Common Ground:
Exploring Compatibilities Between the Linked Data Models of the Library of Congress and OCLC
http://www.oclc.org/research/publications/2015/oc lcresearch-loc-linked-data-2015.html.
[5] Hickey, T. B., Toves J. A.. 2014. "Managing Ambiguity In VIAF" D-Lib Magazine 20 (July/August). doi:10.1045/july2014-
hickey.http://www.dlib.org/dlib/july14/hickey/07h ickey.html.
[6] Isele R., Jentzsch A., Bizer Ch. Silk Server - Adding missing Links while consuming Linked Data// 1st International Workshop on Consuming Linked Data (COLD 2010), Shanghai, November 2010.
[7] Ley M.: DBLP - Some Lessons Learned. PVLDB 2(2), 2009, pp. 1493-1500.
[8] Marchuk A.G., Marchuk P.A. Specific features of digital libraries construction with linked content.
Proc. of the RCDL’2010 Conf.– 2010. – P. 19–23.
(In Russian).
[9] Rogov A.A., Sidorov Yu. Vl. Statistical and Information-calculating Support of the Authorship Attribution of the Literary Works. Computer Data Analysis and Modeling: Robustness and Computer Intensive Methods: Proc. of the Sixth International Conference(September 10-14, 2001, Minsk).
Vol.2: K-S/ Edited by Prof. Dr. S. Aivazian, Prof.
Dr. Yu. Kharin and Prof. Dr. H. Rieder. Minsk:
BSU, 2001. – P. 187-192.
[10] Schultz A. et al. How to integrate LINKED DATA into your application //Semantic technology &
Business Conference, San Francisco, June 5, 2012.
http://mes-semantics.com/wp-
content/uploads/2012/09/Becker-etal-LDIF SemTechSanFrancisco.pdf.
[11] Steyvers M., Griffiths T. Probabilistic Topic Models Handbook of Latent Semantic Analysis.
2007.
[12] 9( . 9., #( . ., T . . - + $ ( +) $( / // . #X. .:
. (. – 2008. – @. 6, $. 1. – . 3–9.
[13] <+ . . +$
/ . . <+ , .
`.@ ", -. . < //
=$ : $
$ (, %$ )
(RCDL’2012) : . XIV. . .,
*-", 15–18 . 2012 . – *-" : +- «X », 2012. – . 360–369.
[14] jq 2.. Z +: !+
*+ )" ..
. X, .9: , N2, 2000, .115-126.
[15] http://snowball.tartarus.org/
[16] http://www.codeproject.com/Articles/12098/Term- frequency-Inverse-document-frequency-implemen
A Combined Approach to Cross-Language Identity Resolution
Zinaida V. Apanovich, Alexander G. Marchuk This paper describes experiments on the cross – language identity resolution problem that arises when the English-language LOD datasets are used to populate the content of a Russian scholarly knowledge base. One possible approach is the combined use of structured and text data, containing additional information and facilitating the identity resolution. The dataset of the Open Archive of the Russian Academy of Sciences and SpringerLink e-library are used as test examples.