利用者:CES1596/OCR0.js
注意: 保存後、変更を確認するにはブラウザーのキャッシュを消去する必要がある場合があります。
- Firefox / Safari: Shift を押しながら 再読み込み をクリックするか、Ctrl-F5 または Ctrl-R を押してください (Mac では ⌘-R)
- Google Chrome: Ctrl-Shift-R を押してください (Mac では ⌘-Shift-R)
- Internet Explorer / Microsoft Edge: Ctrl を押しながら 最新の情報に更新 をクリックするか、Ctrl-F5 を押してください
- Opera: Ctrl-F5を押してください
//OCR tool based on wikisource.org's MediaWiki:OCR.js, MediaWiki:GoogleOCR.js and User:Putnik/TesseractOCR.js
var lang = 'ja';
var language = 'jpn';
//var language = 'jpn_vert';
$(function() {
var namespaceNumber = mw.config.get('wgNamespaceNumber');
if (namespaceNumber !== 250) return;
$("#p-namespaces ul").append($("<li><span><a>HOCR</a></span></li>").attr({'id':'hocr0_btn', 'title':'do HOCR'}).css({'color':'#0000ff','cursor':'pointer'}));
$('#hocr0_btn').click(function() {
do_hocr();
console.log('do_hocr done');
$('#hocr0_btn, #hocr0_btn span a').css({'color':'#000000'});
});
$("#p-namespaces ul").append($("<li><span><a>OCR</a></span></li>").attr({'id':'ocr0_btn', 'title':'do OCR'}).css({'color':'#0000ff','cursor':'pointer'}));
$('#ocr0_btn').click(function() {
do_ocr();
console.log('do_ocr done');
$('#ocr0_btn, #ocr0_btn span a').css({'color':'#000000'});
});
$("#p-namespaces ul").append($("<li><span><a>TOCR</a></span></li>").attr({'id':'tocr0_btn', 'title':'do Tesseract OCR'}).css({'color':'#0000ff','cursor':'pointer'}));
$('#tocr0_btn').click(function() {
do_tocr();
console.log('do_tocr done');
$('#tocr0_btn, #tocr0_btn span a').css({'color':'#000000'});
});
$("#p-namespaces ul").append($("<li><span><a>GOCR</a></span></li>").attr({'id':'gocr0_btn', 'title':'do Google OCR'}).css({'color':'#0000ff','cursor':'pointer'}));
$('#gocr0_btn').click(function() {
do_gocr();
console.log('do_gocr done');
$('#gocr0_btn, #gocr0_btn span a').css({'color':'#000000'});
});
});
function hocr_callback(data) {
if (data.error) {
console.log('hocr data error');
do_ocr();
return;
} else {
var tb = document.getElementById("wpTextbox1");
tb.value = $(data.text).text();
console.log('hocr callback done');
}
}
function ocr_callback(data) {
if (data.error) {
alert(data.text);
console.log('ocr data error');
} else {
var tb = document.getElementById("wpTextbox1");
tb.value = data.text;
console.log('ocr callback done');
}
}
function do_hocr() {
var request_url = '//tools.wmflabs.org/phetools/hocr_cgi.py?cmd=hocr&book='
+ encodeURIComponent(mw.config.get('wgTitle')) + '&lang=' + lang + '&user=' + mw.config.get('wgUserName');
console.log('hocr url defined');
$.getJSON(request_url).done(hocr_callback);
console.log('hocr getJSON done');
}
function do_ocr() {
if ($( '.prp-page-image img' ).length) {
var url_image = 'https:' + $( '.prp-page-image img' ).attr('src');
var request_url = "//tools.wmflabs.org/phetools/ocr.php?cmd=ocr&url="+url_image+"&lang="+lang+"&user="+mw.config.get('wgUserName');
console.log('ocr url defined');
$.getJSON( request_url ).done( ocr_callback );
console.log('ocr getJSON done');
}
}
function do_tocr() {
if ( $( '.prp-page-image img' ).length === 0 ) {
mw.notify( 'tocr image not found' );
console.log('tocr image not found');
}
var imageUrl = 'https:' + $( '.prp-page-image img' ).attr( 'src' );
$.getScript( 'https://tools-static.wmflabs.org/cdnjs/ajax/libs/tesseract.js/2.0.0-alpha.2/tesseract.min.js', function() {
var { TesseractWorker } = Tesseract;
var worker = new TesseractWorker({
workerPath: 'https://tools-static.wmflabs.org/cdnjs/ajax/libs/tesseract.js/2.0.0-alpha.2/worker.min.js',
langPath: 'https://tools.wmflabs.org/tessdata/4.0.0',
corePath: 'https://tools.wmflabs.org/tessdata/core/tesseract-core.wasm.js',
});
worker
.recognize( imageUrl, language )
.then( processOcrResult );
} );
console.log('tocr getScript done');
}
function processOcrResult( result ) {
if ( result.text === undefined || result.text.length === 0 ) {
mw.notify( 'tocr no text' );
console.log('tocr no text');
return;
}
$( '#wpTextbox1' ).val( result.text );
console.log('tocr callback done');
}
function do_gocr() {
if ( $( '.prp-page-image img' ).length === 0 ) {
mw.notify( mw.msg( 'google-ocr-image-not-found' ) );
console.log('gocr image not found');
}
var imageUrl = 'https:' + $( '.prp-page-image img' ).attr('src');
var toolUrl = "//tools.wmflabs.org/ws-google-ocr/api.php";
var requestUrl = toolUrl + "?image=" + imageUrl + "&lang="+lang;
$.getJSON( requestUrl )
.done( processOcrResult )
.fail( processOcrResult )
console.log('gocr getJSON done');
}
function processOcrResult( response ) {
if ( response.responseJSON !== undefined && response.responseJSON.error ) {
mw.notify( mw.msg( 'error' ) + ' ' + response.responseJSON.error.code + ' ' + response.responseJSON.error.message );
console.log('gocr error');
return;
}
if ( response.text === undefined || response.text.length === 0 ) {
mw.notify( mw.msg( 'google-ocr-no-text' ) );
console.log('gocr no text');
return;
}
$( '#wpTextbox1' ).val( response.text );
console.log('gocr callback done');
}