@@ -14,10 +14,53 @@ def test_ingest_on_svg(self):
1414 self .assertIn ("TEST" , entity .first ("bodyText" ))
1515 self .assertEqual (entity .first ("processingStatus" ), self .manager .STATUS_SUCCESS )
1616
17- def test_ingest_on_jpeg (self ):
18- fixture_path , entity = self .fixture ("jpegtest.jpg" )
19- self .manager .ingest (fixture_path , entity )
20- self .assertIn ("Debian" , entity .first ("bodyText" ))
21- self .assertEqual (entity .first ("mimeType" ), "image/jpeg" )
17+ def test_tesseract_ocr_regression (self ):
18+ """This test is meant to catch a regresion in the OCR behaviour
19+ descrbed in this PR: https://github.com/alephdata/ingest-file/pull/585"""
2220
23- self .assertEqual (entity .first ("processingStatus" ), self .manager .STATUS_SUCCESS )
21+ test_data = {
22+ "jpeg" : {
23+ "file" : "regression_jpg.jpg" ,
24+ "content" : "Debian -- Packages" ,
25+ "mime_type" : "image/jpeg" ,
26+ },
27+ "gif" : {
28+ "file" : "regression_gif.gif" ,
29+ "content" : "This is text inside a GIF image" ,
30+ "mime_type" : "image/gif" ,
31+ },
32+ # "tiff": {
33+ # "file": "regression_tiff.tiff",
34+ # "content": "Debian -- Packages",
35+ # "mime_type": "image/tiff",
36+ # },
37+ "webp" : {
38+ "file" : "regression_webp.webp" ,
39+ "content" : "Debian -- Packages" ,
40+ "mime_type" : "image/webp" ,
41+ },
42+ "openjpeg" : {
43+ "file" : "regression_openjpeg.jp2" ,
44+ "content" : "Debian -- Packages" ,
45+ "mime_type" : "image/jp2" ,
46+ },
47+ }
48+
49+ for test_image_type in test_data :
50+ fixture_path , entity = self .fixture (test_data [test_image_type ]["file" ])
51+ self .manager .ingest (fixture_path , entity )
52+ self .assertIn (
53+ test_data [test_image_type ]["content" ],
54+ entity .first ("bodyText" ),
55+ f"Test failed for { test_data [test_image_type ]['file' ]} " ,
56+ )
57+ self .assertEqual (
58+ entity .first ("mimeType" ),
59+ test_data [test_image_type ]["mime_type" ],
60+ f"Test failed for { test_data [test_image_type ]['file' ]} " ,
61+ )
62+ self .assertEqual (
63+ entity .first ("processingStatus" ),
64+ self .manager .STATUS_SUCCESS ,
65+ f"Test failed for { test_data [test_image_type ]['file' ]} " ,
66+ )
0 commit comments