comparison mupdf-source/source/fitz/stext-device.c @ 40:aa33339d6b8a upstream

ADD: MuPDF v1.26.10: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.5.
author Franz Glasner <fzglas.hg@dom66.de>
date Sat, 11 Oct 2025 11:31:38 +0200
parents b50eed0cc0ef
children
comparison
equal deleted inserted replaced
39:a6bc019ac0b2 40:aa33339d6b8a
980 int unicode; 980 int unicode;
981 int i; 981 int i;
982 982
983 for (i = start; i < end; i++) 983 for (i = start; i < end; i++)
984 { 984 {
985 if (dev->flags & (FZ_STEXT_CLIP | FZ_STEXT_CLIP_RECT))
986 {
987 fz_rect r = current_clip(ctx, dev);
988 if (fz_glyph_entirely_outside_box(ctx, &ctm, span, &span->items[i], &r))
989 {
990 dev->last.clipped = 1;
991 continue;
992 }
993 }
994 dev->last.clipped = 0;
995
985 /* Calculate new pen location and delta */ 996 /* Calculate new pen location and delta */
986 tm.e = span->items[i].x; 997 tm.e = span->items[i].x;
987 tm.f = span->items[i].y; 998 tm.f = span->items[i].y;
988 dev->last.trm = fz_concat(tm, ctm); 999 dev->last.trm = fz_concat(tm, ctm);
989 dev->last.bidi_level = span->bidi_level; 1000 dev->last.bidi_level = span->bidi_level;
993 fz_drop_font(ctx, dev->last.font); 1004 fz_drop_font(ctx, dev->last.font);
994 dev->last.font = fz_keep_font(ctx, font); 1005 dev->last.font = fz_keep_font(ctx, font);
995 } 1006 }
996 dev->last.valid = 1; 1007 dev->last.valid = 1;
997 dev->last.flags = flags; 1008 dev->last.flags = flags;
998
999 if (dev->flags & (FZ_STEXT_CLIP | FZ_STEXT_CLIP_RECT))
1000 {
1001 fz_rect r = current_clip(ctx, dev);
1002 if (fz_glyph_entirely_outside_box(ctx, &ctm, span, &span->items[i], &r))
1003 {
1004 dev->last.clipped = 1;
1005 continue;
1006 }
1007 }
1008 dev->last.clipped = 0;
1009 1009
1010 /* Calculate bounding box and new pen position based on font metrics */ 1010 /* Calculate bounding box and new pen position based on font metrics */
1011 if (span->items[i].gid >= 0) 1011 if (span->items[i].gid >= 0)
1012 adv = span->items[i].adv; 1012 adv = span->items[i].adv;
1013 else 1013 else
1057 1057
1058 return rune; 1058 return rune;
1059 } 1059 }
1060 1060
1061 static void 1061 static void
1062 flush_actualtext(fz_context *ctx, fz_stext_device *dev, const char *actualtext, int i) 1062 flush_actualtext(fz_context *ctx, fz_stext_device *dev, const char *actualtext, int i, int end)
1063 { 1063 {
1064 if (*actualtext == 0) 1064 if (*actualtext == 0)
1065 return; 1065 return;
1066 1066
1067 while (1) 1067 if (dev->flags & (FZ_STEXT_CLIP | FZ_STEXT_CLIP_RECT))
1068 if (dev->last.clipped)
1069 return;
1070
1071 while (end < 0 || (end >= 0 && i < end))
1068 { 1072 {
1069 int rune; 1073 int rune;
1070 actualtext += fz_chartorune(&rune, actualtext); 1074 actualtext += fz_chartorune(&rune, actualtext);
1071 1075
1072 if (rune == 0) 1076 if (rune == 0)
1073 break; 1077 break;
1074
1075 if (dev->flags & (FZ_STEXT_CLIP | FZ_STEXT_CLIP_RECT))
1076 if (dev->last.clipped)
1077 continue;
1078 1078
1079 fz_add_stext_char(ctx, dev, dev->last.font, 1079 fz_add_stext_char(ctx, dev, dev->last.font,
1080 rune, 1080 rune,
1081 -1, 1081 -1,
1082 dev->last.trm, 1082 dev->last.trm,
1119 /* Spot a matching prefix and send it. */ 1119 /* Spot a matching prefix and send it. */
1120 for (start = 0; start < span->len; start++) 1120 for (start = 0; start < span->len; start++)
1121 { 1121 {
1122 int rune; 1122 int rune;
1123 int len = fz_chartorune(&rune, actualtext); 1123 int len = fz_chartorune(&rune, actualtext);
1124 if (span->items[start].gid != rune || rune == 0) 1124 if (span->items[start].ucs != rune || rune == 0)
1125 break; 1125 break;
1126 actualtext += len; z--; 1126 actualtext += len; z--;
1127 } 1127 }
1128 if (start != 0) 1128 if (start != 0)
1129 do_extract(ctx, dev, span, ctm, 0, start, flags); 1129 do_extract(ctx, dev, span, ctm, 0, start, flags);
1144 /* Spot a matching postfix. Can't send it til the end. */ 1144 /* Spot a matching postfix. Can't send it til the end. */
1145 for (end = span->len; end > start; end--) 1145 for (end = span->len; end > start; end--)
1146 { 1146 {
1147 /* Nasty n^2 algo here, cos backtracking through utf8 is not trivial. It'll do. */ 1147 /* Nasty n^2 algo here, cos backtracking through utf8 is not trivial. It'll do. */
1148 int rune = rune_index(actualtext, z-1); 1148 int rune = rune_index(actualtext, z-1);
1149 if (span->items[end-1].gid != rune) 1149 if (span->items[end-1].ucs != rune)
1150 break; 1150 break;
1151 z--; 1151 z--;
1152 } 1152 }
1153 /* So we can send end -> span->len at the end. */ 1153 /* So we can send end -> span->len at the end. */
1154 1154
1157 /* items[start..end] have to be sent with actualtext[start..z] */ 1157 /* items[start..end] have to be sent with actualtext[start..z] */
1158 for (i = start; i < end; i++) 1158 for (i = start; i < end; i++)
1159 { 1159 {
1160 fz_text_item *item = &span->items[i]; 1160 fz_text_item *item = &span->items[i];
1161 int rune = -1; 1161 int rune = -1;
1162
1163 if (dev->flags & (FZ_STEXT_CLIP | FZ_STEXT_CLIP_RECT))
1164 {
1165 fz_rect r = current_clip(ctx, dev);
1166 if (fz_glyph_entirely_outside_box(ctx, &ctm, span, &span->items[i], &r))
1167 {
1168 dev->last.clipped = 1;
1169 continue;
1170 }
1171 }
1172 dev->last.clipped = 0;
1162 1173
1163 if ((size_t)i < z) 1174 if ((size_t)i < z)
1164 actualtext += fz_chartorune(&rune, actualtext); 1175 actualtext += fz_chartorune(&rune, actualtext);
1165 1176
1166 /* Calculate new pen location and delta */ 1177 /* Calculate new pen location and delta */
1173 { 1184 {
1174 fz_drop_font(ctx, dev->last.font); 1185 fz_drop_font(ctx, dev->last.font);
1175 dev->last.font = fz_keep_font(ctx, font); 1186 dev->last.font = fz_keep_font(ctx, font);
1176 } 1187 }
1177 dev->last.valid = 1; 1188 dev->last.valid = 1;
1178 1189 dev->last.flags = flags;
1179 if (dev->flags & (FZ_STEXT_CLIP | FZ_STEXT_CLIP_RECT))
1180 {
1181 fz_rect r = current_clip(ctx, dev);
1182 if (fz_glyph_entirely_outside_box(ctx, &ctm, span, &span->items[i], &r))
1183 {
1184 dev->last.clipped = 1;
1185 continue;
1186 }
1187 }
1188 dev->last.clipped = 0;
1189 1190
1190 /* Calculate bounding box and new pen position based on font metrics */ 1191 /* Calculate bounding box and new pen position based on font metrics */
1191 if (item->gid >= 0) 1192 if (item->gid >= 0)
1192 adv = item->adv; 1193 adv = item->adv;
1193 else 1194 else
1215 return; 1216 return;
1216 } 1217 }
1217 1218
1218 /* We found a matching postfix. It seems likely that this is going to be the only 1219 /* We found a matching postfix. It seems likely that this is going to be the only
1219 * text object we get, so send any remaining actualtext now. */ 1220 * text object we get, so send any remaining actualtext now. */
1220 flush_actualtext(ctx, dev, actualtext, i); 1221 flush_actualtext(ctx, dev, actualtext, i, i + strlen(actualtext) - (span->len - end));
1221 1222
1222 /* Send the postfix */ 1223 /* Send the postfix */
1223 if (end != span->len) 1224 if (end != span->len)
1224 do_extract(ctx, dev, span, ctm, end, span->len, flags); 1225 do_extract(ctx, dev, span, ctm, end, span->len, flags);
1225 1226
1335 1336
1336 static void 1337 static void
1337 fz_stext_begin_metatext(fz_context *ctx, fz_device *dev, fz_metatext meta, const char *text) 1338 fz_stext_begin_metatext(fz_context *ctx, fz_device *dev, fz_metatext meta, const char *text)
1338 { 1339 {
1339 fz_stext_device *tdev = (fz_stext_device*)dev; 1340 fz_stext_device *tdev = (fz_stext_device*)dev;
1340 metatext_t *mt = fz_malloc_struct(ctx, metatext_t); 1341 metatext_t *mt = find_actualtext(tdev);
1342
1343 if (mt != NULL && meta == FZ_METATEXT_ACTUALTEXT)
1344 flush_actualtext(ctx, tdev, mt->text, 0, -1);
1345
1346 if (meta == FZ_METATEXT_ACTUALTEXT)
1347 tdev->last.valid = 0;
1348
1349 mt = fz_malloc_struct(ctx, metatext_t);
1341 1350
1342 mt->prev = tdev->metatext; 1351 mt->prev = tdev->metatext;
1343 tdev->metatext = mt; 1352 tdev->metatext = mt;
1344 mt->type = meta; 1353 mt->type = meta;
1345 mt->text = text ? fz_strdup(ctx, text) : NULL; 1354 mt->text = text ? fz_strdup(ctx, text) : NULL;
1382 } 1391 }
1383 1392
1384 /* If we have a 'last' text position, send the content after that. */ 1393 /* If we have a 'last' text position, send the content after that. */
1385 if (tdev->last.valid) 1394 if (tdev->last.valid)
1386 { 1395 {
1387 flush_actualtext(ctx, tdev, tdev->metatext->text, 0); 1396 flush_actualtext(ctx, tdev, tdev->metatext->text, 0, -1);
1388 pop_metatext(ctx, tdev); 1397 pop_metatext(ctx, tdev);
1398 tdev->last.valid = 0;
1389 return; 1399 return;
1390 } 1400 }
1391 1401
1392 /* If we have collected a rectangle for content that encloses the actual text, 1402 /* If we have collected a rectangle for content that encloses the actual text,
1393 * send the content there. */ 1403 * send the content there. */
1399 tdev->last.trm.d = tdev->metatext->bounds.y1 - tdev->metatext->bounds.y0; 1409 tdev->last.trm.d = tdev->metatext->bounds.y1 - tdev->metatext->bounds.y0;
1400 tdev->last.trm.e = tdev->metatext->bounds.x0; 1410 tdev->last.trm.e = tdev->metatext->bounds.x0;
1401 tdev->last.trm.f = tdev->metatext->bounds.y0; 1411 tdev->last.trm.f = tdev->metatext->bounds.y0;
1402 } 1412 }
1403 else 1413 else
1404 fz_warn(ctx, "Actualtext with no position. Text may be lost or mispositioned."); 1414 {
1415 if ((dev->flags & (FZ_STEXT_CLIP | FZ_STEXT_CLIP_RECT)) == 0)
1416 fz_warn(ctx, "Actualtext with no position. Text may be lost or mispositioned.");
1417 pop_metatext(ctx, tdev);
1418 return;
1419 }
1405 1420
1406 fz_var(myfont); 1421 fz_var(myfont);
1407 1422
1408 fz_try(ctx) 1423 fz_try(ctx)
1409 { 1424 {
1410 if (tdev->last.font == NULL) 1425 if (tdev->last.font == NULL)
1411 { 1426 {
1412 myfont = fz_new_base14_font(ctx, "Helvetica"); 1427 myfont = fz_new_base14_font(ctx, "Helvetica");
1413 tdev->last.font = myfont; 1428 tdev->last.font = myfont;
1414 } 1429 }
1415 flush_actualtext(ctx, tdev, tdev->metatext->text, 0); 1430 flush_actualtext(ctx, tdev, tdev->metatext->text, 0, -1);
1416 pop_metatext(ctx, tdev); 1431 pop_metatext(ctx, tdev);
1417 } 1432 }
1418 fz_always(ctx) 1433 fz_always(ctx)
1419 { 1434 {
1420 if (myfont) 1435 if (myfont)