Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/source/fitz/stext-device.c @ 40:aa33339d6b8a upstream
ADD: MuPDF v1.26.10: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.5.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Sat, 11 Oct 2025 11:31:38 +0200 |
| parents | b50eed0cc0ef |
| children |
comparison
equal
deleted
inserted
replaced
| 39:a6bc019ac0b2 | 40:aa33339d6b8a |
|---|---|
| 980 int unicode; | 980 int unicode; |
| 981 int i; | 981 int i; |
| 982 | 982 |
| 983 for (i = start; i < end; i++) | 983 for (i = start; i < end; i++) |
| 984 { | 984 { |
| 985 if (dev->flags & (FZ_STEXT_CLIP | FZ_STEXT_CLIP_RECT)) | |
| 986 { | |
| 987 fz_rect r = current_clip(ctx, dev); | |
| 988 if (fz_glyph_entirely_outside_box(ctx, &ctm, span, &span->items[i], &r)) | |
| 989 { | |
| 990 dev->last.clipped = 1; | |
| 991 continue; | |
| 992 } | |
| 993 } | |
| 994 dev->last.clipped = 0; | |
| 995 | |
| 985 /* Calculate new pen location and delta */ | 996 /* Calculate new pen location and delta */ |
| 986 tm.e = span->items[i].x; | 997 tm.e = span->items[i].x; |
| 987 tm.f = span->items[i].y; | 998 tm.f = span->items[i].y; |
| 988 dev->last.trm = fz_concat(tm, ctm); | 999 dev->last.trm = fz_concat(tm, ctm); |
| 989 dev->last.bidi_level = span->bidi_level; | 1000 dev->last.bidi_level = span->bidi_level; |
| 993 fz_drop_font(ctx, dev->last.font); | 1004 fz_drop_font(ctx, dev->last.font); |
| 994 dev->last.font = fz_keep_font(ctx, font); | 1005 dev->last.font = fz_keep_font(ctx, font); |
| 995 } | 1006 } |
| 996 dev->last.valid = 1; | 1007 dev->last.valid = 1; |
| 997 dev->last.flags = flags; | 1008 dev->last.flags = flags; |
| 998 | |
| 999 if (dev->flags & (FZ_STEXT_CLIP | FZ_STEXT_CLIP_RECT)) | |
| 1000 { | |
| 1001 fz_rect r = current_clip(ctx, dev); | |
| 1002 if (fz_glyph_entirely_outside_box(ctx, &ctm, span, &span->items[i], &r)) | |
| 1003 { | |
| 1004 dev->last.clipped = 1; | |
| 1005 continue; | |
| 1006 } | |
| 1007 } | |
| 1008 dev->last.clipped = 0; | |
| 1009 | 1009 |
| 1010 /* Calculate bounding box and new pen position based on font metrics */ | 1010 /* Calculate bounding box and new pen position based on font metrics */ |
| 1011 if (span->items[i].gid >= 0) | 1011 if (span->items[i].gid >= 0) |
| 1012 adv = span->items[i].adv; | 1012 adv = span->items[i].adv; |
| 1013 else | 1013 else |
| 1057 | 1057 |
| 1058 return rune; | 1058 return rune; |
| 1059 } | 1059 } |
| 1060 | 1060 |
| 1061 static void | 1061 static void |
| 1062 flush_actualtext(fz_context *ctx, fz_stext_device *dev, const char *actualtext, int i) | 1062 flush_actualtext(fz_context *ctx, fz_stext_device *dev, const char *actualtext, int i, int end) |
| 1063 { | 1063 { |
| 1064 if (*actualtext == 0) | 1064 if (*actualtext == 0) |
| 1065 return; | 1065 return; |
| 1066 | 1066 |
| 1067 while (1) | 1067 if (dev->flags & (FZ_STEXT_CLIP | FZ_STEXT_CLIP_RECT)) |
| 1068 if (dev->last.clipped) | |
| 1069 return; | |
| 1070 | |
| 1071 while (end < 0 || (end >= 0 && i < end)) | |
| 1068 { | 1072 { |
| 1069 int rune; | 1073 int rune; |
| 1070 actualtext += fz_chartorune(&rune, actualtext); | 1074 actualtext += fz_chartorune(&rune, actualtext); |
| 1071 | 1075 |
| 1072 if (rune == 0) | 1076 if (rune == 0) |
| 1073 break; | 1077 break; |
| 1074 | |
| 1075 if (dev->flags & (FZ_STEXT_CLIP | FZ_STEXT_CLIP_RECT)) | |
| 1076 if (dev->last.clipped) | |
| 1077 continue; | |
| 1078 | 1078 |
| 1079 fz_add_stext_char(ctx, dev, dev->last.font, | 1079 fz_add_stext_char(ctx, dev, dev->last.font, |
| 1080 rune, | 1080 rune, |
| 1081 -1, | 1081 -1, |
| 1082 dev->last.trm, | 1082 dev->last.trm, |
| 1119 /* Spot a matching prefix and send it. */ | 1119 /* Spot a matching prefix and send it. */ |
| 1120 for (start = 0; start < span->len; start++) | 1120 for (start = 0; start < span->len; start++) |
| 1121 { | 1121 { |
| 1122 int rune; | 1122 int rune; |
| 1123 int len = fz_chartorune(&rune, actualtext); | 1123 int len = fz_chartorune(&rune, actualtext); |
| 1124 if (span->items[start].gid != rune || rune == 0) | 1124 if (span->items[start].ucs != rune || rune == 0) |
| 1125 break; | 1125 break; |
| 1126 actualtext += len; z--; | 1126 actualtext += len; z--; |
| 1127 } | 1127 } |
| 1128 if (start != 0) | 1128 if (start != 0) |
| 1129 do_extract(ctx, dev, span, ctm, 0, start, flags); | 1129 do_extract(ctx, dev, span, ctm, 0, start, flags); |
| 1144 /* Spot a matching postfix. Can't send it til the end. */ | 1144 /* Spot a matching postfix. Can't send it til the end. */ |
| 1145 for (end = span->len; end > start; end--) | 1145 for (end = span->len; end > start; end--) |
| 1146 { | 1146 { |
| 1147 /* Nasty n^2 algo here, cos backtracking through utf8 is not trivial. It'll do. */ | 1147 /* Nasty n^2 algo here, cos backtracking through utf8 is not trivial. It'll do. */ |
| 1148 int rune = rune_index(actualtext, z-1); | 1148 int rune = rune_index(actualtext, z-1); |
| 1149 if (span->items[end-1].gid != rune) | 1149 if (span->items[end-1].ucs != rune) |
| 1150 break; | 1150 break; |
| 1151 z--; | 1151 z--; |
| 1152 } | 1152 } |
| 1153 /* So we can send end -> span->len at the end. */ | 1153 /* So we can send end -> span->len at the end. */ |
| 1154 | 1154 |
| 1157 /* items[start..end] have to be sent with actualtext[start..z] */ | 1157 /* items[start..end] have to be sent with actualtext[start..z] */ |
| 1158 for (i = start; i < end; i++) | 1158 for (i = start; i < end; i++) |
| 1159 { | 1159 { |
| 1160 fz_text_item *item = &span->items[i]; | 1160 fz_text_item *item = &span->items[i]; |
| 1161 int rune = -1; | 1161 int rune = -1; |
| 1162 | |
| 1163 if (dev->flags & (FZ_STEXT_CLIP | FZ_STEXT_CLIP_RECT)) | |
| 1164 { | |
| 1165 fz_rect r = current_clip(ctx, dev); | |
| 1166 if (fz_glyph_entirely_outside_box(ctx, &ctm, span, &span->items[i], &r)) | |
| 1167 { | |
| 1168 dev->last.clipped = 1; | |
| 1169 continue; | |
| 1170 } | |
| 1171 } | |
| 1172 dev->last.clipped = 0; | |
| 1162 | 1173 |
| 1163 if ((size_t)i < z) | 1174 if ((size_t)i < z) |
| 1164 actualtext += fz_chartorune(&rune, actualtext); | 1175 actualtext += fz_chartorune(&rune, actualtext); |
| 1165 | 1176 |
| 1166 /* Calculate new pen location and delta */ | 1177 /* Calculate new pen location and delta */ |
| 1173 { | 1184 { |
| 1174 fz_drop_font(ctx, dev->last.font); | 1185 fz_drop_font(ctx, dev->last.font); |
| 1175 dev->last.font = fz_keep_font(ctx, font); | 1186 dev->last.font = fz_keep_font(ctx, font); |
| 1176 } | 1187 } |
| 1177 dev->last.valid = 1; | 1188 dev->last.valid = 1; |
| 1178 | 1189 dev->last.flags = flags; |
| 1179 if (dev->flags & (FZ_STEXT_CLIP | FZ_STEXT_CLIP_RECT)) | |
| 1180 { | |
| 1181 fz_rect r = current_clip(ctx, dev); | |
| 1182 if (fz_glyph_entirely_outside_box(ctx, &ctm, span, &span->items[i], &r)) | |
| 1183 { | |
| 1184 dev->last.clipped = 1; | |
| 1185 continue; | |
| 1186 } | |
| 1187 } | |
| 1188 dev->last.clipped = 0; | |
| 1189 | 1190 |
| 1190 /* Calculate bounding box and new pen position based on font metrics */ | 1191 /* Calculate bounding box and new pen position based on font metrics */ |
| 1191 if (item->gid >= 0) | 1192 if (item->gid >= 0) |
| 1192 adv = item->adv; | 1193 adv = item->adv; |
| 1193 else | 1194 else |
| 1215 return; | 1216 return; |
| 1216 } | 1217 } |
| 1217 | 1218 |
| 1218 /* We found a matching postfix. It seems likely that this is going to be the only | 1219 /* We found a matching postfix. It seems likely that this is going to be the only |
| 1219 * text object we get, so send any remaining actualtext now. */ | 1220 * text object we get, so send any remaining actualtext now. */ |
| 1220 flush_actualtext(ctx, dev, actualtext, i); | 1221 flush_actualtext(ctx, dev, actualtext, i, i + strlen(actualtext) - (span->len - end)); |
| 1221 | 1222 |
| 1222 /* Send the postfix */ | 1223 /* Send the postfix */ |
| 1223 if (end != span->len) | 1224 if (end != span->len) |
| 1224 do_extract(ctx, dev, span, ctm, end, span->len, flags); | 1225 do_extract(ctx, dev, span, ctm, end, span->len, flags); |
| 1225 | 1226 |
| 1335 | 1336 |
| 1336 static void | 1337 static void |
| 1337 fz_stext_begin_metatext(fz_context *ctx, fz_device *dev, fz_metatext meta, const char *text) | 1338 fz_stext_begin_metatext(fz_context *ctx, fz_device *dev, fz_metatext meta, const char *text) |
| 1338 { | 1339 { |
| 1339 fz_stext_device *tdev = (fz_stext_device*)dev; | 1340 fz_stext_device *tdev = (fz_stext_device*)dev; |
| 1340 metatext_t *mt = fz_malloc_struct(ctx, metatext_t); | 1341 metatext_t *mt = find_actualtext(tdev); |
| 1342 | |
| 1343 if (mt != NULL && meta == FZ_METATEXT_ACTUALTEXT) | |
| 1344 flush_actualtext(ctx, tdev, mt->text, 0, -1); | |
| 1345 | |
| 1346 if (meta == FZ_METATEXT_ACTUALTEXT) | |
| 1347 tdev->last.valid = 0; | |
| 1348 | |
| 1349 mt = fz_malloc_struct(ctx, metatext_t); | |
| 1341 | 1350 |
| 1342 mt->prev = tdev->metatext; | 1351 mt->prev = tdev->metatext; |
| 1343 tdev->metatext = mt; | 1352 tdev->metatext = mt; |
| 1344 mt->type = meta; | 1353 mt->type = meta; |
| 1345 mt->text = text ? fz_strdup(ctx, text) : NULL; | 1354 mt->text = text ? fz_strdup(ctx, text) : NULL; |
| 1382 } | 1391 } |
| 1383 | 1392 |
| 1384 /* If we have a 'last' text position, send the content after that. */ | 1393 /* If we have a 'last' text position, send the content after that. */ |
| 1385 if (tdev->last.valid) | 1394 if (tdev->last.valid) |
| 1386 { | 1395 { |
| 1387 flush_actualtext(ctx, tdev, tdev->metatext->text, 0); | 1396 flush_actualtext(ctx, tdev, tdev->metatext->text, 0, -1); |
| 1388 pop_metatext(ctx, tdev); | 1397 pop_metatext(ctx, tdev); |
| 1398 tdev->last.valid = 0; | |
| 1389 return; | 1399 return; |
| 1390 } | 1400 } |
| 1391 | 1401 |
| 1392 /* If we have collected a rectangle for content that encloses the actual text, | 1402 /* If we have collected a rectangle for content that encloses the actual text, |
| 1393 * send the content there. */ | 1403 * send the content there. */ |
| 1399 tdev->last.trm.d = tdev->metatext->bounds.y1 - tdev->metatext->bounds.y0; | 1409 tdev->last.trm.d = tdev->metatext->bounds.y1 - tdev->metatext->bounds.y0; |
| 1400 tdev->last.trm.e = tdev->metatext->bounds.x0; | 1410 tdev->last.trm.e = tdev->metatext->bounds.x0; |
| 1401 tdev->last.trm.f = tdev->metatext->bounds.y0; | 1411 tdev->last.trm.f = tdev->metatext->bounds.y0; |
| 1402 } | 1412 } |
| 1403 else | 1413 else |
| 1404 fz_warn(ctx, "Actualtext with no position. Text may be lost or mispositioned."); | 1414 { |
| 1415 if ((dev->flags & (FZ_STEXT_CLIP | FZ_STEXT_CLIP_RECT)) == 0) | |
| 1416 fz_warn(ctx, "Actualtext with no position. Text may be lost or mispositioned."); | |
| 1417 pop_metatext(ctx, tdev); | |
| 1418 return; | |
| 1419 } | |
| 1405 | 1420 |
| 1406 fz_var(myfont); | 1421 fz_var(myfont); |
| 1407 | 1422 |
| 1408 fz_try(ctx) | 1423 fz_try(ctx) |
| 1409 { | 1424 { |
| 1410 if (tdev->last.font == NULL) | 1425 if (tdev->last.font == NULL) |
| 1411 { | 1426 { |
| 1412 myfont = fz_new_base14_font(ctx, "Helvetica"); | 1427 myfont = fz_new_base14_font(ctx, "Helvetica"); |
| 1413 tdev->last.font = myfont; | 1428 tdev->last.font = myfont; |
| 1414 } | 1429 } |
| 1415 flush_actualtext(ctx, tdev, tdev->metatext->text, 0); | 1430 flush_actualtext(ctx, tdev, tdev->metatext->text, 0, -1); |
| 1416 pop_metatext(ctx, tdev); | 1431 pop_metatext(ctx, tdev); |
| 1417 } | 1432 } |
| 1418 fz_always(ctx) | 1433 fz_always(ctx) |
| 1419 { | 1434 { |
| 1420 if (myfont) | 1435 if (myfont) |
