<bean id="extracter.pdf" class="es.org.extractor.PDFCustomExtractor" parent="baseMetadataExtracter" >
<property name="inheritDefaultMapping">
<value>true</value>
</property>
<property name="mappingProperties">
<bean class="org.springframework.beans.factory.config.PropertiesFactoryBean">
<property name="location">
<value>classpath:alfresco/extension/custom-pdf-extractor-mappings.properties</value>
</property>
</bean>
</property>
</bean>
public class PDFCustomExtractor extends AbstractMappingMetadataExtracter
{
private static final String KEY_KEYWORDS = "keywords";
public static String[] SUPPORTED_MIMETYPES = { "application/pdf" };
private static Log log = LogFactory.getLog(PDFCustomExtractor.class);
public PDFCustomExtractor()
{
super(new HashSet(Arrays.asList(SUPPORTED_MIMETYPES)));
}
public Map<String, Serializable> extractRaw(ContentReader reader)
throws Throwable
{
Map rawProperties = newRawMap();
PDDocument pdf = null;
InputStream is = null;
try
{
is = reader.getContentInputStream();
pdf = PDDocument.load(is);
if (pdf.isEncrypted()) {
//break label337;
}
PDDocumentInformation docInfo = pdf.getDocumentInformation();
putRawValue("author", docInfo.getAuthor(), rawProperties);
putRawValue("title", docInfo.getTitle(), rawProperties);
putRawValue("subject", docInfo.getSubject(), rawProperties);
String keywords = docInfo.getKeywords();
try
{
keywords = keywords.trim();
while (keywords.charAt(0) == '"') {
keywords = keywords.substring(1);
}
while (keywords.charAt(keywords.length() - 1) == '"') {
keywords = keywords.substring(0, keywords.length() - 1);
}
keywords = keywords.trim();
StringTokenizer stcomma = new StringTokenizer(keywords, ";");
while (stcomma.hasMoreTokens()) {
String token = stcomma.nextToken();
StringTokenizer sttoken = new StringTokenizer(token, "=");
putRawValue(sttoken.nextToken(), sttoken.nextToken(), rawProperties);
}
}
catch (Exception x) {
log.info("\n\nExtracter: " + x.toString() + ".\n");
}
if (keywords != null) {
log.info("\n\nKeywords es:" + keywords + ".\n");
putRawValue("keywords", keywords, rawProperties);
}
else {
log.info("\n\nKeywords es null.\n");
}
try
{
Calendar created = docInfo.getCreationDate();
label337: if (created != null)
{
putRawValue("created", created.getTime(), rawProperties);
}
}
catch (IOException localIOException)
{
}
}
finally
{
if (is != null)
try {
is.close(); } catch (IOException localIOException1) {
}
if (pdf != null) {
try {
pdf.close(); } catch (Throwable e) { e.printStackTrace(); }
}
}
log.info("\n\nPropiedades de vuelta:" + rawProperties.toString() + "\n");
return rawProperties;
}
}
namespace.prefix.dm=extension.miModelo
CODIGO=dm:codigo
FACTURA=dm:factura
NOMBRE=dm:nombre
Content from pre 2016 and from language groups that have been closed.
Content is read-only.
By using this site, you are agreeing to allow us to collect and use cookies as outlined in Alfresco’s Cookie Statement and Terms of Use (and you have a legitimate interest in Alfresco and our products, authorizing us to contact you in such methods). If you are not ok with these terms, please do not use this website.