AnsweredAssumed Answered

[Entreprise 2.2] Creation Date custom metadata extraction

Question asked by jayjayecl on Jun 27, 2008
Hello,

I have troubles with custom meta-data extraction on Alfresco Entreprise 2.2.

The problem is that the "modified date" and "created date" of each content added to the repository are at the time they were added. I would like to get them set to the actual creation and "last modified" dates.

Before running an Alfresco Entreprise 2.2 environment, I had it work on an Alfresco Community 2.1 :
- custom-metadata-extrators.xml in tomcat/shared /…/extensions
- content rule to throw the metadata extraction process whenever a content is added

I read a lot of topics here dealing with this problem, but the answers could not help me ("go see http://wiki.alfresco.com/wiki/Metadata_Extraction …", "overwrite policy …", etc ).

I activated the debug mode and could not point out the cause of this problem.

ABOUT ALFRESCO COMMUNITY 2.1 :

the custom-metadata-extraction-context.xml :


<?xml version='1.0' encoding='UTF-8'?>
<!DOCTYPE beans PUBLIC '-//SPRING//DTD BEAN//EN' 'http://www.springframework.org/dtd/spring-beans.dtd'>
<beans>
   
   <bean id="extracter.Office" class="org.alfresco.repo.content.metadata.OfficeMetadataExtracter" parent="baseMetadataExtracter" >
      <property name="inheritDefaultMapping">
         <value>true</value>
      </property>
      <property name="overwritePolicy">
         <value>EAGER</value>
      </property>
      <property name="mappingProperties">
         <props>
            <prop key="namespace.prefix.cm">http://www.alfresco.org/model/content/1.0</prop>
            <prop key="author">cm:author</prop>
            <prop key="title">cm:title</prop>
            <prop key="subject">cm:description</prop>
            <prop key="createDateTime">cm:created</prop>
            <prop key="lastSaveDateTime">cm:modified</prop>
         </props>
      </property>
    </bean>

</beans>


the content-services-context.xml :


<?xml version='1.0' encoding='UTF-8'?>
<!DOCTYPE beans PUBLIC '-//SPRING//DTD BEAN//EN' 'http://www.springframework.org/dtd/spring-beans.dtd'>

<beans>
  
   <bean id="fileContentStore" class="org.alfresco.repo.content.filestore.FileContentStore">
      <constructor-arg>
         <value>${dir.contentstore}</value>
      </constructor-arg>
   </bean>
  
   <!– deleted content will get pushed into this store, where it can be cleaned up at will –>
   <bean id="deletedContentStore" class="org.alfresco.repo.content.filestore.FileContentStore">
      <constructor-arg>
         <value>${dir.contentstore.deleted}</value>
      </constructor-arg>
   </bean>
   <!– bean to move deleted content into the the backup store –>
   <bean id="deletedContentBackupListener" class="org.alfresco.repo.content.cleanup.DeletedContentBackupCleanerListener" >
      <property name="store">
         <ref bean="deletedContentStore" />
      </property>
   </bean>
   <!– Performs the content cleanup –>
   <bean id="contentStoreCleaner" class="org.alfresco.repo.content.cleanup.ContentStoreCleaner" >
      <property name="dictionaryService">
         <ref bean="dictionaryService" />
      </property>
      <property name="nodeDaoService" >
         <ref bean="nodeDaoService" />
      </property>
      <property name="avmNodeDAO">
            <ref bean="avmNodeDAO"/>
      </property>
      <property name="transactionService" >
         <ref bean="transactionService" />
      </property>
      <property name="protectDays" >
         <value>14</value>
      </property>
      <property name="stores" >
         <list>
            <ref bean="fileContentStore" />
         </list>
      </property>
      <property name="listeners" >
         <list>
            <ref bean="deletedContentBackupListener" />
         </list>
      </property>
   </bean>

   <bean id="contentService" class="org.alfresco.repo.content.RoutingContentService" init-method="init">
      <property name="transactionService">
          <ref bean="transactionService" />
      </property>
      <property name="retryingTransactionHelper">
          <ref bean="retryingTransactionHelper"/>
      </property>
      <property name="dictionaryService">
          <ref bean="dictionaryService" />
      </property>
      <property name="nodeService">
          <ref bean="nodeService" />
      </property>
      <property name="transformerRegistry">
          <ref bean="contentTransformerRegistry" />
      </property>
      <property name="store">
          <ref bean="fileContentStore" />
      </property>
      <property name="policyComponent">
          <ref bean="policyComponent" />
      </property>
      <property name="avmService">
          <ref bean="avmService"/>
      </property>
      <property name="imageMagickContentTransformer">
         <ref bean="transformer.ImageMagick" />
      </property>
   </bean>
   
    <bean id="mimetypeConfigService" class="org.alfresco.config.xml.XMLConfigService" init-method="init">
        <constructor-arg>
            <bean class="org.alfresco.config.source.UrlConfigSource">
                <constructor-arg>
                    <list>
                        <value>classpath:alfresco/mimetype/mimetype-map.xml</value>
                        <value>classpath:alfresco/mimetype/mimetype-map-openoffice.xml</value>
                    </list>
                </constructor-arg>
            </bean>
        </constructor-arg>
    </bean>

   <bean id="mimetypeService" class="org.alfresco.repo.content.MimetypeMap" init-method="init" >
      <property name="configService">
         <ref bean="mimetypeConfigService" />
      </property>
      <property name="contentCharsetFinder">
         <ref bean="charset.finder"/>
      </property>
   </bean>
  
   <bean id="contentFilterLanguagesConfigService" class="org.alfresco.config.xml.XMLConfigService" init-method="init">
      <constructor-arg>
         <bean class="org.alfresco.config.source.UrlConfigSource">
            <constructor-arg>
               <list>
                  <value>classpath:alfresco/ml/content-filter-lang.xml</value>
               </list>
            </constructor-arg>
         </bean>
      </constructor-arg>
   </bean>

   <bean id="contentFilterLanguagesService" class="org.alfresco.repo.model.ml.ContentFilterLanguagesMap" init-method="init" >
      <property name="configService">
         <ref bean="contentFilterLanguagesConfigService" />
      </property>
   </bean>
  
   <bean id="openOfficeConnection" class="net.sf.jooreports.openoffice.connection.SocketOpenOfficeConnection" />
   <bean id="openOfficeConnectionTester" class="org.alfresco.util.OpenOfficeConnectionTester" init-method="checkConnection" >
      <property name="connection">
         <ref bean="openOfficeConnection" />
      </property>
      <property name="strict">
         <value>false</value>
      </property>
   </bean>
  
   <!– Metadata Extraction Regisitry –>
   <bean id="metadataExtracterRegistry" class="org.alfresco.repo.content.metadata.MetadataExtracterRegistry" />
  
   <!– Abstract bean definition defining base definition for all metadata extracters –>
   <bean id="baseMetadataExtracter"
         class="org.alfresco.repo.content.metadata.AbstractMetadataExtracter"
         abstract="true"
         init-method="register">
      <property name="registry">
         <ref bean="metadataExtracterRegistry" />
      </property>
      <property name="mimetypeService">
         <ref bean="mimetypeService" />
      </property>
   </bean>
  
   <!– Content Metadata Extracters –>
   <bean id="extracter.PDFBox" class="org.alfresco.repo.content.metadata.PdfBoxMetadataExtracter" parent="baseMetadataExtracter" />
   <bean id="extracter.Office" class="org.alfresco.repo.content.metadata.OfficeMetadataExtracter" parent="baseMetadataExtracter" />
   <bean id="extracter.Mail" class="org.alfresco.repo.content.metadata.MailMetadataExtracter" parent="baseMetadataExtracter" />
   <bean id="extracter.Html" class="org.alfresco.repo.content.metadata.HtmlMetadataExtracter" parent="baseMetadataExtracter" />
   <bean id="extracter.MP3" class="org.alfresco.repo.content.metadata.MP3MetadataExtracter" parent="baseMetadataExtracter" />
   <bean id="extracter.OpenDocument" class="org.alfresco.repo.content.metadata.OpenDocumentMetadataExtracter" parent="baseMetadataExtracter" />
   <bean id="extracter.OpenOffice" class="org.alfresco.repo.content.metadata.OpenOfficeMetadataExtracter" parent="baseMetadataExtracter" >
      <property name="connection">
         <ref bean="openOfficeConnection" />
      </property>
   </bean>
  

   <!– Content Transformation Regisitry –>
   <bean id="contentTransformerRegistry" class="org.alfresco.repo.content.transform.ContentTransformerRegistry" />

   <!– Abstract bean definition defining base definition for all transformers –>
   <bean id="baseContentTransformer"
         class="org.alfresco.repo.content.transform.AbstractContentTransformer"
         abstract="true"
         init-method="register">
      <property name="mimetypeService">
         <ref bean="mimetypeService" />
      </property>
      <property name="registry">
         <ref bean="contentTransformerRegistry" />
      </property>
   </bean>
  
   <!– Content Transformations –>
   <bean id="transformer.StringExtracter"
         class="org.alfresco.repo.content.transform.StringExtractingContentTransformer"
         parent="baseContentTransformer" />

   <bean id="transformer.BinaryPassThrough"
         class="org.alfresco.repo.content.transform.BinaryPassThroughContentTransformer"
         parent="baseContentTransformer" />

   <bean id="transformer.PdfBox"
         class="org.alfresco.repo.content.transform.PdfBoxContentTransformer"
         parent="baseContentTransformer" >
      <property name="explicitTransformations">
         <list>
            <bean class="org.alfresco.repo.content.transform.ContentTransformerRegistry$TransformationKey" >
                <constructor-arg><value>application/pdf</value></constructor-arg>
                <constructor-arg><value>text/plain</value></constructor-arg>
            </bean>
         </list>
      </property>
   </bean>

   <bean id="transformer.Poi"
         class="org.alfresco.repo.content.transform.PoiHssfContentTransformer"
         parent="baseContentTransformer" />

   <bean id="transformer.TextMining"
         class="org.alfresco.repo.content.transform.TextMiningContentTransformer"
         parent="baseContentTransformer" >
      <property name="explicitTransformations">
         <list>
            <bean class="org.alfresco.repo.content.transform.ContentTransformerRegistry$TransformationKey" >
                <constructor-arg><value>application/msword</value></constructor-arg>
                <constructor-arg><value>text/plain</value></constructor-arg>
            </bean>
         </list>
      </property>
   </bean>

   <bean id="transformer.HtmlParser"
         class="org.alfresco.repo.content.transform.HtmlParserContentTransformer"
         parent="baseContentTransformer" />

   <bean id="transformer.OpenOffice"
         class="org.alfresco.repo.content.transform.OpenOfficeContentTransformer"
         parent="baseContentTransformer" >
      <property name="connection">
         <ref bean="openOfficeConnection" />
      </property>
      <property name="documentFormatsConfiguration">
         <value>classpath:alfresco/mimetype/openoffice-document-formats.xml</value>
      </property>
   </bean>

   <bean id="transformer.complex.OpenOffice.PdfBox"
        class="org.alfresco.repo.content.transform.ComplexContentTransformer"
        parent="baseContentTransformer" >
      <property name="transformers">
         <list>
            <ref bean="transformer.OpenOffice" />
            <ref bean="transformer.PdfBox" />
         </list>
      </property>
      <property name="intermediateMimetypes">
         <list>
            <value>application/pdf</value>
         </list>
      </property>
   </bean>
  
   <bean id="transformer.OutlookMsg"
         class="org.alfresco.repo.content.transform.MailContentTransformer"
         parent="baseContentTransformer" />

   <!–
   <bean id="transformer.JMagick" class="org.alfresco.repo.content.transform.magick.JMagickContentTransformer" init-method="init" />
   </bean>
   –>

   <bean id="transformer.ImageMagick"
        class="org.alfresco.repo.content.transform.magick.ImageMagickContentTransformer"
        parent="baseContentTransformer"
        init-method="init">
      <property name="executer">
         <bean name="transformer.ImageMagick.Command" class="org.alfresco.util.exec.RuntimeExec">
            <property name="commandMap">
                <map>
                    <entry key="Windows.*">
                        <value>imconvert "${source}" ${options} "${target}"</value>
                    </entry>
                    <entry key=".*">
                        <value>convert ${source} ${options} ${target}</value>
                    </entry>
                </map>
            </property>
            <property name="defaultProperties">
                <props>
                    <prop key="options"></prop>
                </props>
            </property>
         </bean>
      </property>
   </bean>

</beans>



I made a test by adding a MSOffice excel document (creation date 11/04/2004)
the logs



14:37:17,823 DEBUG [content.metadata.MetadataExtracterRegistry] Finding best extracter for application/vnd.excel
14:37:17,827 DEBUG [content.metadata.AbstractMappingMetadataExtracter] Starting metadata extraction:
   reader: ContentAccessor[
            contentUrl=store:///opt/alfresco/tomcat/temp/Alfresco/alfresco14991.upload,
            mimetype=application/vnd.excel,
            size=15360,
            encoding=UTF-8,
            locale=en_US
           ]

   extracter: org.alfresco.repo.content.metadata.OfficeMetadataExtracter@9dc852


14:37:17,960 DEBUG [content.metadata.AbstractMappingMetadataExtracter] Converted extracted raw values to system values:
   Raw Properties:    {
            pageCount=0,
            createDateTime=Thu Nov 04 15:09:41 CET 2004,
            osVersion=131333,
            lastPrinted=Wed Feb 02 14:52:51 CET 2005,
            format=0,
            wordCount=0,
            author=NCASMT,
            lastAuthor=******,
            lastSaveDateTime=Wed Oct 19 15:17:26 CEST 2005
         }

   System Properties: {
            {http://www.alfresco.org/model/content/1.0}modified=Wed Oct 19 15:17:26 CEST 2005,
            {http://www.alfresco.org/model/content/1.0}author=NCASMT,
            {http://www.alfresco.org/model/content/1.0}created=Thu Nov 04 15:09:41 CET 2004
         }


14:37:17,961 DEBUG [content.metadata.AbstractMappingMetadataExtracter] Completed metadata extraction:
   reader:    ContentAccessor[
            contentUrl=store:///opt/alfresco/tomcat/temp/Alfresco/alfresco14991.upload,
            mimetype=application/vnd.excel,
            size=15360,
            encoding=UTF-8,
            locale=en_US
              ]

   extracter: org.alfresco.repo.content.metadata.OfficeMetadataExtracter@9dc852

   changed:   {
            {http://www.alfresco.org/model/content/1.0}created=Thu Nov 04 15:09:41 CET 2004,
            {http://www.alfresco.org/model/content/1.0}author=NCASMT,
            {http://www.alfresco.org/model/content/1.0}modified=Wed Oct 19 15:17:26 CEST 2005
      }


14:37:18,233 DEBUG [content.metadata.AbstractMappingMetadataExtracter] Starting metadata extraction:
   reader: ContentAccessor[
            contentUrl=store://2008/6/27/14/37/c76a7e4d-4445-11dd-b6b2-176d7b65f74c.bin,
            mimetype=application/vnd.excel,
            size=15360,
            encoding=UTF-8,
            locale=en_US
            ]

   extracter: org.alfresco.repo.content.metadata.OfficeMetadataExtracter@9dc852

14:37:18,234 DEBUG [content.metadata.AbstractMappingMetadataExtracter] Converted extracted raw values to system values:
   Raw Properties:    {
            pageCount=0,
            createDateTime=Thu Nov 04 15:09:41 CET 2004,
            osVersion=131333,
            lastPrinted=Wed Feb 02 14:52:51 CET 2005,
            format=0,
            wordCount=0,
            author=NCASMT,
            lastAuthor=******,
            lastSaveDateTime=Wed Oct 19 15:17:26 CEST 2005
         }

   System Properties: {
            {http://www.alfresco.org/model/content/1.0}modified=Wed Oct 19 15:17:26 CEST 2005,
            {http://www.alfresco.org/model/content/1.0}author=NCASMT,
            {http://www.alfresco.org/model/content/1.0}created=Thu Nov 04 15:09:41 CET 2004
         }

14:37:18,234 DEBUG [content.metadata.AbstractMappingMetadataExtracter] Completed metadata extraction:
   reader:    ContentAccessor[
            contentUrl=store://2008/6/27/14/37/c76a7e4d-4445-11dd-b6b2-176d7b65f74c.bin,
            mimetype=application/vnd.excel,
            size=15360,
            encoding=UTF-8,
            locale=en_US
               ]

   extracter: org.alfresco.repo.content.metadata.OfficeMetadataExtracter@9dc852
  
   changed:   {
            {http://www.alfresco.org/model/content/1.0}created=Thu Nov 04 15:09:41 CET 2004,
            {http://www.alfresco.org/model/content/1.0}author=NCASMT,
            {http://www.alfresco.org/model/content/1.0}modified=Wed Oct 19 15:17:26 CEST 2005
      }


The metadata extraction is working on this community Alfresco version.




ABOUT ALFRESCO ENTREPRISE 2.2 :

the custom-metadata-extraction-context.xml :


<?xml version='1.0' encoding='UTF-8'?>
<!DOCTYPE beans PUBLIC '-//SPRING//DTD BEAN//EN' 'http://www.springframework.org/dtd/spring-beans.dtd'>

<beans>

   <bean id="extracter.Office" class="org.alfresco.repo.content.metadata.OfficeMetadataExtracter" parent="baseMetadataExtracter" >
      <property name="inheritDefaultMapping">
         <value>true</value>
      </property>
      <property name="overwritePolicy">
         <value>EAGER</value>
      </property>
      <property name="mappingProperties">
         <props>
            <prop key="namespace.prefix.cm">http://www.alfresco.org/model/content/1.0</prop>
            <prop key="author">cm:author</prop>
            <prop key="title">cm:title</prop>
            <prop key="subject">cm:description</prop>
            <prop key="createDateTime">cm:created</prop>
            <prop key="lastSaveDateTime">cm:modified</prop>
         </props>
      </property>
    </bean>

</beans>


the content-services-context.xml :


<?xml version='1.0' encoding='UTF-8'?>
<!DOCTYPE beans PUBLIC '-//SPRING//DTD BEAN//EN' 'http://www.springframework.org/dtd/spring-beans.dtd'>

<beans>
  
   <bean id="fileContentStore" class="org.alfresco.repo.content.filestore.FileContentStore">
      <constructor-arg>
         <value>${dir.contentstore}</value>
      </constructor-arg>
   </bean>
  
   <!– deleted content will get pushed into this store, where it can be cleaned up at will –>
   <bean id="deletedContentStore" class="org.alfresco.repo.content.filestore.FileContentStore">
      <constructor-arg>
         <value>${dir.contentstore.deleted}</value>
      </constructor-arg>
   </bean>
   <!– bean to move deleted content into the the backup store –>
   <bean id="deletedContentBackupListener" class="org.alfresco.repo.content.cleanup.DeletedContentBackupCleanerListener" >
      <property name="store">
         <ref bean="deletedContentStore" />
      </property>
   </bean>
   <!– Performs the content cleanup –>
   <bean id="contentStoreCleaner" class="org.alfresco.repo.content.cleanup.ContentStoreCleaner" >
      <property name="dictionaryService">
         <ref bean="dictionaryService" />
      </property>
      <property name="nodeDaoService" >
         <ref bean="nodeDaoService" />
      </property>
      <property name="avmNodeDAO">
            <ref bean="avmNodeDAO"/>
      </property>
      <property name="contentUrlDAO">
           <ref bean="contentUrlDAO"/>
      </property>
      <property name="transactionService" >
         <ref bean="transactionService" />
      </property>
      <property name="protectDays" >
         <value>14</value>
      </property>
      <property name="stores" >
         <list>
            <ref bean="fileContentStore" />
         </list>
      </property>
      <property name="listeners" >
         <list>
            <ref bean="deletedContentBackupListener" />
         </list>
      </property>
   </bean>

   <bean id="contentService" class="org.alfresco.repo.content.RoutingContentService" init-method="init">
      <property name="transactionService">
          <ref bean="transactionService" />
      </property>
      <property name="retryingTransactionHelper">
          <ref bean="retryingTransactionHelper"/>
      </property>
      <property name="dictionaryService">
          <ref bean="dictionaryService" />
      </property>
      <property name="nodeService">
          <ref bean="nodeService" />
      </property>
      <property name="transformerRegistry">
          <ref bean="contentTransformerRegistry" />
      </property>
      <property name="store">
          <ref bean="fileContentStore" />
      </property>
      <property name="policyComponent">
          <ref bean="policyComponent" />
      </property>
      <property name="avmService">
          <ref bean="avmService"/>
      </property>
      <property name="imageMagickContentTransformer">
         <ref bean="transformer.ImageMagick" />
      </property>
   </bean>
   
    <bean id="mimetypeConfigService" class="org.alfresco.config.xml.XMLConfigService" init-method="init">
        <constructor-arg>
            <bean class="org.alfresco.config.source.UrlConfigSource">
                <constructor-arg>
                    <list>
                        <value>classpath:alfresco/mimetype/mimetype-map.xml</value>
                        <value>classpath:alfresco/mimetype/mimetype-map-openoffice.xml</value>
                    </list>
                </constructor-arg>
            </bean>
        </constructor-arg>
    </bean>

   <bean id="mimetypeService" class="org.alfresco.repo.content.MimetypeMap" init-method="init" >
      <property name="configService">
         <ref bean="mimetypeConfigService" />
      </property>
      <property name="contentCharsetFinder">
         <ref bean="charset.finder"/>
      </property>
   </bean>
  
   <bean id="contentFilterLanguagesConfigService" class="org.alfresco.config.xml.XMLConfigService" init-method="init">
      <constructor-arg>
         <bean class="org.alfresco.config.source.UrlConfigSource">
            <constructor-arg>
               <list>
                  <value>classpath:alfresco/ml/content-filter-lang.xml</value>
               </list>
            </constructor-arg>
         </bean>
      </constructor-arg>
   </bean>

   <bean id="contentFilterLanguagesService" class="org.alfresco.repo.model.ml.ContentFilterLanguagesMap" init-method="init" >
      <property name="configService">
         <ref bean="contentFilterLanguagesConfigService" />
      </property>
   </bean>
  
   <bean id="openOfficeConnection" class="net.sf.jooreports.openoffice.connection.SocketOpenOfficeConnection" />
  
   <!– Metadata Extraction Regisitry –>
   <bean id="metadataExtracterRegistry" class="org.alfresco.repo.content.metadata.MetadataExtracterRegistry" />
  
   <!– Abstract bean definition defining base definition for all metadata extracters –>
   <bean id="baseMetadataExtracter"
         class="org.alfresco.repo.content.metadata.AbstractMetadataExtracter"
         abstract="true"
         init-method="register">
      <property name="registry">
         <ref bean="metadataExtracterRegistry" />
      </property>
      <property name="mimetypeService">
         <ref bean="mimetypeService" />
      </property>
      <property name="dictionaryService">
         <ref bean="dictionaryService" />
      </property>
   </bean>
  
   <!– Content Metadata Extracters –>
   <bean id="extracter.PDFBox"        class="org.alfresco.repo.content.metadata.PdfBoxMetadataExtracter"        parent="baseMetadataExtracter" />
   <bean id="extracter.Office"        class="org.alfresco.repo.content.metadata.OfficeMetadataExtracter"        parent="baseMetadataExtracter" />
   <bean id="extracter.Mail"          class="org.alfresco.repo.content.metadata.MailMetadataExtracter"          parent="baseMetadataExtracter" />
   <bean id="extracter.Html"          class="org.alfresco.repo.content.metadata.HtmlMetadataExtracter"          parent="baseMetadataExtracter" />
   <bean id="extracter.MP3"           class="org.alfresco.repo.content.metadata.MP3MetadataExtracter"           parent="baseMetadataExtracter" />
   <bean id="extracter.OpenDocument"  class="org.alfresco.repo.content.metadata.OpenDocumentMetadataExtracter"  parent="baseMetadataExtracter" />
   <bean id="extracter.OpenOffice"    class="org.alfresco.repo.content.metadata.OpenOfficeMetadataExtracter"    parent="baseMetadataExtracter" >
      <property name="connection">
         <ref bean="openOfficeConnection" />
      </property>
   </bean>
  

   <!– Content Transformation Regisitry –>
   <bean id="contentTransformerRegistry" class="org.alfresco.repo.content.transform.ContentTransformerRegistry" />

   <!– Abstract bean definition defining base definition for all transformers –>
   <bean id="baseContentTransformer"
         class="org.alfresco.repo.content.transform.AbstractContentTransformer"
         abstract="true"
         init-method="register">
      <property name="mimetypeService">
         <ref bean="mimetypeService" />
      </property>
      <property name="registry">
         <ref bean="contentTransformerRegistry" />
      </property>
   </bean>
  
   <!– Content Transformations –>
   <bean id="transformer.StringExtracter"
         class="org.alfresco.repo.content.transform.StringExtractingContentTransformer"
         parent="baseContentTransformer" />

   <bean id="transformer.BinaryPassThrough"
         class="org.alfresco.repo.content.transform.BinaryPassThroughContentTransformer"
         parent="baseContentTransformer" />

   <bean id="transformer.PdfBox"
         class="org.alfresco.repo.content.transform.PdfBoxContentTransformer"
         parent="baseContentTransformer" >
      <property name="explicitTransformations">
         <list>
            <bean class="org.alfresco.repo.content.transform.ContentTransformerRegistry$TransformationKey" >
                <constructor-arg><value>application/pdf</value></constructor-arg>
                <constructor-arg><value>text/plain</value></constructor-arg>
            </bean>
         </list>
      </property>
   </bean>

   <bean id="transformer.PdfBox.TextToPdf"
         class="org.alfresco.repo.content.transform.TextToPdfContentTransformer"
         parent="baseContentTransformer" >
      <property name="explicitTransformations">
         <list>
            <bean class="org.alfresco.repo.content.transform.ContentTransformerRegistry$TransformationKey" >
                <constructor-arg><value>text/plain</value></constructor-arg>
                <constructor-arg><value>application/pdf</value></constructor-arg>
            </bean>
         </list>
      </property>
   </bean>

   <bean id="transformer.Poi"
         class="org.alfresco.repo.content.transform.PoiHssfContentTransformer"
         parent="baseContentTransformer" />

   <bean id="transformer.TextMining"
         class="org.alfresco.repo.content.transform.TextMiningContentTransformer"
         parent="baseContentTransformer" >
      <property name="explicitTransformations">
         <list>
            <bean class="org.alfresco.repo.content.transform.ContentTransformerRegistry$TransformationKey" >
                <constructor-arg><value>application/msword</value></constructor-arg>
                <constructor-arg><value>text/plain</value></constructor-arg>
            </bean>
         </list>
      </property>
   </bean>

   <bean id="transformer.HtmlParser"
         class="org.alfresco.repo.content.transform.HtmlParserContentTransformer"
         parent="baseContentTransformer" />

   <bean id="transformer.OpenOffice"
         class="org.alfresco.repo.content.transform.OpenOfficeContentTransformer"
         parent="baseContentTransformer" >
      <property name="connection">
         <ref bean="openOfficeConnection" />
      </property>
      <property name="documentFormatsConfiguration">
         <value>classpath:alfresco/mimetype/openoffice-document-formats.xml</value>
      </property>
   </bean>

   <bean id="transformer.complex.OpenOffice.PdfBox"
        class="org.alfresco.repo.content.transform.ComplexContentTransformer"
        parent="baseContentTransformer" >
      <property name="transformers">
         <list>
            <ref bean="transformer.OpenOffice" />
            <ref bean="transformer.PdfBox" />
         </list>
      </property>
      <property name="intermediateMimetypes">
         <list>
            <value>application/pdf</value>
         </list>
      </property>
   </bean>
  
   <bean id="transformer.OutlookMsg"
         class="org.alfresco.repo.content.transform.MailContentTransformer"
         parent="baseContentTransformer" />

   <!–
   <bean id="transformer.JMagick" class="org.alfresco.repo.content.transform.magick.JMagickContentTransformer" init-method="init" />
   </bean>
   –>

   <bean id="transformer.ImageMagick"
        class="org.alfresco.repo.content.transform.magick.ImageMagickContentTransformer"
        parent="baseContentTransformer"
        init-method="init">
      <property name="executer">
         <bean name="transformer.ImageMagick.Command" class="org.alfresco.util.exec.RuntimeExec">
            <property name="commandMap">
                <map>
                    <entry key="Windows.*">
                        <value>imconvert "${source}" ${options} "${target}"</value>
                    </entry>
                    <entry key=".*">
                        <value>convert ${source} ${options} ${target}</value>
                    </entry>
                </map>
            </property>
            <property name="defaultProperties">
                <props>
                    <prop key="options"></prop>
                </props>
            </property>
         </bean>
      </property>
   </bean>

</beans>



And the logs, with the same test-case :


14:21:30,831 DEBUG [content.metadata.AbstractMappingMetadataExtracter] Starting metadata extraction:
   reader: ContentAccessor[
            contentUrl=store:///opt/alfresco/tomcat/temp/Alfresco/alfresco62331.upload,
            mimetype=application/vnd.excel,
            size=15360,
            encoding=UTF-8,
            locale=en_US
            ]

   extracter: org.alfresco.repo.content.metadata.OfficeMetadataExtracter@be8464


14:21:30,839 DEBUG [content.metadata.AbstractMappingMetadataExtracter] Converted extracted raw values to system values:
   Raw Properties:    {
            pageCount=0,
            createDateTime=Thu Nov 04 15:09:41 CET 2004,
            osVersion=131333,
            lastPrinted=Wed Feb 02 14:52:51 CET 2005,
            format=0,
            wordCount=0,
            author=NCASMT,
            lastAuthor=******,
            editTime=0,
            lastSaveDateTime=Wed Oct 19 15:17:26 CEST 2005
         }

   System Properties: {
            {http://www.alfresco.org/model/content/1.0}modified=Wed Oct 19 15:17:26 CEST 2005,
            {http://www.alfresco.org/model/content/1.0}author=NCASMT,
            {http://www.alfresco.org/model/content/1.0}created=Thu Nov 04 15:09:41 CET 2004
         }


14:21:30,840 DEBUG [content.metadata.AbstractMappingMetadataExtracter] Completed metadata extraction:
   reader:    ContentAccessor[
            contentUrl=store:///opt/alfresco/tomcat/temp/Alfresco/alfresco62331.upload,
            mimetype=application/vnd.excel,
            size=15360,
            encoding=UTF-8,
            locale=en_US
              ]

   extracter: org.alfresco.repo.content.metadata.OfficeMetadataExtracter@be8464

   changed:   {{http://www.alfresco.org/model/content/1.0}author=NCASMT}


14:21:31,312 DEBUG [content.metadata.AbstractMappingMetadataExtracter] Starting metadata extraction:
   reader: ContentAccessor[
            contentUrl=store://2008/6/27/14/21/92edd339-4443-11dd-b28b-692bcf67ff2a.bin,
            mimetype=application/vnd.excel,
            size=15360,
            encoding=UTF-8,
            locale=en_US
            ]

   extracter: org.alfresco.repo.content.metadata.OfficeMetadataExtracter@be8464


14:21:31,314 DEBUG [content.metadata.AbstractMappingMetadataExtracter] Converted extracted raw values to system values:
   Raw Properties:    {
            pageCount=0,
            createDateTime=Thu Nov 04 15:09:41 CET 2004,
            osVersion=131333,
            lastPrinted=Wed Feb 02 14:52:51 CET 2005,
            format=0,
            wordCount=0,
            author=NCASMT,
            lastAuthor=******,
            editTime=0,
            lastSaveDateTime=Wed Oct 19 15:17:26 CEST 2005
         }

   System Properties: {
            {http://www.alfresco.org/model/content/1.0}modified=Wed Oct 19 15:17:26 CEST 2005,
            {http://www.alfresco.org/model/content/1.0}author=NCASMT,
            {http://www.alfresco.org/model/content/1.0}created=Thu Nov 4 15:09:41 CET 2004
         }

14:21:31,315 DEBUG [content.metadata.AbstractMappingMetadataExtracter] Completed metadata extraction:
   reader:    ContentAccessor[
            contentUrl=store://2008/6/27/14/21/92edd339-4443-11dd-b28b-692bcf67ff2a.bin,
            mimetype=application/vnd.excel,
            size=15360,
            encoding=UTF-8,
            locale=en_US
              ]

   extracter: org.alfresco.repo.content.metadata.OfficeMetadataExtracter@be8464
   changed:   {{http://www.alfresco.org/model/content/1.0}author=NCASMT}



We can see (at the end of this log file) that only the author metadata has finally been changed.

Outcomes