AnsweredAssumed Answered

XML Data Extraction

Question asked by tonyfoo on Feb 10, 2010
Hi All,

I am currently trying to get a custom xml extraction extension to work. However, it does not seem to be firing at all. Please see my context file below as well as my two properties files:

<?xml version='1.0' encoding='UTF-8'?>
<!DOCTYPE beans PUBLIC '-//SPRING//DTD BEAN//EN' 'http://www.springframework.org/dtd/spring-beans.dtd'>

<!–
   Sample configuration of a XmlMetadataExtracter in use within the WCM environment.
  
   This show how XML metadata extraction can be set up to extract metadata from different
   formats of XML.  It also shows how metadata can be extracted in WCM projects.
  
–>
<beans>
  
   <!–
      In order to limit the number of extractors active for Web Content Management, a separate registry
      must be created for the extractors.
   –>
   <bean id="avmMetadataExtracterRegistry" class="org.alfresco.repo.content.metadata.MetadataExtracterRegistry" />
  
   <!–
      Configure the AVM services to broadcast the content update notifications.
  
   <bean id="avmNodeService" class="org.alfresco.repo.avm.AVMNodeService" init-method="init">
      <property name="dictionaryService">
         <ref bean="dictionaryService"/>
      </property>
      <property name="avmService">
         <ref bean="avmLockingAwareService"/>
      </property>
      <property name="policyComponent">
         <ref bean="policyComponent"/>
      </property>
      <property name="invokePolicies">
         <value>true</value>
      </property>
   </bean>
   <bean id="avmMetadataExtracter" class="org.alfresco.repo.avm.AvmMetadataExtracter" init-method="init">
      <property name="policyComponent">
         <ref bean="policyComponent"/>
      </property>
      <property name="extracterAction">
         <bean class="org.alfresco.repo.action.executer.ContentMetadataExtracter" >
            <property name="dictionaryService">
               <ref bean="dictionaryService"/>
            </property>
            <property name="nodeService">
               <ref bean="avmNodeService" />
            </property>
            <property name="contentService">
               <ref bean="contentService" />
            </property>
            <property name="metadataExtracterRegistry">
               <ref bean="avmMetadataExtracterRegistry" />
            </property>
            <property name="carryAspectProperties">
               <value>true</value>
            </property>
         </bean>
      </property>
   </bean>
   –>
   <!–
      Configure an extractor that targets Alfresco Model XML files.
      Although this inherits from the base extracter bean, the use of the 'init' method
      means that it isn't automatically registered.
   –>
   <bean id="extracter.xml.transcript.AlfrescoModelMetadataExtracter" class="org.alfresco.repo.content.metadata.xml.XPathMetadataExtracter" parent="baseMetadataExtracter" init-method="init" >
      <property name="mappingProperties">
         <!–
            The properties can also be specified using a properties file on the classpath, e.g.:
            <bean class="org.springframework.beans.factory.config.PropertiesFactoryBean">
               <property name="location">
                  <value>classpath:alfresco/extension/xml-metadata/AlfrescoModel-xpath-mappings.properties</value>
               </property>
            </bean>
         –>
         <bean class="org.springframework.beans.factory.config.PropertiesFactoryBean">
            <property name="properties">
               <value>alfresco/messages/extraction/transcript-xml-metadata-extractor-messages.properties</value>
            </property>
         </bean>
      </property>
      <property name="xpathMappingProperties">
         <bean class="org.springframework.beans.factory.config.PropertiesFactoryBean">
            <property name="properties">
            <value>alfresco/messages/extraction/transcript-xml-metadata-extractor-xpath-messages.properties</value>
            </property>
         </bean>
      </property>
   </bean>
  
   <!–
      This selector examines the XML documents, executing the given XPath statements until a
      match is made.
   –>
   <bean id="extracter.xml.transcript.selector.XPathSelector" class="org.alfresco.repo.content.selector.XPathContentWorkerSelector" init-method="init">
      <property name="workers">
         <map>
            <entry key="/my:test">
               <null />
            </entry>
            <entry key="/model">
               <ref bean="extracter.xml.transcript.AlfrescoModelMetadataExtracter" />
            </entry>
         </map>
      </property>
   </bean>
  
   <!–
      This is the face of the XML metadata extraction.  If passes the XML document to each of
      the selectors, until one of them gives back a MetadataExtracter (via the selectors),
      which is then used as normal to extract the values.
      Note the use of the AVM-specific registry.
      The overwrite policy of the embedded extracters has no effect.  It is only this extractor's
      policy that is used.
   –>
   <bean id="extracter.xml.transcript.XMLMetadataExtracter" class="org.alfresco.repo.content.metadata.xml.XmlMetadataExtracter" parent="baseMetadataExtracter">
      <property name="registry">
         <ref bean="avmMetadataExtracterRegistry" />
      </property>
      <property name="overwritePolicy">
         <value>EAGER</value>
      </property>
      <property name="selectors">
         <list>
            <ref bean="extracter.xml.transcript.selector.XPathSelector" />
         </list>
      </property>
   </bean>
  
</beans>

Properties
#Properties for XML Metadata Extraction
namespace.prefix.my = http://www.au.ca/transcript/content/1.0  
title = my:Title
LastName = my:LastName
FirstName = my:FirstName
BirthDate = my:DOB
OrganizationName = my:InstitutionName

XPATH Properties
#Properties for XML XPATH Metadata Extraction
LastName = /ColTrn:CollegeTranscript/TransmissionData/Student/Person/Name/LastName
FirstName = /ColTrn:CollegeTranscript/TransmissionData/Student/Person/Name/FirstName
BirthDate = /ColTrn:CollegeTranscript/TransmissionData/Student/Person/Birth/BirthDate
OrganizationName = /ColTrn:CollegeTranscript/TransmissionData/Source/OrganizationName

I am unsure on how to fire the extraction. I have tried using the "Extract Common Meta-Data" rule before and after adding an aspect. Eventually I would like to fire the extraction process within an already existing workflow. The content model referenced in the doc is already working correctly as well.

Thanks for any help.
Tony

Outcomes