Hi,
I am successfully configured OCR with my alfresco (windows installation). But, it only working for PNG, TIFF, JPG & GPEG. But i need it for PDF extension also because most of scanned files are in pdf format.
My tesseract-ocr-transform-context.xml is,
<?xml version='1.0' encoding='UTF-8'?>
<!DOCTYPE beans PUBLIC '-//SPRING//DTD BEAN//EN' 'http://www.springframework.org/dtd/spring-beans.dtd'>
<beans default-lazy-init="false" default-autowire="no" default-dependency-check="none">
<bean id="transformer.worker.ocr.tiff" class="org.alfresco.repo.content.transform.RuntimeExecutableContentTransformerWorker" lazy-init="default" autowire="default" dependency-check="default">
<property name="mimetypeService">
<ref bean="mimetypeService" />
</property>
<property name="checkCommand">
<bean class="org.alfresco.util.exec.RuntimeExec" lazy-init="default" autowire="default" dependency-check="default">
<property name="commandsAndArguments">
<map>
<entry key="Windows.*">
<list>
<value>C:\Windows\System32\cmd.exe</value>
<value>/C</value>
<value>dir C:\alfresco-community\ocr.bat</value>
</list>
</entry>
</map>
</property>
<property name="errorCodes">
<value>1</value>
</property>
</bean>
</property>
<property name="transformCommand">
<bean class="org.alfresco.util.exec.RuntimeExec" lazy-init="default" autowire="default" dependency-check="default">
<property name="commandsAndArguments">
<map>
<entry key="Windows.*">
<list>
<value>C:\Windows\System32\cmd.exe</value>
<value>/C</value>
<value>C:\alfresco-community\ocr.bat</value>
<value>"${source}"</value>
<value>"${target}"</value>
</list>
</entry>
</map>
</property>
<property name="errorCodes">
<value>1,2</value>
</property>
</bean>
</property>
<property name="explicitTransformations">
<list>
<bean class="org.alfresco.repo.content.transform.ExplictTransformationDetails" lazy-init="default" autowire="default" dependency-check="default">
<property name="sourceMimetype">
<value>image/tiff</value>
</property>
<property name="targetMimetype">
<value>text/plain</value>
</property>
</bean>
<bean class="org.alfresco.repo.content.transform.ExplictTransformationDetails" lazy-init="default" autowire="default" dependency-check="default">
<property name="sourceMimetype">
<value>image/png</value>
</property>
<property name="targetMimetype">
<value>text/plain</value>
</property>
</bean>
<bean class="org.alfresco.repo.content.transform.ExplictTransformationDetails" lazy-init="default" autowire="default" dependency-check="default">
<property name="sourceMimetype">
<value>image/jpeg</value>
</property>
<property name="targetMimetype">
<value>text/plain</value>
</property>
</bean>
<bean class="org.alfresco.repo.content.transform.ExplictTransformationDetails" lazy-init="default" autowire="default" dependency-check="default">
<property name="sourceMimetype">
<value>image/jpg</value>
</property>
<property name="targetMimetype">
<value>text/plain</value>
</property>
</bean>
</list>
</property>
</bean>
<bean id="transformer.ocr.tiff" class="org.alfresco.repo.content.transform.ProxyContentTransformer" parent="baseContentTransformer" lazy-init="default" autowire="default" dependency-check="default">
<property name="worker">
<ref bean="transformer.worker.ocr.tiff" />
</property>
</bean>
</beans>
I changed it into this including another bean (red colored),
<?xml version='1.0' encoding='UTF-8'?>
<!DOCTYPE beans PUBLIC '-//SPRING//DTD BEAN//EN' 'http://www.springframework.org/dtd/spring-beans.dtd'>
<beans default-lazy-init="false" default-autowire="no" default-dependency-check="none">
<bean id="transformer.worker.ocr.tiff" class="org.alfresco.repo.content.transform.RuntimeExecutableContentTransformerWorker" lazy-init="default" autowire="default" dependency-check="default">
<property name="mimetypeService">
<ref bean="mimetypeService" />
</property>
<property name="checkCommand">
<bean class="org.alfresco.util.exec.RuntimeExec" lazy-init="default" autowire="default" dependency-check="default">
<property name="commandsAndArguments">
<map>
<entry key="Windows.*">
<list>
<value>C:\Windows\System32\cmd.exe</value>
<value>/C</value>
<value>dir C:\alfresco-community\ocr.bat</value>
</list>
</entry>
</map>
</property>
<property name="errorCodes">
<value>1</value>
</property>
</bean>
</property>
<property name="transformCommand">
<bean class="org.alfresco.util.exec.RuntimeExec" lazy-init="default" autowire="default" dependency-check="default">
<property name="commandsAndArguments">
<map>
<entry key="Windows.*">
<list>
<value>C:\Windows\System32\cmd.exe</value>
<value>/C</value>
<value>C:\alfresco-community\ocr.bat</value>
<value>"${source}"</value>
<value>"${target}"</value>
</list>
</entry>
</map>
</property>
<property name="errorCodes">
<value>1,2</value>
</property>
</bean>
</property>
<property name="explicitTransformations">
<list>
<bean class="org.alfresco.repo.content.transform.ExplictTransformationDetails" lazy-init="default" autowire="default" dependency-check="default">
<property name="sourceMimetype">
<value>image/tiff</value>
</property>
<property name="targetMimetype">
<value>text/plain</value>
</property>
</bean>
<bean class="org.alfresco.repo.content.transform.ExplictTransformationDetails" lazy-init="default" autowire="default" dependency-check="default">
<property name="sourceMimetype">
<value>image/png</value>
</property>
<property name="targetMimetype">
<value>text/plain</value>
</property>
</bean>
<bean class="org.alfresco.repo.content.transform.ExplictTransformationDetails" lazy-init="default" autowire="default" dependency-check="default">
<property name="sourceMimetype">
<value>image/pdf</value>
</property>
<property name="targetMimetype">
<value>text/plain</value>
</property>
</bean>
<bean class="org.alfresco.repo.content.transform.ExplictTransformationDetails" lazy-init="default" autowire="default" dependency-check="default">
<property name="sourceMimetype">
<value>image/jpeg</value>
</property>
<property name="targetMimetype">
<value>text/plain</value>
</property>
</bean>
<bean class="org.alfresco.repo.content.transform.ExplictTransformationDetails" lazy-init="default" autowire="default" dependency-check="default">
<property name="sourceMimetype">
<value>image/jpg</value>
</property>
<property name="targetMimetype">
<value>text/plain</value>
</property>
</bean>
</list>
</property>
</bean>
<bean id="transformer.ocr.tiff" class="org.alfresco.repo.content.transform.ProxyContentTransformer" parent="baseContentTransformer" lazy-init="default" autowire="default" dependency-check="default">
<property name="worker">
<ref bean="transformer.worker.ocr.tiff" />
</property>
</bean>
</beans>
but, it gives me bellow error.
org.springframework.beans.factory.BeanCreationException: Error creating bean with name 'transformer.ocr.tiff' defined in file [C:\alfresco-community\tomcat\shared\classes\alfresco\extension\tesseract-ocr-transform-context.xml]: Invocation of init method failed; nested exception is java.lang.IllegalArgumentException: content.transformer.ocr.tiff.extensions.bin.txt.priority has been specified more than once
2019-06-02 20:00:46,019 INFO [org.alfresco.repo.management.subsystems.ChildApplicationContextFactory] [localhost-startStop-1] Stopping 'Transformers' subsystem, ID: [Transformers, default]
2019-06-02 20:00:46,020 INFO [org.alfresco.repo.management.subsystems.ChildApplicationContextFactory] [localhost-startStop-1] Stopped 'Transformers' subsystem, ID: [Transformers, default]
2019-06-02 20:00:46,021 INFO [org.alfresco.repo.management.subsystems.ChildApplicationContextFactory] [localhost-startStop-1] Stopping 'Authentication' subsystem, ID: [Authentication, managed, alfrescoNtlm1]
2019-06-02 20:00:46,021 INFO [org.alfresco.repo.management.subsystems.ChildApplicationContextFactory] [localhost-startStop-1] Stopped 'Authentication' subsystem, ID: [Authentication, managed, alfrescoNtlm1]
2019-06-02 20:00:46,030 ERROR [org.springframework.web.context.ContextLoader] [localhost-startStop-1] Context initialization failed
org.springframework.beans.factory.BeanCreationException: Error creating bean with name 'transformer.ocr.tiff' defined in file [C:\alfresco-community\tomcat\shared\classes\alfresco\extension\tesseract-ocr-transform-context.xml]: Invocation of init method failed; nested exception is java.lang.IllegalArgumentException: content.transformer.ocr.tiff.extensions.bin.txt.priority has been specified more than once
at org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.initializeBean(AbstractAutowireCapableBeanFactory.java:1514)
at org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.doCreateBean(AbstractAutowireCapableBeanFactory.java:521)
at org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.createBean(AbstractAutowireCapableBeanFactory.java:458)
at org.springframework.beans.factory.support.AbstractBeanFactory$1.getObject(AbstractBeanFactory.java:293)
at org.springframework.beans.factory.support.DefaultSingletonBeanRegistry.getSingleton(DefaultSingletonBeanRegistry.java:223)
at org.springframework.beans.factory.support.AbstractBeanFactory.doGetBean(AbstractBeanFactory.java:290)
at org.springframework.beans.factory.support.AbstractBeanFactory.getBean(AbstractBeanFactory.java:191)
at org.springframework.beans.factory.support.DefaultListableBeanFactory.preInstantiateSingletons(DefaultListableBeanFactory.java:636)
at org.springframework.context.support.AbstractApplicationContext.finishBeanFactoryInitialization(AbstractApplicationContext.java:938)
at org.springframework.context.support.AbstractApplicationContext.refresh(AbstractApplicationContext.java:479)
at org.springframework.web.context.ContextLoader.configureAndRefreshWebApplicationContext(ContextLoader.java:410)
at org.springframework.web.context.ContextLoader.initWebApplicationContext(ContextLoader.java:306)
at org.springframework.web.context.ContextLoaderListener.contextInitialized(ContextLoaderListener.java:112)
at org.alfresco.web.app.ContextLoaderListener.contextInitialized(ContextLoaderListener.java:70)
at org.apache.catalina.core.StandardContext.listenerStart(StandardContext.java:5118)
at org.apache.catalina.core.StandardContext.startInternal(StandardContext.java:5634)
at org.apache.catalina.util.LifecycleBase.start(LifecycleBase.java:145)
at org.apache.catalina.core.ContainerBase.addChildInternal(ContainerBase.java:899)
at org.apache.catalina.core.ContainerBase.addChild(ContainerBase.java:875)
at org.apache.catalina.core.StandardHost.addChild(StandardHost.java:652)
at org.apache.catalina.startup.HostConfig.deployDescriptor(HostConfig.java:679)
at org.apache.catalina.startup.HostConfig$DeployDescriptor.run(HostConfig.java:1966)
at java.util.concurrent.Executors$RunnableAdapter.call(Unknown Source)
at java.util.concurrent.FutureTask.run(Unknown Source)
at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
at java.lang.Thread.run(Unknown Source)
Caused by: java.lang.IllegalArgumentException: content.transformer.ocr.tiff.extensions.bin.txt.priority has been specified more than once
at org.alfresco.repo.content.transform.TransformerPropertySetter.setProperties(TransformerPropertySetter.java:119)
at org.alfresco.repo.content.transform.TransformerConfigImpl.setProperties(TransformerConfigImpl.java:239)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source)
at java.lang.reflect.Method.invoke(Unknown Source)
at org.alfresco.repo.management.subsystems.SubsystemProxyFactory$1.invoke(SubsystemProxyFactory.java:79)
at org.springframework.aop.framework.ReflectiveMethodInvocation.proceed(ReflectiveMethodInvocation.java:172)
at org.springframework.aop.framework.JdkDynamicAopProxy.invoke(JdkDynamicAopProxy.java:204)
at com.sun.proxy.$Proxy25.setProperties(Unknown Source)
at org.alfresco.repo.content.transform.ContentTransformerHelper.logDeprecatedSetter(ContentTransformerHelper.java:251)
at org.alfresco.repo.content.transform.ProxyContentTransformer.register(ProxyContentTransformer.java:74)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source)
at java.lang.reflect.Method.invoke(Unknown Source)
at org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.invokeCustomInitMethod(AbstractAutowireCapableBeanFactory.java:1640)
at org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.invokeInitMethods(AbstractAutowireCapableBeanFactory.java:1581)
at org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.initializeBean(AbstractAutowireCapableBeanFactory.java:1511)
... 26 more
Did i missed anything or there is another way to do this?
please help me.
Sorry for my English.
Thank you.
Ask for and offer help to other Alfresco Content Services Users and members of the Alfresco team.
Related links:
By using this site, you are agreeing to allow us to collect and use cookies as outlined in Alfresco’s Cookie Statement and Terms of Use (and you have a legitimate interest in Alfresco and our products, authorizing us to contact you in such methods). If you are not ok with these terms, please do not use this website.