Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/dropbox #70

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 16 additions & 4 deletions pypdfocr/pypdfocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
from pypdfocr_pdffiler import PyPdfFiler
from pypdfocr_filer_dirs import PyFilerDirs
from pypdfocr_filer_evernote import PyFilerEvernote
from pypdfocr_filer_dropbox import PyFilerDropbox
from pypdfocr_preprocess import PyPreprocess

def error(text):
Expand Down Expand Up @@ -119,7 +120,8 @@ def get_options(self, argv):
:ivar config: Dict of the config file
:ivar watch: Whether folder watching mode is turned on
:ivar enable_evernote: Enable filing to evernote

:ivar enable_dropbox: Enable dropbox

"""
p = argparse.ArgumentParser(
description = "Convert scanned PDFs into their OCR equivalent. Depends on GhostScript and Tesseract-OCR being installed.",
Expand Down Expand Up @@ -166,6 +168,8 @@ def get_options(self, argv):
dest='configfile', help='Configuration file for defaults and PDF filing')
filing_group.add_argument('-e', '--evernote', action='store_true',
default=False, dest='enable_evernote', help='Enable filing to Evernote')
filing_group.add_argument('-r', '--dropbox', action='store_true',
default=False, dest='enable_dropbox', help='Enable filing to Dropbox')
filing_group.add_argument('-n', action='store_true',
default=False, dest='match_using_filename', help='Use filename to match if contents did not match anything, before filing to default folder')

Expand Down Expand Up @@ -209,7 +213,12 @@ def get_options(self, argv):
else:
self.enable_evernote = False

if args.enable_filing or args.enable_evernote:
if args.enable_dropbox:
self.enable_dropbox = True
else:
self.enable_dropbox = False

if args.enable_filing or args.enable_evernote or args.enable_dropbox:
self.enable_filing = True
if not args.configfile:
p.error("Please specify a configuration file(CONFIGFILE) to enable filing")
Expand Down Expand Up @@ -245,7 +254,8 @@ def _setup_filing(self):
"""
Instance the proper PyFiler object (either
:class:`pypdfocr.pypdfocr_filer_dirs.PyFilerDirs` or
:class:`pypdfocr.pypdfocr_filer_evernote.PyFilerEvernote`)
:class:`pypdfocr.pypdfocr_filer_evernote.PyFilerEvernote` or
:class:`pypdfocr.pypdfocr_filer_dropbox.PyFilerDropbox` )

TODO: Make this more generic to allow third-party plugin filing objects

Expand Down Expand Up @@ -280,9 +290,11 @@ def _setup_filing(self):
# --------------------------------------------------
if self.enable_evernote:
self.filer = PyFilerEvernote(self.config['evernote_developer_token'])
elif self.enable_dropbox:
self.filer = PyFilerDropbox(self.config['dropbox_developer_token'],self.config['dropbox_base_path'])
else:
self.filer = PyFilerDirs()

self.filer.target_folder = self.config['target_folder']
self.filer.default_folder = self.config['default_folder']
self.filer.original_move_folder = original_move_folder
Expand Down
145 changes: 145 additions & 0 deletions pypdfocr/pypdfocr_filer_dropbox.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@

# Copyright 2013 Stefan Gorling All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import os
import shutil
import hashlib
import time
import sys

from pypdfocr_filer import PyFiler

import functools

import dropbox

"""
Implementation of a filer class
-> Files documents into dropbox subfolders. Mainly copy and paste from PyFilerEvernote
"""


class PyFilerDropbox(PyFiler):

def get_target_folder(self):
return self._target_folder
def set_target_folder (self, target_folder):
""" Override this to make sure we only have the basename"""
print("Setting target_folder %s" % target_folder)
if target_folder:
self._target_folder = os.path.basename(target_folder)
else:
self._target_folder = target_folder

target_folder = property(get_target_folder, set_target_folder)

def get_default_folder (self):
""" Override this to make sure we only have the basename"""
return self._default_folder

def set_default_folder (self, default_folder):
""" Override this to make sure we only have the basename"""
if default_folder:
self._default_folder = os.path.basename(default_folder)
else:
self._default_folder = default_folder

default_folder = property(get_default_folder, set_default_folder)

def __init__(self, dev_token,base_path):
self.target_folder = None
self.default_folder = None
self.original_move_folder = None
self.folder_targets = {}
self.dictUserInfo = { 'dev_token': dev_token }
self.base_path = base_path
self._connect_to_dropbox(self.dictUserInfo)

def _connect_to_dropbox(self, dictUserInfo):
"""
Establish a connection to dropbox and authenticate.

:param dictUserInfo: contains the token that is needed to connect ot hte api
:returns success: Return wheter connection succeeded
:rtype bool:
"""
print("Authenticating to Dropbox")
dev_token = dictUserInfo['dev_token']
logging.debug("Authenticating using token %s" % dev_token)
user = None
try:
self.client = dropbox.Dropbox(dev_token)
except Exception as e:
print("Error attempting to connect to Dropbox: %s " % (e))
sys.exit(-1)

return True

def add_folder_target(self, folder, keywords):
assert folder not in self.folder_targets, "Target folder already defined! (%s)" % (folder)
self.folder_targets[folder] = keywords

def file_original(self, original_filename):
"""
Just file it to the local file system (don't upload to dropbox)
"""
if not self.original_move_folder:
logging.debug("Leaving original untouched")
return original_filename

tgt_path = self.original_move_folder
logging.debug("Moving original %s to %s" % (original_filename, tgt_path))
tgtfilename = os.path.join(tgt_path, os.path.basename(original_filename))
tgtfilename = self._get_unique_filename_by_appending_version_integer(tgtfilename)

shutil.move(original_filename, tgtfilename)
return tgtfilename


def move_to_matching_folder(self, filename, foldername):
"""
#Move file to dropbox

"""
assert self.target_folder != None
assert self.default_folder != None

if not foldername:
logging.info("[DEFAULT] %s --> %s" % (filename, self.default_folder))
foldername = self.default_folder
else:
logging.info("[MATCH] %s --> %s" % (filename, foldername))


dest_path=self.base_path+"/"+foldername+"/"+os.path.basename(filename)
logging.info("Sending to dropboxy as %s" % dest_path)

#Send file to dropbox
with open(filename) as f:
self.client.files_upload(f.read(), dest_path, mute=True)

return "%s" % (dest_path)


if __name__ == '__main__': # pragma: no cover
logging.basicConfig(level=logging.DEBUG, format='%(message)s')
logging.basicConfig(level=logging.INFO, format='%(message)s')
p = PyFilerDropbox()
p.add_folder_target("auto", ['dmv'])
p.target_folder = 'myuploads'
p.default_folder = 'default'
p.original_move_folder = None

p.move_to_matching_folder('../dmv/dmv_ocr.pdf', 'auto')