"big_book_search":"#!/usr/bin/env python\n# vim:fileencoding=UTF-8\nfrom __future__ import absolute_import, division, print_function, unicode_literals\n\n__license__ = 'GPL v3'\n__copyright__ = '2013, Kovid Goyal <kovid@kovidgoyal.net>'\n__docformat__ = 'restructuredtext en'\n\nfrom calibre.ebooks.metadata.sources.base import Option, Source\n\n\ndef get_urls(br, tokens):\n from urllib.parse import quote_plus\n\n from html5_parser import parse\n escaped = (quote_plus(x) for x in tokens if x and x.strip())\n q = '+'.join(escaped)\n url = 'https://bigbooksearch.com/please-dont-scrape-my-site-you-will-put-my-api-key-over-the-usage-limit-and-the-site-will-break/books/'+q\n raw = br.open(url).read()\n root = parse(raw.decode('utf-8'))\n urls = [i.get('src') for i in root.xpath('//img[@src]')]\n return urls\n\n\nclass BigBookSearch(Source):\n\n name = 'Big Book Search'\n version = (1, 0, 1)\n minimum_calibre_version = (2, 80, 0)\n description = _('Downloads multiple book covers from Amazon. Useful to find alternate covers.')\n capabilities = frozenset(['cover'])\n can_get_multiple_covers = True\n options = (Option('max_covers', 'number', 5, _('Maximum number of covers to get'),\n _('The maximum number of covers to process from the search result')),\n )\n supports_gzip_transfer_encoding = True\n\n def download_cover(self, log, result_queue, abort,\n title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False):\n if not title:\n return\n br = self.browser\n tokens = tuple(self.get_title_tokens(title)) + tuple(self.get_author_tokens(authors))\n urls = get_urls(br, tokens)\n self.download_multiple_covers(title, authors, urls, get_best_cover, timeout, result_queue, abort, log)\n\n\ndef test():\n import pprint\n\n from calibre import browser\n br = browser()\n urls = get_urls(br, ['consider', 'phlebas', 'banks'])\n pprint.pprint(urls)\n\n\nif __name__ == '__main__':\n test()\n",
"edelweiss":"#!/usr/bin/env python\n# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai\nfrom __future__ import absolute_import, division, print_function, unicode_literals\n\n__license__ = 'GPL v3'\n__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'\n__docformat__ = 'restructuredtext en'\n\nimport re\nimport time\nfrom threading import Thread\n\ntry:\n from queue import Empty, Queue\nexcept ImportError:\n from Queue import Empty, Queue\n\nfrom calibre import as_unicode, random_user_agent\nfrom calibre.ebooks.metadata import check_isbn\nfrom calibre.ebooks.metadata.sources.base import Source\n\n\ndef clean_html(raw):\n from calibre.ebooks.chardet import xml_to_unicode\n from calibre.utils.cleantext import clean_ascii_chars\n return clean_ascii_chars(xml_to_unicode(raw, strip_encoding_pats=True,\n resolve_entities=True, assume_utf8=True)[0])\n\n\ndef parse_html(raw):\n raw = clean_html(raw)\n from html5_parser import parse\n return parse(raw)\n\n\ndef astext(node):\n from lxml import etree\n return etree.tostring(node, method='text', encoding='unicode',\n with_tail=False).strip()\n\n\nclass Worker(Thread): # {{{\n\n def __init__(self, basic_data, relevance, result_queue, br, timeout, log, plugin):\n Thread.__init__(self)\n self.daemon = True\n self.basic_data = basic_data\n self.br, self.log, self.timeout = br, log, timeout\n self.result_queue, self.plugin, self.sku = result_queue, plugin, self.basic_data['sku']\n self.relevance = relevance\n\n def run(self):\n url = ('https://www.edelweiss.plus/GetTreelineControl.aspx?controlName=/uc/product/two_Enhanced.ascx&'\n 'sku={0}&idPrefix=content_1_{0}&mode=0'.format(self.sku))\n try:\n raw = self.br.open_novisit(url, timeout=self.timeout).read()\n except:\n self.log.exception('Failed to load comments page: %r'%url)\n return\n\n try:\n mi = self.parse(raw)\n mi.source_relevance = self.relevance\n self.plugin.clean_downloaded_metadata(mi)\n self.result_queue.put(mi)\n except:\n self.log.exception('Failed to parse details for sku: %s'%self.sku)\n\n def parse(self, raw):\n from calibre.ebooks.metadata.book.base import Metadata\n from calibre.utils.date import UNDEFINED_DATE\n root = parse_html(raw)\n mi = Metadata(self.basic_data['title'], self.basic_data['authors'])\n\n # Identifiers\n if self.basic_data['isbns']:\n mi.isbn = self.basic_data['isbns'][0]\n mi.set_identifier('edelweiss', self.sku)\n\n # Tags\n if self.basic_data['tags']:\n mi.tags = self.basic_data['tags']\n mi.tags = [t[1:].strip() if t.startswith('&') else t for t in mi.tags]\n\n # Publisher\n mi.publisher = self.basic_data['publisher']\n\n # Pubdate\n if self.basic_data['pubdate'] and self.basic_data['pubdate'].year != UNDEFINED_DATE:\n mi.pubdate = self.basic_data['pubdate']\n\n # Rating\n if self.basic_data['rating']:\n mi.rating = self.basic_data['rating']\n\n # Comments\n comments = ''\n for cid in ('summary', 'contributorbio', 'quotes_reviews'):\n cid = 'desc_{}{}-content'.format(cid, self.sku)\n div = root.xpath('//*[@id=\"{}\"]'.format(cid))\nifdiv:\ncomments+=self.render_comments(div[0])\nifcomments:\nmi.comments=comments\n\nmi.has_cover=self.plugin.cached_identifier_to_cover_url(self.sku)isnotNone\nreturnmi\n\ndefrender_comments(self,desc):\nfromlxmlimportetree\n\nfromcalibre.library.commentsimportsanitize_comments_html\nforcindesc.xpath('descendant::noscript'):\nc.getparent().remove(c)\nforaindesc.xpath('descendant::a[@href]'):\ndela.attrib['href']\na.tag
"google":"#!/usr/bin/env python\n# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai\n# License: GPLv3 Copyright: 2011, Kovid Goyal <kovid at kovidgoyal.net>\nfrom __future__ import absolute_import, division, print_function, unicode_literals\n\nimport hashlib\nimport os\nimport re\nimport sys\nimport tempfile\nimport time\n\nimport regex\n\ntry:\n from queue import Empty, Queue\nexcept ImportError:\n from Queue import Empty, Queue\n\nfrom calibre import as_unicode, prepare_string_for_xml, replace_entities\nfrom calibre.ebooks.chardet import xml_to_unicode\nfrom calibre.ebooks.metadata import authors_to_string, check_isbn\nfrom calibre.ebooks.metadata.book.base import Metadata\nfrom calibre.ebooks.metadata.sources.base import Source\nfrom calibre.utils.cleantext import clean_ascii_chars\nfrom calibre.utils.localization import canonicalize_lang\n\nNAMESPACES = {\n 'openSearch': 'http://a9.com/-/spec/opensearchrss/1.0/',\n 'atom': 'http://www.w3.org/2005/Atom',\n 'dc': 'http://purl.org/dc/terms',\n 'gd': 'http://schemas.google.com/g/2005'\n}\n\n\ndef pretty_google_books_comments(raw):\n raw = replace_entities(raw)\n # Paragraphs in the comments are removed but whatever software googl uses\n # to do this does not insert a space so we often find the pattern\n # word.Capital in the comments which can be used to find paragraph markers.\n parts = []\n for x in re.split(r'([a-z)\"”])(\\.)([A-Z(\"“])', raw):\n if x == '.':\n parts.append('.</p>\\n\\n<p>')\n else:\n parts.append(prepare_string_for_xml(x))\n raw = '<p>' + ''.join(parts) + '</p>'\n return raw\n\n\ndef get_details(browser, url, timeout): # {{{\n try:\n raw = browser.open_novisit(url, timeout=timeout).read()\n except Exception as e:\n gc = getattr(e, 'getcode', lambda: -1)\n if gc() != 403:\n raise\n # Google is throttling us, wait a little\n time.sleep(2)\n raw = browser.open_novisit(url, timeout=timeout).read()\n\n return raw\n\n\n# }}}\n\nxpath_cache = {}\n\n\ndef XPath(x):\n ans = xpath_cache.get(x)\n if ans is None:\n from lxml import etree\n ans = xpath_cache[x] = etree.XPath(x, namespaces=NAMESPACES)\n return ans\n\n\ndef to_metadata(browser, log, entry_, timeout, running_a_test=False): # {{{\n from lxml import etree\n\n # total_results = XPath('//openSearch:totalResults')\n # start_index = XPath('//openSearch:startIndex')\n # items_per_page = XPath('//openSearch:itemsPerPage')\n entry = XPath('//atom:entry')\n entry_id = XPath('descendant::atom:id')\n url = XPath('descendant::atom:link[@rel=\"self\"]/@href')\ncreator=XPath('descendant::dc:creator')\nidentifier=XPath('descendant::dc:identifier')\ntitle=XPath('descendant::dc:title')\ndate=XPath('descendant::dc:date')\npublisher=XPath('descendant::dc:publisher')\nsubject=XPath('descendant::dc:subject')\ndescription=XPath('descendant::dc:description')\nlanguage=XPath('descendant::dc:language')\n\n#print(etree.tostring(entry_,pretty_print=True))\n\ndefget_text(extra,x):\ntry:\nans=x(extra)\nifans:\nans=ans[0].text\nifansandans.strip():\nreturnans.strip()\nexcept:\nlog.exception('Programmingerror:')\nreturnNone\n\ndefget_extra_details():\nraw=get_details(browser,details_url,timeout)\nifrunning_a_test:\nwithopen(os.path.join(tempfile.gettempdir(),'Google-'+details_url.split('/')[-1]+'.xml'),'wb')asf:\nf.write(raw)\nprint('Bookdetailssavedto:',f.name,file=sys.stderr)\nfeed=etree.fromstring(\nxml_to_unicode(clean_ascii_chars(raw),strip_encoding_pats=True)[0],\nparser=etree.XMLParser(recover=True,no_network=True,resolve_entities=False)\n)\nreturnentry(feed)[0]\n\nifisinstance(entry_,str):\ngoogle_id