From 544e609d44a68911d682b30dd1b637c19bc3e78d Mon Sep 17 00:00:00 2001 From: Johannes Erwerle Date: Fri, 1 Apr 2022 12:04:54 +0200 Subject: [PATCH] fixed crashing when finding tables that do not contain links in the columns --- ilias_sync2/main.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/ilias_sync2/main.py b/ilias_sync2/main.py index d402567..2b1c12b 100644 --- a/ilias_sync2/main.py +++ b/ilias_sync2/main.py @@ -153,17 +153,21 @@ class CrawlObject(): if len(items) == 4: _, name, _, download = items - link = download.find("a").attrs["href"] - name = name.find("a").text + ".mp4" # rough estimate that all files are mp4 - url = urljoin(self.url, link) + + link_element = download.find("a") + name_element = name.find("a") + if link_element is not None and name_element is not None: + link = link_element.attrs["href"] + name = name_element.text + ".mp4" # rough estimate that all files are mp4 + url = urljoin(self.url, link) - path = self.path / name + path = self.path / name - if url in self.discovered: - logging.info(f"{ url } already discovered, skipping it") - continue - self.discovered.add(url) - subitems.append(CrawlObject(path, url, self.session, discovered=self.discovered)) + if url in self.discovered: + logging.info(f"{ url } already discovered, skipping it") + continue + self.discovered.add(url) + subitems.append(CrawlObject(path, url, self.session, discovered=self.discovered)) # download things from exercise sections exercise_div = content.find("div", class_="ilExcOverview")