fixed crashing when finding tables that do not contain links in the

columns
This commit is contained in:
Johannes Erwerle 2022-04-01 12:04:54 +02:00
parent 4742e417f7
commit 544e609d44

View file

@ -153,17 +153,21 @@ class CrawlObject():
if len(items) == 4: if len(items) == 4:
_, name, _, download = items _, name, _, download = items
link = download.find("a").attrs["href"]
name = name.find("a").text + ".mp4" # rough estimate that all files are mp4
url = urljoin(self.url, link)
path = self.path / name link_element = download.find("a")
name_element = name.find("a")
if link_element is not None and name_element is not None:
link = link_element.attrs["href"]
name = name_element.text + ".mp4" # rough estimate that all files are mp4
url = urljoin(self.url, link)
if url in self.discovered: path = self.path / name
logging.info(f"{ url } already discovered, skipping it")
continue if url in self.discovered:
self.discovered.add(url) logging.info(f"{ url } already discovered, skipping it")
subitems.append(CrawlObject(path, url, self.session, discovered=self.discovered)) continue
self.discovered.add(url)
subitems.append(CrawlObject(path, url, self.session, discovered=self.discovered))
# download things from exercise sections # download things from exercise sections
exercise_div = content.find("div", class_="ilExcOverview") exercise_div = content.find("div", class_="ilExcOverview")