7
1
mirror of https://gitlab.com/ansol/web-ansol.org.git synced 2024-12-14 08:16:20 +00:00

Add importer script

This commit is contained in:
Hugo Peixoto 2021-09-23 14:29:55 +01:00
parent a1a8eb23c1
commit 9c8fdba4a4

86
import.rb Executable file
View File

@ -0,0 +1,86 @@
#!/usr/bin/env ruby
require 'yaml'
require 'fileutils'
require 'set'
Post = Struct.new(:filename, :frontmatter, :body) do
def to_s
"#{YAML.dump(frontmatter)}---\n#{body}"
end
def self.load(filename)
parts = File.read(filename).split("---\n")
frontmatter = YAML.load(parts[1])
self.new(filename, frontmatter, parts[2])
end
def dump_hugo
File.write("content/post/#{File.basename(filename)}", to_s)
end
end
posts = Dir["imported-content/_posts/*.md"].map do |filename|
Post.load(filename)
end
# Idempotency check
# just checking if the ruby serialization isn't changing files unnecessarily
posts.each do |post|
if File.read(post.filename) != post.to_s
raise "not idempotent!"
end
end
# Handle attachments
FileUtils.mkdir_p("static/attachments")
files_to_copy = Set.new
posts.select { |p| p.frontmatter.dig("metadata", "anexos")&.any? }.each do |post|
anexos = post.frontmatter.dig("metadata", "anexos")
anexos.filter! { |anexo| anexo["anexos_display"] == 1 }
if anexos.empty?
post.frontmatter["metadata"].delete("anexos")
end
anexos.each do |anexo|
files_to_copy.add(anexo["anexos_uri"].gsub("public://", ""))
anexo["anexos_uri"].gsub!("public://", "/attachments/")
end
post.dump_hugo
end
# Handle links
posts.each do |post|
post.body.gsub!(/href="(.*?)"/) do |match|
url = $1.sub(/^http:\/\/ansol\.org/, "https://ansol.org")
url =
if url.start_with?("https://ansol.org/sites/ansol.org/files/")
filename = url.gsub("https://ansol.org/sites/ansol.org/files/", "")
files_to_copy.add(filename.gsub("%20", " "))
"/attachments/#{filename}"
elsif url.start_with?("https://ansol.org")
pp url
url
else
url
end
"href=\"#{url}\""
end
post.dump_hugo
end
files_to_copy.each do |file|
FileUtils.mkdir_p("static/attachments/#{File.dirname(file)}")
FileUtils.copy_file("imported-files/#{file}", "static/attachments/#{file}")
end