setup
i have 30-odd author affiliations that look like the left panel and want to re-format to what's shown in the right panel. and i'm too lazy to do that manually ...
solution
the plan
- copy the selected texts. read texts from the clipboard.
- using regular expression (regex), replace '[affiliation number]. ' with '[affiliation number]'.
- write the resulting string in a new document.
- in the new doc, format the affiliation numbers.
- copy all contents of the new document.
programming language & module(s)
- VBA
file preps
select the texts to turn affiliation numbers to superscripts, like this:
variables to customize
none.
the script
fmtAffilNoPoster
Sub fmtAffilNoPoster() | |
Dim ret, clipboard As Object | |
Dim cbTxt As String | |
Dim newDoc As Document | |
Dim retPattern As String | |
Dim newDocEnd As Long | |
retPattern = "(\d+)\.?\s+(\w+)" | |
clearFind | |
Selection.Copy | |
Set DataObj = New MSForms.DataObject | |
DataObj.GetFromClipboard | |
txt = DataObj.GetText(1) | |
' Debug.Print txt | |
splitted = Split(txt, Chr(13)) | |
Set ret = CreateObject("VBScript.RegExp") | |
ret.pattern = retPattern | |
ret.ignorecase = True ' if needed | |
cbTxt = "" | |
For i = LBound(splitted) To UBound(splitted) | |
txt = splitted(i) | |
If ret.test(txt) Then | |
' replaces "1. Affiliation A" with "1Affiliation A". | |
' "$1" stands for submatches(0). | |
replacedTxt = ret.Replace(txt, "$1$2") | |
replacedTxt = Replace(replacedTxt, Chr(13), "") | |
replacedTxt = Trim(replacedTxt) | |
If Len(cbTxt) = 0 Then | |
cbTxt = replacedTxt | |
Else | |
cbTxt = cbTxt & Chr(13) & replacedTxt | |
End If | |
End If | |
Next | |
' Debug.Print "cbTxt: ", cbTxt | |
Set newDoc = Documents.Add | |
Selection.TypeText cbTxt | |
For Each p In newDoc.Paragraphs | |
Set rng = newDoc.Range(p.Range.Start, p.Range.Start + 1) | |
rng.MoveEndWhile "0123456789" | |
rng.Font.superScript = True | |
Next | |
With newDoc.Range.Find | |
' collapse all paragraphs into 1. | |
.Text = "^p" | |
.Replacement.Text = "; " | |
.Execute Replace:=wdReplaceAll | |
With Selection | |
.EndKey wdStory | |
' delete the last "; " | |
With .Find | |
.Text = "; " | |
.Replacement.Text = "" | |
.Forward = False | |
.Execute Replace:=wdReplaceOne | |
End With | |
.EndKey wdStory | |
.MoveStartWhile Chr(13) | |
newDoc.Range(0, .Start).Copy | |
End With | |
End With | |
newDoc.Close savechanges:=wdDoNotSaveChanges | |
clearFind | |
End Sub | |
Sub clearFind() | |
With Selection.Find | |
.Text = "" | |
.Replacement.Text = "" | |
.Forward = True | |
.Wrap = wdFindContinue | |
.Format = False | |
.MatchCase = False | |
.MatchWholeWord = False | |
.MatchByte = True | |
.MatchWildcards = False | |
.MatchSoundsLike = False | |
.MatchAllWordForms = False | |
End With | |
End Sub |
output
the result is now in the clipboard, ready to be pasted.
note to self
- a good tutorial (in Chinese) of working with regex in VBA: https://geek-docs.com/regexp/regexp-tutorials/51_vba_regular_expression.html